From 6ae0f7cd75ee4eef37ae95dec6ee22a8a02a9fa4 Mon Sep 17 00:00:00 2001 From: fanasina Date: Sun, 4 Feb 2024 00:54:04 +0100 Subject: [PATCH] add product tensor with pthread (normal prod and contract prod) --- tensor_t/Makefile | 2 +- tensor_t/src/tensor_t/tensor_t.c | 200 ++++++++++++++++++++++++++++++- tensor_t/src/tensor_t/tensor_t.h | 5 + tensor_t/test/is_good.c | 169 +++++++++++++++++++++++++- 4 files changed, 371 insertions(+), 5 deletions(-) diff --git a/tensor_t/Makefile b/tensor_t/Makefile index cdca378..acd00c0 100644 --- a/tensor_t/Makefile +++ b/tensor_t/Makefile @@ -9,7 +9,7 @@ INCLUDE_TENS=$(PWD)/src INCLUDE_PERMDIR=$(PERMDIR)/src INCLUDE_DIMDIR=$(DIMDIR)/src INCLUDE_TOOLDIR=$(TOOLDIR)/include -CFLAGS=-I$(INCLUDE_TOOLDIR) -I$(INCLUDE_PERMDIR) -I$(INCLUDE_DIMDIR) -I$(INCLUDE_TENS) +CFLAGS=-I$(INCLUDE_TOOLDIR) -I$(INCLUDE_PERMDIR) -I$(INCLUDE_DIMDIR) -I$(INCLUDE_TENS) -lpthread #SRC_DIR=$(ROOT_DIR)/src #SRC=$(wildcard */*/*.c) diff --git a/tensor_t/src/tensor_t/tensor_t.c b/tensor_t/src/tensor_t/tensor_t.c index 809ed54..5e7bd02 100644 --- a/tensor_t/src/tensor_t/tensor_t.c +++ b/tensor_t/src/tensor_t/tensor_t.c @@ -6,7 +6,6 @@ void subArray(size_t* dst, size_t* src, size_t debDst, size_t finDst, size_t deb } } - void concatArray(size_t* dst, size_t* src0, size_t* src1, size_t debDst, size_t debSrc0, size_t finSrc0, size_t debSrc1, size_t finSrc1) { size_t i = debDst; for (size_t j = debSrc0; j < finSrc0; j++) { @@ -152,6 +151,7 @@ void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##typ } \ } \ \ +\ void tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1) { \ dimension *dd; \ add_dimension(&dd, M0->dim, M1->dim); \ @@ -269,6 +269,204 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_## /*printf("\n");*/\ }\ }\ +struct arg_Prod_##type{\ + type *M0x;\ + type *M1x;\ + type *Mx;\ + size_t beginRange;\ + size_t endRange;\ + size_t MRank;\ +};\ +void* runProd_thread_##type(void *arg){\ + struct arg_Prod_##type *arg_t = arg;\ + size_t a0_id, a1_id;\ + for (size_t i = arg_t->beginRange; i < arg_t->endRange; i++) {\ + if(endian){\ + a0_id=i / arg_t->MRank;\ + a1_id=i % arg_t->MRank;\ + }\ + else{\ + a0_id=i % arg_t->MRank;\ + a1_id=i / arg_t->MRank;\ + }\ + arg_t->Mx[i] += arg_t->M0x[a0_id] * arg_t->M1x[a1_id];\ + }\ +}\ +\ +void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t nbthread) { \ + dimension *dd; \ + add_dimension(&dd, M0->dim, M1->dim); \ + (*MM)=CREATE_TENSOR_##type(dd); \ + tensor_##type *M = *MM; \ + \ + \ + pthread_t *thrd = malloc(nbthread * sizeof(pthread_t));\ + struct arg_Prod_##type **arg_th = malloc( nbthread * sizeof(struct arg_Prod_##type *));\ +\ + for(size_t i = 0; i < nbthread; ++i){\ + arg_th[i]=malloc(sizeof(struct arg_Prod_##type));\ + arg_th[i]->M0x=M0->x;\ + arg_th[i]->M1x=M1->x;\ + arg_th[i]->Mx=M->x;\ + arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\ + if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\ + else arg_th[i]->endRange = M->dim->rank ;\ + if(endian){\ + arg_th[i]->MRank = M1->dim->rank;\ + }\ + else{\ + arg_th[i]->MRank = M0->dim->rank;\ + }\ + pthread_create(&thrd[i], NULL, runProd_thread_##type, (void*)arg_th[i]);\ + }\ +\ + for(size_t i=0; i< nbthread; ++i){\ + pthread_join(thrd[i], NULL);\ + }\ +\ +} \ +\ +void* runProd_thread2d_##type(void *arg){\ + struct arg_Prod_##type *arg_t = arg;\ + size_t k;\ + for (size_t i = arg_t->beginRange; i < arg_t->endRange; i++) {\ + for (size_t j = 0; j < arg_t->MRank; j++) {\ + if(endian){\ + k = i * arg_t->MRank + j;\ + }\ + else{\ + k =i + arg_t->MRank * j ;\ + }\ + arg_t->Mx[k] += arg_t->M0x[i] * arg_t->M1x[j];\ + }\ + }\ +}\ +\ +void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t nbthread) { \ + dimension *dd; \ + add_dimension(&dd, M0->dim, M1->dim); \ + (*MM)=CREATE_TENSOR_##type(dd); \ + tensor_##type *M = *MM; \ + \ + \ + pthread_t *thrd = malloc(nbthread * sizeof(pthread_t));\ + struct arg_Prod_##type **arg_th = malloc( nbthread * sizeof(struct arg_Prod_##type *));\ +\ + for(size_t i = 0; i < nbthread; ++i){\ + arg_th[i]=malloc(sizeof(struct arg_Prod_##type));\ + arg_th[i]->M0x=M0->x;\ + arg_th[i]->M1x=M1->x;\ + arg_th[i]->Mx=M->x;\ + arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\ + if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\ + else arg_th[i]->endRange = M->dim->rank ;\ + if(endian){\ + arg_th[i]->MRank = M1->dim->rank;\ + }\ + else{\ + arg_th[i]->MRank = M0->dim->rank;\ + }\ + pthread_create(&thrd[i], NULL, runProd_thread_##type, (void*)arg_th[i]);\ + }\ +\ + for(size_t i=0; i< nbthread; ++i){\ + pthread_join(thrd[i], NULL);\ + }\ +\ +} \ +struct arg_ProdContract_##type{\ + type *M0x;\ + type *M1x;\ + type *Mx;\ + size_t beginRange;\ + size_t endRange;\ + size_t dSubRank;\ + size_t dMRank;\ +};\ +void* runProdContract_thread_##type(void *arg){\ + struct arg_ProdContract_##type *arg_t = arg;\ + size_t a0_id, a1_id, n0_id, n1_id;\ + for (size_t i = arg_t->beginRange; i < arg_t->endRange; i++) {\ + if(endian){\ + a0_id=i/ arg_t->dSubRank;\ + a1_id=i% arg_t->dSubRank;\ + }\ + else{\ + a0_id=i% arg_t->dSubRank;\ + a1_id=i/ arg_t->dSubRank;\ + }\ + arg_t->Mx[i] = 0;\ + for (size_t k = 0; k < arg_t->dMRank; k++) {\ + if(endian){\ + n0_id= a0_id * arg_t->dMRank + k;\ + n1_id= a1_id + arg_t->dSubRank * k;\ + }\ + else{\ + n0_id= a0_id + arg_t->dSubRank * k;\ + n1_id= a1_id * arg_t->dMRank + k;\ + }\ + arg_t->Mx[i] += arg_t->M0x[n0_id] * arg_t->M1x[n1_id];\ + }\ + }\ +}\ +/* M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l\ + M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0)\ +M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]]*/\ +\ +void tensorContractnProdThread_##type(tensor_##type** MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t nbthread) {\ +\ + size_t len0 = M0->dim->size - contractionNumber;\ + size_t len1 = M1->dim->size - contractionNumber;\ +\ + size_t* tsub0 = malloc(sizeof(size_t) *len0);\ + size_t* tsub1 = malloc(sizeof(size_t) *len1);\ + size_t* tDk1 = malloc(sizeof(size_t) *contractionNumber);\ + size_t* tDk0 = malloc(sizeof(size_t) *contractionNumber);\ + subArray(tsub0, M0->dim->perm, 0, len0, 0);\ + subArray(tsub1, M1->dim->perm, 0, len1, contractionNumber);\ + subArray(tDk1, M1->dim->perm, 0, contractionNumber, 0);\ + subArray(tDk0, M0->dim->perm, 0, contractionNumber, len0);\ + dimension *dSub0 = init_dim(tsub0, len0);\ + dimension *dSub1 = init_dim(tsub1, len1);\ + dimension *dM1 = init_dim(tDk1, contractionNumber);\ + dimension *dM0 = init_dim(tDk0, contractionNumber);\ + dimension *dM;\ + min_dimension(&dM, dM0, dM1);\ + \ + dimension *dd;\ + add_dimension(&dd, dSub0, dSub1);\ + updateRankDim(dd);\ + *MM = CREATE_TENSOR_##type(dd);\ + tensor_##type *M= *MM;\ +\ +\ + \ + pthread_t *thrd = malloc(nbthread * sizeof(pthread_t));\ + struct arg_ProdContract_##type **arg_th = malloc( nbthread * sizeof(struct arg_ProdContract_##type *));\ +\ + for(size_t i = 0; i < nbthread; ++i){\ + arg_th[i]=malloc(sizeof(struct arg_ProdContract_##type));\ + arg_th[i]->M0x=M0->x;\ + arg_th[i]->M1x=M1->x;\ + arg_th[i]->Mx=M->x;\ + arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\ + if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\ + else arg_th[i]->endRange = M->dim->rank ;\ + if(endian){\ + arg_th[i]->dSubRank = dSub1->rank;\ + }\ + else{\ + arg_th[i]->dSubRank = dSub0->rank;\ + }\ + arg_th[i]->dMRank = dM->rank;\ + pthread_create(&thrd[i], NULL, runProdContract_thread_##type, (void*)arg_th[i]);\ + }\ +\ + for(size_t i=0; i< nbthread; ++i){\ + pthread_join(thrd[i], NULL);\ + }\ +\ +}\ void tensorContractnProdNotOpt_##type(tensor_##type** MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber) {\ /* if (!checkMatchProdtensor(M0->dim, M1->dim, contractionNumber)) {\ prsize_tf("Deep = %d\n", contractionNumber);\ diff --git a/tensor_t/src/tensor_t/tensor_t.h b/tensor_t/src/tensor_t/tensor_t.h index 8b491ae..de5a5c7 100644 --- a/tensor_t/src/tensor_t/tensor_t.h +++ b/tensor_t/src/tensor_t/tensor_t.h @@ -1,6 +1,8 @@ #ifndef __TENSOR_T__H__ #define __TENSOR_T__H__ +#include + #include "dimension_t/dimension_t.h" void subArray(size_t* dst, size_t* src, size_t debDst, size_t finDst, size_t debSrc); @@ -19,6 +21,9 @@ tensor_##type * sub_tensor_tail_##type(tensor_##type *rootens, size_t subdim, si void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \ void tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \ void tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \ +void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \ +void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \ +void tensorContractnProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t nbthread); \ void tensorContractnProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \ diff --git a/tensor_t/test/is_good.c b/tensor_t/test/is_good.c index 28ca5c1..c322d95 100644 --- a/tensor_t/test/is_good.c +++ b/tensor_t/test/is_good.c @@ -442,23 +442,186 @@ TEST(VStensorContractnProd_TYPE_DOUBLE2 ){ //print_tensor_double(M1,"M1"); tensor_TYPE_DOUBLE *M; - //tensor_TYPE_DOUBLE *MnO; + tensor_TYPE_DOUBLE *MnO; tensorContractnProd_TYPE_DOUBLE(&M, M0,M1,2); //print_tensor_double(M,"M"); //cl_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2); - //tensorContractnProdNotOpt_TYPE_DOUBLE(&MnO, M0,M1,2); + tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2); //print_tensor_double(MnO,"MnO"); // for(size_t i=0;idim->rank;++i) // EXPECT_EQ_TYPE_DOUBLE(M->x[i],MnO->x[i]); - //EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank); + EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank); } +TEST(Pthread_tensorContractnProd_TYPE_DOUBLE2 ){ + dimension *d0=create_dim(3); + dimension *d1=create_dim(3); + + d0->perm[0]=125; + d0->perm[1]=52; //3; + d0->perm[2]=63; + + d1->perm[0]=52; + d1->perm[1]=63;//3; + d1->perm[2]=154; + + updateRankDim(d0); + updateRankDim(d1); + + + tensor_TYPE_DOUBLE *M0 = CREATE_TENSOR_TYPE_DOUBLE(d0); + tensor_TYPE_DOUBLE *M1 = CREATE_TENSOR_TYPE_DOUBLE(d1); + + LOG("M0->dim->rank = %ld\n",M0->dim->rank); + LOG("M1->dim->rank = %ld\n",M1->dim->rank); + for(size_t i=0; idim->rank;++i) M0->x[i]=i*0.1 +1; + for(size_t i=0; idim->rank;++i) M1->x[i]=i*0.003 + 2; + + //print_tensor_double(M0,"M0"); + //print_tensor_double(M1,"M1"); + + tensor_TYPE_DOUBLE *M; + tensor_TYPE_DOUBLE *MnO; + + size_t nbthread = 5; + + tensorContractnProd_TYPE_DOUBLE(&M, M0,M1,2); + //print_tensor_double(M,"M"); + //cl_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2); + tensorContractnProdThread_TYPE_DOUBLE(&MnO, M0,M1,2,nbthread); + + //print_tensor_double(MnO,"MnO"); + + // for(size_t i=0;idim->rank;++i) + // EXPECT_EQ_TYPE_DOUBLE(M->x[i],MnO->x[i]); + + EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank); + + +} +TEST(tensorProd_vs ){ + dimension *d0=create_dim(3); + dimension *d1=create_dim(2); + + d0->perm[0]=12; + d0->perm[1]=13; + d0->perm[2]=12; + + d1->perm[0]=21; + d1->perm[1]=23; + + updateRankDim(d0); + updateRankDim(d1); + + + tensor_TYPE_FLOAT *M0 = CREATE_TENSOR_TYPE_FLOAT(d0); + tensor_TYPE_FLOAT *M1 = CREATE_TENSOR_TYPE_FLOAT(d1); + + LOG("M0->dim->rank = %ld\n",M0->dim->rank); + LOG("M1->dim->rank = %ld\n",M1->dim->rank); + for(size_t i=0; idim->rank;++i) M0->x[i]=i*0.1 +1; + for(size_t i=0; idim->rank;++i) M1->x[i]=i*0.003 + 2; + + + + tensor_TYPE_FLOAT *M; + tensor_TYPE_FLOAT *Mn; + + tensorProd_TYPE_FLOAT(&M,M0,M1); + tensorProdNotOpt_TYPE_FLOAT(&Mn,M0,M1); + LOG("M->dim->rank = %ld\n",M->dim->rank); + + + EXPECT_ARRAY_EQ_TYPE_FLOAT(M->x,M->dim->rank,Mn->x,Mn->dim->rank); + +} +TEST(tensorProd_vsThread ){ + dimension *d0=create_dim(3); + dimension *d1=create_dim(2); + + d0->perm[0]=12; + d0->perm[1]=13; + d0->perm[2]=12; + + d1->perm[0]=21; + d1->perm[1]=23; + + updateRankDim(d0); + updateRankDim(d1); + + + tensor_TYPE_FLOAT *M0 = CREATE_TENSOR_TYPE_FLOAT(d0); + tensor_TYPE_FLOAT *M1 = CREATE_TENSOR_TYPE_FLOAT(d1); + + LOG("M0->dim->rank = %ld\n",M0->dim->rank); + LOG("M1->dim->rank = %ld\n",M1->dim->rank); + for(size_t i=0; idim->rank;++i) M0->x[i]=i*0.1 +1; + for(size_t i=0; idim->rank;++i) M1->x[i]=i*0.003 + 2; + + + + tensor_TYPE_FLOAT *M; + tensor_TYPE_FLOAT *Mn; + + size_t nbthread = 5; + + tensorProdThread_TYPE_FLOAT(&M,M0,M1,nbthread); + tensorProdNotOpt_TYPE_FLOAT(&Mn,M0,M1); + LOG("M->dim->rank = %ld\n",M->dim->rank); + + + EXPECT_ARRAY_EQ_TYPE_FLOAT(M->x,M->dim->rank,Mn->x,Mn->dim->rank); + +} + +TEST(tensorProd_vsThread2d ){ + dimension *d0=create_dim(3); + dimension *d1=create_dim(2); + + d0->perm[0]=12; + d0->perm[1]=13; + d0->perm[2]=12; + + d1->perm[0]=21; + d1->perm[1]=23; + + updateRankDim(d0); + updateRankDim(d1); + + + tensor_TYPE_FLOAT *M0 = CREATE_TENSOR_TYPE_FLOAT(d0); + tensor_TYPE_FLOAT *M1 = CREATE_TENSOR_TYPE_FLOAT(d1); + + LOG("M0->dim->rank = %ld\n",M0->dim->rank); + LOG("M1->dim->rank = %ld\n",M1->dim->rank); + for(size_t i=0; idim->rank;++i) M0->x[i]=i*0.1 +1; + for(size_t i=0; idim->rank;++i) M1->x[i]=i*0.003 + 2; + + + + tensor_TYPE_FLOAT *M; + tensor_TYPE_FLOAT *Mn; + + size_t nbthread = 5; + + tensorProdThread2d_TYPE_FLOAT(&M,M0,M1,nbthread); + tensorProdNotOpt_TYPE_FLOAT(&Mn,M0,M1); + LOG("M->dim->rank = %ld\n",M->dim->rank); + + + EXPECT_ARRAY_EQ_TYPE_FLOAT(M->x,M->dim->rank,Mn->x,Mn->dim->rank); + +} + + + + int main(int argc, char **argv){