add product tensor with pthread (normal prod and contract prod)

2024-02-04 00:54:04 +01:00
parent 5fc1681e19
commit 6ae0f7cd75
4 changed files with 371 additions and 5 deletions
@@ -6,7 +6,6 @@ void subArray(size_t* dst, size_t* src, size_t debDst, size_t finDst, size_t deb
    }
 }

-
 void concatArray(size_t* dst, size_t* src0, size_t* src1, size_t debDst, size_t debSrc0, size_t finSrc0, size_t debSrc1, size_t finSrc1) {
    size_t i = debDst;
    for (size_t j = debSrc0; j < finSrc0; j++) {
@@ -152,6 +151,7 @@ void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##typ
    }  \
 }  \
 \
+\
 void tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1) {  \
    dimension *dd;  \
    add_dimension(&dd, M0->dim, M1->dim); \
@@ -269,6 +269,204 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
      /*printf("\n");*/\
    }\
 }\
+struct arg_Prod_##type{\
+  type *M0x;\
+  type *M1x;\
+  type *Mx;\
+  size_t beginRange;\
+  size_t endRange;\
+  size_t MRank;\
+};\
+void* runProd_thread_##type(void *arg){\
+  struct arg_Prod_##type *arg_t = arg;\
+  size_t a0_id, a1_id;\
+    for (size_t i = arg_t->beginRange; i < arg_t->endRange; i++) {\
+        if(endian){\
+          a0_id=i / arg_t->MRank;\
+          a1_id=i % arg_t->MRank;\
+        }\
+        else{\
+          a0_id=i % arg_t->MRank;\
+          a1_id=i / arg_t->MRank;\
+        }\
+        arg_t->Mx[i] += arg_t->M0x[a0_id] * arg_t->M1x[a1_id];\
+   }\
+}\
+\
+void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t nbthread) {  \
+    dimension *dd;  \
+    add_dimension(&dd, M0->dim, M1->dim); \
+    (*MM)=CREATE_TENSOR_##type(dd);  \
+    tensor_##type *M = *MM; \
+    \
+    \
+  pthread_t *thrd = malloc(nbthread * sizeof(pthread_t));\
+  struct arg_Prod_##type **arg_th = malloc( nbthread * sizeof(struct arg_Prod_##type *));\
+\
+  for(size_t i = 0; i < nbthread; ++i){\
+    arg_th[i]=malloc(sizeof(struct arg_Prod_##type));\
+    arg_th[i]->M0x=M0->x;\
+    arg_th[i]->M1x=M1->x;\
+    arg_th[i]->Mx=M->x;\
+    arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\
+    if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
+    else arg_th[i]->endRange = M->dim->rank ;\
+    if(endian){\
+      arg_th[i]->MRank = M1->dim->rank;\
+    }\
+    else{\
+      arg_th[i]->MRank = M0->dim->rank;\
+    }\
+    pthread_create(&thrd[i], NULL, runProd_thread_##type, (void*)arg_th[i]);\
+  }\
+\
+  for(size_t i=0; i< nbthread; ++i){\
+    pthread_join(thrd[i], NULL);\
+  }\
+\
+}  \
+\
+void* runProd_thread2d_##type(void *arg){\
+  struct arg_Prod_##type *arg_t = arg;\
+  size_t k;\
+    for (size_t i = arg_t->beginRange; i < arg_t->endRange; i++) {\
+      for (size_t j = 0; j < arg_t->MRank; j++) {\
+        if(endian){\
+          k = i * arg_t->MRank + j;\
+        }\
+        else{\
+          k =i + arg_t->MRank * j ;\
+        }\
+        arg_t->Mx[k] += arg_t->M0x[i] * arg_t->M1x[j];\
+      }\
+   }\
+}\
+\
+void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t nbthread) {  \
+    dimension *dd;  \
+    add_dimension(&dd, M0->dim, M1->dim); \
+    (*MM)=CREATE_TENSOR_##type(dd);  \
+    tensor_##type *M = *MM; \
+    \
+    \
+  pthread_t *thrd = malloc(nbthread * sizeof(pthread_t));\
+  struct arg_Prod_##type **arg_th = malloc( nbthread * sizeof(struct arg_Prod_##type *));\
+\
+  for(size_t i = 0; i < nbthread; ++i){\
+    arg_th[i]=malloc(sizeof(struct arg_Prod_##type));\
+    arg_th[i]->M0x=M0->x;\
+    arg_th[i]->M1x=M1->x;\
+    arg_th[i]->Mx=M->x;\
+    arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\
+    if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
+    else arg_th[i]->endRange = M->dim->rank ;\
+    if(endian){\
+      arg_th[i]->MRank = M1->dim->rank;\
+    }\
+    else{\
+      arg_th[i]->MRank = M0->dim->rank;\
+    }\
+    pthread_create(&thrd[i], NULL, runProd_thread_##type, (void*)arg_th[i]);\
+  }\
+\
+  for(size_t i=0; i< nbthread; ++i){\
+    pthread_join(thrd[i], NULL);\
+  }\
+\
+}  \
+struct arg_ProdContract_##type{\
+  type *M0x;\
+  type *M1x;\
+  type *Mx;\
+  size_t beginRange;\
+  size_t endRange;\
+  size_t dSubRank;\
+  size_t dMRank;\
+};\
+void* runProdContract_thread_##type(void *arg){\
+  struct arg_ProdContract_##type *arg_t = arg;\
+  size_t a0_id, a1_id, n0_id, n1_id;\
+    for (size_t i = arg_t->beginRange; i < arg_t->endRange; i++) {\
+        if(endian){\
+          a0_id=i/ arg_t->dSubRank;\
+          a1_id=i% arg_t->dSubRank;\
+        }\
+        else{\
+          a0_id=i% arg_t->dSubRank;\
+          a1_id=i/ arg_t->dSubRank;\
+        }\
+        arg_t->Mx[i] = 0;\
+        for (size_t k = 0; k < arg_t->dMRank; k++) {\
+          if(endian){\
+            n0_id= a0_id * arg_t->dMRank + k;\
+            n1_id= a1_id + arg_t->dSubRank * k;\
+          }\
+          else{\
+            n0_id= a0_id + arg_t->dSubRank * k;\
+            n1_id= a1_id * arg_t->dMRank + k;\
+          }\
+            arg_t->Mx[i] += arg_t->M0x[n0_id] * arg_t->M1x[n1_id];\
+        }\
+    }\
+}\
+/* M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...<l=n /  xl = y0,x{l+1}=y1, x{n}=yl  et zi=xi i<n-l et zj=y{j-(n-l)} j>=n-l alor p=n+m-2l\
+ M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0)\
+M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]]*/\
+\
+void tensorContractnProdThread_##type(tensor_##type** MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t nbthread) {\
+\
+    size_t len0 = M0->dim->size - contractionNumber;\
+    size_t len1 = M1->dim->size - contractionNumber;\
+\
+    size_t* tsub0 = malloc(sizeof(size_t) *len0);\
+    size_t* tsub1 = malloc(sizeof(size_t) *len1);\
+    size_t* tDk1 = malloc(sizeof(size_t) *contractionNumber);\
+    size_t* tDk0 = malloc(sizeof(size_t) *contractionNumber);\
+    subArray(tsub0, M0->dim->perm, 0, len0, 0);\
+    subArray(tsub1, M1->dim->perm, 0, len1, contractionNumber);\
+    subArray(tDk1, M1->dim->perm, 0, contractionNumber, 0);\
+    subArray(tDk0, M0->dim->perm, 0, contractionNumber, len0);\
+    dimension *dSub0 = init_dim(tsub0, len0);\
+    dimension *dSub1 = init_dim(tsub1, len1);\
+    dimension *dM1 = init_dim(tDk1, contractionNumber);\
+    dimension *dM0 = init_dim(tDk0, contractionNumber);\
+    dimension *dM;\
+    min_dimension(&dM, dM0, dM1);\
+    \
+    dimension *dd;\
+    add_dimension(&dd, dSub0, dSub1);\
+    updateRankDim(dd);\
+    *MM = CREATE_TENSOR_##type(dd);\
+    tensor_##type *M= *MM;\
+\
+\
+    \
+  pthread_t *thrd = malloc(nbthread * sizeof(pthread_t));\
+  struct arg_ProdContract_##type **arg_th = malloc( nbthread * sizeof(struct arg_ProdContract_##type *));\
+\
+  for(size_t i = 0; i < nbthread; ++i){\
+    arg_th[i]=malloc(sizeof(struct arg_ProdContract_##type));\
+    arg_th[i]->M0x=M0->x;\
+    arg_th[i]->M1x=M1->x;\
+    arg_th[i]->Mx=M->x;\
+    arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\
+    if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
+    else arg_th[i]->endRange = M->dim->rank ;\
+    if(endian){\
+      arg_th[i]->dSubRank = dSub1->rank;\
+    }\
+    else{\
+      arg_th[i]->dSubRank = dSub0->rank;\
+    }\
+    arg_th[i]->dMRank = dM->rank;\
+    pthread_create(&thrd[i], NULL, runProdContract_thread_##type, (void*)arg_th[i]);\
+  }\
+\
+  for(size_t i=0; i< nbthread; ++i){\
+    pthread_join(thrd[i], NULL);\
+  }\
+\
+}\
 void tensorContractnProdNotOpt_##type(tensor_##type** MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber) {\
   /* if (!checkMatchProdtensor(M0->dim, M1->dim, contractionNumber)) {\
        prsize_tf("Deep = %d\n", contractionNumber);\
@@ -1,6 +1,8 @@
 #ifndef __TENSOR_T__H__
 #define __TENSOR_T__H__

+#include <pthread.h>
+
 #include "dimension_t/dimension_t.h"

 void subArray(size_t* dst, size_t* src, size_t debDst, size_t finDst, size_t debSrc);
@@ -19,6 +21,9 @@ tensor_##type * sub_tensor_tail_##type(tensor_##type *rootens, size_t subdim, si
 void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
 void tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
 void tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
+void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \
+void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \
+void tensorContractnProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t nbthread); \
 void tensorContractnProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \