debug all leak mem with valgrind in ftest and tensor rep

This commit is contained in:
2024-02-08 23:57:10 +01:00
parent 6ae0f7cd75
commit e1171d720b
32 changed files with 1468 additions and 405 deletions
+60 -21
View File
@@ -1,36 +1,75 @@
__kernel void prodContractnTensorLin_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
__kernel void prodContractnTensor2dLin_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
//Get the index of the current element to be processed
size_t i = get_global_id(0);
size_t k, a0_id, a1_id, n0_id, n1_id;
a0_id = i / dSubRank;
a1_id = i % dSubRank;
Mx[i] = 0;
//size_t i = get_global_id(0);
//size_t j = get_global_id(1);
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
size_t k, n0_id, n1_id;
size_t ind = i * dSubRank + j;
Mx[ind] = 0;
for (k = 0; k < dMRank; k++) {
n0_id = a0_id * dMRank + k;
n1_id = a1_id + dSubRank * k;
Mx[i] += M0x[n0_id] * M1x[n1_id];
n0_id = i * dMRank + k;
n1_id = j + dSubRank * k;
Mx[ind] += M0x[n0_id] * M1x[n1_id];
}
}
__kernel void prodContractnTensorLin_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
__kernel void prodContractnTensor2dLin_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
//Get the index of the current element to be processed
size_t i = get_global_id(0);
size_t k, a0_id, a1_id, n0_id, n1_id;
a0_id = i / dSubRank;
a1_id = i % dSubRank;
Mx[i] = 0;
//size_t i = get_global_id(0);
//size_t j = get_global_id(1);
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
size_t k, n0_id, n1_id;
size_t ind = i * dSubRank + j;
Mx[ind] = 0;
for (k = 0; k < dMRank; k++) {
n0_id = a0_id * dMRank + k;
n1_id = a1_id + dSubRank * k;
Mx[i] += M0x[n0_id] * M1x[n1_id];
n0_id = i * dMRank + k;
n1_id = j + dSubRank * k;
Mx[ind] += M0x[n0_id] * M1x[n1_id];
}
}
__kernel void prodContractnTensor2dLiniNotEndian_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
//Get the index of the current element to be processed
//size_t i = get_global_id(0);
//size_t j = get_global_id(1);
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
size_t k, n0_id, n1_id;
size_t ind = i + dSubRank * j;
Mx[ind] = 0;
for (k = 0; k < dMRank; k++) {
n0_id = i + dMRank * k;
n1_id = j * dSubRank + k;
Mx[ind] += M0x[n0_id] * M1x[n1_id];
}
}
__kernel void prodContractnTensor2dLinNotEndian_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
//Get the index of the current element to be processed
//size_t i = get_global_id(0);
//size_t j = get_global_id(1);
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
size_t k, n0_id, n1_id;
size_t ind = i + dSubRank * j;
Mx[ind] = 0;
for (k = 0; k < dMRank; k++) {
n0_id = i + dMRank * k;
n1_id = j * dSubRank + k;
Mx[ind] += M0x[n0_id] * M1x[n1_id];
}
}
+1 -7
View File
@@ -11,7 +11,7 @@ __kernel void prodTensor2dLin_TYPE_FLOAT(long unsigned int M1rank, __global cons
Mx[k] = M0x[i] * M1x[j];
}
__kernel void prodTensori2dLin_TYPE_DOUBLE(long unsigned int M1rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
__kernel void prodTensor2dLin_TYPE_DOUBLE(long unsigned int M1rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
//Get the index of the current element to be processed
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
@@ -51,9 +51,3 @@ __kernel void prodTensori2dLinNotEndian_TYPE_DOUBLE(long unsigned int M0rank, __
Mx[k] = M0x[i] * M1x[j];
}
@@ -1,6 +1,3 @@
__kernel void prodContractnTensorLin_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
//Get the index of the current element to be processed
@@ -64,7 +61,3 @@ __kernel void prodContractnTensorLinNotEndian_TYPE_DOUBLE(long unsigned int dSub
Mx[i] += M0x[n0_id] * M1x[n1_id];
}
}
-5
View File
@@ -33,8 +33,3 @@ __kernel void prodTensorLinNotEndian_TYPE_DOUBLE(long unsigned int M0rank, __glo
size_t j = k / M0rank;
Mx[k] = M0x[i] * M1x[j];
}
+67 -3
View File
@@ -28,14 +28,15 @@
cl_uint ret_num_devices; \
cl_uint ret_num_platforms; \
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); \
checkError(ret,__func__,"Error: Failed to get platform ID ");\
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, \
&device_id, &ret_num_devices); \
size_t returned_size = 0;\
size_t max_workgroup_size = 0;\
ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, &returned_size);\
/*ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, &returned_size);\
checkError(ret,__func__,"Error: Failed to retrieve device info!");\
printf(" ===========================================================++> return size: %ld\n max group sz: %ld\n", returned_size, max_workgroup_size);\
\
*/\
/*int gpu = 1;\
ret = clGetDeviceIDs( platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, \
&device_id, &ret_num_devices); */\
@@ -136,6 +137,7 @@
ret |= clReleaseCommandQueue(command_queue); \
ret |= clReleaseContext(context); \
checkError(ret,__func__,"Error: Failed to clean up! ");\
free(source_str);
#define GEN_cl_FUNC_TENSOR(type)\
\
@@ -214,6 +216,7 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor
dSubRank = dSub0->rank;\
\
}\
printf("func_cl_name = %s ......... \n",func_cl_name);\
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
\
/*/ Set the arguments of the kernel */ \
@@ -228,7 +231,7 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor
READ_BUF_N_CLEANUP(type)\
\
} \
\
\
\
void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz) { \
dimension *dd; \
@@ -262,6 +265,67 @@ void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type
READ_BUF_N_CLEANUP(type)\
} \
\
void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz) {\
\
size_t len0 = M0->dim->size - contractionNumber;\
size_t len1 = M1->dim->size - contractionNumber;\
\
size_t* tsub0 = malloc(sizeof(size_t) *len0);\
size_t* tsub1 = malloc(sizeof(size_t) *len1);\
size_t* tDk1 = malloc(sizeof(size_t) *contractionNumber);\
size_t* tDk0 = malloc(sizeof(size_t) *contractionNumber);\
subArray(tsub0, M0->dim->perm, 0, len0, 0);\
subArray(tsub1, M1->dim->perm, 0, len1, contractionNumber);\
subArray(tDk1, M1->dim->perm, 0, contractionNumber, 0);\
subArray(tDk0, M0->dim->perm, 0, contractionNumber, len0);\
dimension *dSub0 = init_dim(tsub0, len0);\
dimension *dSub1 = init_dim(tsub1, len1);\
dimension *dM1 = init_dim(tDk1, contractionNumber);\
dimension *dM0 = init_dim(tDk0, contractionNumber);\
dimension *dM;\
min_dimension(&dM, dM0, dM1);\
\
dimension *dd;\
add_dimension(&dd, dSub0, dSub1);\
updateRankDim(dd);\
*MM = CREATE_TENSOR_##type(dd);\
tensor_##type *M= *MM;\
char *file_cl_src = "../src/kernel_2d_ProdContractnTensor.cl"; \
char *func_cl_nameEndian = "prodContractnTensor2dLin_" #type; \
char *func_cl_nameNotEndian = "prodContractnTensor2dLinNotEndian_" #type; \
char *func_cl_name; \
size_t dSubRank;\
if(endian){\
func_cl_name = func_cl_nameEndian;\
dSubRank = dSub1->rank;\
\
}else{\
func_cl_name = func_cl_nameNotEndian;\
dSubRank = dSub0->rank;\
\
}\
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
/*size_t cl_dev_max_w_sz,sz_val;\
ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &cl_dev_max_w_sz, &sz_val);\
printf("CL_DEVICE_MAX_WORK_GROUP_SIZE = : %ld, sz :%ld\n ",cl_dev_max_w_sz, sz_val);\
*/\
/*/ Set the arguments of the kernel */ \
ret |= clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&dSubRank); \
ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
ret |= clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&M_mem_obj); \
checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\
\
/*printf("EXEC_cl_2d_KERNEL(type,%ld,%ld,%ld,%ld)\n",dSub0->rank,dSub1->rank,div0Wsz,div1Wsz);\
*/EXEC_cl_2d_KERNEL(type,dSub0->rank,dSub1->rank,div0Wsz,div1Wsz);\
READ_BUF_N_CLEANUP(type)\
\
} \
\
void checkError(cl_int error, const char *func_name, char *msg) {
if (error != CL_SUCCESS) {
+1 -1
View File
@@ -19,7 +19,7 @@
void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
void cl_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz); \
/*void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz);*/ \
void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz); \
CL_GENERATE_TENSOR_TYPE(TYPE_FLOAT);
CL_GENERATE_TENSOR_TYPE(TYPE_DOUBLE);
+217 -76
View File
@@ -23,6 +23,27 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
printf("\n");
}
#define FREE_COORD_\
free(coord0);\
free(coord1);\
free(coord);\
#define FREE_t \
free( tsub0 );\
free( tsub1 );\
free( tDk1 );\
free( tDk0 );
#define FREE_dM_S_\
free_dimension(dM0);\
free_dimension(dM1);\
free_dimension(dM);\
free_dimension(dSub0);\
free_dimension(dSub1);\
#define GEN_FUNC_TENSOR(type)\
tensor_##type* CREATE_TENSOR_##type(dimension *dim){\
tensor_##type *r_tens=malloc(sizeof(tensor_##type));\
@@ -32,24 +53,37 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
return r_tens;\
}\
\
tensor_##type * sub_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim){\
tensor_##type* CREATE_TENSOR_FROM_CPY_DIM_##type(dimension *dim){\
tensor_##type *r_tens=malloc(sizeof(tensor_##type));\
r_tens->dim = init_copy_dim(dim->perm,dim->size);\
r_tens->x = malloc(sizeof(type)*dim->rank);\
return r_tens;\
}\
\
void free_tensor_##type(tensor_##type * tens){\
if(tens){\
free_dimension(tens->dim);\
free(tens->x);\
free(tens);\
}\
}\
/* tensor_##type * sub_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim){\
dimension *rdim= rootens->dim;\
dimension *dS_t = sub_minus_dim_tail(rdim,rdim->size - minuSubdim);\
if(rankInDim < dS_t->rank){\
dimension *dS_h = sub_minus_dim_head(rdim,minuSubdim);\
tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
ret_ens->dim = dS_h;\
/*ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;*/\
ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
if(endian){\
ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
for(size_t i=0; i<dS_h->rank; ++i){\
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
\
}\
}else{\
ret_ens->x = (rootens->x)+rankInDim*dS_h->rank;\
\
for(size_t i=0; i<dS_h->rank; ++i){\
ret_ens->x[i]=rootens->x[i + dS_h->rank * rankInDim];\
}\
}\
return ret_ens;\
}\
@@ -63,15 +97,15 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
dimension *dS_t = sub_minus_dim_tail(rdim,minuSubdim);\
tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
ret_ens->dim = dS_t;\
if(endian==false){\
ret_ens->x = malloc(sizeof(type)*dS_t->rank);\
if(endian==false){\
for(size_t i=0; i<dS_t->rank; ++i){\
ret_ens->x[i]=rootens->x[i*dS_h->rank + rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
\
}\
}else{\
ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;\
for(size_t i=0; i<dS_t->rank; ++i){\
ret_ens->x[i]=rootens->x[i + dS_t->rank * rankInDim];\
}\
\
}\
return ret_ens;\
@@ -80,23 +114,21 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
}\
\
tensor_##type * sub_tensor_head_##type(tensor_##type *rootens, size_t subdim, size_t rankInDim){\
/*return sub_minus_tensor_head_##type(rootens,rootens->dim->size - subdim, rankInDim);*/\
dimension *rdim= rootens->dim;\
dimension *dS_t = sub_dim_tail(rdim,rdim->size - subdim);\
if(rankInDim < dS_t->rank){\
dimension *dS_h = sub_dim_head(rdim,subdim);\
tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
ret_ens->dim = dS_h;\
/*ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;*/\
if(endian){\
ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
if(endian){\
for(size_t i=0; i<dS_h->rank; ++i){\
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
\
}\
}else{\
ret_ens->x = (rootens->x)+rankInDim*dS_h->rank;\
for(size_t i=0; i<dS_h->rank; ++i){\
ret_ens->x[i]=rootens->x[i + dS_h->rank * rankInDim];\
}\
\
}\
return ret_ens;\
@@ -104,28 +136,142 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
return NULL;\
}\
tensor_##type * sub_tensor_tail_##type(tensor_##type *rootens, size_t subdim, size_t rankInDim){ \
/*return sub_minus_tensor_tail_##type(rootens,rootens->dim->size - subdim, rankInDim);*/\
dimension *rdim= rootens->dim;\
dimension *dS_h = sub_dim_head(rdim,rdim->size - subdim);\
if(rankInDim < dS_h->rank){\
dimension *dS_t = sub_dim_tail(rdim,subdim);\
tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
ret_ens->dim = dS_t;\
if(endian==false){\
ret_ens->x = malloc(sizeof(type)*dS_t->rank);\
if(endian==false){\
for(size_t i=0; i<dS_t->rank; ++i){\
ret_ens->x[i]=rootens->x[i*dS_h->rank + rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
\
}\
}else{\
ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;\
for(size_t i=0; i<dS_t->rank; ++i){\
ret_ens->x[i]=rootens->x[i + dS_t->rank * rankInDim];\
}\
\
}\
return ret_ens;\
}\
return NULL;\
}\
*/ \
tensor_##type * sub_copy_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim){\
dimension *rdim= rootens->dim;\
dimension *dS_t = sub_copy_minus_dim_tail(rdim,rdim->size - minuSubdim);\
if(rankInDim < dS_t->rank){\
dimension *dS_h = sub_copy_minus_dim_head(rdim,minuSubdim);\
tensor_##type *ret_ens = CREATE_TENSOR_##type(dS_h);\
/*malloc(sizeof(tensor_##type));\
ret_ens->dim = dS_h;\
ret_ens->x = malloc(sizeof(type)*dS_h->rank);*/\
if(endian){\
/*ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
*/for(size_t i=0; i<dS_h->rank; ++i){\
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
\
}\
}else{\
/*ret_ens->x = (rootens->x)+rankInDim*dS_h->rank;*/\
for(size_t i=0; i<dS_h->rank; ++i){\
ret_ens->x[i]=rootens->x[i + dS_h->rank * rankInDim];\
}\
}\
free_dimension(dS_t);\
return ret_ens;\
}\
free_dimension(dS_t);\
return NULL;\
}\
\
tensor_##type * sub_copy_minus_tensor_tail_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim){\
dimension *rdim= rootens->dim;\
dimension *dS_h = sub_copy_minus_dim_head(rdim,rdim->size - minuSubdim);\
if(rankInDim < dS_h->rank){\
dimension *dS_t = sub_copy_minus_dim_tail(rdim,minuSubdim);\
tensor_##type *ret_ens = CREATE_TENSOR_##type(dS_t);\
/*tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
ret_ens->dim = dS_t;\
ret_ens->x = malloc(sizeof(type)*dS_t->rank);\
*/if(endian==false){\
for(size_t i=0; i<dS_t->rank; ++i){\
ret_ens->x[i]=rootens->x[i*dS_h->rank + rankInDim];\
}\
}else{\
/*ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;*/\
for(size_t i=0; i<dS_t->rank; ++i){\
ret_ens->x[i]=rootens->x[i + dS_t->rank * rankInDim];\
}\
\
}\
free_dimension(dS_h);\
return ret_ens;\
}\
free_dimension(dS_h);\
return NULL;\
}\
\
tensor_##type * sub_copy_tensor_head_##type(tensor_##type *rootens, size_t sub_copydim, size_t rankInDim){\
/*return sub_copy_minus_tensor_head_##type(rootens,rootens->dim->size - sub_copydim, rankInDim);*/\
dimension *rdim= rootens->dim;\
dimension *dS_t = sub_copy_dim_tail(rdim,rdim->size - sub_copydim);\
if(rankInDim < dS_t->rank){\
dimension *dS_h = sub_copy_dim_head(rdim,sub_copydim);\
tensor_##type *ret_ens = CREATE_TENSOR_##type(dS_h);\
/*tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
ret_ens->dim = dS_h;\
ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
*/if(endian){\
for(size_t i=0; i<dS_h->rank; ++i){\
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
}\
}else{\
/*ret_ens->x = (rootens->x)+rankInDim*dS_h->rank;*/\
for(size_t i=0; i<dS_h->rank; ++i){\
ret_ens->x[i]=rootens->x[i + dS_h->rank * rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
}\
\
}\
free_dimension(dS_t);\
return ret_ens;\
}\
free_dimension(dS_t);\
return NULL;\
}\
tensor_##type * sub_copy_tensor_tail_##type(tensor_##type *rootens, size_t sub_copydim, size_t rankInDim){ \
/*return sub_copy_minus_tensor_tail_##type(rootens,rootens->dim->size - sub_copydim, rankInDim);*/\
dimension *rdim= rootens->dim;\
dimension *dS_h = sub_copy_dim_head(rdim,rdim->size - sub_copydim);\
if(rankInDim < dS_h->rank){\
dimension *dS_t = sub_copy_dim_tail(rdim,sub_copydim);\
tensor_##type *ret_ens = CREATE_TENSOR_##type(dS_t);\
/*tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
ret_ens->dim = dS_t;\
ret_ens->x = malloc(sizeof(type)*dS_t->rank);\
*/if(endian==false){\
for(size_t i=0; i<dS_t->rank; ++i){\
ret_ens->x[i]=rootens->x[i*dS_h->rank + rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
}\
}else{\
/*ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;*/\
for(size_t i=0; i<dS_t->rank; ++i){\
ret_ens->x[i]=rootens->x[i + dS_t->rank * rankInDim];\
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
}\
\
}\
free_dimension(dS_h);\
return ret_ens;\
}\
free_dimension(dS_h);\
return NULL;\
}\
\
void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1) { \
dimension *dd; \
@@ -149,6 +295,7 @@ void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##typ
M->x[i] = M0->x[lin0] * M1->x[lin1]; \
/*printf(" M->x[%ld] = M0->x[%ld] * M1->x[%ld] ::: %f = %f * %f \n",i,lin0,lin1, M->x[i] , M0->x[lin0] , M1->x[lin1]);*/\
} \
FREE_COORD_ ; \
} \
\
\
@@ -203,7 +350,7 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
printDebug_dimension(dM0,"dM0");\
printDebug_dimension(dM1,"dM1");*/\
dimension *dM;\
min_dimension(&dM, dM0, dM1);\
min_copy_dimension(&dM, dM0, dM1);\
/*printDebug_dimension(dM,"dM");*/\
\
dimension *dd;\
@@ -213,21 +360,7 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
*MM = CREATE_TENSOR_##type(dd);\
tensor_##type *M= *MM;\
\
/*size_t* coord;\
coord = malloc(sizeof(size_t)* M->dim->size);\
\
size_t* coord0 , lin0;\
coord0 = malloc(sizeof(size_t)* len0);\
size_t* coord1, lin1;\
coord1 = malloc(sizeof(size_t)* len1);\
\
size_t* coordM0 ;\
coordM0 = malloc(sizeof(size_t)* M0->dim->size);\
size_t* coordM1 ;\
coordM1 = malloc(sizeof(size_t)* M1->dim->size);\
\
size_t* Koord ;\
Koord = malloc(sizeof(size_t)* contractionNumber);*/\
\
\
size_t a0_id, a1_id, n0_id, n1_id;\
for (size_t i = 0; i < M->dim->rank; i++) {\
@@ -239,11 +372,6 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
a0_id=i%dSub0->rank;\
a1_id=i/dSub0->rank;\
}\
/*vCoordFromLin(coord, i, M->dim);\
subArray(coord0, coord, 0, len0, 0);\
subArray(coord1, coord, 0, len1, len0);\
printf("i:%ld=> c0: %ld vs %ld \n",i,LineFromCoord(coord0,dSub0),a0_id);\
printf("i:%ld=> c1: %ld vs %ld \n",i,LineFromCoord(coord1,dSub1),a1_id);*/\
M->x[i] = 0;\
for (size_t k = 0; k < dM->rank; k++) {\
if(endian){\
@@ -255,19 +383,10 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
n1_id= a1_id*dM->rank + k;\
}\
M->x[i] += M0->x[n0_id] * M1->x[n1_id];\
/*vCoordFromLin(Koord, k, dM);\
concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, contractionNumber);\
concatArray(coordM1, Koord, coord1, 0, 0, contractionNumber, 0, len1);\
lin0 = LineFromCoord(coordM0, M0->dim);\
lin1 = LineFromCoord(coordM1, M1->dim);\
printf("k:%ld, lin0:%ld, vs n0: %ld\n",k,lin0,n0_id);\
printf("k:%ld, lin1:%ld, vs n1: %ld\n",k,lin1,n1_id);*/\
/*M->x[i] += M0->x[lin0] * M1->x[lin1];*/\
/*printf("M[%ld]:%f += M0[%ld]:%f * M1[%ld]:%f | \n",i,M->x[i],n0_id,M0->x[n0_id],n1_id,M1->x[n1_id]);*/\
/*printf("k:%ld |i:%ld |lin0:%ld | lin1:%ld | ",k,i,lin0,lin1);*/\
\
}\
/*printf("\n");*/\
}\
FREE_dM_S_ \
}\
struct arg_Prod_##type{\
type *M0x;\
@@ -309,9 +428,10 @@ void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##typ
arg_th[i]->M1x=M1->x;\
arg_th[i]->Mx=M->x;\
arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\
if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
/*if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
else arg_th[i]->endRange = M->dim->rank ;\
if(endian){\
*/if(endian){\
arg_th[i]->MRank = M1->dim->rank;\
}\
else{\
@@ -322,27 +442,39 @@ void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##typ
\
for(size_t i=0; i< nbthread; ++i){\
pthread_join(thrd[i], NULL);\
free(arg_th[i]);\
}\
\
free(thrd);\
free(arg_th);\
} \
\
struct arg_Pro2d_##type{\
type *M0x;\
type *M1x;\
type *Mx;\
size_t beginRange;\
size_t endRange;\
size_t M0Rank;\
size_t M1Rank;\
};\
void* runProd_thread2d_##type(void *arg){\
struct arg_Prod_##type *arg_t = arg;\
struct arg_Pro2d_##type *arg_t = arg;\
size_t k;\
for (size_t i = arg_t->beginRange; i < arg_t->endRange; i++) {\
for (size_t j = 0; j < arg_t->MRank; j++) {\
for (size_t j = 0; j < arg_t->M1Rank; j++) {\
if(endian){\
k = i * arg_t->MRank + j;\
k = i * arg_t->M1Rank + j;\
}\
else{\
k =i + arg_t->MRank * j ;\
k =i + arg_t->M0Rank * j ;\
}\
arg_t->Mx[k] += arg_t->M0x[i] * arg_t->M1x[j];\
arg_t->Mx[k] = arg_t->M0x[i] * arg_t->M1x[j];\
}\
}\
}\
\
void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t nbthread) { \
void tensorProdThrea2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t nbthread) { \
dimension *dd; \
add_dimension(&dd, M0->dim, M1->dim); \
(*MM)=CREATE_TENSOR_##type(dd); \
@@ -350,29 +482,29 @@ void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##t
\
\
pthread_t *thrd = malloc(nbthread * sizeof(pthread_t));\
struct arg_Prod_##type **arg_th = malloc( nbthread * sizeof(struct arg_Prod_##type *));\
struct arg_Pro2d_##type **arg_th = malloc( nbthread * sizeof(struct arg_Pro2d_##type *));\
\
for(size_t i = 0; i < nbthread; ++i){\
arg_th[i]=malloc(sizeof(struct arg_Prod_##type));\
arg_th[i]=malloc(sizeof(struct arg_Pro2d_##type));\
arg_th[i]->M0x=M0->x;\
arg_th[i]->M1x=M1->x;\
arg_th[i]->Mx=M->x;\
arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\
if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
else arg_th[i]->endRange = M->dim->rank ;\
if(endian){\
arg_th[i]->MRank = M1->dim->rank;\
}\
else{\
arg_th[i]->MRank = M0->dim->rank;\
}\
pthread_create(&thrd[i], NULL, runProd_thread_##type, (void*)arg_th[i]);\
arg_th[i]->beginRange = i*(M0->dim->rank)/nbthread ;\
arg_th[i]->endRange = (i+1)*(M0->dim->rank)/nbthread ;\
/*if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M0->dim->rank)/nbthread ;\
else arg_th[i]->endRange = M0->dim->rank ;\
*/arg_th[i]->M1Rank = M1->dim->rank;\
arg_th[i]->M0Rank = M0->dim->rank;\
pthread_create(&thrd[i], NULL, runProd_thread2d_##type, (void*)arg_th[i]);\
}\
\
for(size_t i=0; i< nbthread; ++i){\
pthread_join(thrd[i], NULL);\
free(arg_th[i]);\
}\
\
free(thrd);\
free(arg_th);\
} \
struct arg_ProdContract_##type{\
type *M0x;\
@@ -431,7 +563,7 @@ void tensorContractnProdThread_##type(tensor_##type** MM, tensor_##type *M0, ten
dimension *dM1 = init_dim(tDk1, contractionNumber);\
dimension *dM0 = init_dim(tDk0, contractionNumber);\
dimension *dM;\
min_dimension(&dM, dM0, dM1);\
min_copy_dimension(&dM, dM0, dM1);\
\
dimension *dd;\
add_dimension(&dd, dSub0, dSub1);\
@@ -464,8 +596,12 @@ void tensorContractnProdThread_##type(tensor_##type** MM, tensor_##type *M0, ten
\
for(size_t i=0; i< nbthread; ++i){\
pthread_join(thrd[i], NULL);\
free(arg_th[i]);\
}\
\
free(thrd);\
free(arg_th);\
FREE_dM_S_ ; \
}\
void tensorContractnProdNotOpt_##type(tensor_##type** MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber) {\
/* if (!checkMatchProdtensor(M0->dim, M1->dim, contractionNumber)) {\
@@ -496,7 +632,7 @@ void tensorContractnProdNotOpt_##type(tensor_##type** MM, tensor_##type *M0, ten
printDebug_dimension(dM0,"dM0");\
printDebug_dimension(dM1,"dM1");*/\
dimension *dM;\
min_dimension(&dM, dM0, dM1);\
min_copy_dimension(&dM, dM0, dM1);\
/*printDebug_dimension(dM,"dM");*/\
\
dimension *dd;\
@@ -539,6 +675,11 @@ void tensorContractnProdNotOpt_##type(tensor_##type** MM, tensor_##type *M0, ten
}\
/*printf("\n");*/\
}\
FREE_COORD_ ; \
free(coordM0);\
free(coordM1);\
free(Koord); \
FREE_dM_S_ ; \
}\
\
+7 -2
View File
@@ -14,15 +14,20 @@ struct tensor_##type{\
};\
typedef struct tensor_##type tensor_##type;\
tensor_##type * CREATE_TENSOR_##type(dimension *dim); \
tensor_##type * sub_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
void free_tensor_##type(tensor_##type * tens); \
/*tensor_##type * sub_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
tensor_##type * sub_minus_tensor_tail_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
tensor_##type * sub_tensor_head_##type(tensor_##type *rootens, size_t subdim, size_t rankInDim); \
tensor_##type * sub_tensor_tail_##type(tensor_##type *rootens, size_t subdim, size_t rankInDim); \
*/tensor_##type * sub_copy_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
tensor_##type * sub_copy_minus_tensor_tail_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
tensor_##type * sub_copy_tensor_head_##type(tensor_##type *rootens, size_t sub_copydim, size_t rankInDim); \
tensor_##type * sub_copy_tensor_tail_##type(tensor_##type *rootens, size_t sub_copydim, size_t rankInDim); \
void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
void tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
void tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \
void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \
void tensorProdThrea2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \
void tensorContractnProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t nbthread); \
void tensorContractnProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \