debug all leak mem with valgrind in ftest and tensor rep
This commit is contained in:
@@ -1,36 +1,75 @@
|
||||
|
||||
|
||||
|
||||
__kernel void prodContractnTensorLin_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
|
||||
__kernel void prodContractnTensor2dLin_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t i = get_global_id(0);
|
||||
size_t k, a0_id, a1_id, n0_id, n1_id;
|
||||
a0_id = i / dSubRank;
|
||||
a1_id = i % dSubRank;
|
||||
Mx[i] = 0;
|
||||
//size_t i = get_global_id(0);
|
||||
//size_t j = get_global_id(1);
|
||||
|
||||
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
|
||||
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
|
||||
|
||||
size_t k, n0_id, n1_id;
|
||||
size_t ind = i * dSubRank + j;
|
||||
Mx[ind] = 0;
|
||||
for (k = 0; k < dMRank; k++) {
|
||||
n0_id = a0_id * dMRank + k;
|
||||
n1_id = a1_id + dSubRank * k;
|
||||
Mx[i] += M0x[n0_id] * M1x[n1_id];
|
||||
n0_id = i * dMRank + k;
|
||||
n1_id = j + dSubRank * k;
|
||||
Mx[ind] += M0x[n0_id] * M1x[n1_id];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void prodContractnTensorLin_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
|
||||
__kernel void prodContractnTensor2dLin_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t i = get_global_id(0);
|
||||
size_t k, a0_id, a1_id, n0_id, n1_id;
|
||||
a0_id = i / dSubRank;
|
||||
a1_id = i % dSubRank;
|
||||
Mx[i] = 0;
|
||||
//size_t i = get_global_id(0);
|
||||
//size_t j = get_global_id(1);
|
||||
|
||||
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
|
||||
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
|
||||
|
||||
size_t k, n0_id, n1_id;
|
||||
size_t ind = i * dSubRank + j;
|
||||
Mx[ind] = 0;
|
||||
for (k = 0; k < dMRank; k++) {
|
||||
n0_id = a0_id * dMRank + k;
|
||||
n1_id = a1_id + dSubRank * k;
|
||||
Mx[i] += M0x[n0_id] * M1x[n1_id];
|
||||
n0_id = i * dMRank + k;
|
||||
n1_id = j + dSubRank * k;
|
||||
Mx[ind] += M0x[n0_id] * M1x[n1_id];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void prodContractnTensor2dLiniNotEndian_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
//size_t i = get_global_id(0);
|
||||
//size_t j = get_global_id(1);
|
||||
|
||||
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
|
||||
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
|
||||
|
||||
size_t k, n0_id, n1_id;
|
||||
size_t ind = i + dSubRank * j;
|
||||
Mx[ind] = 0;
|
||||
for (k = 0; k < dMRank; k++) {
|
||||
n0_id = i + dMRank * k;
|
||||
n1_id = j * dSubRank + k;
|
||||
Mx[ind] += M0x[n0_id] * M1x[n1_id];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void prodContractnTensor2dLinNotEndian_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
//size_t i = get_global_id(0);
|
||||
//size_t j = get_global_id(1);
|
||||
|
||||
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
|
||||
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
|
||||
|
||||
size_t k, n0_id, n1_id;
|
||||
size_t ind = i + dSubRank * j;
|
||||
Mx[ind] = 0;
|
||||
for (k = 0; k < dMRank; k++) {
|
||||
n0_id = i + dMRank * k;
|
||||
n1_id = j * dSubRank + k;
|
||||
Mx[ind] += M0x[n0_id] * M1x[n1_id];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ __kernel void prodTensor2dLin_TYPE_FLOAT(long unsigned int M1rank, __global cons
|
||||
Mx[k] = M0x[i] * M1x[j];
|
||||
}
|
||||
|
||||
__kernel void prodTensori2dLin_TYPE_DOUBLE(long unsigned int M1rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
|
||||
__kernel void prodTensor2dLin_TYPE_DOUBLE(long unsigned int M1rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
|
||||
@@ -51,9 +51,3 @@ __kernel void prodTensori2dLinNotEndian_TYPE_DOUBLE(long unsigned int M0rank, __
|
||||
Mx[k] = M0x[i] * M1x[j];
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
|
||||
|
||||
|
||||
__kernel void prodContractnTensorLin_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
@@ -64,7 +61,3 @@ __kernel void prodContractnTensorLinNotEndian_TYPE_DOUBLE(long unsigned int dSub
|
||||
Mx[i] += M0x[n0_id] * M1x[n1_id];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -33,8 +33,3 @@ __kernel void prodTensorLinNotEndian_TYPE_DOUBLE(long unsigned int M0rank, __glo
|
||||
size_t j = k / M0rank;
|
||||
Mx[k] = M0x[i] * M1x[j];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -28,14 +28,15 @@
|
||||
cl_uint ret_num_devices; \
|
||||
cl_uint ret_num_platforms; \
|
||||
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); \
|
||||
checkError(ret,__func__,"Error: Failed to get platform ID ");\
|
||||
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, \
|
||||
&device_id, &ret_num_devices); \
|
||||
size_t returned_size = 0;\
|
||||
size_t max_workgroup_size = 0;\
|
||||
ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, &returned_size);\
|
||||
/*ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, &returned_size);\
|
||||
checkError(ret,__func__,"Error: Failed to retrieve device info!");\
|
||||
printf(" ===========================================================++> return size: %ld\n max group sz: %ld\n", returned_size, max_workgroup_size);\
|
||||
\
|
||||
*/\
|
||||
/*int gpu = 1;\
|
||||
ret = clGetDeviceIDs( platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, \
|
||||
&device_id, &ret_num_devices); */\
|
||||
@@ -136,6 +137,7 @@
|
||||
ret |= clReleaseCommandQueue(command_queue); \
|
||||
ret |= clReleaseContext(context); \
|
||||
checkError(ret,__func__,"Error: Failed to clean up! ");\
|
||||
free(source_str);
|
||||
|
||||
#define GEN_cl_FUNC_TENSOR(type)\
|
||||
\
|
||||
@@ -214,6 +216,7 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor
|
||||
dSubRank = dSub0->rank;\
|
||||
\
|
||||
}\
|
||||
printf("func_cl_name = %s ......... \n",func_cl_name);\
|
||||
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||
\
|
||||
/*/ Set the arguments of the kernel */ \
|
||||
@@ -228,7 +231,7 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor
|
||||
READ_BUF_N_CLEANUP(type)\
|
||||
\
|
||||
} \
|
||||
\
|
||||
\
|
||||
\
|
||||
void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz) { \
|
||||
dimension *dd; \
|
||||
@@ -262,6 +265,67 @@ void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type
|
||||
READ_BUF_N_CLEANUP(type)\
|
||||
} \
|
||||
\
|
||||
void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz) {\
|
||||
\
|
||||
size_t len0 = M0->dim->size - contractionNumber;\
|
||||
size_t len1 = M1->dim->size - contractionNumber;\
|
||||
\
|
||||
size_t* tsub0 = malloc(sizeof(size_t) *len0);\
|
||||
size_t* tsub1 = malloc(sizeof(size_t) *len1);\
|
||||
size_t* tDk1 = malloc(sizeof(size_t) *contractionNumber);\
|
||||
size_t* tDk0 = malloc(sizeof(size_t) *contractionNumber);\
|
||||
subArray(tsub0, M0->dim->perm, 0, len0, 0);\
|
||||
subArray(tsub1, M1->dim->perm, 0, len1, contractionNumber);\
|
||||
subArray(tDk1, M1->dim->perm, 0, contractionNumber, 0);\
|
||||
subArray(tDk0, M0->dim->perm, 0, contractionNumber, len0);\
|
||||
dimension *dSub0 = init_dim(tsub0, len0);\
|
||||
dimension *dSub1 = init_dim(tsub1, len1);\
|
||||
dimension *dM1 = init_dim(tDk1, contractionNumber);\
|
||||
dimension *dM0 = init_dim(tDk0, contractionNumber);\
|
||||
dimension *dM;\
|
||||
min_dimension(&dM, dM0, dM1);\
|
||||
\
|
||||
dimension *dd;\
|
||||
add_dimension(&dd, dSub0, dSub1);\
|
||||
updateRankDim(dd);\
|
||||
*MM = CREATE_TENSOR_##type(dd);\
|
||||
tensor_##type *M= *MM;\
|
||||
char *file_cl_src = "../src/kernel_2d_ProdContractnTensor.cl"; \
|
||||
char *func_cl_nameEndian = "prodContractnTensor2dLin_" #type; \
|
||||
char *func_cl_nameNotEndian = "prodContractnTensor2dLinNotEndian_" #type; \
|
||||
char *func_cl_name; \
|
||||
size_t dSubRank;\
|
||||
if(endian){\
|
||||
func_cl_name = func_cl_nameEndian;\
|
||||
dSubRank = dSub1->rank;\
|
||||
\
|
||||
}else{\
|
||||
func_cl_name = func_cl_nameNotEndian;\
|
||||
dSubRank = dSub0->rank;\
|
||||
\
|
||||
}\
|
||||
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||
/*size_t cl_dev_max_w_sz,sz_val;\
|
||||
ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &cl_dev_max_w_sz, &sz_val);\
|
||||
printf("CL_DEVICE_MAX_WORK_GROUP_SIZE = : %ld, sz :%ld\n ",cl_dev_max_w_sz, sz_val);\
|
||||
*/\
|
||||
/*/ Set the arguments of the kernel */ \
|
||||
ret |= clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&dSubRank); \
|
||||
ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
|
||||
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
||||
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
||||
ret |= clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&M_mem_obj); \
|
||||
checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\
|
||||
\
|
||||
/*printf("EXEC_cl_2d_KERNEL(type,%ld,%ld,%ld,%ld)\n",dSub0->rank,dSub1->rank,div0Wsz,div1Wsz);\
|
||||
*/EXEC_cl_2d_KERNEL(type,dSub0->rank,dSub1->rank,div0Wsz,div1Wsz);\
|
||||
READ_BUF_N_CLEANUP(type)\
|
||||
\
|
||||
} \
|
||||
\
|
||||
|
||||
|
||||
|
||||
|
||||
void checkError(cl_int error, const char *func_name, char *msg) {
|
||||
if (error != CL_SUCCESS) {
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
|
||||
void cl_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
|
||||
void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz); \
|
||||
/*void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz);*/ \
|
||||
void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz); \
|
||||
|
||||
CL_GENERATE_TENSOR_TYPE(TYPE_FLOAT);
|
||||
CL_GENERATE_TENSOR_TYPE(TYPE_DOUBLE);
|
||||
|
||||
@@ -23,6 +23,27 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
#define FREE_COORD_\
|
||||
free(coord0);\
|
||||
free(coord1);\
|
||||
free(coord);\
|
||||
|
||||
#define FREE_t \
|
||||
free( tsub0 );\
|
||||
free( tsub1 );\
|
||||
free( tDk1 );\
|
||||
free( tDk0 );
|
||||
|
||||
#define FREE_dM_S_\
|
||||
free_dimension(dM0);\
|
||||
free_dimension(dM1);\
|
||||
free_dimension(dM);\
|
||||
free_dimension(dSub0);\
|
||||
free_dimension(dSub1);\
|
||||
|
||||
|
||||
|
||||
#define GEN_FUNC_TENSOR(type)\
|
||||
tensor_##type* CREATE_TENSOR_##type(dimension *dim){\
|
||||
tensor_##type *r_tens=malloc(sizeof(tensor_##type));\
|
||||
@@ -32,24 +53,37 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
|
||||
return r_tens;\
|
||||
}\
|
||||
\
|
||||
tensor_##type * sub_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim){\
|
||||
tensor_##type* CREATE_TENSOR_FROM_CPY_DIM_##type(dimension *dim){\
|
||||
tensor_##type *r_tens=malloc(sizeof(tensor_##type));\
|
||||
r_tens->dim = init_copy_dim(dim->perm,dim->size);\
|
||||
r_tens->x = malloc(sizeof(type)*dim->rank);\
|
||||
return r_tens;\
|
||||
}\
|
||||
\
|
||||
void free_tensor_##type(tensor_##type * tens){\
|
||||
if(tens){\
|
||||
free_dimension(tens->dim);\
|
||||
free(tens->x);\
|
||||
free(tens);\
|
||||
}\
|
||||
}\
|
||||
/* tensor_##type * sub_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim){\
|
||||
dimension *rdim= rootens->dim;\
|
||||
dimension *dS_t = sub_minus_dim_tail(rdim,rdim->size - minuSubdim);\
|
||||
if(rankInDim < dS_t->rank){\
|
||||
dimension *dS_h = sub_minus_dim_head(rdim,minuSubdim);\
|
||||
tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
|
||||
ret_ens->dim = dS_h;\
|
||||
/*ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;*/\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
|
||||
if(endian){\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
|
||||
for(size_t i=0; i<dS_h->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
|
||||
\
|
||||
}\
|
||||
}else{\
|
||||
ret_ens->x = (rootens->x)+rankInDim*dS_h->rank;\
|
||||
\
|
||||
for(size_t i=0; i<dS_h->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i + dS_h->rank * rankInDim];\
|
||||
}\
|
||||
}\
|
||||
return ret_ens;\
|
||||
}\
|
||||
@@ -63,15 +97,15 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
|
||||
dimension *dS_t = sub_minus_dim_tail(rdim,minuSubdim);\
|
||||
tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
|
||||
ret_ens->dim = dS_t;\
|
||||
if(endian==false){\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_t->rank);\
|
||||
if(endian==false){\
|
||||
for(size_t i=0; i<dS_t->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i*dS_h->rank + rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
\
|
||||
}\
|
||||
}else{\
|
||||
ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;\
|
||||
for(size_t i=0; i<dS_t->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i + dS_t->rank * rankInDim];\
|
||||
}\
|
||||
\
|
||||
}\
|
||||
return ret_ens;\
|
||||
@@ -80,23 +114,21 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
|
||||
}\
|
||||
\
|
||||
tensor_##type * sub_tensor_head_##type(tensor_##type *rootens, size_t subdim, size_t rankInDim){\
|
||||
/*return sub_minus_tensor_head_##type(rootens,rootens->dim->size - subdim, rankInDim);*/\
|
||||
dimension *rdim= rootens->dim;\
|
||||
dimension *dS_t = sub_dim_tail(rdim,rdim->size - subdim);\
|
||||
if(rankInDim < dS_t->rank){\
|
||||
dimension *dS_h = sub_dim_head(rdim,subdim);\
|
||||
tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
|
||||
ret_ens->dim = dS_h;\
|
||||
/*ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;*/\
|
||||
if(endian){\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
|
||||
if(endian){\
|
||||
for(size_t i=0; i<dS_h->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
\
|
||||
}\
|
||||
}else{\
|
||||
ret_ens->x = (rootens->x)+rankInDim*dS_h->rank;\
|
||||
for(size_t i=0; i<dS_h->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i + dS_h->rank * rankInDim];\
|
||||
}\
|
||||
\
|
||||
}\
|
||||
return ret_ens;\
|
||||
@@ -104,28 +136,142 @@ void printArraySzt(size_t *a, size_t sz,char *msg){
|
||||
return NULL;\
|
||||
}\
|
||||
tensor_##type * sub_tensor_tail_##type(tensor_##type *rootens, size_t subdim, size_t rankInDim){ \
|
||||
/*return sub_minus_tensor_tail_##type(rootens,rootens->dim->size - subdim, rankInDim);*/\
|
||||
dimension *rdim= rootens->dim;\
|
||||
dimension *dS_h = sub_dim_head(rdim,rdim->size - subdim);\
|
||||
if(rankInDim < dS_h->rank){\
|
||||
dimension *dS_t = sub_dim_tail(rdim,subdim);\
|
||||
tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
|
||||
ret_ens->dim = dS_t;\
|
||||
if(endian==false){\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_t->rank);\
|
||||
if(endian==false){\
|
||||
for(size_t i=0; i<dS_t->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i*dS_h->rank + rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
\
|
||||
}\
|
||||
}else{\
|
||||
ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;\
|
||||
for(size_t i=0; i<dS_t->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i + dS_t->rank * rankInDim];\
|
||||
}\
|
||||
\
|
||||
}\
|
||||
return ret_ens;\
|
||||
}\
|
||||
return NULL;\
|
||||
}\
|
||||
*/ \
|
||||
tensor_##type * sub_copy_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim){\
|
||||
dimension *rdim= rootens->dim;\
|
||||
dimension *dS_t = sub_copy_minus_dim_tail(rdim,rdim->size - minuSubdim);\
|
||||
if(rankInDim < dS_t->rank){\
|
||||
dimension *dS_h = sub_copy_minus_dim_head(rdim,minuSubdim);\
|
||||
tensor_##type *ret_ens = CREATE_TENSOR_##type(dS_h);\
|
||||
/*malloc(sizeof(tensor_##type));\
|
||||
ret_ens->dim = dS_h;\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_h->rank);*/\
|
||||
if(endian){\
|
||||
/*ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
|
||||
*/for(size_t i=0; i<dS_h->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
\
|
||||
}\
|
||||
}else{\
|
||||
/*ret_ens->x = (rootens->x)+rankInDim*dS_h->rank;*/\
|
||||
for(size_t i=0; i<dS_h->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i + dS_h->rank * rankInDim];\
|
||||
}\
|
||||
}\
|
||||
free_dimension(dS_t);\
|
||||
return ret_ens;\
|
||||
}\
|
||||
free_dimension(dS_t);\
|
||||
return NULL;\
|
||||
}\
|
||||
\
|
||||
tensor_##type * sub_copy_minus_tensor_tail_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim){\
|
||||
dimension *rdim= rootens->dim;\
|
||||
dimension *dS_h = sub_copy_minus_dim_head(rdim,rdim->size - minuSubdim);\
|
||||
if(rankInDim < dS_h->rank){\
|
||||
dimension *dS_t = sub_copy_minus_dim_tail(rdim,minuSubdim);\
|
||||
tensor_##type *ret_ens = CREATE_TENSOR_##type(dS_t);\
|
||||
/*tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
|
||||
ret_ens->dim = dS_t;\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_t->rank);\
|
||||
*/if(endian==false){\
|
||||
for(size_t i=0; i<dS_t->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i*dS_h->rank + rankInDim];\
|
||||
}\
|
||||
}else{\
|
||||
/*ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;*/\
|
||||
for(size_t i=0; i<dS_t->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i + dS_t->rank * rankInDim];\
|
||||
}\
|
||||
\
|
||||
}\
|
||||
free_dimension(dS_h);\
|
||||
return ret_ens;\
|
||||
}\
|
||||
free_dimension(dS_h);\
|
||||
return NULL;\
|
||||
}\
|
||||
\
|
||||
tensor_##type * sub_copy_tensor_head_##type(tensor_##type *rootens, size_t sub_copydim, size_t rankInDim){\
|
||||
/*return sub_copy_minus_tensor_head_##type(rootens,rootens->dim->size - sub_copydim, rankInDim);*/\
|
||||
dimension *rdim= rootens->dim;\
|
||||
dimension *dS_t = sub_copy_dim_tail(rdim,rdim->size - sub_copydim);\
|
||||
if(rankInDim < dS_t->rank){\
|
||||
dimension *dS_h = sub_copy_dim_head(rdim,sub_copydim);\
|
||||
tensor_##type *ret_ens = CREATE_TENSOR_##type(dS_h);\
|
||||
/*tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
|
||||
ret_ens->dim = dS_h;\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_h->rank);\
|
||||
*/if(endian){\
|
||||
for(size_t i=0; i<dS_h->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i*dS_t->rank + rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
}\
|
||||
}else{\
|
||||
/*ret_ens->x = (rootens->x)+rankInDim*dS_h->rank;*/\
|
||||
for(size_t i=0; i<dS_h->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i + dS_h->rank * rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
}\
|
||||
\
|
||||
}\
|
||||
free_dimension(dS_t);\
|
||||
return ret_ens;\
|
||||
}\
|
||||
free_dimension(dS_t);\
|
||||
return NULL;\
|
||||
}\
|
||||
tensor_##type * sub_copy_tensor_tail_##type(tensor_##type *rootens, size_t sub_copydim, size_t rankInDim){ \
|
||||
/*return sub_copy_minus_tensor_tail_##type(rootens,rootens->dim->size - sub_copydim, rankInDim);*/\
|
||||
dimension *rdim= rootens->dim;\
|
||||
dimension *dS_h = sub_copy_dim_head(rdim,rdim->size - sub_copydim);\
|
||||
if(rankInDim < dS_h->rank){\
|
||||
dimension *dS_t = sub_copy_dim_tail(rdim,sub_copydim);\
|
||||
tensor_##type *ret_ens = CREATE_TENSOR_##type(dS_t);\
|
||||
/*tensor_##type *ret_ens = malloc(sizeof(tensor_##type));\
|
||||
ret_ens->dim = dS_t;\
|
||||
ret_ens->x = malloc(sizeof(type)*dS_t->rank);\
|
||||
*/if(endian==false){\
|
||||
for(size_t i=0; i<dS_t->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i*dS_h->rank + rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
}\
|
||||
}else{\
|
||||
/*ret_ens->x = (rootens->x)+rankInDim*dS_t->rank;*/\
|
||||
for(size_t i=0; i<dS_t->rank; ++i){\
|
||||
ret_ens->x[i]=rootens->x[i + dS_t->rank * rankInDim];\
|
||||
/*printf("%ld: [i:%ld] | %ld : [%ld ]\n",dS_t->rank, i,dS_h->rank,i*dS_h->rank + rankInDim);*/\
|
||||
}\
|
||||
\
|
||||
}\
|
||||
free_dimension(dS_h);\
|
||||
return ret_ens;\
|
||||
}\
|
||||
free_dimension(dS_h);\
|
||||
return NULL;\
|
||||
}\
|
||||
\
|
||||
void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1) { \
|
||||
dimension *dd; \
|
||||
@@ -149,6 +295,7 @@ void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##typ
|
||||
M->x[i] = M0->x[lin0] * M1->x[lin1]; \
|
||||
/*printf(" M->x[%ld] = M0->x[%ld] * M1->x[%ld] ::: %f = %f * %f \n",i,lin0,lin1, M->x[i] , M0->x[lin0] , M1->x[lin1]);*/\
|
||||
} \
|
||||
FREE_COORD_ ; \
|
||||
} \
|
||||
\
|
||||
\
|
||||
@@ -203,7 +350,7 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
|
||||
printDebug_dimension(dM0,"dM0");\
|
||||
printDebug_dimension(dM1,"dM1");*/\
|
||||
dimension *dM;\
|
||||
min_dimension(&dM, dM0, dM1);\
|
||||
min_copy_dimension(&dM, dM0, dM1);\
|
||||
/*printDebug_dimension(dM,"dM");*/\
|
||||
\
|
||||
dimension *dd;\
|
||||
@@ -213,21 +360,7 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
|
||||
*MM = CREATE_TENSOR_##type(dd);\
|
||||
tensor_##type *M= *MM;\
|
||||
\
|
||||
/*size_t* coord;\
|
||||
coord = malloc(sizeof(size_t)* M->dim->size);\
|
||||
\
|
||||
size_t* coord0 , lin0;\
|
||||
coord0 = malloc(sizeof(size_t)* len0);\
|
||||
size_t* coord1, lin1;\
|
||||
coord1 = malloc(sizeof(size_t)* len1);\
|
||||
\
|
||||
size_t* coordM0 ;\
|
||||
coordM0 = malloc(sizeof(size_t)* M0->dim->size);\
|
||||
size_t* coordM1 ;\
|
||||
coordM1 = malloc(sizeof(size_t)* M1->dim->size);\
|
||||
\
|
||||
size_t* Koord ;\
|
||||
Koord = malloc(sizeof(size_t)* contractionNumber);*/\
|
||||
\
|
||||
\
|
||||
size_t a0_id, a1_id, n0_id, n1_id;\
|
||||
for (size_t i = 0; i < M->dim->rank; i++) {\
|
||||
@@ -239,11 +372,6 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
|
||||
a0_id=i%dSub0->rank;\
|
||||
a1_id=i/dSub0->rank;\
|
||||
}\
|
||||
/*vCoordFromLin(coord, i, M->dim);\
|
||||
subArray(coord0, coord, 0, len0, 0);\
|
||||
subArray(coord1, coord, 0, len1, len0);\
|
||||
printf("i:%ld=> c0: %ld vs %ld \n",i,LineFromCoord(coord0,dSub0),a0_id);\
|
||||
printf("i:%ld=> c1: %ld vs %ld \n",i,LineFromCoord(coord1,dSub1),a1_id);*/\
|
||||
M->x[i] = 0;\
|
||||
for (size_t k = 0; k < dM->rank; k++) {\
|
||||
if(endian){\
|
||||
@@ -255,19 +383,10 @@ void tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor_##
|
||||
n1_id= a1_id*dM->rank + k;\
|
||||
}\
|
||||
M->x[i] += M0->x[n0_id] * M1->x[n1_id];\
|
||||
/*vCoordFromLin(Koord, k, dM);\
|
||||
concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, contractionNumber);\
|
||||
concatArray(coordM1, Koord, coord1, 0, 0, contractionNumber, 0, len1);\
|
||||
lin0 = LineFromCoord(coordM0, M0->dim);\
|
||||
lin1 = LineFromCoord(coordM1, M1->dim);\
|
||||
printf("k:%ld, lin0:%ld, vs n0: %ld\n",k,lin0,n0_id);\
|
||||
printf("k:%ld, lin1:%ld, vs n1: %ld\n",k,lin1,n1_id);*/\
|
||||
/*M->x[i] += M0->x[lin0] * M1->x[lin1];*/\
|
||||
/*printf("M[%ld]:%f += M0[%ld]:%f * M1[%ld]:%f | \n",i,M->x[i],n0_id,M0->x[n0_id],n1_id,M1->x[n1_id]);*/\
|
||||
/*printf("k:%ld |i:%ld |lin0:%ld | lin1:%ld | ",k,i,lin0,lin1);*/\
|
||||
\
|
||||
}\
|
||||
/*printf("\n");*/\
|
||||
}\
|
||||
FREE_dM_S_ \
|
||||
}\
|
||||
struct arg_Prod_##type{\
|
||||
type *M0x;\
|
||||
@@ -309,9 +428,10 @@ void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##typ
|
||||
arg_th[i]->M1x=M1->x;\
|
||||
arg_th[i]->Mx=M->x;\
|
||||
arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\
|
||||
if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
|
||||
arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
|
||||
/*if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
|
||||
else arg_th[i]->endRange = M->dim->rank ;\
|
||||
if(endian){\
|
||||
*/if(endian){\
|
||||
arg_th[i]->MRank = M1->dim->rank;\
|
||||
}\
|
||||
else{\
|
||||
@@ -322,27 +442,39 @@ void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##typ
|
||||
\
|
||||
for(size_t i=0; i< nbthread; ++i){\
|
||||
pthread_join(thrd[i], NULL);\
|
||||
free(arg_th[i]);\
|
||||
}\
|
||||
\
|
||||
free(thrd);\
|
||||
free(arg_th);\
|
||||
} \
|
||||
\
|
||||
struct arg_Pro2d_##type{\
|
||||
type *M0x;\
|
||||
type *M1x;\
|
||||
type *Mx;\
|
||||
size_t beginRange;\
|
||||
size_t endRange;\
|
||||
size_t M0Rank;\
|
||||
size_t M1Rank;\
|
||||
};\
|
||||
void* runProd_thread2d_##type(void *arg){\
|
||||
struct arg_Prod_##type *arg_t = arg;\
|
||||
struct arg_Pro2d_##type *arg_t = arg;\
|
||||
size_t k;\
|
||||
for (size_t i = arg_t->beginRange; i < arg_t->endRange; i++) {\
|
||||
for (size_t j = 0; j < arg_t->MRank; j++) {\
|
||||
for (size_t j = 0; j < arg_t->M1Rank; j++) {\
|
||||
if(endian){\
|
||||
k = i * arg_t->MRank + j;\
|
||||
k = i * arg_t->M1Rank + j;\
|
||||
}\
|
||||
else{\
|
||||
k =i + arg_t->MRank * j ;\
|
||||
k =i + arg_t->M0Rank * j ;\
|
||||
}\
|
||||
arg_t->Mx[k] += arg_t->M0x[i] * arg_t->M1x[j];\
|
||||
arg_t->Mx[k] = arg_t->M0x[i] * arg_t->M1x[j];\
|
||||
}\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t nbthread) { \
|
||||
void tensorProdThrea2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t nbthread) { \
|
||||
dimension *dd; \
|
||||
add_dimension(&dd, M0->dim, M1->dim); \
|
||||
(*MM)=CREATE_TENSOR_##type(dd); \
|
||||
@@ -350,29 +482,29 @@ void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##t
|
||||
\
|
||||
\
|
||||
pthread_t *thrd = malloc(nbthread * sizeof(pthread_t));\
|
||||
struct arg_Prod_##type **arg_th = malloc( nbthread * sizeof(struct arg_Prod_##type *));\
|
||||
struct arg_Pro2d_##type **arg_th = malloc( nbthread * sizeof(struct arg_Pro2d_##type *));\
|
||||
\
|
||||
for(size_t i = 0; i < nbthread; ++i){\
|
||||
arg_th[i]=malloc(sizeof(struct arg_Prod_##type));\
|
||||
arg_th[i]=malloc(sizeof(struct arg_Pro2d_##type));\
|
||||
arg_th[i]->M0x=M0->x;\
|
||||
arg_th[i]->M1x=M1->x;\
|
||||
arg_th[i]->Mx=M->x;\
|
||||
arg_th[i]->beginRange = i*(M->dim->rank)/nbthread ;\
|
||||
if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M->dim->rank)/nbthread ;\
|
||||
else arg_th[i]->endRange = M->dim->rank ;\
|
||||
if(endian){\
|
||||
arg_th[i]->MRank = M1->dim->rank;\
|
||||
}\
|
||||
else{\
|
||||
arg_th[i]->MRank = M0->dim->rank;\
|
||||
}\
|
||||
pthread_create(&thrd[i], NULL, runProd_thread_##type, (void*)arg_th[i]);\
|
||||
arg_th[i]->beginRange = i*(M0->dim->rank)/nbthread ;\
|
||||
arg_th[i]->endRange = (i+1)*(M0->dim->rank)/nbthread ;\
|
||||
/*if(i < nbthread - 1 ) arg_th[i]->endRange = (i+1)*(M0->dim->rank)/nbthread ;\
|
||||
else arg_th[i]->endRange = M0->dim->rank ;\
|
||||
*/arg_th[i]->M1Rank = M1->dim->rank;\
|
||||
arg_th[i]->M0Rank = M0->dim->rank;\
|
||||
pthread_create(&thrd[i], NULL, runProd_thread2d_##type, (void*)arg_th[i]);\
|
||||
}\
|
||||
\
|
||||
for(size_t i=0; i< nbthread; ++i){\
|
||||
pthread_join(thrd[i], NULL);\
|
||||
free(arg_th[i]);\
|
||||
}\
|
||||
\
|
||||
free(thrd);\
|
||||
free(arg_th);\
|
||||
} \
|
||||
struct arg_ProdContract_##type{\
|
||||
type *M0x;\
|
||||
@@ -431,7 +563,7 @@ void tensorContractnProdThread_##type(tensor_##type** MM, tensor_##type *M0, ten
|
||||
dimension *dM1 = init_dim(tDk1, contractionNumber);\
|
||||
dimension *dM0 = init_dim(tDk0, contractionNumber);\
|
||||
dimension *dM;\
|
||||
min_dimension(&dM, dM0, dM1);\
|
||||
min_copy_dimension(&dM, dM0, dM1);\
|
||||
\
|
||||
dimension *dd;\
|
||||
add_dimension(&dd, dSub0, dSub1);\
|
||||
@@ -464,8 +596,12 @@ void tensorContractnProdThread_##type(tensor_##type** MM, tensor_##type *M0, ten
|
||||
\
|
||||
for(size_t i=0; i< nbthread; ++i){\
|
||||
pthread_join(thrd[i], NULL);\
|
||||
free(arg_th[i]);\
|
||||
}\
|
||||
\
|
||||
free(thrd);\
|
||||
free(arg_th);\
|
||||
FREE_dM_S_ ; \
|
||||
}\
|
||||
void tensorContractnProdNotOpt_##type(tensor_##type** MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber) {\
|
||||
/* if (!checkMatchProdtensor(M0->dim, M1->dim, contractionNumber)) {\
|
||||
@@ -496,7 +632,7 @@ void tensorContractnProdNotOpt_##type(tensor_##type** MM, tensor_##type *M0, ten
|
||||
printDebug_dimension(dM0,"dM0");\
|
||||
printDebug_dimension(dM1,"dM1");*/\
|
||||
dimension *dM;\
|
||||
min_dimension(&dM, dM0, dM1);\
|
||||
min_copy_dimension(&dM, dM0, dM1);\
|
||||
/*printDebug_dimension(dM,"dM");*/\
|
||||
\
|
||||
dimension *dd;\
|
||||
@@ -539,6 +675,11 @@ void tensorContractnProdNotOpt_##type(tensor_##type** MM, tensor_##type *M0, ten
|
||||
}\
|
||||
/*printf("\n");*/\
|
||||
}\
|
||||
FREE_COORD_ ; \
|
||||
free(coordM0);\
|
||||
free(coordM1);\
|
||||
free(Koord); \
|
||||
FREE_dM_S_ ; \
|
||||
}\
|
||||
\
|
||||
|
||||
|
||||
@@ -14,15 +14,20 @@ struct tensor_##type{\
|
||||
};\
|
||||
typedef struct tensor_##type tensor_##type;\
|
||||
tensor_##type * CREATE_TENSOR_##type(dimension *dim); \
|
||||
tensor_##type * sub_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
|
||||
void free_tensor_##type(tensor_##type * tens); \
|
||||
/*tensor_##type * sub_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
|
||||
tensor_##type * sub_minus_tensor_tail_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
|
||||
tensor_##type * sub_tensor_head_##type(tensor_##type *rootens, size_t subdim, size_t rankInDim); \
|
||||
tensor_##type * sub_tensor_tail_##type(tensor_##type *rootens, size_t subdim, size_t rankInDim); \
|
||||
*/tensor_##type * sub_copy_minus_tensor_head_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
|
||||
tensor_##type * sub_copy_minus_tensor_tail_##type(tensor_##type *rootens, size_t minuSubdim, size_t rankInDim); \
|
||||
tensor_##type * sub_copy_tensor_head_##type(tensor_##type *rootens, size_t sub_copydim, size_t rankInDim); \
|
||||
tensor_##type * sub_copy_tensor_tail_##type(tensor_##type *rootens, size_t sub_copydim, size_t rankInDim); \
|
||||
void tensorProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
|
||||
void tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
|
||||
void tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
|
||||
void tensorProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \
|
||||
void tensorProdThread2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \
|
||||
void tensorProdThrea2d_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1,size_t nbthread); \
|
||||
void tensorContractnProdThread_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t nbthread); \
|
||||
void tensorContractnProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
|
||||
|
||||
|
||||
Reference in New Issue
Block a user