debug tensor prod contract 2d cl and pthread
This commit is contained in:
@@ -49,8 +49,8 @@ __kernel void prodContractnTensor2dLiniNotEndian_TYPE_FLOAT(long unsigned int dS
|
|||||||
size_t ind = i + dSubRank * j;
|
size_t ind = i + dSubRank * j;
|
||||||
Mx[ind] = 0;
|
Mx[ind] = 0;
|
||||||
for (k = 0; k < dMRank; k++) {
|
for (k = 0; k < dMRank; k++) {
|
||||||
n0_id = i + dMRank * k;
|
n0_id = i + dSubRank * k;
|
||||||
n1_id = j * dSubRank + k;
|
n1_id = j * dMRank + k;
|
||||||
Mx[ind] += M0x[n0_id] * M1x[n1_id];
|
Mx[ind] += M0x[n0_id] * M1x[n1_id];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -68,8 +68,8 @@ __kernel void prodContractnTensor2dLinNotEndian_TYPE_DOUBLE(long unsigned int dS
|
|||||||
size_t ind = i + dSubRank * j;
|
size_t ind = i + dSubRank * j;
|
||||||
Mx[ind] = 0;
|
Mx[ind] = 0;
|
||||||
for (k = 0; k < dMRank; k++) {
|
for (k = 0; k < dMRank; k++) {
|
||||||
n0_id = i + dMRank * k;
|
n0_id = i + dSubRank * k;
|
||||||
n1_id = j * dSubRank + k;
|
n1_id = j * dMRank + k;
|
||||||
Mx[ind] += M0x[n0_id] * M1x[n1_id];
|
Mx[ind] += M0x[n0_id] * M1x[n1_id];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,7 +74,7 @@
|
|||||||
ret |= clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \
|
ret |= clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \
|
||||||
M1->dim->rank * sizeof(type), M1->x, 0, NULL, NULL); \
|
M1->dim->rank * sizeof(type), M1->x, 0, NULL, NULL); \
|
||||||
\
|
\
|
||||||
checkError(ret,__func__,"Error: Failed to read buffers! ");\
|
checkError(ret,__func__,"Error: Failed to write buffers! ");\
|
||||||
/*/ Create a program from the kernel source */ \
|
/*/ Create a program from the kernel source */ \
|
||||||
cl_program program = clCreateProgramWithSource(context, 1, \
|
cl_program program = clCreateProgramWithSource(context, 1, \
|
||||||
(const char **)&source_str, (const size_t *)&source_size, &ret); \
|
(const char **)&source_str, (const size_t *)&source_size, &ret); \
|
||||||
@@ -97,6 +97,7 @@
|
|||||||
/*char func_cl_name[250]; sprintf(func_cl_name,"prodTensorLin_%s", #type);*/ \
|
/*char func_cl_name[250]; sprintf(func_cl_name,"prodTensorLin_%s", #type);*/ \
|
||||||
/*printf("cl_func_type = %s\n",func_cl_name); */ \
|
/*printf("cl_func_type = %s\n",func_cl_name); */ \
|
||||||
cl_kernel kernel = clCreateKernel(program, func_cl_name, &ret); \
|
cl_kernel kernel = clCreateKernel(program, func_cl_name, &ret); \
|
||||||
|
printf("func_cl_name = %s ......... \n",func_cl_name);\
|
||||||
|
|
||||||
|
|
||||||
/*/ Set the arguments of the kernel */ \
|
/*/ Set the arguments of the kernel */ \
|
||||||
@@ -223,7 +224,6 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor
|
|||||||
dSubRank = dSub0->rank;\
|
dSubRank = dSub0->rank;\
|
||||||
\
|
\
|
||||||
}\
|
}\
|
||||||
printf("func_cl_name = %s ......... \n",func_cl_name);\
|
|
||||||
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||||
\
|
\
|
||||||
/*/ Set the arguments of the kernel */ \
|
/*/ Set the arguments of the kernel */ \
|
||||||
@@ -277,7 +277,7 @@ void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tens
|
|||||||
\
|
\
|
||||||
size_t len0 = M0->dim->size - contractionNumber;\
|
size_t len0 = M0->dim->size - contractionNumber;\
|
||||||
size_t len1 = M1->dim->size - contractionNumber;\
|
size_t len1 = M1->dim->size - contractionNumber;\
|
||||||
\
|
\
|
||||||
size_t* tsub0 = malloc(sizeof(size_t) *len0);\
|
size_t* tsub0 = malloc(sizeof(size_t) *len0);\
|
||||||
size_t* tsub1 = malloc(sizeof(size_t) *len1);\
|
size_t* tsub1 = malloc(sizeof(size_t) *len1);\
|
||||||
size_t* tDk1 = malloc(sizeof(size_t) *contractionNumber);\
|
size_t* tDk1 = malloc(sizeof(size_t) *contractionNumber);\
|
||||||
@@ -303,22 +303,22 @@ void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tens
|
|||||||
char *func_cl_nameNotEndian = "prodContractnTensor2dLinNotEndian_" #type; \
|
char *func_cl_nameNotEndian = "prodContractnTensor2dLinNotEndian_" #type; \
|
||||||
char *func_cl_name; \
|
char *func_cl_name; \
|
||||||
size_t dSubRank;\
|
size_t dSubRank;\
|
||||||
if(endian){\
|
if(endian){\
|
||||||
func_cl_name = func_cl_nameEndian;\
|
func_cl_name = func_cl_nameEndian;\
|
||||||
dSubRank = dSub1->rank;\
|
dSubRank = dSub1->rank;\
|
||||||
\
|
\
|
||||||
}else{\
|
}else{\
|
||||||
func_cl_name = func_cl_nameNotEndian;\
|
func_cl_name = func_cl_nameNotEndian;\
|
||||||
dSubRank = dSub0->rank;\
|
dSubRank = dSub0->rank;\
|
||||||
\
|
\
|
||||||
}\
|
}\
|
||||||
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||||
/*size_t cl_dev_max_w_sz,sz_val;\
|
/*size_t cl_dev_max_w_sz,sz_val;\
|
||||||
ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &cl_dev_max_w_sz, &sz_val);\
|
ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &cl_dev_max_w_sz, &sz_val);\
|
||||||
printf("CL_DEVICE_MAX_WORK_GROUP_SIZE = : %ld, sz :%ld\n ",cl_dev_max_w_sz, sz_val);\
|
printf("CL_DEVICE_MAX_WORK_GROUP_SIZE = : %ld, sz :%ld\n ",cl_dev_max_w_sz, sz_val);\
|
||||||
*/\
|
*/\
|
||||||
/*/ Set the arguments of the kernel */ \
|
/*/ Set the arguments of the kernel */ \
|
||||||
ret |= clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&dSubRank); \
|
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&dSubRank); \
|
||||||
ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
|
ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
|
||||||
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
||||||
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
||||||
@@ -330,6 +330,7 @@ void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tens
|
|||||||
READ_BUF_N_CLEANUP(type)\
|
READ_BUF_N_CLEANUP(type)\
|
||||||
FREE_dM_S_ \
|
FREE_dM_S_ \
|
||||||
\
|
\
|
||||||
|
\
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
|
|
||||||
@@ -394,4 +395,3 @@ void checkError(cl_int error, const char *func_name, char *msg) {
|
|||||||
GEN_cl_FUNC_TENSOR(TYPE_FLOAT);
|
GEN_cl_FUNC_TENSOR(TYPE_FLOAT);
|
||||||
GEN_cl_FUNC_TENSOR(TYPE_DOUBLE);
|
GEN_cl_FUNC_TENSOR(TYPE_DOUBLE);
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -623,7 +623,7 @@ void* runPro2dContract_thread_##type(void *arg){\
|
|||||||
if(endian)\
|
if(endian)\
|
||||||
l = j + arg_t->dSub1Rank * i;\
|
l = j + arg_t->dSub1Rank * i;\
|
||||||
else\
|
else\
|
||||||
l = j * arg_t->dMRank + i;\
|
l = j * arg_t->dSub0Rank + i;\
|
||||||
arg_t->Mx[l] = 0;\
|
arg_t->Mx[l] = 0;\
|
||||||
for (size_t k = 0; k < arg_t->dMRank; k++) {\
|
for (size_t k = 0; k < arg_t->dMRank; k++) {\
|
||||||
if(endian){\
|
if(endian){\
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
#define VALGRIND_ 0
|
#define VALGRIND_ 0
|
||||||
|
|
||||||
TEST(rank){
|
TEST(rank){
|
||||||
|
endian =true;
|
||||||
dimension *D=create_dim(4);
|
dimension *D=create_dim(4);
|
||||||
D->perm[0]=2;
|
D->perm[0]=2;
|
||||||
D->perm[1]=3;
|
D->perm[1]=3;
|
||||||
@@ -556,6 +557,8 @@ TEST(VStensorContractnProd_TYPE_DOUBLE2 ){
|
|||||||
|
|
||||||
}
|
}
|
||||||
TEST(Pthread_tensorContractnPro2d_TYPE_DOUBLE2 ){
|
TEST(Pthread_tensorContractnPro2d_TYPE_DOUBLE2 ){
|
||||||
|
|
||||||
|
endian = false;
|
||||||
dimension *d0=create_dim(3);
|
dimension *d0=create_dim(3);
|
||||||
dimension *d1=create_dim(3);
|
dimension *d1=create_dim(3);
|
||||||
#if VALGRIND_
|
#if VALGRIND_
|
||||||
|
|||||||
+11
-12
@@ -22,6 +22,7 @@
|
|||||||
#define VALGRIND 1
|
#define VALGRIND 1
|
||||||
|
|
||||||
TEST(rank){
|
TEST(rank){
|
||||||
|
// endian=false;
|
||||||
dimension *D=create_dim(4);
|
dimension *D=create_dim(4);
|
||||||
D->perm[0]=2;
|
D->perm[0]=2;
|
||||||
D->perm[1]=3;
|
D->perm[1]=3;
|
||||||
@@ -700,17 +701,18 @@ TEST(VScltensorContractnProd_TYPE_DOUBLE2 ){
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(VScl2dtensorContractnProd_TYPE_DOUBLE2 ){
|
TEST(VScl2dtensorContractnProd_TYPE_DOUBLE2 ){
|
||||||
|
endian=false;
|
||||||
dimension *d0=create_dim(3);
|
dimension *d0=create_dim(3);
|
||||||
dimension *d1=create_dim(3);
|
dimension *d1=create_dim(3);
|
||||||
#if VALGRIND
|
#if VALGRIND
|
||||||
|
|
||||||
d0->perm[0]=12;
|
d0->perm[0]=8;
|
||||||
d0->perm[1]=4; //3;
|
d0->perm[1]=4; //3;
|
||||||
d0->perm[2]=6;
|
d0->perm[2]=6;
|
||||||
|
|
||||||
d1->perm[0]=4;
|
d1->perm[0]=4;
|
||||||
d1->perm[1]=6;//3;
|
d1->perm[1]=6;//3;
|
||||||
d1->perm[2]=16;
|
d1->perm[2]=8;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -724,14 +726,6 @@ TEST(VScl2dtensorContractnProd_TYPE_DOUBLE2 ){
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
d0->perm[0]=512;
|
|
||||||
d0->perm[1]=48; //3;
|
|
||||||
d0->perm[2]=64;
|
|
||||||
|
|
||||||
d1->perm[0]=48;
|
|
||||||
d1->perm[1]=64;//3;
|
|
||||||
d1->perm[2]=240;
|
|
||||||
|
|
||||||
updateRankDim(d0);
|
updateRankDim(d0);
|
||||||
updateRankDim(d1);
|
updateRankDim(d1);
|
||||||
|
|
||||||
@@ -752,10 +746,15 @@ TEST(VScl2dtensorContractnProd_TYPE_DOUBLE2 ){
|
|||||||
|
|
||||||
size_t nbth = 10;
|
size_t nbth = 10;
|
||||||
|
|
||||||
|
// tensorContractnPro2dThread_TYPE_DOUBLE(&M, M0,M1,2,nbth);
|
||||||
tensorContractnProdThread_TYPE_DOUBLE(&M, M0,M1,2,nbth);
|
tensorContractnProdThread_TYPE_DOUBLE(&M, M0,M1,2,nbth);
|
||||||
//print_tensor_double(M,"M");
|
//print_tensor_double(M,"M");
|
||||||
|
#if VALGRIND
|
||||||
|
//cl_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2);
|
||||||
|
cl2d_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2,8,8);
|
||||||
|
#else
|
||||||
cl2d_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2,16,16);
|
cl2d_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2,16,16);
|
||||||
//cl2d_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2,8,8);
|
#endif
|
||||||
//tensorContractnProdNotOpt_TYPE_DOUBLE(&MnO, M0,M1,2);
|
//tensorContractnProdNotOpt_TYPE_DOUBLE(&MnO, M0,M1,2);
|
||||||
|
|
||||||
//print_tensor_double(MnO,"MnO");
|
//print_tensor_double(MnO,"MnO");
|
||||||
@@ -870,7 +869,7 @@ TEST(tensorProd_vs2d ){
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(tensorProd_vs2d_Endian ){
|
TEST(tensorProd_vs2d_Endian ){
|
||||||
endian=false;
|
//endian=false;
|
||||||
dimension *d0=create_dim(3);
|
dimension *d0=create_dim(3);
|
||||||
dimension *d1=create_dim(2);
|
dimension *d1=create_dim(2);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user