From c07ed1b160f5c32036e8141034d74551843cdc94 Mon Sep 17 00:00:00 2001 From: fanasina Date: Mon, 12 Feb 2024 12:08:16 +0100 Subject: [PATCH] debug tensor prod contract 2d cl and pthread --- tensor_t/src/kernel_2d_ProdContractnTensor.cl | 8 ++-- tensor_t/src/tensor_t/cl_tensor_t.c | 38 +++++++++---------- tensor_t/src/tensor_t/tensor_t.c | 2 +- tensor_t/test/is_good.c | 3 ++ tensor_t/test_cl/is_good.c | 23 ++++++----- 5 files changed, 38 insertions(+), 36 deletions(-) diff --git a/tensor_t/src/kernel_2d_ProdContractnTensor.cl b/tensor_t/src/kernel_2d_ProdContractnTensor.cl index d577299..fe833e3 100644 --- a/tensor_t/src/kernel_2d_ProdContractnTensor.cl +++ b/tensor_t/src/kernel_2d_ProdContractnTensor.cl @@ -49,8 +49,8 @@ __kernel void prodContractnTensor2dLiniNotEndian_TYPE_FLOAT(long unsigned int dS size_t ind = i + dSubRank * j; Mx[ind] = 0; for (k = 0; k < dMRank; k++) { - n0_id = i + dMRank * k; - n1_id = j * dSubRank + k; + n0_id = i + dSubRank * k; + n1_id = j * dMRank + k; Mx[ind] += M0x[n0_id] * M1x[n1_id]; } } @@ -68,8 +68,8 @@ __kernel void prodContractnTensor2dLinNotEndian_TYPE_DOUBLE(long unsigned int dS size_t ind = i + dSubRank * j; Mx[ind] = 0; for (k = 0; k < dMRank; k++) { - n0_id = i + dMRank * k; - n1_id = j * dSubRank + k; + n0_id = i + dSubRank * k; + n1_id = j * dMRank + k; Mx[ind] += M0x[n0_id] * M1x[n1_id]; } } diff --git a/tensor_t/src/tensor_t/cl_tensor_t.c b/tensor_t/src/tensor_t/cl_tensor_t.c index 255b976..681c6ab 100644 --- a/tensor_t/src/tensor_t/cl_tensor_t.c +++ b/tensor_t/src/tensor_t/cl_tensor_t.c @@ -74,7 +74,7 @@ ret |= clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \ M1->dim->rank * sizeof(type), M1->x, 0, NULL, NULL); \ \ - checkError(ret,__func__,"Error: Failed to read buffers! ");\ + checkError(ret,__func__,"Error: Failed to write buffers! ");\ /*/ Create a program from the kernel source */ \ cl_program program = clCreateProgramWithSource(context, 1, \ (const char **)&source_str, (const size_t *)&source_size, &ret); \ @@ -97,6 +97,7 @@ /*char func_cl_name[250]; sprintf(func_cl_name,"prodTensorLin_%s", #type);*/ \ /*printf("cl_func_type = %s\n",func_cl_name); */ \ cl_kernel kernel = clCreateKernel(program, func_cl_name, &ret); \ + printf("func_cl_name = %s ......... \n",func_cl_name);\ /*/ Set the arguments of the kernel */ \ @@ -223,7 +224,6 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor dSubRank = dSub0->rank;\ \ }\ - printf("func_cl_name = %s ......... \n",func_cl_name);\ SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ \ /*/ Set the arguments of the kernel */ \ @@ -277,7 +277,7 @@ void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tens \ size_t len0 = M0->dim->size - contractionNumber;\ size_t len1 = M1->dim->size - contractionNumber;\ -\ + \ size_t* tsub0 = malloc(sizeof(size_t) *len0);\ size_t* tsub1 = malloc(sizeof(size_t) *len1);\ size_t* tDk1 = malloc(sizeof(size_t) *contractionNumber);\ @@ -303,22 +303,22 @@ void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tens char *func_cl_nameNotEndian = "prodContractnTensor2dLinNotEndian_" #type; \ char *func_cl_name; \ size_t dSubRank;\ - if(endian){\ - func_cl_name = func_cl_nameEndian;\ - dSubRank = dSub1->rank;\ - \ - }else{\ - func_cl_name = func_cl_nameNotEndian;\ - dSubRank = dSub0->rank;\ - \ - }\ - SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ - /*size_t cl_dev_max_w_sz,sz_val;\ - ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &cl_dev_max_w_sz, &sz_val);\ - printf("CL_DEVICE_MAX_WORK_GROUP_SIZE = : %ld, sz :%ld\n ",cl_dev_max_w_sz, sz_val);\ - */\ + if(endian){\ + func_cl_name = func_cl_nameEndian;\ + dSubRank = dSub1->rank;\ + \ + }else{\ + func_cl_name = func_cl_nameNotEndian;\ + dSubRank = dSub0->rank;\ + \ + }\ + SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ + /*size_t cl_dev_max_w_sz,sz_val;\ + ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &cl_dev_max_w_sz, &sz_val);\ + printf("CL_DEVICE_MAX_WORK_GROUP_SIZE = : %ld, sz :%ld\n ",cl_dev_max_w_sz, sz_val);\ + */\ /*/ Set the arguments of the kernel */ \ - ret |= clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&dSubRank); \ + ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&dSubRank); \ ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \ ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \ ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \ @@ -330,6 +330,7 @@ void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tens READ_BUF_N_CLEANUP(type)\ FREE_dM_S_ \ \ + \ } \ \ @@ -394,4 +395,3 @@ void checkError(cl_int error, const char *func_name, char *msg) { GEN_cl_FUNC_TENSOR(TYPE_FLOAT); GEN_cl_FUNC_TENSOR(TYPE_DOUBLE); - diff --git a/tensor_t/src/tensor_t/tensor_t.c b/tensor_t/src/tensor_t/tensor_t.c index 5fa242b..08ab0d4 100644 --- a/tensor_t/src/tensor_t/tensor_t.c +++ b/tensor_t/src/tensor_t/tensor_t.c @@ -623,7 +623,7 @@ void* runPro2dContract_thread_##type(void *arg){\ if(endian)\ l = j + arg_t->dSub1Rank * i;\ else\ - l = j * arg_t->dMRank + i;\ + l = j * arg_t->dSub0Rank + i;\ arg_t->Mx[l] = 0;\ for (size_t k = 0; k < arg_t->dMRank; k++) {\ if(endian){\ diff --git a/tensor_t/test/is_good.c b/tensor_t/test/is_good.c index a497046..96bb7ad 100644 --- a/tensor_t/test/is_good.c +++ b/tensor_t/test/is_good.c @@ -20,6 +20,7 @@ #define VALGRIND_ 0 TEST(rank){ + endian =true; dimension *D=create_dim(4); D->perm[0]=2; D->perm[1]=3; @@ -556,6 +557,8 @@ TEST(VStensorContractnProd_TYPE_DOUBLE2 ){ } TEST(Pthread_tensorContractnPro2d_TYPE_DOUBLE2 ){ + + endian = false; dimension *d0=create_dim(3); dimension *d1=create_dim(3); #if VALGRIND_ diff --git a/tensor_t/test_cl/is_good.c b/tensor_t/test_cl/is_good.c index d2cbb5f..0aa1575 100644 --- a/tensor_t/test_cl/is_good.c +++ b/tensor_t/test_cl/is_good.c @@ -22,6 +22,7 @@ #define VALGRIND 1 TEST(rank){ +// endian=false; dimension *D=create_dim(4); D->perm[0]=2; D->perm[1]=3; @@ -700,17 +701,18 @@ TEST(VScltensorContractnProd_TYPE_DOUBLE2 ){ } TEST(VScl2dtensorContractnProd_TYPE_DOUBLE2 ){ + endian=false; dimension *d0=create_dim(3); dimension *d1=create_dim(3); #if VALGRIND - d0->perm[0]=12; + d0->perm[0]=8; d0->perm[1]=4; //3; d0->perm[2]=6; d1->perm[0]=4; d1->perm[1]=6;//3; - d1->perm[2]=16; + d1->perm[2]=8; #else @@ -724,14 +726,6 @@ TEST(VScl2dtensorContractnProd_TYPE_DOUBLE2 ){ #endif - d0->perm[0]=512; - d0->perm[1]=48; //3; - d0->perm[2]=64; - - d1->perm[0]=48; - d1->perm[1]=64;//3; - d1->perm[2]=240; - updateRankDim(d0); updateRankDim(d1); @@ -752,10 +746,15 @@ TEST(VScl2dtensorContractnProd_TYPE_DOUBLE2 ){ size_t nbth = 10; + // tensorContractnPro2dThread_TYPE_DOUBLE(&M, M0,M1,2,nbth); tensorContractnProdThread_TYPE_DOUBLE(&M, M0,M1,2,nbth); //print_tensor_double(M,"M"); +#if VALGRIND + //cl_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2); + cl2d_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2,8,8); +#else cl2d_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2,16,16); - //cl2d_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2,8,8); +#endif //tensorContractnProdNotOpt_TYPE_DOUBLE(&MnO, M0,M1,2); //print_tensor_double(MnO,"MnO"); @@ -870,7 +869,7 @@ TEST(tensorProd_vs2d ){ } TEST(tensorProd_vs2d_Endian ){ - endian=false; + //endian=false; dimension *d0=create_dim(3); dimension *d1=create_dim(2);