diff --git a/tensor_t/Makefile b/tensor_t/Makefile index 78e30d4..cdca378 100644 --- a/tensor_t/Makefile +++ b/tensor_t/Makefile @@ -40,13 +40,13 @@ DEP=$(DIMDIR) $(PERMDIR) $(TOPTARGETS): $(DEP) -all: $(TENSRC_O) +all: $(TENSRC_O) $(clTENSRC_O) $(TENSRC_O) : $(TENSRC) $(DIMSRC_O) $(PERMSRC_O) $(CC) -o $@ -c $^ $(CFLAGS) $(clTENSRC_O) : $(clTENSRC) $(TENSRC_O) - $(CC) -o $@ -c $^ $(CFLAGS) + $(CC) -o $@ -c $< $(CFLAGS) #$(DIMSRC_O) : $(DIMSRC) $(PERMSRC_O) # $(CC) -o $@ -c $< $(CFLAGS) diff --git a/tensor_t/src/tensor_t/cl_tensor_t.c b/tensor_t/src/tensor_t/cl_tensor_t.c index 0234702..c3c08fa 100644 --- a/tensor_t/src/tensor_t/cl_tensor_t.c +++ b/tensor_t/src/tensor_t/cl_tensor_t.c @@ -3,7 +3,9 @@ #define MAX_SOURCE_SIZE (0x100000) -#define CL_GEN_SETUP_(type,file_cl_src,func_cl_name)\ + + +#define SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name)\ /* Load the kernel source code into the array source_str*/ \ FILE *fp; \ char *source_str; \ @@ -28,13 +30,24 @@ cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); \ ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, \ &device_id, &ret_num_devices); \ + size_t returned_size = 0;\ + size_t max_workgroup_size = 0;\ + ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, &returned_size);\ + checkError(ret,__func__,"Error: Failed to retrieve device info!");\ + printf(" ===========================================================++> return size: %ld\n max group sz: %ld\n", returned_size, max_workgroup_size);\ +\ +/*int gpu = 1;\ + ret = clGetDeviceIDs( platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, \ + &device_id, &ret_num_devices); */\ \ /*/ Create an OpenCL context */ \ cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); \ \ /*/ Create a command queue */ \ - cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); \ + /*cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);*/ \ + cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device_id, NULL, &ret); /* NULL =default properties = in order */ \ \ + checkError(ret,__func__,"Error: Failed to create command queue with properties ");\ \ /*/ Create memory buffers on the device for each vector */ \ cl_mem M0_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, \ @@ -43,25 +56,35 @@ M1->dim->rank * sizeof(type), NULL, &ret); \ cl_mem M_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, \ M->dim->rank * sizeof(type), NULL, &ret); \ + if (!M0_mem_obj || !M1_mem_obj || !M_mem_obj) {\ + printf("Error: Failed to creat buffer! %d\n", ret);\ + exit(1);\ + }\ \ /*/ Copy the lists A and B to their respective memory buffers */ \ ret = clEnqueueWriteBuffer(command_queue, M0_mem_obj, CL_TRUE, 0, \ M0->dim->rank * sizeof(type), M0->x, 0, NULL, NULL); \ - ret = clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \ + ret |= clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \ M1->dim->rank * sizeof(type), M1->x, 0, NULL, NULL); \ \ + checkError(ret,__func__,"Error: Failed to read buffers! ");\ /*/ Create a program from the kernel source */ \ cl_program program = clCreateProgramWithSource(context, 1, \ (const char **)&source_str, (const size_t *)&source_size, &ret); \ \ - printf("log 0\n");\ + if (!program) {\ + printf("Error: Failed to create compute program!\n");\ + return ;\ + }\ /*/ Build the program */ \ ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); \ - size_t len; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, NULL, NULL, &len);\ - char *log = malloc(sizeof(char)*len);\ - clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, log, NULL);\ - printf("log: %s \n",log);\ - \ + if(ret != CL_SUCCESS){\ + size_t len; \ + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);\ + char *log = malloc(sizeof(char)*len);\ + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, log, NULL);\ + printf("Error: Failed to build program executable!\n %s \n",log);\ + }\ /*/ Create the OpenCL kernel */ \ /*char func_cl_name[250]; sprintf(func_cl_name,"prodTensorLin_%s", #type);*/ \ printf("cl_func_type = %s\n",func_cl_name); \ @@ -75,29 +98,46 @@ ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \ */ -#define CL_EXEC_KERNEL(type)\ +#define EXEC_cl_KERNEL(type)\ /*/ Execute the OpenCL kernel on the list */ \ size_t global_item_size = M->dim->rank; /*/ Process the entire lists */ \ size_t local_item_size = 1; /*64;*/ /*/ Divide work items into groups of 64 */ \ ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, \ &global_item_size, &local_item_size, 0, NULL, NULL); \ \ - /*/ Read the memory buffer Mx on the device to the local variable M->x */ \ + checkError(ret,__func__,"Error: Failed to execute! ");\ + +#define EXEC_cl_2d_KERNEL(type, global_W_sz0, global_W_sz1, local_W_sz0, local_W_sz1)\ + /*/ Execute the OpenCL kernel on the list */ \ + size_t global_item_size[2]={global_W_sz0, global_W_sz1};/*/ Process the entire lists */ \ + size_t local_item_size[2]={local_W_sz0, local_W_sz1}; /* Divide work items into groups of 64 */ \ + ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, \ + global_item_size, local_item_size, 0, NULL, NULL); \ + \ + checkError(ret,__func__,"Error: Failed to execute! ");\ + + + + +#define READ_BUF_N_CLEANUP(type) \ + /*/ Read the memory buffer Mx on the device to the local variable M->x */ \ ret = clEnqueueReadBuffer(command_queue, M_mem_obj, CL_TRUE, 0, \ M->dim->rank * sizeof(type), M->x, 0, NULL, NULL); \ \ + checkError(ret,__func__,"Error: Failed to read buffer! ");\ /*/ Clean up */ \ ret = clFlush(command_queue); \ - ret = clFinish(command_queue); \ - ret = clReleaseKernel(kernel); \ - ret = clReleaseProgram(program); \ - ret = clReleaseMemObject(M0_mem_obj); \ - ret = clReleaseMemObject(M1_mem_obj); \ - ret = clReleaseMemObject(M_mem_obj); \ - ret = clReleaseCommandQueue(command_queue); \ - ret = clReleaseContext(context); \ + ret |= clFinish(command_queue); \ + ret |= clReleaseKernel(kernel); \ + ret |= clReleaseProgram(program); \ + ret |= clReleaseMemObject(M0_mem_obj); \ + ret |= clReleaseMemObject(M1_mem_obj); \ + ret |= clReleaseMemObject(M_mem_obj); \ + ret |= clReleaseCommandQueue(command_queue); \ + ret |= clReleaseContext(context); \ + checkError(ret,__func__,"Error: Failed to clean up! ");\ -#define CL_GEN_FUNC_TENSOR(type)\ +#define GEN_cl_FUNC_TENSOR(type)\ \ \ void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1) { \ @@ -107,13 +147,15 @@ void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type * tensor_##type *M = *MM; \ char *file_cl_src = "../src/kernel_ProdTensor.cl"; \ char *func_cl_name = "prodTensorLin_" #type; \ - CL_GEN_SETUP_(type,file_cl_src,func_cl_name);\ + SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ /*/ Set the arguments of the kernel */ \ ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \ - ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \ - ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \ - ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \ - CL_EXEC_KERNEL(type);\ + ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \ + ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \ + ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \ + checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\ + EXEC_cl_KERNEL(type);\ + READ_BUF_N_CLEANUP(type)\ } \ \ /* M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l\ @@ -147,21 +189,98 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor tensor_##type *M= *MM;\ char *file_cl_src = "../src/kernel_ProdContractnTensor.cl"; \ char *func_cl_name = "prodContractnTensorLin_" #type; \ - CL_GEN_SETUP_(type,file_cl_src,func_cl_name);\ + SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ \ /*/ Set the arguments of the kernel */ \ ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(dSub1->rank)); \ - ret = clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \ - ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \ - ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \ - ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&M_mem_obj); \ + ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \ + ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \ + ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \ + ret |= clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&M_mem_obj); \ + checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\ \ - CL_EXEC_KERNEL(type);\ + EXEC_cl_KERNEL(type);\ + READ_BUF_N_CLEANUP(type)\ \ } \ - - -CL_GEN_FUNC_TENSOR(TYPE_FLOAT); -CL_GEN_FUNC_TENSOR(TYPE_DOUBLE); +\ +\ +void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz) { \ + dimension *dd; \ + add_dimension(&dd, M0->dim, M1->dim); \ + (*MM)=CREATE_TENSOR_##type(dd); \ + tensor_##type *M = *MM; \ + char *file_cl_src = "../src/kernel_2d_ProdTensor.cl"; \ + char *func_cl_name = "prodTensor2dLin_" #type; \ + SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ + /*/ Set the arguments of the kernel */ \ + ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \ + ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \ + ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \ + ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \ + checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\ + EXEC_cl_2d_KERNEL(type,M0->dim->rank,M1->dim->rank,div0Wsz,div1Wsz);\ + \ + READ_BUF_N_CLEANUP(type)\ +} \ +\ + +void checkError(cl_int error, const char *func_name, char *msg) { + if (error != CL_SUCCESS) { + printf("%s\n",msg); + switch (error) { + case CL_DEVICE_NOT_FOUND: printf(" Error in %s: Device not found.\n",func_name); break; + case CL_DEVICE_NOT_AVAILABLE: printf(" Error in %s: Device not available\n",func_name); break; + case CL_COMPILER_NOT_AVAILABLE: printf(" Error in %s: Compiler not available\n",func_name); break; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: printf(" Error in %s: Memory object allocation failure\n",func_name); break; + case CL_OUT_OF_RESOURCES: printf(" Error in %s: Out of resources\n",func_name); break; + case CL_OUT_OF_HOST_MEMORY: printf(" Error in %s: Out of host memory\n",func_name); break; + case CL_PROFILING_INFO_NOT_AVAILABLE: printf(" Error in %s: Profiling information not available\n",func_name); break; + case CL_MEM_COPY_OVERLAP: printf(" Error in %s: Memory copy overlap\n",func_name); break; + case CL_IMAGE_FORMAT_MISMATCH: printf(" Error in %s: Image format mismatch\n",func_name); break; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: printf(" Error in %s: Image format not supported\n",func_name); break; + case CL_BUILD_PROGRAM_FAILURE: printf(" Error in %s: Program build failure\n",func_name); break; + case CL_MAP_FAILURE: printf(" Error in %s: Map failure\n",func_name); break; + case CL_INVALID_VALUE: printf(" Error in %s: Invalid value\n",func_name); break; + case CL_INVALID_DEVICE_TYPE: printf(" Error in %s: Invalid device type\n",func_name); break; + case CL_INVALID_PLATFORM: printf(" Error in %s: Invalid platform\n",func_name); break; + case CL_INVALID_DEVICE: printf(" Error in %s: Invalid device\n",func_name); break; + case CL_INVALID_CONTEXT: printf(" Error in %s: Invalid context\n",func_name); break; + case CL_INVALID_QUEUE_PROPERTIES: printf(" Error in %s: Invalid queue properties\n",func_name); break; + case CL_INVALID_COMMAND_QUEUE: printf(" Error in %s: Invalid command queue\n",func_name); break; + case CL_INVALID_HOST_PTR: printf(" Error in %s: Invalid host pointer\n",func_name); break; + case CL_INVALID_MEM_OBJECT: printf(" Error in %s: Invalid memory object\n",func_name); break; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: printf(" Error in %s: Invalid image format descriptor\n",func_name); break; + case CL_INVALID_IMAGE_SIZE: printf(" Error in %s: Invalid image size\n",func_name); break; + case CL_INVALID_SAMPLER: printf(" Error in %s: Invalid sampler\n",func_name); break; + case CL_INVALID_BINARY: printf(" Error in %s: Invalid binary\n",func_name); break; + case CL_INVALID_BUILD_OPTIONS: printf(" Error in %s: Invalid build options\n",func_name); break; + case CL_INVALID_PROGRAM: printf(" Error in %s: Invalid program\n",func_name); break; + case CL_INVALID_PROGRAM_EXECUTABLE: printf(" Error in %s: Invalid program executable\n",func_name); break; + case CL_INVALID_KERNEL_NAME: printf(" Error in %s: Invalid kernel name\n",func_name); break; + case CL_INVALID_KERNEL_DEFINITION: printf(" Error in %s: Invalid kernel definition\n",func_name); break; + case CL_INVALID_KERNEL: printf(" Error in %s: Invalid kernel\n",func_name); break; + case CL_INVALID_ARG_INDEX: printf(" Error in %s: Invalid argument index\n",func_name); break; + case CL_INVALID_ARG_VALUE: printf(" Error in %s: Invalid argument value\n",func_name); break; + case CL_INVALID_ARG_SIZE: printf(" Error in %s: Invalid argument size\n",func_name); break; + case CL_INVALID_KERNEL_ARGS: printf(" Error in %s: Invalid kernel arguments\n",func_name); break; + case CL_INVALID_WORK_DIMENSION: printf(" Error in %s: Invalid work dimensionsension\n",func_name); break; + case CL_INVALID_WORK_GROUP_SIZE: printf(" Error in %s: Invalid work group size\n",func_name); break; + case CL_INVALID_WORK_ITEM_SIZE: printf(" Error in %s: Invalid work item size\n",func_name); break; + case CL_INVALID_GLOBAL_OFFSET: printf(" Error in %s: Invalid global offset\n",func_name); break; + case CL_INVALID_EVENT_WAIT_LIST: printf(" Error in %s: Invalid event wait list\n",func_name); break; + case CL_INVALID_EVENT: printf(" Error in %s: Invalid event\n",func_name); break; + case CL_INVALID_OPERATION: printf(" Error in %s: Invalid operation\n",func_name); break; + case CL_INVALID_GL_OBJECT: printf(" Error in %s: Invalid OpenGL object\n",func_name); break; + case CL_INVALID_BUFFER_SIZE: printf(" Error in %s: Invalid buffer size\n",func_name); break; + case CL_INVALID_MIP_LEVEL: printf(" Error in %s: Invalid mip-map level\n",func_name); break; + default: printf(" Error in %s: Unknown with code %d\n",func_name, error); + } + exit(1); + } +} + +GEN_cl_FUNC_TENSOR(TYPE_FLOAT); +GEN_cl_FUNC_TENSOR(TYPE_DOUBLE); diff --git a/tensor_t/src/tensor_t/cl_tensor_t.h b/tensor_t/src/tensor_t/cl_tensor_t.h index f3ecbae..9b300ef 100644 --- a/tensor_t/src/tensor_t/cl_tensor_t.h +++ b/tensor_t/src/tensor_t/cl_tensor_t.h @@ -4,6 +4,8 @@ #include #include +#define CL_TARGET_OPENCL_VERSION 300 + #ifdef __APPLE__ #include #else @@ -12,10 +14,12 @@ #include "tensor_t/tensor_t.h" + #define CL_GENERATE_TENSOR_TYPE(type) \ void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \ -void cl_tensorContractnProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \ - +void cl_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \ +void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz); \ +/*void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz);*/ \ CL_GENERATE_TENSOR_TYPE(TYPE_FLOAT); CL_GENERATE_TENSOR_TYPE(TYPE_DOUBLE); diff --git a/tensor_t/test/Makefile b/tensor_t/test/Makefile index 32b85e0..56ae28e 100644 --- a/tensor_t/test/Makefile +++ b/tensor_t/test/Makefile @@ -28,8 +28,6 @@ EXEC=launch_$(NAME_TEST)_m TENSRC=$(TENSDIR)/src/tensor_t/tensor_t.c TENSRC_O=$(TENSRC:.c=.o) -clTENSRC=$(TENSDIR)/src/tensor_t/cl_tensor_t.c -clTENSRC_O=$(clTENSRC:.c=.o) PERMSRC_O=$(YPERMDIR)/src/permutation_t/permutation_t.o @@ -40,7 +38,7 @@ TOPTARGETS := all clean DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR) -OBJ=$(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) $(clTENSRC_O) +OBJ=$(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) LIB_YTEST=$(YTESTDIR)/libytest.so diff --git a/tensor_t/test/is_good.c b/tensor_t/test/is_good.c index 5f82ae3..b560424 100644 --- a/tensor_t/test/is_good.c +++ b/tensor_t/test/is_good.c @@ -16,7 +16,6 @@ //#include "permutation_t/permutation_t.h" #include "tensor_t/tensor_t.h" -#include "tensor_t/cl_tensor_t.h" TEST(rank){ dimension *D=create_dim(4); @@ -179,98 +178,6 @@ TEST(tensorContractnProd_TYPE_FLOAT2 ){ } -TEST(cl_tensorContractnProd_TYPE_FLOAT2 ){ - dimension *d0=create_dim(3); - dimension *d1=create_dim(3); - - d0->perm[0]=35; - d0->perm[1]=32; //3; - d0->perm[2]=23; - - d1->perm[0]=32; - d1->perm[1]=23;//3; - d1->perm[2]=44; - - updateRankDim(d0); - updateRankDim(d1); - - - tensor_TYPE_FLOAT *M0 = CREATE_TENSOR_TYPE_FLOAT(d0); - tensor_TYPE_FLOAT *M1 = CREATE_TENSOR_TYPE_FLOAT(d1); - - LOG("M0->dim->rank = %ld\n",M0->dim->rank); - LOG("M1->dim->rank = %ld\n",M1->dim->rank); - for(size_t i=0; idim->rank;++i) M0->x[i]=i*0.1 +1; - for(size_t i=0; idim->rank;++i) M1->x[i]=i*0.003 + 2; - -// print_tensor_float(M0,"M0"); -// print_tensor_float(M1,"M1"); - - tensor_TYPE_FLOAT *M; - tensor_TYPE_FLOAT *MnO; - - tensorContractnProdNotOpt_TYPE_FLOAT(&M, M0,M1,2); -// print_tensor_float(M,"M"); - cl_tensorContractnProd_TYPE_FLOAT(&MnO, M0,M1,2); - - -// print_tensor_float(MnO,"MnO"); - - // for(size_t i=0;idim->rank;++i) - // EXPECT_EQ_TYPE_FLOAT(M->x[i],MnO->x[i]); - - //EXPECT_ARRAY_EQ_TYPE_FLOAT(M->x,M->dim->rank,MnO->x,MnO->dim->rank); - - -} - -TEST(cl_tensorContractnProd_TYPE_DOUBLE2 ){ - dimension *d0=create_dim(3); - dimension *d1=create_dim(3); - - d0->perm[0]=125; - d0->perm[1]=52; //3; - d0->perm[2]=63; - - d1->perm[0]=52; - d1->perm[1]=63;//3; - d1->perm[2]=54; - - updateRankDim(d0); - updateRankDim(d1); - - - tensor_TYPE_DOUBLE *M0 = CREATE_TENSOR_TYPE_DOUBLE(d0); - tensor_TYPE_DOUBLE *M1 = CREATE_TENSOR_TYPE_DOUBLE(d1); - - LOG("M0->dim->rank = %ld\n",M0->dim->rank); - LOG("M1->dim->rank = %ld\n",M1->dim->rank); - for(size_t i=0; idim->rank;++i) M0->x[i]=i*0.1 +1; - for(size_t i=0; idim->rank;++i) M1->x[i]=i*0.003 + 2; - - //print_tensor_double(M0,"M0"); - //print_tensor_double(M1,"M1"); - - tensor_TYPE_DOUBLE *M; - tensor_TYPE_DOUBLE *MnO; - - tensorContractnProdNotOpt_TYPE_DOUBLE(&M, M0,M1,2); - //tensorContractnProd_TYPE_DOUBLE(&M, M0,M1,2); - //print_tensor_double(M,"M"); - cl_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2); - - - //print_tensor_double(MnO,"MnO"); - - // for(size_t i=0;idim->rank;++i) - // EXPECT_EQ_TYPE_DOUBLE(M->x[i],MnO->x[i]); - - EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank); - - -} - - TEST(tensorContractnProd_TYPE_DOUBLE2 ){ dimension *d0=create_dim(3); dimension *d1=create_dim(3); @@ -314,94 +221,6 @@ TEST(tensorContractnProd_TYPE_DOUBLE2 ){ EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank); -} - -TEST(TensorProdCL){ - dimension *d0=create_dim(3); - dimension *d1=create_dim(2); - - d0->perm[0]=2; - d0->perm[1]=3; - d0->perm[2]=2; - - d1->perm[0]=2; - d1->perm[1]=3; - - updateRankDim(d0); - updateRankDim(d1); - - - tensor_TYPE_FLOAT *M0 = CREATE_TENSOR_TYPE_FLOAT(d0); - tensor_TYPE_FLOAT *M1 = CREATE_TENSOR_TYPE_FLOAT(d1); - - LOG("M0->dim->rank = %ld\n",M0->dim->rank); - LOG("M1->dim->rank = %ld\n",M1->dim->rank); - for(size_t i=0; idim->rank;++i) M0->x[i]=i*0.1 +1; - for(size_t i=0; idim->rank;++i) M1->x[i]=i*0.003 + 2; - - print_tensor_float(M0,"M0"); - print_tensor_float(M1,"M1"); - - - tensor_TYPE_FLOAT *M; - tensor_TYPE_FLOAT *Mn; - - tensorProd_TYPE_FLOAT(&M,M0,M1); - cl_tensorProd_TYPE_FLOAT(&Mn,M0,M1); - LOG("M->dim->rank = %ld\n",M->dim->rank); - - print_tensor_float(M,"M"); - - EXPECT_ARRAY_EQ_TYPE_FLOAT(M->x,M->dim->rank,Mn->x,Mn->dim->rank); - - - print_tensor_float(Mn,"Mn"); -} - -TEST(VS_cl_tensorContractnProd_TYPE_DOUBLE2 ){ - dimension *d0=create_dim(3); - dimension *d1=create_dim(3); - - d0->perm[0]=125; - d0->perm[1]=52; //3; - d0->perm[2]=63; - - d1->perm[0]=52; - d1->perm[1]=63;//3; - d1->perm[2]=154; - - updateRankDim(d0); - updateRankDim(d1); - - - tensor_TYPE_DOUBLE *M0 = CREATE_TENSOR_TYPE_DOUBLE(d0); - tensor_TYPE_DOUBLE *M1 = CREATE_TENSOR_TYPE_DOUBLE(d1); - - LOG("M0->dim->rank = %ld\n",M0->dim->rank); - LOG("M1->dim->rank = %ld\n",M1->dim->rank); - for(size_t i=0; idim->rank;++i) M0->x[i]=i*0.1 +1; - for(size_t i=0; idim->rank;++i) M1->x[i]=i*0.003 + 2; - - //print_tensor_double(M0,"M0"); - //print_tensor_double(M1,"M1"); - - //tensor_TYPE_DOUBLE *M; - tensor_TYPE_DOUBLE *MnO; - - //tensorContractnProdNotOpt_TYPE_DOUBLE(&M, M0,M1,2); - //tensorContractnProd_TYPE_DOUBLE(&M, M0,M1,2); - //print_tensor_double(M,"M"); - cl_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2); - - - //print_tensor_double(MnO,"MnO"); - - // for(size_t i=0;idim->rank;++i) - // EXPECT_EQ_TYPE_DOUBLE(M->x[i],MnO->x[i]); - - //EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank); - - } @@ -450,8 +269,6 @@ TEST(VStensorContractnProd_TYPE_DOUBLE2 ){ } - - int main(int argc, char **argv){