split cl test
This commit is contained in:
+2
-2
@@ -40,13 +40,13 @@ DEP=$(DIMDIR) $(PERMDIR)
|
|||||||
|
|
||||||
$(TOPTARGETS): $(DEP)
|
$(TOPTARGETS): $(DEP)
|
||||||
|
|
||||||
all: $(TENSRC_O)
|
all: $(TENSRC_O) $(clTENSRC_O)
|
||||||
|
|
||||||
$(TENSRC_O) : $(TENSRC) $(DIMSRC_O) $(PERMSRC_O)
|
$(TENSRC_O) : $(TENSRC) $(DIMSRC_O) $(PERMSRC_O)
|
||||||
$(CC) -o $@ -c $^ $(CFLAGS)
|
$(CC) -o $@ -c $^ $(CFLAGS)
|
||||||
|
|
||||||
$(clTENSRC_O) : $(clTENSRC) $(TENSRC_O)
|
$(clTENSRC_O) : $(clTENSRC) $(TENSRC_O)
|
||||||
$(CC) -o $@ -c $^ $(CFLAGS)
|
$(CC) -o $@ -c $< $(CFLAGS)
|
||||||
|
|
||||||
#$(DIMSRC_O) : $(DIMSRC) $(PERMSRC_O)
|
#$(DIMSRC_O) : $(DIMSRC) $(PERMSRC_O)
|
||||||
# $(CC) -o $@ -c $< $(CFLAGS)
|
# $(CC) -o $@ -c $< $(CFLAGS)
|
||||||
|
|||||||
@@ -3,7 +3,9 @@
|
|||||||
|
|
||||||
#define MAX_SOURCE_SIZE (0x100000)
|
#define MAX_SOURCE_SIZE (0x100000)
|
||||||
|
|
||||||
#define CL_GEN_SETUP_(type,file_cl_src,func_cl_name)\
|
|
||||||
|
|
||||||
|
#define SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name)\
|
||||||
/* Load the kernel source code into the array source_str*/ \
|
/* Load the kernel source code into the array source_str*/ \
|
||||||
FILE *fp; \
|
FILE *fp; \
|
||||||
char *source_str; \
|
char *source_str; \
|
||||||
@@ -28,13 +30,24 @@
|
|||||||
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); \
|
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); \
|
||||||
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, \
|
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, \
|
||||||
&device_id, &ret_num_devices); \
|
&device_id, &ret_num_devices); \
|
||||||
|
size_t returned_size = 0;\
|
||||||
|
size_t max_workgroup_size = 0;\
|
||||||
|
ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, &returned_size);\
|
||||||
|
checkError(ret,__func__,"Error: Failed to retrieve device info!");\
|
||||||
|
printf(" ===========================================================++> return size: %ld\n max group sz: %ld\n", returned_size, max_workgroup_size);\
|
||||||
|
\
|
||||||
|
/*int gpu = 1;\
|
||||||
|
ret = clGetDeviceIDs( platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, \
|
||||||
|
&device_id, &ret_num_devices); */\
|
||||||
\
|
\
|
||||||
/*/ Create an OpenCL context */ \
|
/*/ Create an OpenCL context */ \
|
||||||
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); \
|
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); \
|
||||||
\
|
\
|
||||||
/*/ Create a command queue */ \
|
/*/ Create a command queue */ \
|
||||||
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); \
|
/*cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);*/ \
|
||||||
|
cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device_id, NULL, &ret); /* NULL =default properties = in order */ \
|
||||||
\
|
\
|
||||||
|
checkError(ret,__func__,"Error: Failed to create command queue with properties ");\
|
||||||
\
|
\
|
||||||
/*/ Create memory buffers on the device for each vector */ \
|
/*/ Create memory buffers on the device for each vector */ \
|
||||||
cl_mem M0_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, \
|
cl_mem M0_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, \
|
||||||
@@ -43,25 +56,35 @@
|
|||||||
M1->dim->rank * sizeof(type), NULL, &ret); \
|
M1->dim->rank * sizeof(type), NULL, &ret); \
|
||||||
cl_mem M_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, \
|
cl_mem M_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, \
|
||||||
M->dim->rank * sizeof(type), NULL, &ret); \
|
M->dim->rank * sizeof(type), NULL, &ret); \
|
||||||
|
if (!M0_mem_obj || !M1_mem_obj || !M_mem_obj) {\
|
||||||
|
printf("Error: Failed to creat buffer! %d\n", ret);\
|
||||||
|
exit(1);\
|
||||||
|
}\
|
||||||
\
|
\
|
||||||
/*/ Copy the lists A and B to their respective memory buffers */ \
|
/*/ Copy the lists A and B to their respective memory buffers */ \
|
||||||
ret = clEnqueueWriteBuffer(command_queue, M0_mem_obj, CL_TRUE, 0, \
|
ret = clEnqueueWriteBuffer(command_queue, M0_mem_obj, CL_TRUE, 0, \
|
||||||
M0->dim->rank * sizeof(type), M0->x, 0, NULL, NULL); \
|
M0->dim->rank * sizeof(type), M0->x, 0, NULL, NULL); \
|
||||||
ret = clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \
|
ret |= clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \
|
||||||
M1->dim->rank * sizeof(type), M1->x, 0, NULL, NULL); \
|
M1->dim->rank * sizeof(type), M1->x, 0, NULL, NULL); \
|
||||||
\
|
\
|
||||||
|
checkError(ret,__func__,"Error: Failed to read buffers! ");\
|
||||||
/*/ Create a program from the kernel source */ \
|
/*/ Create a program from the kernel source */ \
|
||||||
cl_program program = clCreateProgramWithSource(context, 1, \
|
cl_program program = clCreateProgramWithSource(context, 1, \
|
||||||
(const char **)&source_str, (const size_t *)&source_size, &ret); \
|
(const char **)&source_str, (const size_t *)&source_size, &ret); \
|
||||||
\
|
\
|
||||||
printf("log 0\n");\
|
if (!program) {\
|
||||||
|
printf("Error: Failed to create compute program!\n");\
|
||||||
|
return ;\
|
||||||
|
}\
|
||||||
/*/ Build the program */ \
|
/*/ Build the program */ \
|
||||||
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); \
|
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); \
|
||||||
size_t len; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, NULL, NULL, &len);\
|
if(ret != CL_SUCCESS){\
|
||||||
char *log = malloc(sizeof(char)*len);\
|
size_t len; \
|
||||||
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, log, NULL);\
|
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);\
|
||||||
printf("log: %s \n",log);\
|
char *log = malloc(sizeof(char)*len);\
|
||||||
\
|
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, log, NULL);\
|
||||||
|
printf("Error: Failed to build program executable!\n %s \n",log);\
|
||||||
|
}\
|
||||||
/*/ Create the OpenCL kernel */ \
|
/*/ Create the OpenCL kernel */ \
|
||||||
/*char func_cl_name[250]; sprintf(func_cl_name,"prodTensorLin_%s", #type);*/ \
|
/*char func_cl_name[250]; sprintf(func_cl_name,"prodTensorLin_%s", #type);*/ \
|
||||||
printf("cl_func_type = %s\n",func_cl_name); \
|
printf("cl_func_type = %s\n",func_cl_name); \
|
||||||
@@ -75,29 +98,46 @@
|
|||||||
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
|
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define CL_EXEC_KERNEL(type)\
|
#define EXEC_cl_KERNEL(type)\
|
||||||
/*/ Execute the OpenCL kernel on the list */ \
|
/*/ Execute the OpenCL kernel on the list */ \
|
||||||
size_t global_item_size = M->dim->rank; /*/ Process the entire lists */ \
|
size_t global_item_size = M->dim->rank; /*/ Process the entire lists */ \
|
||||||
size_t local_item_size = 1; /*64;*/ /*/ Divide work items into groups of 64 */ \
|
size_t local_item_size = 1; /*64;*/ /*/ Divide work items into groups of 64 */ \
|
||||||
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, \
|
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, \
|
||||||
&global_item_size, &local_item_size, 0, NULL, NULL); \
|
&global_item_size, &local_item_size, 0, NULL, NULL); \
|
||||||
\
|
\
|
||||||
/*/ Read the memory buffer Mx on the device to the local variable M->x */ \
|
checkError(ret,__func__,"Error: Failed to execute! ");\
|
||||||
|
|
||||||
|
#define EXEC_cl_2d_KERNEL(type, global_W_sz0, global_W_sz1, local_W_sz0, local_W_sz1)\
|
||||||
|
/*/ Execute the OpenCL kernel on the list */ \
|
||||||
|
size_t global_item_size[2]={global_W_sz0, global_W_sz1};/*/ Process the entire lists */ \
|
||||||
|
size_t local_item_size[2]={local_W_sz0, local_W_sz1}; /* Divide work items into groups of 64 */ \
|
||||||
|
ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, \
|
||||||
|
global_item_size, local_item_size, 0, NULL, NULL); \
|
||||||
|
\
|
||||||
|
checkError(ret,__func__,"Error: Failed to execute! ");\
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define READ_BUF_N_CLEANUP(type) \
|
||||||
|
/*/ Read the memory buffer Mx on the device to the local variable M->x */ \
|
||||||
ret = clEnqueueReadBuffer(command_queue, M_mem_obj, CL_TRUE, 0, \
|
ret = clEnqueueReadBuffer(command_queue, M_mem_obj, CL_TRUE, 0, \
|
||||||
M->dim->rank * sizeof(type), M->x, 0, NULL, NULL); \
|
M->dim->rank * sizeof(type), M->x, 0, NULL, NULL); \
|
||||||
\
|
\
|
||||||
|
checkError(ret,__func__,"Error: Failed to read buffer! ");\
|
||||||
/*/ Clean up */ \
|
/*/ Clean up */ \
|
||||||
ret = clFlush(command_queue); \
|
ret = clFlush(command_queue); \
|
||||||
ret = clFinish(command_queue); \
|
ret |= clFinish(command_queue); \
|
||||||
ret = clReleaseKernel(kernel); \
|
ret |= clReleaseKernel(kernel); \
|
||||||
ret = clReleaseProgram(program); \
|
ret |= clReleaseProgram(program); \
|
||||||
ret = clReleaseMemObject(M0_mem_obj); \
|
ret |= clReleaseMemObject(M0_mem_obj); \
|
||||||
ret = clReleaseMemObject(M1_mem_obj); \
|
ret |= clReleaseMemObject(M1_mem_obj); \
|
||||||
ret = clReleaseMemObject(M_mem_obj); \
|
ret |= clReleaseMemObject(M_mem_obj); \
|
||||||
ret = clReleaseCommandQueue(command_queue); \
|
ret |= clReleaseCommandQueue(command_queue); \
|
||||||
ret = clReleaseContext(context); \
|
ret |= clReleaseContext(context); \
|
||||||
|
checkError(ret,__func__,"Error: Failed to clean up! ");\
|
||||||
|
|
||||||
#define CL_GEN_FUNC_TENSOR(type)\
|
#define GEN_cl_FUNC_TENSOR(type)\
|
||||||
\
|
\
|
||||||
\
|
\
|
||||||
void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1) { \
|
void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1) { \
|
||||||
@@ -107,13 +147,15 @@ void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *
|
|||||||
tensor_##type *M = *MM; \
|
tensor_##type *M = *MM; \
|
||||||
char *file_cl_src = "../src/kernel_ProdTensor.cl"; \
|
char *file_cl_src = "../src/kernel_ProdTensor.cl"; \
|
||||||
char *func_cl_name = "prodTensorLin_" #type; \
|
char *func_cl_name = "prodTensorLin_" #type; \
|
||||||
CL_GEN_SETUP_(type,file_cl_src,func_cl_name);\
|
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||||
/*/ Set the arguments of the kernel */ \
|
/*/ Set the arguments of the kernel */ \
|
||||||
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \
|
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \
|
||||||
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
||||||
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
||||||
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
|
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
|
||||||
CL_EXEC_KERNEL(type);\
|
checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\
|
||||||
|
EXEC_cl_KERNEL(type);\
|
||||||
|
READ_BUF_N_CLEANUP(type)\
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
/* M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...<l=n / xl = y0,x{l+1}=y1, x{n}=yl et zi=xi i<n-l et zj=y{j-(n-l)} j>=n-l alor p=n+m-2l\
|
/* M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...<l=n / xl = y0,x{l+1}=y1, x{n}=yl et zi=xi i<n-l et zj=y{j-(n-l)} j>=n-l alor p=n+m-2l\
|
||||||
@@ -147,21 +189,98 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor
|
|||||||
tensor_##type *M= *MM;\
|
tensor_##type *M= *MM;\
|
||||||
char *file_cl_src = "../src/kernel_ProdContractnTensor.cl"; \
|
char *file_cl_src = "../src/kernel_ProdContractnTensor.cl"; \
|
||||||
char *func_cl_name = "prodContractnTensorLin_" #type; \
|
char *func_cl_name = "prodContractnTensorLin_" #type; \
|
||||||
CL_GEN_SETUP_(type,file_cl_src,func_cl_name);\
|
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||||
\
|
\
|
||||||
/*/ Set the arguments of the kernel */ \
|
/*/ Set the arguments of the kernel */ \
|
||||||
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(dSub1->rank)); \
|
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(dSub1->rank)); \
|
||||||
ret = clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
|
ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
|
||||||
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
||||||
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
||||||
ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&M_mem_obj); \
|
ret |= clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&M_mem_obj); \
|
||||||
|
checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\
|
||||||
\
|
\
|
||||||
CL_EXEC_KERNEL(type);\
|
EXEC_cl_KERNEL(type);\
|
||||||
|
READ_BUF_N_CLEANUP(type)\
|
||||||
\
|
\
|
||||||
} \
|
} \
|
||||||
|
\
|
||||||
|
\
|
||||||
CL_GEN_FUNC_TENSOR(TYPE_FLOAT);
|
void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz) { \
|
||||||
CL_GEN_FUNC_TENSOR(TYPE_DOUBLE);
|
dimension *dd; \
|
||||||
|
add_dimension(&dd, M0->dim, M1->dim); \
|
||||||
|
(*MM)=CREATE_TENSOR_##type(dd); \
|
||||||
|
tensor_##type *M = *MM; \
|
||||||
|
char *file_cl_src = "../src/kernel_2d_ProdTensor.cl"; \
|
||||||
|
char *func_cl_name = "prodTensor2dLin_" #type; \
|
||||||
|
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||||
|
/*/ Set the arguments of the kernel */ \
|
||||||
|
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \
|
||||||
|
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
||||||
|
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
||||||
|
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
|
||||||
|
checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\
|
||||||
|
EXEC_cl_2d_KERNEL(type,M0->dim->rank,M1->dim->rank,div0Wsz,div1Wsz);\
|
||||||
|
\
|
||||||
|
READ_BUF_N_CLEANUP(type)\
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
|
||||||
|
void checkError(cl_int error, const char *func_name, char *msg) {
|
||||||
|
if (error != CL_SUCCESS) {
|
||||||
|
printf("%s\n",msg);
|
||||||
|
switch (error) {
|
||||||
|
case CL_DEVICE_NOT_FOUND: printf(" Error in %s: Device not found.\n",func_name); break;
|
||||||
|
case CL_DEVICE_NOT_AVAILABLE: printf(" Error in %s: Device not available\n",func_name); break;
|
||||||
|
case CL_COMPILER_NOT_AVAILABLE: printf(" Error in %s: Compiler not available\n",func_name); break;
|
||||||
|
case CL_MEM_OBJECT_ALLOCATION_FAILURE: printf(" Error in %s: Memory object allocation failure\n",func_name); break;
|
||||||
|
case CL_OUT_OF_RESOURCES: printf(" Error in %s: Out of resources\n",func_name); break;
|
||||||
|
case CL_OUT_OF_HOST_MEMORY: printf(" Error in %s: Out of host memory\n",func_name); break;
|
||||||
|
case CL_PROFILING_INFO_NOT_AVAILABLE: printf(" Error in %s: Profiling information not available\n",func_name); break;
|
||||||
|
case CL_MEM_COPY_OVERLAP: printf(" Error in %s: Memory copy overlap\n",func_name); break;
|
||||||
|
case CL_IMAGE_FORMAT_MISMATCH: printf(" Error in %s: Image format mismatch\n",func_name); break;
|
||||||
|
case CL_IMAGE_FORMAT_NOT_SUPPORTED: printf(" Error in %s: Image format not supported\n",func_name); break;
|
||||||
|
case CL_BUILD_PROGRAM_FAILURE: printf(" Error in %s: Program build failure\n",func_name); break;
|
||||||
|
case CL_MAP_FAILURE: printf(" Error in %s: Map failure\n",func_name); break;
|
||||||
|
case CL_INVALID_VALUE: printf(" Error in %s: Invalid value\n",func_name); break;
|
||||||
|
case CL_INVALID_DEVICE_TYPE: printf(" Error in %s: Invalid device type\n",func_name); break;
|
||||||
|
case CL_INVALID_PLATFORM: printf(" Error in %s: Invalid platform\n",func_name); break;
|
||||||
|
case CL_INVALID_DEVICE: printf(" Error in %s: Invalid device\n",func_name); break;
|
||||||
|
case CL_INVALID_CONTEXT: printf(" Error in %s: Invalid context\n",func_name); break;
|
||||||
|
case CL_INVALID_QUEUE_PROPERTIES: printf(" Error in %s: Invalid queue properties\n",func_name); break;
|
||||||
|
case CL_INVALID_COMMAND_QUEUE: printf(" Error in %s: Invalid command queue\n",func_name); break;
|
||||||
|
case CL_INVALID_HOST_PTR: printf(" Error in %s: Invalid host pointer\n",func_name); break;
|
||||||
|
case CL_INVALID_MEM_OBJECT: printf(" Error in %s: Invalid memory object\n",func_name); break;
|
||||||
|
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: printf(" Error in %s: Invalid image format descriptor\n",func_name); break;
|
||||||
|
case CL_INVALID_IMAGE_SIZE: printf(" Error in %s: Invalid image size\n",func_name); break;
|
||||||
|
case CL_INVALID_SAMPLER: printf(" Error in %s: Invalid sampler\n",func_name); break;
|
||||||
|
case CL_INVALID_BINARY: printf(" Error in %s: Invalid binary\n",func_name); break;
|
||||||
|
case CL_INVALID_BUILD_OPTIONS: printf(" Error in %s: Invalid build options\n",func_name); break;
|
||||||
|
case CL_INVALID_PROGRAM: printf(" Error in %s: Invalid program\n",func_name); break;
|
||||||
|
case CL_INVALID_PROGRAM_EXECUTABLE: printf(" Error in %s: Invalid program executable\n",func_name); break;
|
||||||
|
case CL_INVALID_KERNEL_NAME: printf(" Error in %s: Invalid kernel name\n",func_name); break;
|
||||||
|
case CL_INVALID_KERNEL_DEFINITION: printf(" Error in %s: Invalid kernel definition\n",func_name); break;
|
||||||
|
case CL_INVALID_KERNEL: printf(" Error in %s: Invalid kernel\n",func_name); break;
|
||||||
|
case CL_INVALID_ARG_INDEX: printf(" Error in %s: Invalid argument index\n",func_name); break;
|
||||||
|
case CL_INVALID_ARG_VALUE: printf(" Error in %s: Invalid argument value\n",func_name); break;
|
||||||
|
case CL_INVALID_ARG_SIZE: printf(" Error in %s: Invalid argument size\n",func_name); break;
|
||||||
|
case CL_INVALID_KERNEL_ARGS: printf(" Error in %s: Invalid kernel arguments\n",func_name); break;
|
||||||
|
case CL_INVALID_WORK_DIMENSION: printf(" Error in %s: Invalid work dimensionsension\n",func_name); break;
|
||||||
|
case CL_INVALID_WORK_GROUP_SIZE: printf(" Error in %s: Invalid work group size\n",func_name); break;
|
||||||
|
case CL_INVALID_WORK_ITEM_SIZE: printf(" Error in %s: Invalid work item size\n",func_name); break;
|
||||||
|
case CL_INVALID_GLOBAL_OFFSET: printf(" Error in %s: Invalid global offset\n",func_name); break;
|
||||||
|
case CL_INVALID_EVENT_WAIT_LIST: printf(" Error in %s: Invalid event wait list\n",func_name); break;
|
||||||
|
case CL_INVALID_EVENT: printf(" Error in %s: Invalid event\n",func_name); break;
|
||||||
|
case CL_INVALID_OPERATION: printf(" Error in %s: Invalid operation\n",func_name); break;
|
||||||
|
case CL_INVALID_GL_OBJECT: printf(" Error in %s: Invalid OpenGL object\n",func_name); break;
|
||||||
|
case CL_INVALID_BUFFER_SIZE: printf(" Error in %s: Invalid buffer size\n",func_name); break;
|
||||||
|
case CL_INVALID_MIP_LEVEL: printf(" Error in %s: Invalid mip-map level\n",func_name); break;
|
||||||
|
default: printf(" Error in %s: Unknown with code %d\n",func_name, error);
|
||||||
|
}
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GEN_cl_FUNC_TENSOR(TYPE_FLOAT);
|
||||||
|
GEN_cl_FUNC_TENSOR(TYPE_DOUBLE);
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,8 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define CL_TARGET_OPENCL_VERSION 300
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#include <OpenCL/opencl.h>
|
#include <OpenCL/opencl.h>
|
||||||
#else
|
#else
|
||||||
@@ -12,10 +14,12 @@
|
|||||||
|
|
||||||
#include "tensor_t/tensor_t.h"
|
#include "tensor_t/tensor_t.h"
|
||||||
|
|
||||||
|
|
||||||
#define CL_GENERATE_TENSOR_TYPE(type) \
|
#define CL_GENERATE_TENSOR_TYPE(type) \
|
||||||
void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
|
void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
|
||||||
void cl_tensorContractnProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
|
void cl_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
|
||||||
|
void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz); \
|
||||||
|
/*void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz);*/ \
|
||||||
|
|
||||||
CL_GENERATE_TENSOR_TYPE(TYPE_FLOAT);
|
CL_GENERATE_TENSOR_TYPE(TYPE_FLOAT);
|
||||||
CL_GENERATE_TENSOR_TYPE(TYPE_DOUBLE);
|
CL_GENERATE_TENSOR_TYPE(TYPE_DOUBLE);
|
||||||
|
|||||||
@@ -28,8 +28,6 @@ EXEC=launch_$(NAME_TEST)_m
|
|||||||
TENSRC=$(TENSDIR)/src/tensor_t/tensor_t.c
|
TENSRC=$(TENSDIR)/src/tensor_t/tensor_t.c
|
||||||
TENSRC_O=$(TENSRC:.c=.o)
|
TENSRC_O=$(TENSRC:.c=.o)
|
||||||
|
|
||||||
clTENSRC=$(TENSDIR)/src/tensor_t/cl_tensor_t.c
|
|
||||||
clTENSRC_O=$(clTENSRC:.c=.o)
|
|
||||||
|
|
||||||
|
|
||||||
PERMSRC_O=$(YPERMDIR)/src/permutation_t/permutation_t.o
|
PERMSRC_O=$(YPERMDIR)/src/permutation_t/permutation_t.o
|
||||||
@@ -40,7 +38,7 @@ TOPTARGETS := all clean
|
|||||||
|
|
||||||
DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR)
|
DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR)
|
||||||
|
|
||||||
OBJ=$(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) $(clTENSRC_O)
|
OBJ=$(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O)
|
||||||
|
|
||||||
LIB_YTEST=$(YTESTDIR)/libytest.so
|
LIB_YTEST=$(YTESTDIR)/libytest.so
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
//#include "permutation_t/permutation_t.h"
|
//#include "permutation_t/permutation_t.h"
|
||||||
#include "tensor_t/tensor_t.h"
|
#include "tensor_t/tensor_t.h"
|
||||||
#include "tensor_t/cl_tensor_t.h"
|
|
||||||
|
|
||||||
TEST(rank){
|
TEST(rank){
|
||||||
dimension *D=create_dim(4);
|
dimension *D=create_dim(4);
|
||||||
@@ -179,98 +178,6 @@ TEST(tensorContractnProd_TYPE_FLOAT2 ){
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(cl_tensorContractnProd_TYPE_FLOAT2 ){
|
|
||||||
dimension *d0=create_dim(3);
|
|
||||||
dimension *d1=create_dim(3);
|
|
||||||
|
|
||||||
d0->perm[0]=35;
|
|
||||||
d0->perm[1]=32; //3;
|
|
||||||
d0->perm[2]=23;
|
|
||||||
|
|
||||||
d1->perm[0]=32;
|
|
||||||
d1->perm[1]=23;//3;
|
|
||||||
d1->perm[2]=44;
|
|
||||||
|
|
||||||
updateRankDim(d0);
|
|
||||||
updateRankDim(d1);
|
|
||||||
|
|
||||||
|
|
||||||
tensor_TYPE_FLOAT *M0 = CREATE_TENSOR_TYPE_FLOAT(d0);
|
|
||||||
tensor_TYPE_FLOAT *M1 = CREATE_TENSOR_TYPE_FLOAT(d1);
|
|
||||||
|
|
||||||
LOG("M0->dim->rank = %ld\n",M0->dim->rank);
|
|
||||||
LOG("M1->dim->rank = %ld\n",M1->dim->rank);
|
|
||||||
for(size_t i=0; i<M0->dim->rank;++i) M0->x[i]=i*0.1 +1;
|
|
||||||
for(size_t i=0; i<M1->dim->rank;++i) M1->x[i]=i*0.003 + 2;
|
|
||||||
|
|
||||||
// print_tensor_float(M0,"M0");
|
|
||||||
// print_tensor_float(M1,"M1");
|
|
||||||
|
|
||||||
tensor_TYPE_FLOAT *M;
|
|
||||||
tensor_TYPE_FLOAT *MnO;
|
|
||||||
|
|
||||||
tensorContractnProdNotOpt_TYPE_FLOAT(&M, M0,M1,2);
|
|
||||||
// print_tensor_float(M,"M");
|
|
||||||
cl_tensorContractnProd_TYPE_FLOAT(&MnO, M0,M1,2);
|
|
||||||
|
|
||||||
|
|
||||||
// print_tensor_float(MnO,"MnO");
|
|
||||||
|
|
||||||
// for(size_t i=0;i<M->dim->rank;++i)
|
|
||||||
// EXPECT_EQ_TYPE_FLOAT(M->x[i],MnO->x[i]);
|
|
||||||
|
|
||||||
//EXPECT_ARRAY_EQ_TYPE_FLOAT(M->x,M->dim->rank,MnO->x,MnO->dim->rank);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(cl_tensorContractnProd_TYPE_DOUBLE2 ){
|
|
||||||
dimension *d0=create_dim(3);
|
|
||||||
dimension *d1=create_dim(3);
|
|
||||||
|
|
||||||
d0->perm[0]=125;
|
|
||||||
d0->perm[1]=52; //3;
|
|
||||||
d0->perm[2]=63;
|
|
||||||
|
|
||||||
d1->perm[0]=52;
|
|
||||||
d1->perm[1]=63;//3;
|
|
||||||
d1->perm[2]=54;
|
|
||||||
|
|
||||||
updateRankDim(d0);
|
|
||||||
updateRankDim(d1);
|
|
||||||
|
|
||||||
|
|
||||||
tensor_TYPE_DOUBLE *M0 = CREATE_TENSOR_TYPE_DOUBLE(d0);
|
|
||||||
tensor_TYPE_DOUBLE *M1 = CREATE_TENSOR_TYPE_DOUBLE(d1);
|
|
||||||
|
|
||||||
LOG("M0->dim->rank = %ld\n",M0->dim->rank);
|
|
||||||
LOG("M1->dim->rank = %ld\n",M1->dim->rank);
|
|
||||||
for(size_t i=0; i<M0->dim->rank;++i) M0->x[i]=i*0.1 +1;
|
|
||||||
for(size_t i=0; i<M1->dim->rank;++i) M1->x[i]=i*0.003 + 2;
|
|
||||||
|
|
||||||
//print_tensor_double(M0,"M0");
|
|
||||||
//print_tensor_double(M1,"M1");
|
|
||||||
|
|
||||||
tensor_TYPE_DOUBLE *M;
|
|
||||||
tensor_TYPE_DOUBLE *MnO;
|
|
||||||
|
|
||||||
tensorContractnProdNotOpt_TYPE_DOUBLE(&M, M0,M1,2);
|
|
||||||
//tensorContractnProd_TYPE_DOUBLE(&M, M0,M1,2);
|
|
||||||
//print_tensor_double(M,"M");
|
|
||||||
cl_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2);
|
|
||||||
|
|
||||||
|
|
||||||
//print_tensor_double(MnO,"MnO");
|
|
||||||
|
|
||||||
// for(size_t i=0;i<M->dim->rank;++i)
|
|
||||||
// EXPECT_EQ_TYPE_DOUBLE(M->x[i],MnO->x[i]);
|
|
||||||
|
|
||||||
EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
TEST(tensorContractnProd_TYPE_DOUBLE2 ){
|
TEST(tensorContractnProd_TYPE_DOUBLE2 ){
|
||||||
dimension *d0=create_dim(3);
|
dimension *d0=create_dim(3);
|
||||||
dimension *d1=create_dim(3);
|
dimension *d1=create_dim(3);
|
||||||
@@ -314,94 +221,6 @@ TEST(tensorContractnProd_TYPE_DOUBLE2 ){
|
|||||||
EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank);
|
EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank);
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(TensorProdCL){
|
|
||||||
dimension *d0=create_dim(3);
|
|
||||||
dimension *d1=create_dim(2);
|
|
||||||
|
|
||||||
d0->perm[0]=2;
|
|
||||||
d0->perm[1]=3;
|
|
||||||
d0->perm[2]=2;
|
|
||||||
|
|
||||||
d1->perm[0]=2;
|
|
||||||
d1->perm[1]=3;
|
|
||||||
|
|
||||||
updateRankDim(d0);
|
|
||||||
updateRankDim(d1);
|
|
||||||
|
|
||||||
|
|
||||||
tensor_TYPE_FLOAT *M0 = CREATE_TENSOR_TYPE_FLOAT(d0);
|
|
||||||
tensor_TYPE_FLOAT *M1 = CREATE_TENSOR_TYPE_FLOAT(d1);
|
|
||||||
|
|
||||||
LOG("M0->dim->rank = %ld\n",M0->dim->rank);
|
|
||||||
LOG("M1->dim->rank = %ld\n",M1->dim->rank);
|
|
||||||
for(size_t i=0; i<M0->dim->rank;++i) M0->x[i]=i*0.1 +1;
|
|
||||||
for(size_t i=0; i<M1->dim->rank;++i) M1->x[i]=i*0.003 + 2;
|
|
||||||
|
|
||||||
print_tensor_float(M0,"M0");
|
|
||||||
print_tensor_float(M1,"M1");
|
|
||||||
|
|
||||||
|
|
||||||
tensor_TYPE_FLOAT *M;
|
|
||||||
tensor_TYPE_FLOAT *Mn;
|
|
||||||
|
|
||||||
tensorProd_TYPE_FLOAT(&M,M0,M1);
|
|
||||||
cl_tensorProd_TYPE_FLOAT(&Mn,M0,M1);
|
|
||||||
LOG("M->dim->rank = %ld\n",M->dim->rank);
|
|
||||||
|
|
||||||
print_tensor_float(M,"M");
|
|
||||||
|
|
||||||
EXPECT_ARRAY_EQ_TYPE_FLOAT(M->x,M->dim->rank,Mn->x,Mn->dim->rank);
|
|
||||||
|
|
||||||
|
|
||||||
print_tensor_float(Mn,"Mn");
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(VS_cl_tensorContractnProd_TYPE_DOUBLE2 ){
|
|
||||||
dimension *d0=create_dim(3);
|
|
||||||
dimension *d1=create_dim(3);
|
|
||||||
|
|
||||||
d0->perm[0]=125;
|
|
||||||
d0->perm[1]=52; //3;
|
|
||||||
d0->perm[2]=63;
|
|
||||||
|
|
||||||
d1->perm[0]=52;
|
|
||||||
d1->perm[1]=63;//3;
|
|
||||||
d1->perm[2]=154;
|
|
||||||
|
|
||||||
updateRankDim(d0);
|
|
||||||
updateRankDim(d1);
|
|
||||||
|
|
||||||
|
|
||||||
tensor_TYPE_DOUBLE *M0 = CREATE_TENSOR_TYPE_DOUBLE(d0);
|
|
||||||
tensor_TYPE_DOUBLE *M1 = CREATE_TENSOR_TYPE_DOUBLE(d1);
|
|
||||||
|
|
||||||
LOG("M0->dim->rank = %ld\n",M0->dim->rank);
|
|
||||||
LOG("M1->dim->rank = %ld\n",M1->dim->rank);
|
|
||||||
for(size_t i=0; i<M0->dim->rank;++i) M0->x[i]=i*0.1 +1;
|
|
||||||
for(size_t i=0; i<M1->dim->rank;++i) M1->x[i]=i*0.003 + 2;
|
|
||||||
|
|
||||||
//print_tensor_double(M0,"M0");
|
|
||||||
//print_tensor_double(M1,"M1");
|
|
||||||
|
|
||||||
//tensor_TYPE_DOUBLE *M;
|
|
||||||
tensor_TYPE_DOUBLE *MnO;
|
|
||||||
|
|
||||||
//tensorContractnProdNotOpt_TYPE_DOUBLE(&M, M0,M1,2);
|
|
||||||
//tensorContractnProd_TYPE_DOUBLE(&M, M0,M1,2);
|
|
||||||
//print_tensor_double(M,"M");
|
|
||||||
cl_tensorContractnProd_TYPE_DOUBLE(&MnO, M0,M1,2);
|
|
||||||
|
|
||||||
|
|
||||||
//print_tensor_double(MnO,"MnO");
|
|
||||||
|
|
||||||
// for(size_t i=0;i<M->dim->rank;++i)
|
|
||||||
// EXPECT_EQ_TYPE_DOUBLE(M->x[i],MnO->x[i]);
|
|
||||||
|
|
||||||
//EXPECT_ARRAY_EQ_TYPE_DOUBLE(M->x,M->dim->rank,MnO->x,MnO->dim->rank);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -450,8 +269,6 @@ TEST(VStensorContractnProd_TYPE_DOUBLE2 ){
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char **argv){
|
int main(int argc, char **argv){
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user