split cl test

This commit is contained in:
2024-02-01 17:14:00 +01:00
parent b83a125bac
commit 5939360b29
5 changed files with 163 additions and 225 deletions
+154 -35
View File
@@ -3,7 +3,9 @@
#define MAX_SOURCE_SIZE (0x100000)
#define CL_GEN_SETUP_(type,file_cl_src,func_cl_name)\
#define SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name)\
/* Load the kernel source code into the array source_str*/ \
FILE *fp; \
char *source_str; \
@@ -28,13 +30,24 @@
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); \
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, \
&device_id, &ret_num_devices); \
size_t returned_size = 0;\
size_t max_workgroup_size = 0;\
ret = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, &returned_size);\
checkError(ret,__func__,"Error: Failed to retrieve device info!");\
printf(" ===========================================================++> return size: %ld\n max group sz: %ld\n", returned_size, max_workgroup_size);\
\
/*int gpu = 1;\
ret = clGetDeviceIDs( platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, \
&device_id, &ret_num_devices); */\
\
/*/ Create an OpenCL context */ \
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); \
\
/*/ Create a command queue */ \
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); \
/*cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);*/ \
cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device_id, NULL, &ret); /* NULL =default properties = in order */ \
\
checkError(ret,__func__,"Error: Failed to create command queue with properties ");\
\
/*/ Create memory buffers on the device for each vector */ \
cl_mem M0_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, \
@@ -43,25 +56,35 @@
M1->dim->rank * sizeof(type), NULL, &ret); \
cl_mem M_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, \
M->dim->rank * sizeof(type), NULL, &ret); \
if (!M0_mem_obj || !M1_mem_obj || !M_mem_obj) {\
printf("Error: Failed to creat buffer! %d\n", ret);\
exit(1);\
}\
\
/*/ Copy the lists A and B to their respective memory buffers */ \
ret = clEnqueueWriteBuffer(command_queue, M0_mem_obj, CL_TRUE, 0, \
M0->dim->rank * sizeof(type), M0->x, 0, NULL, NULL); \
ret = clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \
ret |= clEnqueueWriteBuffer(command_queue, M1_mem_obj, CL_TRUE, 0, \
M1->dim->rank * sizeof(type), M1->x, 0, NULL, NULL); \
\
checkError(ret,__func__,"Error: Failed to read buffers! ");\
/*/ Create a program from the kernel source */ \
cl_program program = clCreateProgramWithSource(context, 1, \
(const char **)&source_str, (const size_t *)&source_size, &ret); \
\
printf("log 0\n");\
if (!program) {\
printf("Error: Failed to create compute program!\n");\
return ;\
}\
/*/ Build the program */ \
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); \
size_t len; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, NULL, NULL, &len);\
char *log = malloc(sizeof(char)*len);\
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, log, NULL);\
printf("log: %s \n",log);\
\
if(ret != CL_SUCCESS){\
size_t len; \
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);\
char *log = malloc(sizeof(char)*len);\
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, log, NULL);\
printf("Error: Failed to build program executable!\n %s \n",log);\
}\
/*/ Create the OpenCL kernel */ \
/*char func_cl_name[250]; sprintf(func_cl_name,"prodTensorLin_%s", #type);*/ \
printf("cl_func_type = %s\n",func_cl_name); \
@@ -75,29 +98,46 @@
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
*/
#define CL_EXEC_KERNEL(type)\
#define EXEC_cl_KERNEL(type)\
/*/ Execute the OpenCL kernel on the list */ \
size_t global_item_size = M->dim->rank; /*/ Process the entire lists */ \
size_t local_item_size = 1; /*64;*/ /*/ Divide work items into groups of 64 */ \
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, \
&global_item_size, &local_item_size, 0, NULL, NULL); \
\
/*/ Read the memory buffer Mx on the device to the local variable M->x */ \
checkError(ret,__func__,"Error: Failed to execute! ");\
#define EXEC_cl_2d_KERNEL(type, global_W_sz0, global_W_sz1, local_W_sz0, local_W_sz1)\
/*/ Execute the OpenCL kernel on the list */ \
size_t global_item_size[2]={global_W_sz0, global_W_sz1};/*/ Process the entire lists */ \
size_t local_item_size[2]={local_W_sz0, local_W_sz1}; /* Divide work items into groups of 64 */ \
ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, \
global_item_size, local_item_size, 0, NULL, NULL); \
\
checkError(ret,__func__,"Error: Failed to execute! ");\
#define READ_BUF_N_CLEANUP(type) \
/*/ Read the memory buffer Mx on the device to the local variable M->x */ \
ret = clEnqueueReadBuffer(command_queue, M_mem_obj, CL_TRUE, 0, \
M->dim->rank * sizeof(type), M->x, 0, NULL, NULL); \
\
checkError(ret,__func__,"Error: Failed to read buffer! ");\
/*/ Clean up */ \
ret = clFlush(command_queue); \
ret = clFinish(command_queue); \
ret = clReleaseKernel(kernel); \
ret = clReleaseProgram(program); \
ret = clReleaseMemObject(M0_mem_obj); \
ret = clReleaseMemObject(M1_mem_obj); \
ret = clReleaseMemObject(M_mem_obj); \
ret = clReleaseCommandQueue(command_queue); \
ret = clReleaseContext(context); \
ret |= clFinish(command_queue); \
ret |= clReleaseKernel(kernel); \
ret |= clReleaseProgram(program); \
ret |= clReleaseMemObject(M0_mem_obj); \
ret |= clReleaseMemObject(M1_mem_obj); \
ret |= clReleaseMemObject(M_mem_obj); \
ret |= clReleaseCommandQueue(command_queue); \
ret |= clReleaseContext(context); \
checkError(ret,__func__,"Error: Failed to clean up! ");\
#define CL_GEN_FUNC_TENSOR(type)\
#define GEN_cl_FUNC_TENSOR(type)\
\
\
void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1) { \
@@ -107,13 +147,15 @@ void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *
tensor_##type *M = *MM; \
char *file_cl_src = "../src/kernel_ProdTensor.cl"; \
char *func_cl_name = "prodTensorLin_" #type; \
CL_GEN_SETUP_(type,file_cl_src,func_cl_name);\
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
/*/ Set the arguments of the kernel */ \
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
CL_EXEC_KERNEL(type);\
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\
EXEC_cl_KERNEL(type);\
READ_BUF_N_CLEANUP(type)\
} \
\
/* M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...<l=n / xl = y0,x{l+1}=y1, x{n}=yl et zi=xi i<n-l et zj=y{j-(n-l)} j>=n-l alor p=n+m-2l\
@@ -147,21 +189,98 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor
tensor_##type *M= *MM;\
char *file_cl_src = "../src/kernel_ProdContractnTensor.cl"; \
char *func_cl_name = "prodContractnTensorLin_" #type; \
CL_GEN_SETUP_(type,file_cl_src,func_cl_name);\
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
\
/*/ Set the arguments of the kernel */ \
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(dSub1->rank)); \
ret = clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&M_mem_obj); \
ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
ret |= clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&M_mem_obj); \
checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\
\
CL_EXEC_KERNEL(type);\
EXEC_cl_KERNEL(type);\
READ_BUF_N_CLEANUP(type)\
\
} \
CL_GEN_FUNC_TENSOR(TYPE_FLOAT);
CL_GEN_FUNC_TENSOR(TYPE_DOUBLE);
\
\
void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz) { \
dimension *dd; \
add_dimension(&dd, M0->dim, M1->dim); \
(*MM)=CREATE_TENSOR_##type(dd); \
tensor_##type *M = *MM; \
char *file_cl_src = "../src/kernel_2d_ProdTensor.cl"; \
char *func_cl_name = "prodTensor2dLin_" #type; \
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
/*/ Set the arguments of the kernel */ \
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
checkError(ret,__func__,"Error: Failed to set kernel arguments! ");\
EXEC_cl_2d_KERNEL(type,M0->dim->rank,M1->dim->rank,div0Wsz,div1Wsz);\
\
READ_BUF_N_CLEANUP(type)\
} \
\
void checkError(cl_int error, const char *func_name, char *msg) {
if (error != CL_SUCCESS) {
printf("%s\n",msg);
switch (error) {
case CL_DEVICE_NOT_FOUND: printf(" Error in %s: Device not found.\n",func_name); break;
case CL_DEVICE_NOT_AVAILABLE: printf(" Error in %s: Device not available\n",func_name); break;
case CL_COMPILER_NOT_AVAILABLE: printf(" Error in %s: Compiler not available\n",func_name); break;
case CL_MEM_OBJECT_ALLOCATION_FAILURE: printf(" Error in %s: Memory object allocation failure\n",func_name); break;
case CL_OUT_OF_RESOURCES: printf(" Error in %s: Out of resources\n",func_name); break;
case CL_OUT_OF_HOST_MEMORY: printf(" Error in %s: Out of host memory\n",func_name); break;
case CL_PROFILING_INFO_NOT_AVAILABLE: printf(" Error in %s: Profiling information not available\n",func_name); break;
case CL_MEM_COPY_OVERLAP: printf(" Error in %s: Memory copy overlap\n",func_name); break;
case CL_IMAGE_FORMAT_MISMATCH: printf(" Error in %s: Image format mismatch\n",func_name); break;
case CL_IMAGE_FORMAT_NOT_SUPPORTED: printf(" Error in %s: Image format not supported\n",func_name); break;
case CL_BUILD_PROGRAM_FAILURE: printf(" Error in %s: Program build failure\n",func_name); break;
case CL_MAP_FAILURE: printf(" Error in %s: Map failure\n",func_name); break;
case CL_INVALID_VALUE: printf(" Error in %s: Invalid value\n",func_name); break;
case CL_INVALID_DEVICE_TYPE: printf(" Error in %s: Invalid device type\n",func_name); break;
case CL_INVALID_PLATFORM: printf(" Error in %s: Invalid platform\n",func_name); break;
case CL_INVALID_DEVICE: printf(" Error in %s: Invalid device\n",func_name); break;
case CL_INVALID_CONTEXT: printf(" Error in %s: Invalid context\n",func_name); break;
case CL_INVALID_QUEUE_PROPERTIES: printf(" Error in %s: Invalid queue properties\n",func_name); break;
case CL_INVALID_COMMAND_QUEUE: printf(" Error in %s: Invalid command queue\n",func_name); break;
case CL_INVALID_HOST_PTR: printf(" Error in %s: Invalid host pointer\n",func_name); break;
case CL_INVALID_MEM_OBJECT: printf(" Error in %s: Invalid memory object\n",func_name); break;
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: printf(" Error in %s: Invalid image format descriptor\n",func_name); break;
case CL_INVALID_IMAGE_SIZE: printf(" Error in %s: Invalid image size\n",func_name); break;
case CL_INVALID_SAMPLER: printf(" Error in %s: Invalid sampler\n",func_name); break;
case CL_INVALID_BINARY: printf(" Error in %s: Invalid binary\n",func_name); break;
case CL_INVALID_BUILD_OPTIONS: printf(" Error in %s: Invalid build options\n",func_name); break;
case CL_INVALID_PROGRAM: printf(" Error in %s: Invalid program\n",func_name); break;
case CL_INVALID_PROGRAM_EXECUTABLE: printf(" Error in %s: Invalid program executable\n",func_name); break;
case CL_INVALID_KERNEL_NAME: printf(" Error in %s: Invalid kernel name\n",func_name); break;
case CL_INVALID_KERNEL_DEFINITION: printf(" Error in %s: Invalid kernel definition\n",func_name); break;
case CL_INVALID_KERNEL: printf(" Error in %s: Invalid kernel\n",func_name); break;
case CL_INVALID_ARG_INDEX: printf(" Error in %s: Invalid argument index\n",func_name); break;
case CL_INVALID_ARG_VALUE: printf(" Error in %s: Invalid argument value\n",func_name); break;
case CL_INVALID_ARG_SIZE: printf(" Error in %s: Invalid argument size\n",func_name); break;
case CL_INVALID_KERNEL_ARGS: printf(" Error in %s: Invalid kernel arguments\n",func_name); break;
case CL_INVALID_WORK_DIMENSION: printf(" Error in %s: Invalid work dimensionsension\n",func_name); break;
case CL_INVALID_WORK_GROUP_SIZE: printf(" Error in %s: Invalid work group size\n",func_name); break;
case CL_INVALID_WORK_ITEM_SIZE: printf(" Error in %s: Invalid work item size\n",func_name); break;
case CL_INVALID_GLOBAL_OFFSET: printf(" Error in %s: Invalid global offset\n",func_name); break;
case CL_INVALID_EVENT_WAIT_LIST: printf(" Error in %s: Invalid event wait list\n",func_name); break;
case CL_INVALID_EVENT: printf(" Error in %s: Invalid event\n",func_name); break;
case CL_INVALID_OPERATION: printf(" Error in %s: Invalid operation\n",func_name); break;
case CL_INVALID_GL_OBJECT: printf(" Error in %s: Invalid OpenGL object\n",func_name); break;
case CL_INVALID_BUFFER_SIZE: printf(" Error in %s: Invalid buffer size\n",func_name); break;
case CL_INVALID_MIP_LEVEL: printf(" Error in %s: Invalid mip-map level\n",func_name); break;
default: printf(" Error in %s: Unknown with code %d\n",func_name, error);
}
exit(1);
}
}
GEN_cl_FUNC_TENSOR(TYPE_FLOAT);
GEN_cl_FUNC_TENSOR(TYPE_DOUBLE);
+6 -2
View File
@@ -4,6 +4,8 @@
#include <stdio.h>
#include <stdlib.h>
#define CL_TARGET_OPENCL_VERSION 300
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
@@ -12,10 +14,12 @@
#include "tensor_t/tensor_t.h"
#define CL_GENERATE_TENSOR_TYPE(type) \
void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1); \
void cl_tensorContractnProdNotOpt_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
void cl_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber); \
void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t div0Wsz, size_t div1Wsz); \
/*void cl2d_tensorContractnProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *M1, size_t contractionNumber, size_t div0Wsz, size_t div1Wsz);*/ \
CL_GENERATE_TENSOR_TYPE(TYPE_FLOAT);
CL_GENERATE_TENSOR_TYPE(TYPE_DOUBLE);