diff --git a/yopenCL/OneFileopencl.c b/yopenCL/OneFileopencl.c new file mode 100644 index 0000000..f4948ff --- /dev/null +++ b/yopenCL/OneFileopencl.c @@ -0,0 +1,138 @@ +#include +#include + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#include "ftest/ftest.h" + + +#define VECTOR_SIZE 1024 + +const char *saxpy_kernel = +"__kernel \n" +"void saxpy_kernel(float alpha, \n" +" __global float *A, \n" +" __global float *B, \n" +" __global float *C) \n" +"{ \n" +" // Get the index of the work-item \n" +" int index = get_global_id(0); \n" +" C[index] = alpha * A[index] + B[index]; \n" +"} \n"; + +TEST(openCL_one){ + int i; + // Alocate space for vectors A, B, C + float alpha = 3.0; + float *A = (float*)malloc(sizeof(float)*VECTOR_SIZE); + float *B = (float*)malloc(sizeof(float)*VECTOR_SIZE); + float *C = (float*)malloc(sizeof(float)*VECTOR_SIZE); + + for(i = 0; i&2 + echo "for example to compile: is_good.c" >&2 + exit 1 +fi + +if [ "$#" -le 1 ] ; then + echo "Usage: $0 $1" >&2 + echo "we can add more option for example '-D DEBUG=1' to have debug print of PRINT_DEBUG_ (tools_t macro), notice that PRINT_DEBUG is provide by ytest and can be activate with --debug option on runtime." + echo "The other compile option is '-g' to have gbd, and so on..." + echo "for example: $0 $1 \"-D DEBUG=1 -g\"" +fi + +YTESTDIR=$PWD/../ytest_t + +gcc -o launch_is_good_c $1 -L$YTESTDIR $2 -lytest -lOpenCL -I$YTESTDIR/include_ytest/include +echo "gcc -o launch_is_good_c $1 -L$YTESTDIR $2 -lytest -I$YTESTDIR/include_ytest/include" + +export LD_LIBRARY_PATH=$YTESTDIR:LD_LIBRARY_PATH + + diff --git a/yopenCL/openF.c b/yopenCL/openF.c new file mode 100644 index 0000000..810b583 --- /dev/null +++ b/yopenCL/openF.c @@ -0,0 +1,128 @@ +#include "ftest/ftest.h" +#include "fmock/fmock.h" + + +#include +#include + +// #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#define MAX_SOURCE_SIZE (0x100000) + +TEST(openCL) { + // Create the two input vectors + int i; + float alpha = 2.0; + const int LIST_SIZE = 1024; + int *A = (int*)malloc(sizeof(int)*LIST_SIZE); + int *B = (int*)malloc(sizeof(int)*LIST_SIZE); + for(i = 0; i < LIST_SIZE; i++) { + A[i] = i; + B[i] = LIST_SIZE - i; + } + + // Load the kernel source code into the array source_str + FILE *fp; + char *source_str; + size_t source_size; + + fp = fopen("vector_add_kernel.cl", "r"); + if (!fp) { + fprintf(stderr, "Failed to load kernel.\n"); + exit(1); + } + source_str = (char*)malloc(MAX_SOURCE_SIZE); + source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp); + fclose( fp ); + + // Get platform and device information + cl_platform_id platform_id = NULL; + cl_device_id device_id = NULL; + cl_uint ret_num_devices; + cl_uint ret_num_platforms; + cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); + ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, + &device_id, &ret_num_devices); + + // Create an OpenCL context + cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); + + // Create a command queue + cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); + + + // Create memory buffers on the device for each vector + cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, + LIST_SIZE * sizeof(int), NULL, &ret); + cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, + LIST_SIZE * sizeof(int), NULL, &ret); + cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + LIST_SIZE * sizeof(int), NULL, &ret); + + // Copy the lists A and B to their respective memory buffers + ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, + LIST_SIZE * sizeof(int), A, 0, NULL, NULL); + ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, + LIST_SIZE * sizeof(int), B, 0, NULL, NULL); + + // Create a program from the kernel source + cl_program program = clCreateProgramWithSource(context, 1, + (const char **)&source_str, (const size_t *)&source_size, &ret); + + // Build the program + ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); + + // Create the OpenCL kernel + cl_kernel kernel = clCreateKernel(program, "vector_add", &ret); + + // Set the arguments of the kernel + ret = clSetKernelArg(kernel, 0, sizeof(float), (void *)&alpha); + ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&a_mem_obj); + ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&b_mem_obj); + ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&c_mem_obj); + + // Execute the OpenCL kernel on the list + size_t global_item_size = LIST_SIZE; // Process the entire lists + size_t local_item_size = 64; // Divide work items into groups of 64 + ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, + &global_item_size, &local_item_size, 0, NULL, NULL); + + // Read the memory buffer C on the device to the local variable C + int *C = (int*)malloc(sizeof(int)*LIST_SIZE); + ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, + LIST_SIZE * sizeof(int), C, 0, NULL, NULL); + + // Display the result to the screen + for(i = 0; i < LIST_SIZE; i++) + EXPECT_EQ( alpha * A[i] + B[i], C[i]); + //printf("%d + %d = %d\n", A[i], B[i], C[i]); + + // Clean up + ret = clFlush(command_queue); + ret = clFinish(command_queue); + ret = clReleaseKernel(kernel); + ret = clReleaseProgram(program); + ret = clReleaseMemObject(a_mem_obj); + ret = clReleaseMemObject(b_mem_obj); + ret = clReleaseMemObject(c_mem_obj); + ret = clReleaseCommandQueue(command_queue); + ret = clReleaseContext(context); + free(A); + free(B); + free(C); +} + +int main(int argc, char **argv){ + + + run_all_tests_args(argc, argv); + + return 0; +} + diff --git a/yopenCL/vector_add_kernel.cl b/yopenCL/vector_add_kernel.cl new file mode 100644 index 0000000..68e9940 --- /dev/null +++ b/yopenCL/vector_add_kernel.cl @@ -0,0 +1,8 @@ +__kernel void vector_add(float alpha, __global const int *A, __global const int *B, __global int *C) { + + // Get the index of the current element to be processed + int i = get_global_id(0); + + // Do the operation + C[i] = alpha * A[i] + B[i]; +}