From 7dd749f85e30587ac05c359b575fa78d6559fe95 Mon Sep 17 00:00:00 2001 From: fanasina Date: Fri, 2 Feb 2024 23:13:22 +0100 Subject: [PATCH] add endian option in cl tensor functions --- tensor_t/src/kernel_2d_ProdTensor.cl | 29 +++++++++++++ tensor_t/src/kernel_ProdContractnTensor.cl | 34 +++++++++++++++ tensor_t/src/kernel_ProdTensor.cl | 19 ++++++++ tensor_t/src/tensor_t/cl_tensor_t.c | 50 +++++++++++++++++++--- tensor_t/test_cl/is_good.c | 1 + 5 files changed, 127 insertions(+), 6 deletions(-) diff --git a/tensor_t/src/kernel_2d_ProdTensor.cl b/tensor_t/src/kernel_2d_ProdTensor.cl index e939e29..7e51094 100644 --- a/tensor_t/src/kernel_2d_ProdTensor.cl +++ b/tensor_t/src/kernel_2d_ProdTensor.cl @@ -25,6 +25,35 @@ __kernel void prodTensori2dLin_TYPE_DOUBLE(long unsigned int M1rank, __global co } +__kernel void prodTensor2dLinNotEndian_TYPE_FLOAT(long unsigned int M0rank, __global const float *M0x , __global const float *M1x, __global float *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0); + size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1); + + //size_t i = get_global_id(0); + //size_t j = get_global_id(1); + + size_t k = i + M0rank * j; + Mx[k] = M0x[i] * M1x[j]; +} + +__kernel void prodTensori2dLinNotEndian_TYPE_DOUBLE(long unsigned int M0rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0); + size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1); + + //size_t i = get_global_id(0); + //size_t j = get_global_id(1); + + size_t k = i + M0rank * j; + Mx[k] = M0x[i] * M1x[j]; + +} + + + diff --git a/tensor_t/src/kernel_ProdContractnTensor.cl b/tensor_t/src/kernel_ProdContractnTensor.cl index b6af932..087521d 100644 --- a/tensor_t/src/kernel_ProdContractnTensor.cl +++ b/tensor_t/src/kernel_ProdContractnTensor.cl @@ -34,3 +34,37 @@ __kernel void prodContractnTensorLin_TYPE_DOUBLE(long unsigned int dSubRank, lon + +__kernel void prodContractnTensorLinNotEndian_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_global_id(0); + size_t k, a0_id, a1_id, n0_id, n1_id; + a0_id = i % dSubRank; + a1_id = i / dSubRank; + Mx[i] = 0; + for (k = 0; k < dMRank; k++) { + n0_id = a0_id + dSubRank * k; + n1_id = a1_id * dMRank + k; + Mx[i] += M0x[n0_id] * M1x[n1_id]; + } +} + +__kernel void prodContractnTensorLinNotEndian_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_global_id(0); + size_t k, a0_id, a1_id, n0_id, n1_id; + a0_id = i % dSubRank; + a1_id = i / dSubRank; + Mx[i] = 0; + for (k = 0; k < dMRank; k++) { + n0_id = a0_id + dSubRank * k; + n1_id = a1_id * dMRank + k; + Mx[i] += M0x[n0_id] * M1x[n1_id]; + } +} + + + + diff --git a/tensor_t/src/kernel_ProdTensor.cl b/tensor_t/src/kernel_ProdTensor.cl index 8363e44..a0a785d 100644 --- a/tensor_t/src/kernel_ProdTensor.cl +++ b/tensor_t/src/kernel_ProdTensor.cl @@ -16,6 +16,25 @@ __kernel void prodTensorLin_TYPE_DOUBLE(long unsigned int M1rank, __global const Mx[k] = M0x[i] * M1x[j]; } +__kernel void prodTensorLinNotEndian_TYPE_FLOAT(long unsigned int M0rank, __global const float *M0x , __global const float *M1x, __global float *Mx ){ + + //Get the index of the current element to be processed + size_t k = get_global_id(0); + size_t i = k % M0rank; + size_t j = k / M0rank; + Mx[k] = M0x[i] * M1x[j]; +} + +__kernel void prodTensorLinNotEndian_TYPE_DOUBLE(long unsigned int M0rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){ + + //Get the index of the current element to be processed + size_t k = get_global_id(0); + size_t i = k % M0rank; + size_t j = k / M0rank; + Mx[k] = M0x[i] * M1x[j]; +} + + diff --git a/tensor_t/src/tensor_t/cl_tensor_t.c b/tensor_t/src/tensor_t/cl_tensor_t.c index c3c08fa..50079a1 100644 --- a/tensor_t/src/tensor_t/cl_tensor_t.c +++ b/tensor_t/src/tensor_t/cl_tensor_t.c @@ -146,10 +146,22 @@ void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type * (*MM)=CREATE_TENSOR_##type(dd); \ tensor_##type *M = *MM; \ char *file_cl_src = "../src/kernel_ProdTensor.cl"; \ - char *func_cl_name = "prodTensorLin_" #type; \ + char *func_cl_nameEndian = "prodTensorLin_" #type; \ + char *func_cl_nameNotEndian = "prodTensorLinNotEndian_" #type; \ + char *func_cl_name; \ + size_t MeDimRank;\ + if(endian){\ + func_cl_name = func_cl_nameEndian;\ + MeDimRank = M1->dim->rank;\ + \ + }else{\ + func_cl_name = func_cl_nameNotEndian;\ + MeDimRank = M0->dim->rank;\ + \ + }\ SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ /*/ Set the arguments of the kernel */ \ - ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \ + ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&MeDimRank); \ ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \ ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \ ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \ @@ -188,11 +200,24 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor *MM = CREATE_TENSOR_##type(dd);\ tensor_##type *M= *MM;\ char *file_cl_src = "../src/kernel_ProdContractnTensor.cl"; \ - char *func_cl_name = "prodContractnTensorLin_" #type; \ + /*char *func_cl_name = "prodContractnTensorLin_" #type;*/ \ + char *func_cl_nameEndian = "prodContractnTensorLin_" #type; \ + char *func_cl_nameNotEndian = "prodContractnTensorLinNotEndian_" #type; \ + char *func_cl_name; \ + size_t dSubRank;\ + if(endian){\ + func_cl_name = func_cl_nameEndian;\ + dSubRank = dSub1->rank;\ + \ + }else{\ + func_cl_name = func_cl_nameNotEndian;\ + dSubRank = dSub0->rank;\ + \ + }\ SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ \ /*/ Set the arguments of the kernel */ \ - ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(dSub1->rank)); \ + ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&dSubRank); \ ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \ ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \ ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \ @@ -211,10 +236,23 @@ void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type (*MM)=CREATE_TENSOR_##type(dd); \ tensor_##type *M = *MM; \ char *file_cl_src = "../src/kernel_2d_ProdTensor.cl"; \ - char *func_cl_name = "prodTensor2dLin_" #type; \ + /*char *func_cl_name = "prodTensor2dLin_" #type;*/ \ + char *func_cl_nameEndian = "prodTensor2dLin_" #type; \ + char *func_cl_nameNotEndian = "prodTensor2dLinNotEndian_" #type; \ + char *func_cl_name; \ + size_t MeDimRank;\ + if(endian){\ + func_cl_name = func_cl_nameEndian;\ + MeDimRank = M1->dim->rank;\ + \ + }else{\ + func_cl_name = func_cl_nameNotEndian;\ + MeDimRank = M0->dim->rank;\ + \ + }\ SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\ /*/ Set the arguments of the kernel */ \ - ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \ + ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(MeDimRank)); \ ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \ ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \ ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \ diff --git a/tensor_t/test_cl/is_good.c b/tensor_t/test_cl/is_good.c index eee207a..05b0755 100644 --- a/tensor_t/test_cl/is_good.c +++ b/tensor_t/test_cl/is_good.c @@ -19,6 +19,7 @@ #include "tensor_t/cl_tensor_t.h" TEST(rank){ + endian=false; dimension *D=create_dim(4); D->perm[0]=2; D->perm[1]=3;