From c417c7aece0f1c914bffbcc52d158103d80df34e Mon Sep 17 00:00:00 2001 From: fanasina Date: Thu, 1 Feb 2024 17:15:07 +0100 Subject: [PATCH] add 2d kernels cl --- tensor_t/src/kernel_2d_ProdContractnTensor.cl | 36 +++++++++++++++++++ tensor_t/src/kernel_2d_ProdTensor.cl | 30 ++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 tensor_t/src/kernel_2d_ProdContractnTensor.cl create mode 100644 tensor_t/src/kernel_2d_ProdTensor.cl diff --git a/tensor_t/src/kernel_2d_ProdContractnTensor.cl b/tensor_t/src/kernel_2d_ProdContractnTensor.cl new file mode 100644 index 0000000..b6af932 --- /dev/null +++ b/tensor_t/src/kernel_2d_ProdContractnTensor.cl @@ -0,0 +1,36 @@ + + + +__kernel void prodContractnTensorLin_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_global_id(0); + size_t k, a0_id, a1_id, n0_id, n1_id; + a0_id = i / dSubRank; + a1_id = i % dSubRank; + Mx[i] = 0; + for (k = 0; k < dMRank; k++) { + n0_id = a0_id * dMRank + k; + n1_id = a1_id + dSubRank * k; + Mx[i] += M0x[n0_id] * M1x[n1_id]; + } +} + +__kernel void prodContractnTensorLin_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_global_id(0); + size_t k, a0_id, a1_id, n0_id, n1_id; + a0_id = i / dSubRank; + a1_id = i % dSubRank; + Mx[i] = 0; + for (k = 0; k < dMRank; k++) { + n0_id = a0_id * dMRank + k; + n1_id = a1_id + dSubRank * k; + Mx[i] += M0x[n0_id] * M1x[n1_id]; + } +} + + + + diff --git a/tensor_t/src/kernel_2d_ProdTensor.cl b/tensor_t/src/kernel_2d_ProdTensor.cl new file mode 100644 index 0000000..e939e29 --- /dev/null +++ b/tensor_t/src/kernel_2d_ProdTensor.cl @@ -0,0 +1,30 @@ +__kernel void prodTensor2dLin_TYPE_FLOAT(long unsigned int M1rank, __global const float *M0x , __global const float *M1x, __global float *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0); + size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1); + + //size_t i = get_global_id(0); + //size_t j = get_global_id(1); + + size_t k = i * M1rank+ j; + Mx[k] = M0x[i] * M1x[j]; +} + +__kernel void prodTensori2dLin_TYPE_DOUBLE(long unsigned int M1rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0); + size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1); + + //size_t i = get_global_id(0); + //size_t j = get_global_id(1); + + size_t k = i * M1rank+ j; + Mx[k] = M0x[i] * M1x[j]; + +} + + + +