From b83a125bac30fd8c9c9ca997fb187383a8fe957c Mon Sep 17 00:00:00 2001 From: fanasina Date: Wed, 24 Jan 2024 19:02:16 +0100 Subject: [PATCH] put all kernel opencl funcs in src --- tensor_t/src/kernel_ProdContractnTensor.cl | 36 ++++++++++++++++++++++ tensor_t/src/kernel_ProdTensor.cl | 21 +++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tensor_t/src/kernel_ProdContractnTensor.cl create mode 100644 tensor_t/src/kernel_ProdTensor.cl diff --git a/tensor_t/src/kernel_ProdContractnTensor.cl b/tensor_t/src/kernel_ProdContractnTensor.cl new file mode 100644 index 0000000..b6af932 --- /dev/null +++ b/tensor_t/src/kernel_ProdContractnTensor.cl @@ -0,0 +1,36 @@ + + + +__kernel void prodContractnTensorLin_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_global_id(0); + size_t k, a0_id, a1_id, n0_id, n1_id; + a0_id = i / dSubRank; + a1_id = i % dSubRank; + Mx[i] = 0; + for (k = 0; k < dMRank; k++) { + n0_id = a0_id * dMRank + k; + n1_id = a1_id + dSubRank * k; + Mx[i] += M0x[n0_id] * M1x[n1_id]; + } +} + +__kernel void prodContractnTensorLin_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){ + + //Get the index of the current element to be processed + size_t i = get_global_id(0); + size_t k, a0_id, a1_id, n0_id, n1_id; + a0_id = i / dSubRank; + a1_id = i % dSubRank; + Mx[i] = 0; + for (k = 0; k < dMRank; k++) { + n0_id = a0_id * dMRank + k; + n1_id = a1_id + dSubRank * k; + Mx[i] += M0x[n0_id] * M1x[n1_id]; + } +} + + + + diff --git a/tensor_t/src/kernel_ProdTensor.cl b/tensor_t/src/kernel_ProdTensor.cl new file mode 100644 index 0000000..8363e44 --- /dev/null +++ b/tensor_t/src/kernel_ProdTensor.cl @@ -0,0 +1,21 @@ +__kernel void prodTensorLin_TYPE_FLOAT(long unsigned int M1rank, __global const float *M0x , __global const float *M1x, __global float *Mx ){ + + //Get the index of the current element to be processed + size_t k = get_global_id(0); + size_t i = k / M1rank; + size_t j = k % M1rank; + Mx[k] = M0x[i] * M1x[j]; +} + +__kernel void prodTensorLin_TYPE_DOUBLE(long unsigned int M1rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){ + + //Get the index of the current element to be processed + size_t k = get_global_id(0); + size_t i = k / M1rank; + size_t j = k % M1rank; + Mx[k] = M0x[i] * M1x[j]; +} + + + +