add endian option in cl tensor functions
This commit is contained in:
@@ -25,6 +25,35 @@ __kernel void prodTensori2dLin_TYPE_DOUBLE(long unsigned int M1rank, __global co
|
||||
|
||||
}
|
||||
|
||||
__kernel void prodTensor2dLinNotEndian_TYPE_FLOAT(long unsigned int M0rank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
|
||||
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
|
||||
|
||||
//size_t i = get_global_id(0);
|
||||
//size_t j = get_global_id(1);
|
||||
|
||||
size_t k = i + M0rank * j;
|
||||
Mx[k] = M0x[i] * M1x[j];
|
||||
}
|
||||
|
||||
__kernel void prodTensori2dLinNotEndian_TYPE_DOUBLE(long unsigned int M0rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t i = get_group_id(0)*get_local_size(0) + get_local_id(0);
|
||||
size_t j = get_group_id(1)*get_local_size(1) + get_local_id(1);
|
||||
|
||||
//size_t i = get_global_id(0);
|
||||
//size_t j = get_global_id(1);
|
||||
|
||||
size_t k = i + M0rank * j;
|
||||
Mx[k] = M0x[i] * M1x[j];
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -34,3 +34,37 @@ __kernel void prodContractnTensorLin_TYPE_DOUBLE(long unsigned int dSubRank, lon
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel void prodContractnTensorLinNotEndian_TYPE_FLOAT(long unsigned int dSubRank, long unsigned int dMRank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t i = get_global_id(0);
|
||||
size_t k, a0_id, a1_id, n0_id, n1_id;
|
||||
a0_id = i % dSubRank;
|
||||
a1_id = i / dSubRank;
|
||||
Mx[i] = 0;
|
||||
for (k = 0; k < dMRank; k++) {
|
||||
n0_id = a0_id + dSubRank * k;
|
||||
n1_id = a1_id * dMRank + k;
|
||||
Mx[i] += M0x[n0_id] * M1x[n1_id];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void prodContractnTensorLinNotEndian_TYPE_DOUBLE(long unsigned int dSubRank, long unsigned int dMRank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t i = get_global_id(0);
|
||||
size_t k, a0_id, a1_id, n0_id, n1_id;
|
||||
a0_id = i % dSubRank;
|
||||
a1_id = i / dSubRank;
|
||||
Mx[i] = 0;
|
||||
for (k = 0; k < dMRank; k++) {
|
||||
n0_id = a0_id + dSubRank * k;
|
||||
n1_id = a1_id * dMRank + k;
|
||||
Mx[i] += M0x[n0_id] * M1x[n1_id];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -16,6 +16,25 @@ __kernel void prodTensorLin_TYPE_DOUBLE(long unsigned int M1rank, __global const
|
||||
Mx[k] = M0x[i] * M1x[j];
|
||||
}
|
||||
|
||||
__kernel void prodTensorLinNotEndian_TYPE_FLOAT(long unsigned int M0rank, __global const float *M0x , __global const float *M1x, __global float *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t k = get_global_id(0);
|
||||
size_t i = k % M0rank;
|
||||
size_t j = k / M0rank;
|
||||
Mx[k] = M0x[i] * M1x[j];
|
||||
}
|
||||
|
||||
__kernel void prodTensorLinNotEndian_TYPE_DOUBLE(long unsigned int M0rank, __global const double *M0x , __global const double *M1x, __global double *Mx ){
|
||||
|
||||
//Get the index of the current element to be processed
|
||||
size_t k = get_global_id(0);
|
||||
size_t i = k % M0rank;
|
||||
size_t j = k / M0rank;
|
||||
Mx[k] = M0x[i] * M1x[j];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -146,10 +146,22 @@ void cl_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type *
|
||||
(*MM)=CREATE_TENSOR_##type(dd); \
|
||||
tensor_##type *M = *MM; \
|
||||
char *file_cl_src = "../src/kernel_ProdTensor.cl"; \
|
||||
char *func_cl_name = "prodTensorLin_" #type; \
|
||||
char *func_cl_nameEndian = "prodTensorLin_" #type; \
|
||||
char *func_cl_nameNotEndian = "prodTensorLinNotEndian_" #type; \
|
||||
char *func_cl_name; \
|
||||
size_t MeDimRank;\
|
||||
if(endian){\
|
||||
func_cl_name = func_cl_nameEndian;\
|
||||
MeDimRank = M1->dim->rank;\
|
||||
\
|
||||
}else{\
|
||||
func_cl_name = func_cl_nameNotEndian;\
|
||||
MeDimRank = M0->dim->rank;\
|
||||
\
|
||||
}\
|
||||
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||
/*/ Set the arguments of the kernel */ \
|
||||
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \
|
||||
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&MeDimRank); \
|
||||
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
||||
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
||||
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
|
||||
@@ -188,11 +200,24 @@ void cl_tensorContractnProd_##type(tensor_##type** MM, tensor_##type *M0, tensor
|
||||
*MM = CREATE_TENSOR_##type(dd);\
|
||||
tensor_##type *M= *MM;\
|
||||
char *file_cl_src = "../src/kernel_ProdContractnTensor.cl"; \
|
||||
char *func_cl_name = "prodContractnTensorLin_" #type; \
|
||||
/*char *func_cl_name = "prodContractnTensorLin_" #type;*/ \
|
||||
char *func_cl_nameEndian = "prodContractnTensorLin_" #type; \
|
||||
char *func_cl_nameNotEndian = "prodContractnTensorLinNotEndian_" #type; \
|
||||
char *func_cl_name; \
|
||||
size_t dSubRank;\
|
||||
if(endian){\
|
||||
func_cl_name = func_cl_nameEndian;\
|
||||
dSubRank = dSub1->rank;\
|
||||
\
|
||||
}else{\
|
||||
func_cl_name = func_cl_nameNotEndian;\
|
||||
dSubRank = dSub0->rank;\
|
||||
\
|
||||
}\
|
||||
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||
\
|
||||
/*/ Set the arguments of the kernel */ \
|
||||
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(dSub1->rank)); \
|
||||
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&dSubRank); \
|
||||
ret |= clSetKernelArg(kernel, 1, sizeof(size_t), (void *)&(dM->rank)); \
|
||||
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
||||
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
||||
@@ -211,10 +236,23 @@ void cl2d_tensorProd_##type(tensor_##type **MM, tensor_##type *M0, tensor_##type
|
||||
(*MM)=CREATE_TENSOR_##type(dd); \
|
||||
tensor_##type *M = *MM; \
|
||||
char *file_cl_src = "../src/kernel_2d_ProdTensor.cl"; \
|
||||
char *func_cl_name = "prodTensor2dLin_" #type; \
|
||||
/*char *func_cl_name = "prodTensor2dLin_" #type;*/ \
|
||||
char *func_cl_nameEndian = "prodTensor2dLin_" #type; \
|
||||
char *func_cl_nameNotEndian = "prodTensor2dLinNotEndian_" #type; \
|
||||
char *func_cl_name; \
|
||||
size_t MeDimRank;\
|
||||
if(endian){\
|
||||
func_cl_name = func_cl_nameEndian;\
|
||||
MeDimRank = M1->dim->rank;\
|
||||
\
|
||||
}else{\
|
||||
func_cl_name = func_cl_nameNotEndian;\
|
||||
MeDimRank = M0->dim->rank;\
|
||||
\
|
||||
}\
|
||||
SETUP_cl_KERNEL_(type,file_cl_src,func_cl_name);\
|
||||
/*/ Set the arguments of the kernel */ \
|
||||
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(M1->dim->rank)); \
|
||||
ret = clSetKernelArg(kernel, 0, sizeof(size_t), (void *)&(MeDimRank)); \
|
||||
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&M0_mem_obj); \
|
||||
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&M1_mem_obj); \
|
||||
ret |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&M_mem_obj); \
|
||||
|
||||
Reference in New Issue
Block a user