#ifndef __D_CUDA_TENSOR_H__ #define __D_CUDA_TENSOR_H__ #include "cuda.h" #include "cuda_runtime.h" //#include "cuda_device_runtime_api.h" //#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/d_tensCuda.h" #include "tensor/tensCuda/d_tensCuda.h" //#1D grid of 1D blocks __device__ int d_getGlobalIdx_1D_1D(); //#1D grid of 2D blocks __device__ int d_getGlobalIdx_1D_2D(); //#1D grid of 3D blocks __device__ int d_getGlobalIdx_1D_3D(); //#1D grid of 1D blocks __device__ int d_getGlobalIdx_2D_1D(); //#1D grid of 2D blocks __device__ int d_getGlobalIdx_2D_2D(); //2D grid of 3D blocks __device__ int d_getGlobalIdx_2D_3D(); //#1D grid of 1D blocks __device__ int d_getGlobalIdx_3D_1D(); //#1D grid of 2D blocks __device__ int d_getGlobalIdx_3D_2D(); //#1D grid of 3D blocks __device__ int d_getGlobalIdx_3D_3D(); extern cudaError_t cudaDeviceGetLimit(size_t* pValue, enum cudaLimit limit); __device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size); __device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank); __device__ size_t d_CoordToLinear(int* coo, int* dim, int rank); __device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size); __device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc); __device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev); __device__ void d_reverseArray(int* arr, int sz); __device__ int d_min(int a, int b); __device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); template __global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); template __global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); template __global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t size, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); template __global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm); #endif