From e050e7553396d5d8724a6bd925851807b2298a0c Mon Sep 17 00:00:00 2001 From: fanasina Date: Fri, 5 Jan 2024 23:26:01 +0100 Subject: [PATCH] remove duplicate dir --- ytest_t/test/src/coordinate/coordinate.h | 21 - ytest_t/test/src/dimension/dimension.cpp | 181 ------ ytest_t/test/src/dimension/dimension.h | 31 - ytest_t/test/src/dimension/dimension.hpp | 90 --- .../test/src/permutation_t/permutation_t.c | 125 ---- .../test/src/permutation_t/permutation_t.h | 45 -- .../test/src/set_theoric_t/set_theoric_t.c | 25 - .../test/src/set_theoric_t/set_theoric_t.h | 24 - ytest_t/test/src/tensor/tens0neD/tens0neD.cpp | 500 --------------- ytest_t/test/src/tensor/tens0neD/tens0neD.h | 114 ---- .../test/src/tensor/tensCuda/d_tensCuda.cu | 493 --------------- ytest_t/test/src/tensor/tensCuda/d_tensCuda.h | 69 --- ytest_t/test/src/tensor/tensCuda/tensCuda.cu | 574 ------------------ ytest_t/test/src/tensor/tensCuda/tensCuda.h | 31 - 14 files changed, 2323 deletions(-) delete mode 100644 ytest_t/test/src/coordinate/coordinate.h delete mode 100644 ytest_t/test/src/dimension/dimension.cpp delete mode 100644 ytest_t/test/src/dimension/dimension.h delete mode 100644 ytest_t/test/src/dimension/dimension.hpp delete mode 100644 ytest_t/test/src/permutation_t/permutation_t.c delete mode 100644 ytest_t/test/src/permutation_t/permutation_t.h delete mode 100644 ytest_t/test/src/set_theoric_t/set_theoric_t.c delete mode 100644 ytest_t/test/src/set_theoric_t/set_theoric_t.h delete mode 100644 ytest_t/test/src/tensor/tens0neD/tens0neD.cpp delete mode 100644 ytest_t/test/src/tensor/tens0neD/tens0neD.h delete mode 100644 ytest_t/test/src/tensor/tensCuda/d_tensCuda.cu delete mode 100644 ytest_t/test/src/tensor/tensCuda/d_tensCuda.h delete mode 100644 ytest_t/test/src/tensor/tensCuda/tensCuda.cu delete mode 100644 ytest_t/test/src/tensor/tensCuda/tensCuda.h diff --git a/ytest_t/test/src/coordinate/coordinate.h b/ytest_t/test/src/coordinate/coordinate.h deleted file mode 100644 index 2850d0a..0000000 --- a/ytest_t/test/src/coordinate/coordinate.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __COORDINATE_C__H__ -#define __COORDINATE_C__H__ - -#include "dimension/dimension.h" - - -struct coordinate -{ - size_t lin_coo; - unsigned int *coord; - struct dimension *dimension; -}; - -typedef coordinate coordinate; - -void LinearToCoord(struct coordinate *coor); -void CoordToLinear(struct coordinate *coor); - - - -#endif diff --git a/ytest_t/test/src/dimension/dimension.cpp b/ytest_t/test/src/dimension/dimension.cpp deleted file mode 100644 index aba7d8a..0000000 --- a/ytest_t/test/src/dimension/dimension.cpp +++ /dev/null @@ -1,181 +0,0 @@ -#include -#include - -#include - -#include -#include - - - -//#include "/home/fanasina/progr_/ptens0neD/dimension/dimension.h" - -//#include "/home/fanasina/progr_/ptens0neD/permutation/permutation.h" - - -#include "dimension/dimension.hpp" - -#include "permutation/permutation.hpp" -//#include "permutation.h" - -/*void dimension::initDim(int* arr, bool end = true) { - endian = end; - delete[]dim; - dim = new int[rank]; - size = 1; - for (int i = 0; i < rank; ++i) { - dim[i] = arr[i]; - size *= dim[i]; - } -}*/ - -dimension& dimension::operator=(const dimension& d) { - int oldRank = rank; - rank = d.rank; - size = d.size; - initDim(d.dim, oldRank); - //for (int i = 0; i < rank; i++) dim[i] = d.dim[i]; - return *this; -} - -dimension& dimension::operator+=(const dimension& d) { - int oldRank = rank; - int* t = new int[rank + d.rank]; - for (int i = 0; i < rank; i++) t[i] = dim[i]; - for (int i = 0; i < d.rank; i++) t[rank + i] = d.dim[i]; - size *= d.size; - rank += d.rank; - initDim(t, oldRank); - return *this; -} - -void dimension::LinearToCoord(int* ret, int lin) const { - int begin = 0, end = rank - 1; - int (*iter)(int) = incr; - bool (*cond)(int, int) = isLessThan; - if (endian == false) { - //if (endian) { - begin = rank - 1; end = 0; - iter = decr; cond = isGreatThan; - } - //printf("to coor begin = %d end = %d \n", begin, end); - - int sm = lin; - int pp = size; - for (int i = begin; cond(i, end); i = iter(i)) { - //printf(" i: %d ", i); - pp /= dim[i]; - ret[i] = sm / pp; - sm %= pp; - //printf("sm[%d] = %d , pp=%d ; ", i, sm, pp); - } - ret[end] = sm; -} - -int dimension::CoordToLinear(int* coo) const { - int begin = 0; - int end = rank - 1; - int (*iter)(int); iter = &incr; - bool (*cond)(int, int); cond = &isLessEqThan; - - if (endian) { - begin = rank - 1; end = 0; - iter = &decr; cond = &isGreatEqThan; - } - - int pp = 1; - int sm = 0; - for (int i = begin; cond(i, end); i = iter(i)) { - sm += (coo[i] * pp); - pp *= dim[i]; - } - return sm; -} - -bool isLessEqThan(int a, int b) { return a <= b; } -bool isLessThan(int a, int b) { return a < b; } -bool isGreatEqThan(int a, int b) { return a >= b; } -bool isGreatThan(int a, int b) { return a > b; } -int incr(int i) { return i + 1; } -int decr(int i) { return i - 1; } - - -void add(dimension& d, const dimension& d0, const dimension& d1) { - int oldRank = d.rank; - int* t = new int[d0.rank + d1.rank]; - for (int i = 0; i < d0.rank; i++) t[i] = d0.dim[i]; - for (int i = 0; i < d1.rank; i++) t[d0.rank + i] = d1.dim[i]; - d.rank = d0.rank + d1.rank; - d.initDim(t, oldRank); -} - -void max(dimension& d, const dimension& d0, const dimension& d1) { - if (d0.rank > d1.rank) { - d = d0; - } - else if (d0.rank < d1.rank) { - d = d1; - } - else {// d0.rank = d1.rank - d = d0; - for (int i = 0; i < d.rank; i++) { - if (d.dim[i] < d1.dim[i]) d.dim[i] = d1.dim[i]; - } - } -} - -void min(dimension& d, const dimension& d0, const dimension& d1) { - if (d0.rank > d1.rank) { - d = d1; - } - else if (d0.rank < d1.rank) { - d = d0; - } - else {// d0.rank = d1.rank - d = d0; - for (int i = 0; i < d.rank; i++) { - if (d.dim[i] > d1.dim[i]) d.dim[i] = d1.dim[i]; - } - } -} - -void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& rev) { - if (d0.rank > d1.rank) { - d = d1; - rev = true; - } - else if (d0.rank < d1.rank) { - d = d0; - rev = false; - } - else {// d0.rank = d1.rank - d = d0; - for (int i = 0; i < d.rank; i++) { - if (d.dim[i] > d1.dim[d.rank - 1 - i]) d.dim[i] = d1.dim[d.rank - 1 - i]; - } - rev = false; - } -} - -void reverseArray(int* arr, int sz) { - int tmp[sz], i = 0; - for (; i < sz / 2; i++) { - tmp[i] = arr[i]; - arr[i] = arr[sz - 1 - i]; - } - for (; i < sz; i++) { - arr[i] = tmp[sz - 1 - i]; - } -} - -void transform(dimension& dDst, const dimension& dSrc, int* perm, int sz) { - dDst = dSrc; - setInit setIn(sz); - if (sz == dSrc.rank) { - if (isPermutation(perm, setIn, sz)) { - for (int i = 0; i < sz; i++) dDst.dim[i] = dSrc.dim[perm[i]]; - } - } -} - - diff --git a/ytest_t/test/src/dimension/dimension.h b/ytest_t/test/src/dimension/dimension.h deleted file mode 100644 index d522d2d..0000000 --- a/ytest_t/test/src/dimension/dimension.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef __DIM__ -#define __DIM__ - -#include -#include - -struct dimension -{ - unsigned int rank; - unsigned int* dim; - size_t size; -}; -typedef dimension dimension; - - -void print_dimension(dimension d); - - -void add(dimension* d, const dimension* d0, const dimension* d1); - -void max(dimension* d, const dimension* d0, const dimension* d1); - -void min(dimension* d, const dimension* d0, const dimension* d1); - -bool minReverse(dimension* d, const dimension* d0, const dimension* d1); - -void transform(dimension* dDst, const dimension* dSrc, int* perm); - - -#endif - diff --git a/ytest_t/test/src/dimension/dimension.hpp b/ytest_t/test/src/dimension/dimension.hpp deleted file mode 100644 index cf8bf66..0000000 --- a/ytest_t/test/src/dimension/dimension.hpp +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef __DIMENSION__ -#define __DIMENSION__ - -#include -#include - -#include - -//#include "tensor.h" - -//#include "dimension.h" - -static int iArray1[1] = { 1 }; - - - -struct dimension { - //friend dimension& operator+(const dimension& d, const dimension& d1); - friend void add(dimension& d, const dimension& d0, const dimension& d1); - friend void max(dimension& d, const dimension& d0, const dimension& d1); - friend void min(dimension& d, const dimension& d0, const dimension& d1); - friend void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& Rev); - friend bool checkMatchProdTensor(dimension& d0, const dimension& d1, int nestingDepth); - friend bool checkMatchProdTensorReverse(dimension& d0, const dimension& d1, int nestingDepth); - friend void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth); - - - int rank; - int* dim; - size_t size; - bool endian; //LitleEndian : true, BigEndian : false, - void initDim(int* arr, int oldRank) { - - //delete[]dim; - //dim = new int[rank]; - if (rank > oldRank) { - free(dim); - dim = (int*)malloc(rank * sizeof(int)); - } - size = 1; - for (int i = 0; i < rank; ++i) { - dim[i] = arr[i]; - size *= dim[i]; - } - } - void initDim(bool end = true) { - endian = end; - //delete[]dim; - //dim = new int[rank]; - - if (dim != NULL) free(dim); - dim = (int*)malloc(rank * sizeof(int)); - } - dimension& operator=(const dimension& d); - dimension& operator+=(const dimension& d); - //dimension& operator*=(const dimension& d); - dimension(int d = 1, int* arr = iArray1, bool end = true) { - endian = end; - rank = d; - //dim = new int[d]; - dim = (int*)malloc(d * sizeof(int)); - initDim(arr, rank); - } - void print() const { printf(" rank: %d\n", rank);for (int i = 0; i < rank; i++) printf(" %d ", dim[i]);printf("\nsize:%ld\n", size); } - void LinearToCoord(int* ret, int lin) const; - int CoordToLinear(int* coo) const; -}; - -bool isLessEqThan(int a, int b); // { return a <= b; } -bool isLessThan(int a, int b); // { return a < b; } -bool isGreatEqThan(int a, int b); // { return a >= b; } -bool isGreatThan(int a, int b); // { return a > b; } -int incr(int i); // { return i + 1; } -int decr(int i); // { return i - 1; } - - - -void add(dimension& d, const dimension& d0, const dimension& d1); - -void max(dimension& d, const dimension& d0, const dimension& d1); - -void min(dimension& d, const dimension& d0, const dimension& d1); - -void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& rev); - -void transform(dimension& dDst, const dimension& dSrc, int* perm, int sz); - - -#endif - diff --git a/ytest_t/test/src/permutation_t/permutation_t.c b/ytest_t/test/src/permutation_t/permutation_t.c deleted file mode 100644 index 4caf54b..0000000 --- a/ytest_t/test/src/permutation_t/permutation_t.c +++ /dev/null @@ -1,125 +0,0 @@ -#include "permutation_t/permutation_t.h" - -#define GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(type)\ - bool IS_PERMUTATION_SET_THEORIC_##type(const PERMUTATION_##type *p){\ - if(p == NULL) return false;\ - size_t size = p->size;\ - type j;\ - size_t *count_array_i = calloc(size, sizeof(size_t));\ - if(count_array_i == NULL){\ - printf("can't alloc count_array_i\n"); return false;}\ - for(size_t i = 0; i < size; ++i){\ - j = p->perm[i];\ - if((COMPARE_N_##type(&j, (type*)&size) >= 0) || count_array_i[j]){\ - free(count_array_i); return false; }\ - ++count_array_i[j];}\ - free(count_array_i);\ - return true; }\ - -GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(TYPE_U_CHAR) -GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(TYPE_U_INT) -GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(TYPE_U_L_INT) -GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(TYPE_SIZE_T) - - -#define GENERATE_PERMUTATION_FUNCTIONS(type)\ - PERMUTATION_##type * CREATE_PERMUTATION_##type(size_t size){\ - if (size == 0) return NULL;\ - PERMUTATION_##type *p = malloc(sizeof(PERMUTATION_##type));\ - p->size = size;\ - p->perm = malloc(size * sizeof(type));\ - return p; }\ -\ - PERMUTATION_TYPE_SIZE_T * TRANSLATE_TO_SET_THEORIC_SIZE_T_##type(const PERMUTATION_##type *p ){\ - if (p == NULL) return NULL;\ - PERMUTATION_TYPE_SIZE_T *t_p = malloc(sizeof(PERMUTATION_TYPE_SIZE_T));\ - size_t size = p->size;\ - t_p->size = size;\ - t_p->perm = malloc(size * sizeof(TYPE_SIZE_T));\ - type *sorted_perm = malloc(size * sizeof(type));\ - COPY_ARRAY_##type(sorted_perm,(const type*)p->perm, size);\ - qsort(sorted_perm, size, sizeof(type), COMPARE_N_##type);\ - size_t *rec_index_visited = malloc(size * sizeof(size_t));\ - size_t cur_rec = 0; bool found_rec;\ - for(size_t i = 0; i < size; ++i){\ - for(size_t j = 0; j < size; ++j){\ - if(COMPARE_N_##type(&(p->perm[j]), &(sorted_perm[i])) == 0){\ - found_rec = false;\ - for(size_t k = 0; k < cur_rec; ++k){\ - if(rec_index_visited[k] == j){\ - found_rec == true; break; } } \ - if(found_rec == false){\ - /*t_p->perm[i] = j;*/\ - t_p->perm[j] = i;\ - rec_index_visited[cur_rec++] = j; \ - break; }\ - }\ - }\ - }\ - free(rec_index_visited);\ - free(sorted_perm);\ - return t_p; \ - }\ -\ - bool IS_PERMUTATION_##type(const PERMUTATION_##type *p){\ - if(p == NULL) return false;\ - PERMUTATION_TYPE_SIZE_T *t_p = TRANSLATE_TO_SET_THEORIC_SIZE_T_##type(p);\ - bool ret = IS_PERMUTATION_SET_THEORIC_TYPE_SIZE_T(t_p);\ - free(t_p);\ - return ret; }\ - - - -GENERATE_PERMUTATION_FUNCTIONS(TYPE_CHAR) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_U_CHAR) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_INT) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_U_INT) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_L_INT) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_U_L_INT) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_SIZE_T) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_FLOAT) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_DOUBLE) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_L_DOUBLE) -GENERATE_PERMUTATION_FUNCTIONS(TYPE_STRING) - - - - - - -/* why TRANSLATE ? - * 2,7,4,1 is a permutation of 1,2,4,7 - *it is equivalent of 1,3,2,0 in set_theoric(4)=0,1,2,3 - this function calculate the permutation equivalent in set_theoric - 2,4,2,5 is translate to 0,1,0,2 - * */ - - -/* if need optimization in translate -#define GENERATE_UNSIGNED_SIZE_WITH_TYPED(type_unsigned, type)\ - PERMUTATION_##type_unsigned * TRANSLATE_TO_SET_THEORIC_##type_unsigned_##type(PERMUTATION_##type *p ){\ - if (p == NULL) return NULL;\ - PERMUTATION_##type_unsigned *t_p = malloc(sizeof(PERMUTATION_##type_unsigned));\ - type_unsigned size = p->size;\ - t_p->perm = malloc(size * sizeof(type_unsigned));\ - type *sorted_perm = malloc(size * sizeof(type));\ - COPY_ARRAY_##type(sorted_perm, p->perm, size);\ - qsort(sorted_perm, size, sizeof(type), COMPARE_N_##type);\ - type_unsigned *rec_index_visited = malloc(size * sizeof(type_unsigned));\ - type_unsigned cur_rec = 0; bool found_rec;\ - for(type_unsigned i = 0; i < size; ++i){\ - for(type_unsigned j = 0; j < size; ++j){\ - if(COMPARE_N_##type(&(p->perm[j]), &(sorted_perm[i])) == 0){\ - found_rec = false;\ - for(type_unsigned k = 0; k < cur_rec; ++k){\ - if(rec_index_visited[k] == j){\ - found_rec == true; break; } } \ - if(found_rec == false){\ - t_p->perm[i] = j;\ - rec_index_visited[cur++] = j; \ - break; } } } } \ - free(rec_index_visited);\ - free(sorted_perm);\ - return t_p; }\ - -*/ diff --git a/ytest_t/test/src/permutation_t/permutation_t.h b/ytest_t/test/src/permutation_t/permutation_t.h deleted file mode 100644 index 49f03de..0000000 --- a/ytest_t/test/src/permutation_t/permutation_t.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef __PERMUTATION_T_C_H__ -#define __PERMUTATION_T_C_H__ - -#include "tools_t/tools_t.h" -#include "set_theoric_t/set_theoric_t.h" - -/* struct of permutation, not necessarly set_theoric - * - * */ - - - -#define GENERATE_PERMUTATION(type)\ - struct PERMUTATION_##type{\ - size_t size;\ - type * perm; };\ -\ - typedef struct PERMUTATION_##type PERMUTATION_##type;\ - PERMUTATION_##type * CREATE_PERMUTATION_##type(size_t size);\ -PERMUTATION_TYPE_SIZE_T * TRANSLATE_TO_SET_THEORIC_SIZE_T_##type(const PERMUTATION_##type *p );\ - - -GENERATE_PERMUTATION(TYPE_SIZE_T) -GENERATE_PERMUTATION(TYPE_CHAR) -GENERATE_PERMUTATION(TYPE_U_CHAR) -GENERATE_PERMUTATION(TYPE_INT) -GENERATE_PERMUTATION(TYPE_U_INT) -GENERATE_PERMUTATION(TYPE_L_INT) -GENERATE_PERMUTATION(TYPE_U_L_INT) -GENERATE_PERMUTATION(TYPE_FLOAT) -GENERATE_PERMUTATION(TYPE_DOUBLE) -GENERATE_PERMUTATION(TYPE_L_DOUBLE) -GENERATE_PERMUTATION(TYPE_STRING) - -#define GENERATE_FUNCTIONS_UNSIGNED(type)\ - bool IS_PERMUTATION_SET_THEORIC_##type(const PERMUTATION_##type *p);\ - -GENERATE_FUNCTIONS_UNSIGNED(TYPE_U_CHAR) -GENERATE_FUNCTIONS_UNSIGNED(TYPE_U_INT) -GENERATE_FUNCTIONS_UNSIGNED(TYPE_U_L_INT) -GENERATE_FUNCTIONS_UNSIGNED(TYPE_SIZE_T) - - - -#endif /*__PERMUTATION_T_C_H__*/ diff --git a/ytest_t/test/src/set_theoric_t/set_theoric_t.c b/ytest_t/test/src/set_theoric_t/set_theoric_t.c deleted file mode 100644 index ab589cd..0000000 --- a/ytest_t/test/src/set_theoric_t/set_theoric_t.c +++ /dev/null @@ -1,25 +0,0 @@ - -#include "set_theoric_t/set_theoric_t.h" - -#define GENERATE_SET_THEORIC(type) \ - SET_THEORIC_##type * CREATE_SET_THEORIC_##type(size_t id){ \ - if(id == 0) return NULL; \ - SET_THEORIC_##type *ret_set = malloc(sizeof(SET_THEORIC_##type)); \ - ret_set->id = id; \ - ret_set->set = malloc(id*sizeof(type)); \ - for(type i = 0; i < id; ++i) ret_set->set[i]=i; \ - return ret_set; \ - } \ - \ - bool IS_SET_THEORIC_##type(SET_THEORIC_##type *st){ \ - for(type i = 0; i < st->id; ++i){ \ - if(st->set[i] != i) return false; \ - return true; \ - } \ - } \ - -GENERATE_SET_THEORIC(TYPE_U_CHAR) -GENERATE_SET_THEORIC(TYPE_U_INT) -GENERATE_SET_THEORIC(TYPE_U_L_INT) -GENERATE_SET_THEORIC(TYPE_SIZE_T) - diff --git a/ytest_t/test/src/set_theoric_t/set_theoric_t.h b/ytest_t/test/src/set_theoric_t/set_theoric_t.h deleted file mode 100644 index a13122c..0000000 --- a/ytest_t/test/src/set_theoric_t/set_theoric_t.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __SET_THEORIC_T_C__H -#define __SET_THEORIC_T_C__H - -#include - -#include "tools_t/tools_t.h" - -#define GENERATE_UNSIGNED_SET_THEORIC(type) \ - struct SET_THEORIC_##type{ \ - type id; \ - type *set; \ - }; \ - typedef struct SET_THEORIC_##type SET_THEORIC_##type; \ - SET_THEORIC_##type * CREATE_SET_THEORIC_##type(size_t id/*TYPE_##type*/); \ - bool IS_SET_THEORIC_##type(SET_THEORIC_##type *st); \ - -GENERATE_UNSIGNED_SET_THEORIC(TYPE_U_CHAR) -GENERATE_UNSIGNED_SET_THEORIC(TYPE_U_INT) -GENERATE_UNSIGNED_SET_THEORIC(TYPE_U_L_INT) -GENERATE_UNSIGNED_SET_THEORIC(TYPE_SIZE_T) - - - -#endif /*__SET_THEORIC_T_C__H*/ diff --git a/ytest_t/test/src/tensor/tens0neD/tens0neD.cpp b/ytest_t/test/src/tensor/tens0neD/tens0neD.cpp deleted file mode 100644 index efa83c8..0000000 --- a/ytest_t/test/src/tensor/tens0neD/tens0neD.cpp +++ /dev/null @@ -1,500 +0,0 @@ -#include -#include - -#include - -#include -#include - - -//#include "/home/fanasina/progr_/ptens0neD/tensor/tens0neD/tens0neD.h" -#include "tensor/tens0neD/tens0neD.h" -//#include "include/tens0neD.h" - - -//#include "cudatensor.h" -//#include "/home/fanasina/progr_/ptens0neD/permutation/permutation.h" -#include "permutation/permutation.h" - - -template -void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz) { - transform(Dst.Dim, Src.Dim, perm, sz); - dimension dsrc = Src.Dim; - dimension ddst = Dst.Dim; - int coor[dsrc.rank]; - int dcoor[ddst.rank], ldst; - for (int i = 0; i < Src.Dim.size; i++) { - dsrc.LinearToCoord(coor, i); - for (int j = 0; j < dsrc.rank; j++) dcoor[j] = coor[perm[j]]; - ldst = ddst.CoordToLinear(dcoor); - Dst.elements[ldst] = Src.elements[i]; - } -} - -template void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); -template void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); - -template -Tensor& Tensor::operator=(const Tensor& M) { - Dim = M.Dim; - for (int i = 0; i < Dim.size; ++i) elements[i] = M.elements[i]; - return *this; -} - -template -Tensor& Tensor::operator*=(const T& val) { - //for (int i = 0; i < rank.size; ++i) elements[i] *= val; - return *this; -} - -template -Tensor& operator*(const Tensor& M0, const Tensor& M1) { - struct dimension d; add(d, M0.Dim, M1.Dim); - Tensor Mret(d); - for (int i = 0; i < M0.Dim.size; ++i) Mret.elements[i] = M0.elements[i]; - Mret.Dim += M0.Dim; - return Mret; -} - - -void subArray(int* dst, int* src, int debDst, int finDst, int debSrc) { - for (int i = debDst; i < finDst; i++) { - dst[i] = src[i + debSrc]; - } -} - -void concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1) { - int i = debDst; - for (int j = debSrc0; j < finSrc0; j++) { - dst[i++] = src0[j]; - } - for (int j = debSrc1; j < finSrc1; j++) { - dst[i++] = src1[j]; - } -} - -template -void Tensor::initVal(T val) { - int* coord = new int[Dim.rank]; - T pp, mult = 0.5; - for (int i = 0; i < Dim.size; i++) { - Dim.LinearToCoord(coord, i); - elements[i] = val; - pp = mult; - for (int j = 0; j < Dim.rank; j++) { - elements[i] += (coord[j] + 1) * pp; - pp *= mult; - } - } -} -template -void Tensor::initVal(float val); -template -void Tensor::initVal(double val); - -template -void Tensor::print() { - Dim.print(); - int* coord = new int[Dim.rank]; - int begin = 0, end = Dim.rank - 1; - //int beginInv = Dim.rank - 1, endInv = 0; - int (*iter)(int) = incr; - //int (*iterInv)(int) = decr; - bool (*cond)(int, int) = isLessEqThan; - //bool (*condInv)(int, int) = isGreatEqThan; - if (Dim.endian == false) { - begin = Dim.rank - 1; end = 0; - //beginInv = 0; endInv = Dim.rank - 1; - iter = decr; cond = isGreatEqThan; - //iterInv = incr; condInv = isLessEqThan; - } - for (int i = 0; i < Dim.size; i++) { - Dim.LinearToCoord(coord, i); - //if (coord[Dim.rank - 1] == 0) { - if (coord[begin] == 0) { - for (int j = begin; cond(j, end); j = iter(j)) { - //for (int j = Dim.rank - 1; j >= 0; j--) { - if (coord[j] == 0) { - printf("("); - } - else break; - } - } - - //printf(" ");for (int j = 0; j < Dim.rank; j++) printf("[%d]", coord[j]); printf(" "); - //printf(" "); for (int j = beginInv; condInv(j, endInv); j = iterInv(j)) printf("[%d]", coord[j]); printf(" "); - //printf(" "); for (int k = beginInv; condInv(k, endInv); k = iterInv(k)) { printf("[%d]", coord[k]); } printf(" "); - - printf(" %.6f ", elements[i]); - - //if (coord[Dim.rank - 1] == Dim.dim[Dim.rank - 1] - 1) { - if (coord[begin] == Dim.dim[begin] - 1) { - for (int j = begin; cond(j, end); j = iter(j)) { - //for (int j = Dim.rank - 1; j >= 0; j--) { - if (coord[j] == Dim.dim[j] - 1) { - printf(")"); - } - else break; - } - } - } - - printf("\n"); -} -template -void Tensor::print(); -template -void Tensor::print(); - -template -void tensorProd(Tensor& M, const Tensor& M0, const Tensor& M1) { - add(M.Dim, M0.Dim, M1.Dim); - M.initTensor(); - int* coord = new int[M.Dim.rank]; - int* coord0 = new int[M0.Dim.rank], lin0; - int* coord1 = new int[M1.Dim.rank], lin1; - for (int i = 0; i < M.Dim.size; i++) { - M.Dim.LinearToCoord(coord, i); - subArray(coord0, coord, 0, M0.Dim.rank, 0); - subArray(coord1, coord, 0, M1.Dim.rank, M0.Dim.rank); - lin0 = (M0.Dim).CoordToLinear(coord0); - lin1 = (M1.Dim).CoordToLinear(coord1); - M.elements[i] = M0.elements[lin0] * M1.elements[lin1]; - } -} - -template -void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); -template -void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); - - - - -bool checkMatchProdTensor(const dimension& d0, const dimension& d1, int nestingDepth) { - if (d0.rank <= nestingDepth || d1.rank <= nestingDepth) return false; - for (int i = 0; i < nestingDepth;i++) { - if (d1.dim[i] != d0.dim[d0.rank - nestingDepth + i]) return false; - } - return true; -} - -bool checkMatchProdTensorReverse(const dimension& d0, const dimension& d1, int nestingDepth) { - if (d0.rank <= nestingDepth || d1.rank <= nestingDepth) return false; - for (int i = 0; i < nestingDepth;i++) { - if (d1.dim[i] != d0.dim[d0.rank - 1 - i]) return false; - } - return true; -} - -void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth) { - int len0 = d0.rank - nestingDepth; - int len1 = d1.rank - nestingDepth; - - int* tsub0 = new int[len0]; - int* tsub1 = new int[len1]; - int* tDk1 = new int[nestingDepth]; - int* tDk0 = new int[nestingDepth]; - subArray(tsub0, d0.dim, 0, len0, 0); - subArray(tsub1, d1.dim, 0, len1, nestingDepth); - subArray(tDk1, d1.dim, 0, nestingDepth, 0); - subArray(tDk0, d0.dim, 0, nestingDepth, len0); - dimension dSub0(len0, tsub0); - dimension dSub1(len1, tsub1); - dimension dM1(nestingDepth, tDk1); - dimension dM0(nestingDepth, tDk0); - - min(dM, dM0, dM1); - //max(dM, dM0, dM1); -} - -// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l -// M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) -//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] -template -void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth) { - if (!checkMatchProdTensor(M0.Dim, M1.Dim, nestingDepth)) { - printf("Deep = %d\n", nestingDepth); - //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); - - //throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); - } - - int len0 = M0.Dim.rank - nestingDepth; - int len1 = M1.Dim.rank - nestingDepth; - - int* tsub0 = new int[len0]; - int* tsub1 = new int[len1]; - int* tDk1 = new int[nestingDepth]; - int* tDk0 = new int[nestingDepth]; - subArray(tsub0, M0.Dim.dim, 0, len0, 0); - subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); - subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); - subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); - - dimension dSub0(len0, tsub0); - dimension dSub1(len1, tsub1); - dimension dM1(nestingDepth, tDk1); - dimension dM0(nestingDepth, tDk0); - dimension dM; - min(dM, dM0, dM1); - //max(dM, dM0, dM1); - - add(M.Dim, dSub0, dSub1); - M.initTensor(); - - int* coord = new int[M.Dim.rank]; - - int* coord0 = new int[len0], lin0; - int* coord1 = new int[len1], lin1; - - int* coordM0 = new int[M0.Dim.rank]; - int* coordM1 = new int[M1.Dim.rank]; - - int* Koord = new int[nestingDepth]; - for (int i = 0; i < M.Dim.size; i++) { - M.Dim.LinearToCoord(coord, i); - subArray(coord0, coord, 0, len0, 0); - subArray(coord1, coord, 0, len1, len0); - M.elements[i] = 0; - for (int k = 0; k < dM.size; k++) { - dM.LinearToCoord(Koord, k); - concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, nestingDepth); - concatArray(coordM1, Koord, coord1, 0, 0, nestingDepth, 0, len1); - lin0 = (M0.Dim).CoordToLinear(coordM0); - lin1 = (M1.Dim).CoordToLinear(coordM1); - M.elements[i] += M0.elements[lin0] * M1.elements[lin1]; - } - } -} - -template -void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); -template -void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); - -void reverseDim(dimension& d, const dimension& d0) { - d.rank = d0.rank; - d.size = d0.size; - if (d.dim != NULL) free(d.dim); - d.dim = (int*)malloc(d.rank * sizeof(int)); - for (int i = 0; i < d.rank; i++) d.dim[i] = d0.dim[d.rank - i - 1]; -} - -template -void reverseTensor(Tensor& M, const Tensor& M0) { - reverseDim(M.Dim, M0.Dim); - size_t id; - int coor[M0.Dim.rank]; - for (size_t i = 0; i < M.Dim.size; i++) { - M0.Dim.LinearToCoord(coor, i); - reverseArray(coor, M0.Dim.rank); - id = M.Dim.CoordToLinear(coor); - M.elements[id] = M0.elements[i]; - } -} - -// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l -// M[x0,x1,x3..xl x{l+1}..xn] X M[xn,x{n-1},..x{l+1}xl y{l+1}..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) -//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] -template -void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth) { - if (!checkMatchProdTensorReverse(M0.Dim, M1.Dim, nestingDepth)) { - printf("Failed in Deep = %d\n", nestingDepth); - //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); - - //throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); - } - - int len0 = M0.Dim.rank - nestingDepth; - int len1 = M1.Dim.rank - nestingDepth; - - int* tsub0 = new int[len0]; - int* tsub1 = new int[len1]; - int* tDk1 = new int[nestingDepth]; - int* tDk0 = new int[nestingDepth]; - subArray(tsub0, M0.Dim.dim, 0, len0, 0); - subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); - subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); - subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); - - dimension dSub0(len0, tsub0); - dimension dSub1(len1, tsub1); - dimension dM1(nestingDepth, tDk1); - dimension dM0(nestingDepth, tDk0); - dimension dM; - bool rev; - minReverse(dM, dM0, dM1, rev); - if (rev) reverseArray(dM.dim, dM.rank); - //max(dM, dM0, dM1); - - add(M.Dim, dSub0, dSub1); - M.initTensor(); - - int* coord = new int[M.Dim.rank]; - - int* coord0 = new int[len0], lin0; - int* coord1 = new int[len1], lin1; - - int* coordM0 = new int[M0.Dim.rank]; - int* coordM1 = new int[M1.Dim.rank]; - - int* Koord = new int[nestingDepth]; - for (int i = 0; i < M.Dim.size; i++) { - M.Dim.LinearToCoord(coord, i); - subArray(coord0, coord, 0, len0, 0); - subArray(coord1, coord, 0, len1, len0); - M.elements[i] = 0; - for (int k = 0; k < dM.size; k++) { - dM.LinearToCoord(Koord, k); - concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, nestingDepth); - reverseArray(Koord, nestingDepth); - concatArray(coordM1, Koord, coord1, 0, 0, nestingDepth, 0, len1); - lin0 = (M0.Dim).CoordToLinear(coordM0); - lin1 = (M1.Dim).CoordToLinear(coordM1); - M.elements[i] += M0.elements[lin0] * M1.elements[lin1]; - } - } -} - -template -void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); -template -void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); - -template -void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p) { - if (p.size == M0.Dim.rank) { - M.Dim.rank = M0.Dim.rank; - M.Dim.size = M0.Dim.size; - M.Dim.initDim(); - M.initTensor(); - //permuteArray(M.Dim.dim, M0.Dim.dim, p); - //for (int i = 0; i < p.size; i++) { M.Dim.dim[i] = M0.Dim.dim[p.perm[i]]; } - p.permute(M.Dim.dim, M0.Dim.dim); - size_t img; - int coor[p.size]; - int rooc[p.size]; - for (size_t i = 0; i < M.Dim.size;i++) { - M0.Dim.LinearToCoord(coor, i); - p.permute(rooc, coor); - img = M.Dim.CoordToLinear(rooc); - if (img >= M.Dim.size) printf(" i: %ld vs img:%ld size: %ld\n", i, img, M.Dim.size); - M.elements[img] = M0.elements[i]; - - } - } -} - -template -void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p); - -template -bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest) { - if (contractNest < Msecond.Dim.rank && contractNest < Mfirst.Dim.rank) { - std::vector founded; - int begin = Mfirst.Dim.rank - contractNest, tmp; - for (int i = 0; i < Msecond.Dim.rank;i++) perm[i] = i; - for (int i = begin; i < Mfirst.Dim.rank; i++) { - for (int j = 0; j < Msecond.Dim.rank;j++) { - if (std::find(founded.begin(), founded.end(), perm[j]) == founded.end()) {// not found - if (Msecond.Dim.dim[perm[j]] == Mfirst.Dim.dim[i]) { - founded.push_back(perm[j]); - tmp = perm[i - begin]; - perm[i - begin] = perm[j]; - perm[j] = tmp; - } - } - } - } - return (founded.size() == contractNest); - } - return false; -} -template -bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); - - -template -bool scanInvPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest) { - if (contractNest < Msecond.Dim.rank && contractNest < Mfirst.Dim.rank) { - std::vector founded; - int begin = Mfirst.Dim.rank - contractNest, tmp; - for (int i = 0; i < Msecond.Dim.rank;i++) perm[i] = i; - for (int i = begin; i < Mfirst.Dim.rank; i++) { - for (int j = 0; j < Msecond.Dim.rank;j++) { - if (std::find(founded.begin(), founded.end(), j) == founded.end()) {// not found - if (Msecond.Dim.dim[j] == Mfirst.Dim.dim[perm[i - begin]]) { - founded.push_back(j); - tmp = perm[i - begin]; - perm[i - begin] = j; - perm[j] = tmp; - } - } - } - } - return (founded.size() == contractNest); - } - return false; -} -template -bool scanInvPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); - - -void LinearTransformCoord(size_t& dst, size_t src, int* inversePerm, size_t Msize, dimension dDst, dimension dSrc) { - size_t sm = src; - size_t pp = Msize; - size_t s = 0; - size_t p = 1; - int ret;// = new int[rank]; - int i; - for (i = 0; i < dSrc.rank; ++i) { - pp /= dSrc.dim[i]; - ret = sm / pp; - p = 1; - for (int j = inversePerm[i] + 1; j < dDst.rank;j++) { - p *= dDst.dim[j]; - } - s += ret * p; - - sm %= pp; - - } - dst = s; - if (s > Msize) printf("I have a problem in LinearTransformCoord: s:%ld siez:%ld \n", s, Msize); - -} - - -template -void permuteTensor(Tensor& M, const Tensor& M0, permutation p) { - if (p.size == M0.Dim.rank) { - M.Dim.rank = M0.Dim.rank; - M.Dim.size = M0.Dim.size; - M.Dim.initDim(); - M.initTensor(); - - if (p.size == M0.Dim.rank) p.permute(M.Dim.dim, M0.Dim.dim); - else { - printf("something wrong perm, not the same size as M0.Dim.rank\n"); - exit(1); - } - size_t img = 0; - printf("in permuteTensor:\n"); - M0.Dim.print(); - M.Dim.print(); - setInit se(M.Dim.rank, 0); - int invP[M.Dim.rank]; - inverseArray(invP, p.perm, M.Dim.rank); - for (size_t i = 0; i < M.Dim.size;i++) { - //LinearTransformCoord(img, i, p.perm, M.Dim.size, M.Dim, M0.Dim); - LinearTransformCoord(img, i, invP, M.Dim.size, M.Dim, M0.Dim); - M.elements[img] = M0.elements[i]; - } - } -} - -template -void permuteTensor(Tensor& M, const Tensor& M0, permutation p); - diff --git a/ytest_t/test/src/tensor/tens0neD/tens0neD.h b/ytest_t/test/src/tensor/tens0neD/tens0neD.h deleted file mode 100644 index a901461..0000000 --- a/ytest_t/test/src/tensor/tens0neD/tens0neD.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef __TENS_0NE_D_H__ -#define __TENS_0NE_D_H__ - -#include -#include - -#include - -//#include "tensor.h" -//#include "cudatensor.h" -//#include "/home/fanasina/progr_/ptens0neD/dimension/dimension.h" -//#include "/home/fanasina/progr_/ptens0neD/permutation/permutation.h" -//#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/tensCuda.h" - -#include "dimension/dimension.h" -#include "permutation/permutation.h" -#include "tensor/tensCuda/tensCuda.h" - -template -struct Tensor { - struct dimension Dim; - T* elements; - Tensor(struct dimension dm = dimension(1)) { - Dim = dm; - //elements = new T[Dim.size]; - elements = (T*)malloc(Dim.size * sizeof(T)); - } - void initTensor() { - //delete[]elements; - //elements = new T[Dim.size]; - if (elements != NULL) - free(elements); - elements = (T*)malloc(Dim.size * sizeof(T)); - } - void initVal(T val); // { for (int i = 0; i < Dim.size; i++) elements[i] = val + 0.001f * i; } - void print(); - Tensor& operator=(const Tensor& M); - Tensor& operator*=(const T& val); - template - friend Tensor& operator*(const Tensor& M0, const Tensor& M1); - - // M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l - // M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) - template - friend void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); - - // M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l - // M[x0,x1,x3..xl x{l+1}..xn] X M[xn,x{n-1},..x{l+1}xl y{l+1}..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) - template - friend void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); - - template - friend void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict); - - /*template - friend void cudaTensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); -*/ - - template - friend void tensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); - - template - friend void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); - - template - friend void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1); - - template - friend void permuteTensor(Tensor& M, const Tensor& M0, permutation p); - template - friend void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p); - template - friend bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); - - //template - //friend void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); - -}; - -template -void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); - - -template -Tensor& operator*(const Tensor& M0, const Tensor& M1); - - -void subArray(int* dst, int* src, int debDst, int finDst, int debSrc); - -void concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); - -void reverseArray(int* arr, int sz); - -template -void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); - -bool checkMatchProdTensor(const dimension& d0, const dimension& d1, int nestingDepth); - -void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth); - -// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l - -//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] -template -void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); - -// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l - -//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] -template -void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); - -#endif - diff --git a/ytest_t/test/src/tensor/tensCuda/d_tensCuda.cu b/ytest_t/test/src/tensor/tensCuda/d_tensCuda.cu deleted file mode 100644 index 09ebcc2..0000000 --- a/ytest_t/test/src/tensor/tensCuda/d_tensCuda.cu +++ /dev/null @@ -1,493 +0,0 @@ -/*#include -#include - -#include "cuda.h" -#include "cuda_runtime.h" -*/ - -#include "d_tensCuda.h" -//#include "index.h" -#include - -//////////////////////////////////////////////////////// - -//1D grid of 1D blocks -__device__ -int d_getGlobalIdx_1D_1D() { - return blockIdx.x * blockDim.x + threadIdx.x; -} -//1D grid of 2D blocks -__device__ -int d_getGlobalIdx_1D_2D() { - return blockIdx.x * blockDim.x * blockDim.y - + threadIdx.y * blockDim.x + threadIdx.x; -} -//1D grid of 3D blocks -__device__ -int d_getGlobalIdx_1D_3D() { - return blockIdx.x * blockDim.x * blockDim.y * blockDim.z - + threadIdx.z * blockDim.y * blockDim.x - + threadIdx.y * blockDim.x + threadIdx.x; -} -//2D grid of 1D blocks -__device__ int d_getGlobalIdx_2D_1D() { - int blockId - = blockIdx.y * gridDim.x + blockIdx.x; - int threadId = blockId * blockDim.x + threadIdx.x; - return threadId; -} -//2D grid of 2D blocks -__device__ -int d_getGlobalIdx_2D_2D() { - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int threadId = blockId * (blockDim.x * blockDim.y) - + (threadIdx.y * blockDim.x) + threadIdx.x; - return threadId; -} -//2D grid of 3D blocks -__device__ -int d_getGlobalIdx_2D_3D() { - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z) - + (threadIdx.z * (blockDim.x * blockDim.y)) - + (threadIdx.y * blockDim.x) + threadIdx.x; - return threadId; -} -//3D grid of 1D blocks -__device__ -int d_getGlobalIdx_3D_1D() { - int blockId = blockIdx.x + blockIdx.y * gridDim.x - + gridDim.x * gridDim.y * blockIdx.z; - int threadId = blockId * blockDim.x + threadIdx.x; - return threadId; -} -//3D grid of 2D blocks -__device__ -int d_getGlobalIdx_3D_2D() { - int blockId = blockIdx.x + blockIdx.y * gridDim.x - + gridDim.x * gridDim.y * blockIdx.z; - int threadId = blockId * (blockDim.x * blockDim.y) - + (threadIdx.y * blockDim.x) + threadIdx.x; - return threadId; -} -//3D grid of 3D blocks -__device__ -int d_getGlobalIdx_3D_3D() { - int blockId = blockIdx.x + blockIdx.y * gridDim.x - + gridDim.x * gridDim.y * blockIdx.z; - int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z) - + (threadIdx.z * (blockDim.x * blockDim.y)) - + (threadIdx.y * blockDim.x) + threadIdx.x; - return threadId; -} - - -/////////////////////////////////////////////////////////////////////////// - - -__device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size) { - size_t sm = lin; - size_t pp = size; - for (int i = rank - 1;i > 0; --i) { - pp /= dim[i]; - ret[i] = sm / pp; - sm %= pp; - } - ret[0] = sm; -} - -__device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank) { - size_t pp = 1; - size_t sm = 0; - for (int i = 0; i < rank; ++i) { - sm += (coo[i] * pp); - pp *= dim[i]; - } - return sm; -} - -__device__ size_t d_CoordToLinear(int* coo, int* dim, int rank) { - size_t pp = 1; - size_t sm = 0; - for (int i = rank - 1; i >= 0; --i) { - sm += (coo[i] * pp); - pp *= dim[i]; - } - return sm; -} - - - -__device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size) { - size_t sm = lin; - size_t pp = size; - for (int i = 0; i < rank - 1; ++i) { - pp /= dim[i]; - ret[i] = sm / pp; - sm %= pp; - } - ret[rank - 1] = sm; -} -/*__device__ void d_LinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { - size_t sm = lin; - size_t pp = size; - size_t s = 0; - size_t p = sizeA; - int ret;// = new int[rank]; - for (int i = 0; i < rank; ++i) { - pp /= dim[i]; - ret = sm / pp; - p /= dim[i]; - s += ret * p; - - sm %= pp; - if (i == rankA - 1) { - part0 = s; - s = 0; - p = size / sizeA; - } - - } - part1 = s; - -}*/ -__device__ void d_LinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { - size_t sm = lin; - size_t pp = size; - size_t s = 0; - size_t p = sizeA; - int ret;// = new int[rank]; - int i; - for (i = 0; i < rankA; ++i) { - pp /= dim[i]; - ret = sm / pp; - p /= dim[i]; - s += ret * p; - - sm %= pp; - - } - part0 = s; - s = 0; - p = size / sizeA;//sizeB - for (; i < rank; ++i) { - pp /= dim[i]; - ret = sm / pp; - p /= dim[i]; - s += ret * p; - - sm %= pp; - - } - - part1 = s; - -} -__device__ void d_LinearToSplitSubrankLimSzEnd(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { - size_t sm = lin; - size_t pp = size; - size_t s = 0; - size_t p = sizeA; - int ret;// = new int[rank]; - for (int i = rank - 1; i >= 0; --i) { - pp /= dim[i]; - ret = sm / pp; - p /= dim[i]; - s += ret * p; - - sm %= pp; - if (i == rankA) { - part1 = s; - s = 0; - p = size / sizeA; - } - - } - part0 = s; - -} - - -__device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc) { - for (int i = debDst; i < finDst; i++) { - dst[i] = src[i + debSrc]; - } -} - -template -__global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB) { - size_t lin0, lin1; - - size_t i = threadIdx.x + blockIdx.x * blockDim.x; - if (i < size) { - d_LinearToSplitSubrankLimSz(lin0, lin1, i, dimC, rankC, rankA, size, sizeA); - - C[i] = A[lin0] * B[lin1]; - - } -} - -template __global__ void d_prodTensor(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, float* B, int* dimB, int rankB); - -template -__global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB) { - size_t lin0, lin1; - - size_t i = threadIdx.x + blockIdx.x * blockDim.x; - if (i < size) { - d_LinearToSplitSubrankLimSzEnd(lin0, lin1, i, dimC, rankC, rankA, size, sizeA); - - C[i] = A[lin0] * B[lin1]; - - } -} - -template __global__ void d_prodTensorEnd(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, float* B, int* dimB, int rankB); - -__device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev) { - if (rank0 > rank1) { - rank = rank1; - for (int i = 0; i < rank1; ++i) dim[i] = dim1[i]; - rev = true; - } - else if (rank0 < rank1) { - rank = rank0; - for (int i = 0; i < rank1; ++i) dim[i] = dim0[i]; - rev = false; - } - else {// rank0 == rank1 - rank = rank0; - for (int i = 0; i < rank0; i++) { - if (dim[i] > dim1[rank1 - 1 - i]) dim[i] = dim1[rank1 - 1 - i]; - else dim[i] = dim0[i]; - } - rev = false; - } -} - -__device__ void d_reverseArray(int* arr, int sz) { - int* tmp; - //tmp = (int*)malloc(sz * sizeof(int)); - - tmp = new int[sz]; - if (tmp == NULL) { - size_t limit = 0; - cudaDeviceGetLimit(&limit, cudaLimitStackSize); - printf("cudaLimitStackSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); - cudaDeviceGetLimit(&limit, cudaLimitPrintfFifoSize); - printf("cudaLimitPrintfFifoSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); - cudaDeviceGetLimit(&limit, cudaLimitMallocHeapSize); - printf("cudaLimitMallocHeapSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); - - printf("error Allocation in tmp = (int*)malloc(sz * sizeof(int)); | | "); - }int i = 0; - for (; i < sz / 2; i++) { - tmp[i] = arr[i]; - arr[i] = arr[sz - 1 - i]; - } - for (; i < sz; i++) { - arr[i] = tmp[sz - 1 - i]; - } - //free(tmp); - delete[]tmp; -} - -__device__ int d_min(int a, int b) { - if (a < b) return a; - return b; -} - -__device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1) { - int i = debDst; - for (int j = debSrc0; j < finSrc0; j++) { - dst[i++] = src0[j]; - } - for (int j = debSrc1; j < finSrc1; j++) { - dst[i++] = src1[j]; - } -} - - - -__device__ void d_ConcatLinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, int rankB, size_t size, size_t sizeA, size_t sizeB, int* dM, int dMrank, size_t dMsize, int ind) { - size_t sm = lin; - size_t pp = size; - size_t s = 0; - size_t p = sizeA; - //size_t sz_dA = sizeA / dMsize; - int rankdA = rankA - dMrank; - - int ret; - int i; - for (i = 0; i < rankdA; ++i) { - pp /= dim[i]; - ret = sm / pp; - p /= dim[i]; - s += ret * p; - sm %= pp; - } - size_t s1 = 0; - - size_t pb = sizeB / dMsize; - for (; i < rank; ++i) { - pp /= dim[i]; - ret = sm / pp; - pb /= dim[i]; - s1 += ret * pb; - sm %= pp; - } - - size_t smd = ind; - size_t ppb = dMsize; - //size_t pb = size / sz_dA; - pb = sizeB; - p = dMsize; - for (int j = 0;j < dMrank;j++) { - ppb /= dM[j]; - ret = smd / ppb; - p /= dM[j]; - s += ret * p; - pb /= dM[j]; - s1 += ret * pb; - smd %= ppb; - } - //pp = size / sz_dA; - part0 = s; - part1 = s1; -} - -__device__ void d_SplitLineardToSubrank(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, int rankB, size_t size, size_t sizeA, size_t sizeB, int* dM, int dMrank, size_t dMsize) { - size_t sm = lin; - size_t pp = size; - size_t s = 0; - size_t p = sizeA; - //size_t sz_dA = sizeA / dMsize; - int rankdA = rankA - dMrank; - - int ret; - int i; - for (i = 0; i < rankdA; ++i) { - pp /= dim[i]; - ret = sm / pp; - p /= dim[i]; - s += ret * p; - sm %= pp; - } - size_t s1 = 0; - - size_t pb = sizeB / dMsize; - for (; i < rank; ++i) { - pp /= dim[i]; - ret = sm / pp; - pb /= dim[i]; - s1 += ret * pb; - sm %= pp; - } - part0 = s; - part1 = s1; -} - - -__device__ void d_UnionConcatLinearSplitedSubrank(size_t& part0, size_t& part1, size_t p0, size_t p1, size_t size, size_t sizeB, int* dM, int dMrank, size_t dMsize, int ind) { - size_t s = p0; - size_t s1 = p1; - int ret; - size_t smd = ind; - size_t ppb = dMsize; - //size_t pb = size / sz_dA; - size_t pb = sizeB; - size_t p = dMsize; - for (int j = 0;j < dMrank;j++) { - ppb /= dM[j]; - ret = smd / ppb; - p /= dM[j]; - s += ret * p; - pb /= dM[j]; - s1 += ret * pb; - smd %= ppb; - } - //pp = size / sz_dA; - part0 = s; - part1 = s1; -} - -template -__global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t sizeC, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize) { - - size_t p0, p1; - size_t lin0, lin1; - - - //size_t i = threadIdx.x + blockIdx.x * blockDim.x; - size_t i = d_getGlobalIdx_1D_1D(); - - if (i < sizeC) { - - d_SplitLineardToSubrank(p0, p1, i, dimC, rankC, rankA, rankB, sizeC, sizeA, sizeB, dM, dMrank, dMsize); - - C[i] = 0; - for (size_t k = 0; k < dMsize; k++) { - - d_UnionConcatLinearSplitedSubrank(lin0, lin1, p0, p1, sizeC, sizeB, dM, dMrank, dMsize, k); - - //d_ConcatLinearToSplitSubrankLimSz(lin0, lin1, i, dimC, rankC, rankA, rankB, sizeC, sizeA, sizeB, dM, dMrank, dMsize, k); - - C[i] += A[lin0] * B[lin1]; - } - } - -} - -template -__global__ void d_TensorContractnReverseProd(float* C, int* dimC, int rankC, size_t size, float* A, int rankA, size_t sizeA, float* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); - -__device__ void d_LinearTransformCoord(size_t& dst, size_t src, int* inversePerm, size_t sizeA, int rankDst, int rankSrc, int* dDst, int* dSrc) { - size_t sm = src; - size_t pp = sizeA; - size_t s = 0; - size_t p = 1; - int ret;// = new int[rank]; - int i, j; - for (i = 0; i < rankSrc; ++i) { - pp /= dSrc[i]; - ret = sm / pp; - p = 1; - for (j = inversePerm[i] + 1; j < rankDst;j++) { - p *= dDst[j]; - } - s += ret * p; - - sm %= pp; - - } - dst = s; - if (s > sizeA) printf("I have a problem in LinearTransformCoord: s:%ld siez:%ld \n", s, sizeA); - -} - -template -__global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm) { - - //size_t i = threadIdx.x + blockIdx.x * blockDim.x; - size_t i = d_getGlobalIdx_1D_1D(); - - if (i < sizeC) { - //printf("(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, int* invPerm); - diff --git a/ytest_t/test/src/tensor/tensCuda/d_tensCuda.h b/ytest_t/test/src/tensor/tensCuda/d_tensCuda.h deleted file mode 100644 index c2b8870..0000000 --- a/ytest_t/test/src/tensor/tensCuda/d_tensCuda.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef __D_CUDA_TENSOR_H__ -#define __D_CUDA_TENSOR_H__ - -#include "cuda.h" -#include "cuda_runtime.h" - -//#include "cuda_device_runtime_api.h" - -//#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/d_tensCuda.h" -#include "tensor/tensCuda/d_tensCuda.h" - - -//#1D grid of 1D blocks -__device__ int d_getGlobalIdx_1D_1D(); -//#1D grid of 2D blocks -__device__ int d_getGlobalIdx_1D_2D(); -//#1D grid of 3D blocks -__device__ int d_getGlobalIdx_1D_3D(); -//#1D grid of 1D blocks -__device__ int d_getGlobalIdx_2D_1D(); -//#1D grid of 2D blocks -__device__ int d_getGlobalIdx_2D_2D(); -//2D grid of 3D blocks -__device__ int d_getGlobalIdx_2D_3D(); -//#1D grid of 1D blocks -__device__ int d_getGlobalIdx_3D_1D(); -//#1D grid of 2D blocks -__device__ int d_getGlobalIdx_3D_2D(); -//#1D grid of 3D blocks -__device__ int d_getGlobalIdx_3D_3D(); - - - -extern cudaError_t cudaDeviceGetLimit(size_t* pValue, enum cudaLimit limit); - - -__device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size); - -__device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank); - -__device__ size_t d_CoordToLinear(int* coo, int* dim, int rank); - - -__device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size); - -__device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc); - -__device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev); - -__device__ void d_reverseArray(int* arr, int sz); - -__device__ int d_min(int a, int b); - -__device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); - - -template -__global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); - -template -__global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); - -template -__global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t size, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); - -template -__global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm); - -#endif \ No newline at end of file diff --git a/ytest_t/test/src/tensor/tensCuda/tensCuda.cu b/ytest_t/test/src/tensor/tensCuda/tensCuda.cu deleted file mode 100644 index 9d4ec29..0000000 --- a/ytest_t/test/src/tensor/tensCuda/tensCuda.cu +++ /dev/null @@ -1,574 +0,0 @@ -#include -#include - -#include - -#include -#include - - -//#include "/home/fanasina/progr_/ptens0neD/tensor/tens0neD/tens0neD.h" - -//#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/tensCuda.h" -#include "tensor/tensCuda/tensCuda.h" - - - - -template -void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1) { - add(M.Dim, M0.Dim, M1.Dim); - M.initTensor(); - - int* d_imM, * d_imM0, * d_imM1; - cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - T* e, * e0, * e1; - errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - int BLOCKSIZE = 256;//1024; - int DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; - //int DIMBLOCKS = (M.Dim.size) / BLOCKSIZE; - - d_prodTensor << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, e1, d_imM1, M1.Dim.rank); - - errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaFree(e); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(e0); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(e1); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM0); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM1); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } -} - - -//template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); -template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); - - -template -void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1) { - add(M.Dim, M0.Dim, M1.Dim); - M.initTensor(); - - int* d_imM, * d_imM0, * d_imM1; - cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - T* e, * e0, * e1; - errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - size_t BLOCKSIZE = 1024; - size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; - - d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, e1, d_imM1, M1.Dim.rank); - - cudaDeviceSynchronize(); - - errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaFree(e); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(e0); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(e1); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM0); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM1); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } -} - - -//template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); -template void cudaTensorProdEnd(Tensor& M, const Tensor& M1, const Tensor& M0); - - -template -void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p) { - if (p.size == M0.Dim.rank) { - M.Dim.rank = M0.Dim.rank; - M.Dim.size = M0.Dim.size; - M.Dim.initDim(); - M.initTensor(); - - p.permute(M.Dim.dim, M0.Dim.dim); - - - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - - cudaEventRecord(start); - - - int* d_imM, * d_imM0; - cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - - T* e, * e0; - errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - - errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - - size_t BLOCKSIZE = 256; //1024;//512; - size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; - dim3 blckSZ, gridSZ; - blckSZ.x = BLOCKSIZE; - gridSZ.x = DIMBLOCKS; - - int* invP, * d_invP; - invP = (int*)malloc(M.Dim.rank * sizeof(int)); - inverseArray(invP, p.perm, M.Dim.rank); - errCu = cudaMalloc((void**)&d_invP, M.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_invP, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(d_invP, invP, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_invP, invP, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - //printf("size: %ld\n", M.Dim.size); - - //d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank); - //d_TensorContractnReverseProd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); - //d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); - d_PermLinearTransformCoord << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, d_invP); - //d_PermLinearTransformCoord << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, p.perm); - //cudaDeviceSynchronize(); - - - errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaFree(e); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(e0); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaFree(d_imM); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM0); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - cudaEventRecord(stop); - cudaEventSynchronize(stop); - float milliseconds = 0; - cudaEventElapsedTime(&milliseconds, start, stop); - printf("ellaps time cuda permute tensor: %f ms\n", milliseconds); - - } -} - -template -void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); - - -// strict match contract ! if no strict, we take the minimum -template -void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M11, int nestingDepth, bool strict) { - - - int perm[M11.Dim.rank]; - struct Tensor M1; - if (scanPermuteMatchContractTensorfromSrcToDst(perm, M11, M0, nestingDepth)) { - for (int i = 0; i < M11.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); - struct permutation p(M11.Dim.rank, perm); - permuteTensor(M1, M11, p); - M1.Dim.print(); - - } - else { - printf("Failed in Deep = %d\n", nestingDepth); - //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); - - throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); - exit(1); - } - - - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - - cudaEventRecord(start); - - int len0 = M0.Dim.rank - nestingDepth; - int len1 = M1.Dim.rank - nestingDepth; - - int* tsub0 = new int[len0]; - int* tsub1 = new int[len1]; - int* tDk1 = new int[nestingDepth]; - int* tDk0 = new int[nestingDepth]; - subArray(tsub0, M0.Dim.dim, 0, len0, 0); - subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); - subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); - subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); - - dimension dSub0(len0, tsub0); - dimension dSub1(len1, tsub1); - dimension dM1(nestingDepth, tDk1); - dimension dM0(nestingDepth, tDk0); - dimension dM(dM0); - //bool rev; - //minReverse(dM, dM0, dM1, rev); - //if (rev) reverseArray(dM.dim, dM.rank); - //max(dM, dM0, dM1); - - add(M.Dim, dSub0, dSub1); - M.initTensor(); - - - - int* d_imM, * d_imM0, * d_imM1, * d_imdM; - cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&d_imdM, dM.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imdM, dM.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(d_imdM, dM.dim, dM.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imdM, dM.dim, dM.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - T* e, * e0, * e1; - errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - size_t BLOCKSIZE = 256; //1024;//512; - size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; - dim3 blckSZ, gridSZ; - blckSZ.x = BLOCKSIZE; - gridSZ.x = DIMBLOCKS; - - - //d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank); - //d_TensorContractnReverseProd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); - //d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); - d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, M0.Dim.rank, M0.Dim.size, e1, M1.Dim.rank, M1.Dim.size, d_imdM, dM.rank, dM.size); - - //cudaDeviceSynchronize(); - - - errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - - errCu = cudaFree(e); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(e0); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(e1); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM0); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - errCu = cudaFree(d_imM1); - if (cudaSuccess != errCu) { - printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); - exit(errCu); - } - cudaEventRecord(stop); - cudaEventSynchronize(stop); - float milliseconds = 0; - cudaEventElapsedTime(&milliseconds, start, stop); - printf("ellaps time cuda prod contract prod: %f ms\n", milliseconds); - - -} - -template -void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict); -//template void cudaTensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); - diff --git a/ytest_t/test/src/tensor/tensCuda/tensCuda.h b/ytest_t/test/src/tensor/tensCuda/tensCuda.h deleted file mode 100644 index be5d0d6..0000000 --- a/ytest_t/test/src/tensor/tensCuda/tensCuda.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef __TENS_CUDA_H__ -#define __TENS_CUDA_H__ - -#include -#include - -#include - -//#include "/home/fanasina/progr_/ptens0neD/tensor/tens0neD/tens0neD.h" -#include "tensor/tens0neD/tens0neD.h" - -//#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/d_tensCuda.h" -#include "tensor/tensCuda/d_tensCuda.h" -//#include "dimension/dimension.h" - -template -struct Tensor; - -template -void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict = true); - -template -void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); -template -void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1); -template -void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); - - -#endif -