From 8e529cb426990ccc7d214ec09ebb81a1836c8a27 Mon Sep 17 00:00:00 2001 From: fanasina Date: Sun, 16 Jul 2023 00:15:15 +0200 Subject: [PATCH] trying TEMPLATE C --- .vscode/c_cpp_properties.json | 18 + .vscode/settings.json | 8 + Makefile | 53 +++ cmdcreate.sh | 3 + create.sh | 4 + kmemcheck.sh | 4 + krun.sh | 4 + memcheck.sh | 3 + run.sh | 3 + src/coordinate/coordinate.h | 21 + src/dimension/dimension.cpp | 181 ++++++++ src/dimension/dimension.h | 31 ++ src/dimension/dimension.hpp | 90 ++++ src/permutation/permutation.c | 105 +++++ src/permutation/permutation.cpp | 292 +++++++++++++ src/permutation/permutation.h | 25 ++ src/permutation/permutation.hpp | 99 +++++ src/permutation_t/permutation_t.c | 114 +++++ src/permutation_t/permutation_t.h | 52 +++ src/set_theoric/set_theoric.c | 26 ++ src/set_theoric/set_theoric.h | 22 + src/set_theoric_t/set_theoric_t.c | 24 ++ src/set_theoric_t/set_theoric_t.h | 31 ++ src/tensor/tens0neD/tens0neD.cpp | 500 ++++++++++++++++++++++ src/tensor/tens0neD/tens0neD.h | 114 +++++ src/tensor/tensCuda/d_tensCuda.cu | 493 ++++++++++++++++++++++ src/tensor/tensCuda/d_tensCuda.h | 69 +++ src/tensor/tensCuda/tensCuda.cu | 574 +++++++++++++++++++++++++ src/tensor/tensCuda/tensCuda.h | 31 ++ src/tools/tools.c | 144 +++++++ src/tools/tools.h | 48 +++ src/tools_t/tools_t.c | 137 ++++++ src/tools_t/tools_t.h | 92 ++++ test/isgood.cu | 652 +++++++++++++++++++++++++++++ testool | Bin 0 -> 26680 bytes try/CMakeLists.txt | 37 ++ try/saved0work/CMakeLists.txt | 71 ++++ try/try_cmakelists0/CMakeLists.txt | 37 ++ try/try_cmakelists1/CMakeLists.txt | 80 ++++ try/try_cmakelists2/CMakeLists.txt | 10 + 40 files changed, 4302 insertions(+) create mode 100644 .vscode/c_cpp_properties.json create mode 100644 .vscode/settings.json create mode 100644 Makefile create mode 100644 cmdcreate.sh create mode 100644 create.sh create mode 100644 kmemcheck.sh create mode 100644 krun.sh create mode 100644 memcheck.sh create mode 100644 run.sh create mode 100644 src/coordinate/coordinate.h create mode 100644 src/dimension/dimension.cpp create mode 100644 src/dimension/dimension.h create mode 100644 src/dimension/dimension.hpp create mode 100644 src/permutation/permutation.c create mode 100644 src/permutation/permutation.cpp create mode 100644 src/permutation/permutation.h create mode 100644 src/permutation/permutation.hpp create mode 100644 src/permutation_t/permutation_t.c create mode 100644 src/permutation_t/permutation_t.h create mode 100644 src/set_theoric/set_theoric.c create mode 100644 src/set_theoric/set_theoric.h create mode 100644 src/set_theoric_t/set_theoric_t.c create mode 100644 src/set_theoric_t/set_theoric_t.h create mode 100644 src/tensor/tens0neD/tens0neD.cpp create mode 100644 src/tensor/tens0neD/tens0neD.h create mode 100644 src/tensor/tensCuda/d_tensCuda.cu create mode 100644 src/tensor/tensCuda/d_tensCuda.h create mode 100644 src/tensor/tensCuda/tensCuda.cu create mode 100644 src/tensor/tensCuda/tensCuda.h create mode 100644 src/tools/tools.c create mode 100644 src/tools/tools.h create mode 100644 src/tools_t/tools_t.c create mode 100644 src/tools_t/tools_t.h create mode 100644 test/isgood.cu create mode 100644 testool create mode 100644 try/CMakeLists.txt create mode 100644 try/saved0work/CMakeLists.txt create mode 100644 try/try_cmakelists0/CMakeLists.txt create mode 100644 try/try_cmakelists1/CMakeLists.txt create mode 100644 try/try_cmakelists2/CMakeLists.txt diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..fcdf68d --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**", + "/home/fanasina/progr_/ptens0neD/**", + "/usr/include/boost/predef/language" + ], + "defines": [], + "compilerPath": "/usr/bin/clang-13", + "cStandard": "c17", + "cppStandard": "c++14", + "intelliSenseMode": "linux-clang-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..decddbd --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "files.associations": { + "array": "cpp", + "string_view": "cpp", + "initializer_list": "cpp", + "utility": "cpp" + } +} \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1ced397 --- /dev/null +++ b/Makefile @@ -0,0 +1,53 @@ +CC=nvcc +LDFLAGS=-lgtest -lpthread +ROOT_DIR=$(shell pwd) +INCLUDE_DIR=$(ROOT_DIR) +CFLAGS=-I$(INCLUDE_DIR) +SRC_DIR=$(ROOT_DIR)/src +SRC=$(wildcard src/*/*.c*) $(wildcard src/*/*/*.c*) +OBJPP=$(SRC:.cpp=.o) +OBJS=$(OBJPP:.cu=.o) +#HEADS=$(OBJS:.o=.h) +TEST_DIR=$(ROOT_DIR)/test +EXEC=$(TEST_DIR)/isgood +PERMSRC=$(wildcard src/*/*perm*.cpp) +PERMSRC_O=$(PERMSRC:.cpp=.o) +DIMSRC=$(wildcard src/*/*dim*.cpp) +DIMSRC_O=$(DIMSRC:.cpp=.o) +TENSRCPP=$(wildcard src/*/*/tens*.cpp) +TENSRCPP_O=$(TENSRCPP:.cpp=.o) +TENSRCU=$(wildcard src/*/*/tens*.cu) +TENSRCU_O=$(TENSRCU:.cu=.o) +DTENSRCU=$(wildcard src/*/*/d_tens*.cu) +DTENSRCU_O=$(DTENSRCU:.cu=.o) + + +TENSRC=$(TENSRCPP) $(TENSRCU) +all: $(EXEC) + +$(EXEC): $(EXEC).cu $(OBJS) + $(CC) -o $@ $^ -I$(INCLUDE_DIR) $(LDFLAGS) + +$(DIMSRC_O): $(DIMSRC) $(PERMSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(TENSRCPP_O): $(TENSRCPP) $(DIMSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(TENSRCU_O): $(TENSRCU) $(DTENSRCU_O) $(DIMSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(PERMSRC_O): $(PERMSRC) + $(CC) -o $@ -c $< $(CFLAGS) + +$(DTENSRCU_O) : $(DTENSRCU) + $(CC) -o $@ -c $< $(CFLAGS) + +.PHONY: clean mrproper + +clean: + rm -f $(OBJS) + +mrproper: clean + rm -f $(EXEC) + diff --git a/cmdcreate.sh b/cmdcreate.sh new file mode 100644 index 0000000..4cf833f --- /dev/null +++ b/cmdcreate.sh @@ -0,0 +1,3 @@ +#!/bin/bash + + nvcc isgood.cu tensor.cu cudatensor.cu ../permutation/permutation.cpp -o isgood -lgtest -lpthread -g --relocatable-device-code=true diff --git a/create.sh b/create.sh new file mode 100644 index 0000000..9c9ed63 --- /dev/null +++ b/create.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +make "$@" + diff --git a/kmemcheck.sh b/kmemcheck.sh new file mode 100644 index 0000000..c32bafa --- /dev/null +++ b/kmemcheck.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +make "$@" +compute-sanitizer --tool memcheck ./test/isgood diff --git a/krun.sh b/krun.sh new file mode 100644 index 0000000..44f67fb --- /dev/null +++ b/krun.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +make "$@" +./test/isgood diff --git a/memcheck.sh b/memcheck.sh new file mode 100644 index 0000000..6ac3e9a --- /dev/null +++ b/memcheck.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +compute-sanitizer --tool memcheck ./build/isgood diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..acb1995 --- /dev/null +++ b/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +./test/isgood diff --git a/src/coordinate/coordinate.h b/src/coordinate/coordinate.h new file mode 100644 index 0000000..2521882 --- /dev/null +++ b/src/coordinate/coordinate.h @@ -0,0 +1,21 @@ +#ifndef __COORDINATE_C__H__ +#define __COORDINATE_C__H__ + +#include "src/dimension/dimension.h" + + +struct coordinate +{ + size_t lin_coo; + unsigned int *coord; + struct dimension *dimension; +}; + +typedef coordinate coordinate; + +void LinearToCoord(struct coordinate *coor); +void CoordToLinear(struct coordinate *coor); + + + +#endif diff --git a/src/dimension/dimension.cpp b/src/dimension/dimension.cpp new file mode 100644 index 0000000..e351cff --- /dev/null +++ b/src/dimension/dimension.cpp @@ -0,0 +1,181 @@ +#include +#include + +#include + +#include +#include + + + +//#include "/home/fanasina/progr_/ptens0neD/src/dimension/dimension.h" + +//#include "/home/fanasina/progr_/ptens0neD/src/permutation/permutation.h" + + +#include "src/dimension/dimension.hpp" + +#include "src/permutation/permutation.hpp" +//#include "permutation.h" + +/*void dimension::initDim(int* arr, bool end = true) { + endian = end; + delete[]dim; + dim = new int[rank]; + size = 1; + for (int i = 0; i < rank; ++i) { + dim[i] = arr[i]; + size *= dim[i]; + } +}*/ + +dimension& dimension::operator=(const dimension& d) { + int oldRank = rank; + rank = d.rank; + size = d.size; + initDim(d.dim, oldRank); + //for (int i = 0; i < rank; i++) dim[i] = d.dim[i]; + return *this; +} + +dimension& dimension::operator+=(const dimension& d) { + int oldRank = rank; + int* t = new int[rank + d.rank]; + for (int i = 0; i < rank; i++) t[i] = dim[i]; + for (int i = 0; i < d.rank; i++) t[rank + i] = d.dim[i]; + size *= d.size; + rank += d.rank; + initDim(t, oldRank); + return *this; +} + +void dimension::LinearToCoord(int* ret, int lin) const { + int begin = 0, end = rank - 1; + int (*iter)(int) = incr; + bool (*cond)(int, int) = isLessThan; + if (endian == false) { + //if (endian) { + begin = rank - 1; end = 0; + iter = decr; cond = isGreatThan; + } + //printf("to coor begin = %d end = %d \n", begin, end); + + int sm = lin; + int pp = size; + for (int i = begin; cond(i, end); i = iter(i)) { + //printf(" i: %d ", i); + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + //printf("sm[%d] = %d , pp=%d ; ", i, sm, pp); + } + ret[end] = sm; +} + +int dimension::CoordToLinear(int* coo) const { + int begin = 0; + int end = rank - 1; + int (*iter)(int); iter = &incr; + bool (*cond)(int, int); cond = &isLessEqThan; + + if (endian) { + begin = rank - 1; end = 0; + iter = &decr; cond = &isGreatEqThan; + } + + int pp = 1; + int sm = 0; + for (int i = begin; cond(i, end); i = iter(i)) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + +bool isLessEqThan(int a, int b) { return a <= b; } +bool isLessThan(int a, int b) { return a < b; } +bool isGreatEqThan(int a, int b) { return a >= b; } +bool isGreatThan(int a, int b) { return a > b; } +int incr(int i) { return i + 1; } +int decr(int i) { return i - 1; } + + +void add(dimension& d, const dimension& d0, const dimension& d1) { + int oldRank = d.rank; + int* t = new int[d0.rank + d1.rank]; + for (int i = 0; i < d0.rank; i++) t[i] = d0.dim[i]; + for (int i = 0; i < d1.rank; i++) t[d0.rank + i] = d1.dim[i]; + d.rank = d0.rank + d1.rank; + d.initDim(t, oldRank); +} + +void max(dimension& d, const dimension& d0, const dimension& d1) { + if (d0.rank > d1.rank) { + d = d0; + } + else if (d0.rank < d1.rank) { + d = d1; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] < d1.dim[i]) d.dim[i] = d1.dim[i]; + } + } +} + +void min(dimension& d, const dimension& d0, const dimension& d1) { + if (d0.rank > d1.rank) { + d = d1; + } + else if (d0.rank < d1.rank) { + d = d0; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] > d1.dim[i]) d.dim[i] = d1.dim[i]; + } + } +} + +void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& rev) { + if (d0.rank > d1.rank) { + d = d1; + rev = true; + } + else if (d0.rank < d1.rank) { + d = d0; + rev = false; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] > d1.dim[d.rank - 1 - i]) d.dim[i] = d1.dim[d.rank - 1 - i]; + } + rev = false; + } +} + +void reverseArray(int* arr, int sz) { + int tmp[sz], i = 0; + for (; i < sz / 2; i++) { + tmp[i] = arr[i]; + arr[i] = arr[sz - 1 - i]; + } + for (; i < sz; i++) { + arr[i] = tmp[sz - 1 - i]; + } +} + +void transform(dimension& dDst, const dimension& dSrc, int* perm, int sz) { + dDst = dSrc; + setInit setIn(sz); + if (sz == dSrc.rank) { + if (isPermutation(perm, setIn, sz)) { + for (int i = 0; i < sz; i++) dDst.dim[i] = dSrc.dim[perm[i]]; + } + } +} + + diff --git a/src/dimension/dimension.h b/src/dimension/dimension.h new file mode 100644 index 0000000..d522d2d --- /dev/null +++ b/src/dimension/dimension.h @@ -0,0 +1,31 @@ +#ifndef __DIM__ +#define __DIM__ + +#include +#include + +struct dimension +{ + unsigned int rank; + unsigned int* dim; + size_t size; +}; +typedef dimension dimension; + + +void print_dimension(dimension d); + + +void add(dimension* d, const dimension* d0, const dimension* d1); + +void max(dimension* d, const dimension* d0, const dimension* d1); + +void min(dimension* d, const dimension* d0, const dimension* d1); + +bool minReverse(dimension* d, const dimension* d0, const dimension* d1); + +void transform(dimension* dDst, const dimension* dSrc, int* perm); + + +#endif + diff --git a/src/dimension/dimension.hpp b/src/dimension/dimension.hpp new file mode 100644 index 0000000..cf8bf66 --- /dev/null +++ b/src/dimension/dimension.hpp @@ -0,0 +1,90 @@ +#ifndef __DIMENSION__ +#define __DIMENSION__ + +#include +#include + +#include + +//#include "tensor.h" + +//#include "dimension.h" + +static int iArray1[1] = { 1 }; + + + +struct dimension { + //friend dimension& operator+(const dimension& d, const dimension& d1); + friend void add(dimension& d, const dimension& d0, const dimension& d1); + friend void max(dimension& d, const dimension& d0, const dimension& d1); + friend void min(dimension& d, const dimension& d0, const dimension& d1); + friend void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& Rev); + friend bool checkMatchProdTensor(dimension& d0, const dimension& d1, int nestingDepth); + friend bool checkMatchProdTensorReverse(dimension& d0, const dimension& d1, int nestingDepth); + friend void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth); + + + int rank; + int* dim; + size_t size; + bool endian; //LitleEndian : true, BigEndian : false, + void initDim(int* arr, int oldRank) { + + //delete[]dim; + //dim = new int[rank]; + if (rank > oldRank) { + free(dim); + dim = (int*)malloc(rank * sizeof(int)); + } + size = 1; + for (int i = 0; i < rank; ++i) { + dim[i] = arr[i]; + size *= dim[i]; + } + } + void initDim(bool end = true) { + endian = end; + //delete[]dim; + //dim = new int[rank]; + + if (dim != NULL) free(dim); + dim = (int*)malloc(rank * sizeof(int)); + } + dimension& operator=(const dimension& d); + dimension& operator+=(const dimension& d); + //dimension& operator*=(const dimension& d); + dimension(int d = 1, int* arr = iArray1, bool end = true) { + endian = end; + rank = d; + //dim = new int[d]; + dim = (int*)malloc(d * sizeof(int)); + initDim(arr, rank); + } + void print() const { printf(" rank: %d\n", rank);for (int i = 0; i < rank; i++) printf(" %d ", dim[i]);printf("\nsize:%ld\n", size); } + void LinearToCoord(int* ret, int lin) const; + int CoordToLinear(int* coo) const; +}; + +bool isLessEqThan(int a, int b); // { return a <= b; } +bool isLessThan(int a, int b); // { return a < b; } +bool isGreatEqThan(int a, int b); // { return a >= b; } +bool isGreatThan(int a, int b); // { return a > b; } +int incr(int i); // { return i + 1; } +int decr(int i); // { return i - 1; } + + + +void add(dimension& d, const dimension& d0, const dimension& d1); + +void max(dimension& d, const dimension& d0, const dimension& d1); + +void min(dimension& d, const dimension& d0, const dimension& d1); + +void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& rev); + +void transform(dimension& dDst, const dimension& dSrc, int* perm, int sz); + + +#endif + diff --git a/src/permutation/permutation.c b/src/permutation/permutation.c new file mode 100644 index 0000000..484002b --- /dev/null +++ b/src/permutation/permutation.c @@ -0,0 +1,105 @@ +#include "src/permutation/permutation.h" + +permutation* +create_permutation(size_t sz) +{ + if(sz == 0) return NULL; + permutation *p=malloc(sizeof(permutation)); + p->size = sz; + p->perm = malloc(sz*sizeof(unsigned int)); +} + +/*void +copy_array_unsigned(unsigned int *dst, const unsigned int *src, size_t size) +{ + for(size_t i = 0; i < size ; ++i) + dst[i]=src[i]; +}*/ + +void +assign_permutation(permutation *p, unsigned int *arr) +{ + copy_array_unsigned(p->perm, arr, p->size); +} + +bool +is_permutation_set_theoric(const permutation *p) +{ + if(p==NULL) return false; + size_t size = p->size, j; + unsigned int *count_array_i = calloc(size, sizeof(unsigned int)); + if(count_array_i == NULL) + { + printf("can't allocate count_array_i\n"); + return false; + } + for(size_t i = 0; i < size; ++i) + { + j = p->perm[i]; + if((j >= size) || count_array_i[j]) + { + free(count_array_i); + return false; + } + count_array_i[j]++; + } + return true; +} +/* 2,7,4,1 is a permutation of 1,2,4,7 + *it is equivalent of 1,3,2,0 in set_theoric(4)=0,1,2,3 + this function calculate the permutation equivalent in set_theoric + * */ +permutation * +translate_set_theoric(const permutation *p, permutation *translate_p) +{ + if(p==NULL) return NULL; + size_t size = p->size; + permutation *translate_p = create_permutation(size); + unsigned int *temperm = malloc(size*sizeof(unsigned int)); + unsigned int *tmperm = malloc(size*sizeof(unsigned int)); + copy_array_unsigned_int(tmperm, p->perm, p->size); // copy + qsort(tmperm, size, sizeof(unsigned int), compare_unsigned_int); + // tmperm contain p->perm ordered + + size_t cur=0; + for(size_t i=0; i< size; ++i) + { + for(size_t j=0; jperm[j] == tmperm[i]) + { + bool found = false; + for(size_t c=0; cperm[i]=j; + temperm[cur++]=j; + break; + } + } + } + } + free(tmperm); + free(temperm); + return translate_p; +} + +bool +is_permutation(const permutation *p) +{ + bool ret = is_permutation_set_theoric(p); + if(ret == false) + { + permutation *t_p = translate_set_theoric(p); + ret = is_permutation_set_theoric(t_p); + free(t_p); + } + return ret; +} diff --git a/src/permutation/permutation.cpp b/src/permutation/permutation.cpp new file mode 100644 index 0000000..dcf5f0a --- /dev/null +++ b/src/permutation/permutation.cpp @@ -0,0 +1,292 @@ +#include +#include +#include +#include + +//#include "/home/fanasina/progr_/ptens0neD/src/permutation/permutation.h" + +#include "src/permutation/permutation.h" + +int sign(int a) { + if (a < 0) return -1; + return 1; +} + +bool isPermutation(int* perm, setInit se, int sz) { + std::vector tmp; + for (int i = 0; i < sz; i++) { + for (int j = 0; j < sz; j++) { + if (perm[i] == se.setinit[j]) { + if (find(tmp.begin(), tmp.end(), j) == tmp.end()) { + tmp.push_back(j); + break; + } + } + } + } + return tmp.size() == sz; + +} + +int permutation::signature() { + int ss = 1; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + ss *= sign(perm[j] - perm[i]);// * sign(j - i); + } + } + return ss; +} + +int signature(int* tab, int sz) { + int ss = 1; + for (int i = 0; i < sz; i++) { + for (int j = i + 1; j < sz; j++) { + ss *= sign(tab[j] - tab[i]) * sign(j - i); + } + } + return ss; +} + +template +void permutation::permute(T* dst, T* src) { + for (int i = 0; i < size;i++) { + dst[i] = src[perm[i]]; + } +} + +template +void permutation::permute(int* dst, int* src); +template +void permutation::permute(float* dst, float* src); + +// complexité sz*(sz+1)/2 +size_t TabToPlaceAlgo(int* tb, int sz) { + int cnt = 0; + int pl; + + int* tPlace = new int[sz]; + + for (int i = sz - 1; i >= 0; i--) { + cnt++; + pl = 0; + for (int j = i + 1; j < sz; j++) { + cnt++; + if (tb[j] < tb[i]) { + pl++; + } + if (pl == tb[i]) break; + } + tPlace[tb[i]] = pl; + + } + size_t q = 0; + for (int i = 0; i < sz;i++) { + cnt++; + //printf("tPlace[%d] == %d et tb[%d] == %d\n", i, tPlace[i], i, tb[i]); + q = (i + 1) * q + tPlace[i]; + } + //printf("algo cnt = %d ", cnt); + return q; +} + +// complexité sz*(sz+1)/2 +size_t TabToPlaceOpt1(int* tb, int sz) { + int cnt = 0; + int mx; + int* tPlace = new int[sz]; + for (int i = sz - 1; i >= 0; i--) { + cnt++; + if (i == sz - 1) { + mx = tb[i]; + tPlace[mx] = 0; + } + else if (mx > tb[i]) { + int pli = 0; // si c est le plus à droite 0 si pas de superieur à lui, on incremente si on trouve plus petit + for (int j = sz - 1; j > i; j--) { + cnt++; + if (tb[i] > tb[j]) { + pli++; + } + else if (tb[i] == tb[j]) { + //return -1; // something wrong + throw "something wrong here, tb[i]==tb[j]"; + } + } + tPlace[tb[i]] = pli; + } + else if (mx < tb[i]) { + mx = tb[i]; + tPlace[mx] = sz - 1 - i; + } + else { + //return -1; // something wrong + throw "something wrong here, tb[i]==mx"; + + } + } + size_t q = 0; + for (int i = 0; i < sz; i++) { + cnt++; + //printf("tab tPlace[%d] == %d et tb[%d] == %d [ q=%d, cnt = %d\n", i, tPlace[i], i, tb[i], q, cnt); + q = (i + 1) * q + tPlace[i]; + } + //printf("Opt cnt = %d ", cnt); + + return q; +} + +// complexité sz*(sz+1) +size_t TabToPlaceNotab(int* tb, int sz) { + int cnt = 0; + int mx = sz - 1; + size_t q = 0; + int pl; + for (int i = 0; i < sz; i++) { + cnt++; + int j; + for (j = 0; j < sz;j++) { + cnt++; + if (tb[j] == i) break; + } + pl = 0; + j++; + for (;j < sz;j++) { + cnt++; + if (tb[j] < i) { + pl++; + } + if (pl == i) break; + } + q = (i + 1) * q + pl; + //q = (sz - tb[i]) * q + pl; + //printf("notab tPlace[tb[%d]] == %d et tb[%d] == %d [ q=%d, cnt = %d\n", i, pl, i, tb[i], q, cnt); + + } + //printf(" notab cnt = %d ", cnt); + + return q; +} + + + + + +//complexité sz*sz/2 +void PlaceToTab(int* tb, size_t pl, int sz) { + int cnt = 0; + size_t a = pl; + int pltbi; + int size = 1; + // s'assurer que tb soit null + for (int i = 0;i < sz;i++) tb[i] = 0; + + for (int i = sz - 1; i >= 0; i--) { + cnt++; + pltbi = a % (i + 1); + a /= (i + 1); + if (i == sz - 1) { + tb[sz - 1 - pltbi] = i; + } + else { + int lt = 0, j = sz - 1; + while (lt < pltbi && j >= 0) { + cnt++; + if (tb[j--] < i) { + lt++; + } + } + while (tb[j] > i) { + cnt++; + j--; + } + tb[j] = i; + + } + } + //printf("cnt PlaceToTab :%d ", cnt); +} + +size_t factorial(int n) { + size_t ret = 1; + for (size_t i = 2; i <= n; i++) { + ret *= i; + } + return ret; +} + +// src1 o src0 = dst; dst(i) = src1(src0(i)) +void compose(int* dst, int* src0, int* src1, int sz) { + for (int i = 0; i < sz; i++) { + dst[i] = src1[src0[i]]; + } +} +// src1 o src0 = dst; dst(i) = src1(src0(i)) +void compose(size_t& rdst, size_t psrc0, size_t psrc1, int sz) { + int dst[sz], src0[sz], src1[sz]; + PlaceToTab(src0, psrc0, sz); + PlaceToTab(src1, psrc1, sz); + for (int i = 0; i < sz; i++) { + dst[i] = src1[src0[i]]; + } + rdst = TabToPlaceOpt1(dst, sz); +} +/* +template +void transform(T* dst, T* src, int* perm, int sz) { + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +} +template +void transform(T* dst, T* src, size_t pl, int sz) { + int perm[sz]; + PlaceToTab(perm, pl, sz); + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +} +*/ +void permuteArray(int* dst, int* src, int* perm, int sz) { + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +} + +void inverseArray(int* dst, int* src, int sz) { + for (int i = 0; i < sz; i++) { + dst[src[i]] = i; + } +} + +// seek?/ seek o src = seek(src)=dst => seek = dst o inv(src) +void permCorrespondance(int* sk, int* dst, int* src, int sz) { + int inv[sz]; + inverseArray(inv, src, sz); + compose(sk, dst, inv, sz); +} + + +// SRC o transf = DST, SRC:{a,b,c,d,g,f} o transf = DST:{g,b,d,f,c,a} +// SRC[0]=a, SRC[1]=b, SRC[2]=c SRC[3]=d SRC[4]=g SRC[5]=f +// DST[0]=g=SRC[4] DST[1]=b=SRC[1] DST[2]=d=SRC[3] DST[3]=f=SRC[5] DST[4]=c=SRC[2] DST[5]=a=SRC[1] +// trans[0]=4 trans[1]=1 trans[2]=3 trans[3]=5 trans[4]=2 trans[5]=0 +// (*cmp) (a,b) = 0 if a==b, -1 if ab +template +void CorrespondacePerm(T* src, T* dst, int* transf, int sz, int (*cmp)(T, T)) { + int tmp[sz]; + std::vector tmpV; + int curt = 0; + for (int i = 0; i < sz; i++) { + for (int j = 0; j < sz; j++) { + if (cmp(dst[i], src[j]) == 0) { + if (std::find(tmpV.begin(), tmpV.end(), j) == tmpV.end()) {// not found + transf[i] = j; + tmpV.push_back(j); + break; + } + } + } + } +} +template void CorrespondacePerm(char* src, char* dst, int* transf, int sz, int (*cmp)(char, char)); \ No newline at end of file diff --git a/src/permutation/permutation.h b/src/permutation/permutation.h new file mode 100644 index 0000000..5a0c55a --- /dev/null +++ b/src/permutation/permutation.h @@ -0,0 +1,25 @@ +#ifndef __PERMUTATION_C_H__ +#define __PERMUTATION_C_H__ + +#include "src/set_theoric/set_theoric.h" + +/* struct of permutation of unsigned int array, not necessarly set_theoric + * + * */ +struct permutation +{ + size_t size; + unsigned int *perm; +}; + +typedef struct permutation permutation; + +permutation * create_permutation(size_t sz); +void assign_permutation(permutation *p, unsigned int *arr); + +bool is_permutation_set_theoric(const permutation *p); + +// more general! need translation and use is_permutation_set_theoric +bool is_permutation(const permutation *p); + +#endif /*__PERMUTATION_C_H__*/ diff --git a/src/permutation/permutation.hpp b/src/permutation/permutation.hpp new file mode 100644 index 0000000..29861b6 --- /dev/null +++ b/src/permutation/permutation.hpp @@ -0,0 +1,99 @@ + +#ifndef __PERMUTATION_H__ +#define __PERMUTATION_H__ + +#include + + +struct setInit { + int size; + int* setinit; + setInit(int sz = 1, int beg = 0) { + size = sz; + setinit = new int[sz]; + for (int i = 0; i < sz; i++) setinit[i] = beg + i; + } +}; + +struct permutation { + int size;// type + int rang; //place;//rang; + int* perm; + permutation(int sz, bool b) { + size = sz; + perm = new int[size]; + } + permutation(int sz = 1, int* tb = { 0 }) { + size = sz; + perm = new int[size]; + for (int i = 0; i < size; i++) perm[i] = tb[i]; + } + //int TabToPlace(int* tb , int sz ); + //void PlaceToTab(int* tb , int pl , int sz); + int signature(); + template + void permute(T* dst, T* src); +}; + +bool isPermutation(int* perm, setInit se, int sz); + +int signature(int* tab, int sz); + +// complexité sz*(sz+1)/2 +size_t TabToPlaceAlgo(int* tb, int sz); + +// complexité sz*(sz+1)/2 +size_t TabToPlaceOpt1(int* tb, int sz); + +// complexité sz*(sz+1) +size_t TabToPlaceNotab(int* tb, int sz); + +//complexité sz*sz/2 +void PlaceToTab(int* tb, size_t pl, int sz); + +size_t factorial(int n); + +// src1 o src0 = dst; dst(i) = src1(src0(i)) +void compose(int* dst, int* src0, int* src1, int sz); +// src1 o src0 = dst; dst(i) = src1(src0(i)) +void compose(size_t& rdst, size_t psrc0, size_t psrc1, int sz); + +/*template +void transform(T* dst, T* src, int* perm, int sz); +template +void transform(T* dst, T* src, size_t pl, int sz); + +template +void transform(T* dst, T* src, int* perm, int sz) { + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +} +template +void transform(T* dst, T* src, size_t pl, int sz) { + int perm[sz]; + PlaceToTab(perm, pl, sz); + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +}*/ + +void permuteArray(int* dst, int* src, int* perm, int sz); +void inverseArray(int* dst, int* src, int sz); + +// seek?/ seek o src = seek(src)=dst => seek = dst o inv(src) +void permCorrespondance(int* sk, int* dst, int* src, int sz); + +// SRC o transf = DST, SRC:{a,b,c,d,g,f} o transf = DST:{g,b,d,f,c,a} +// SRC[0]=a, SRC[1]=b, SRC[2]=c SRC[3]=d SRC[4]=g SRC[5]=f +// DST[0]=g=SRC[4] DST[1]=b=SRC[1] DST[2]=d=SRC[3] DST[3]=f=SRC[5] DST[4]=c=SRC[2] DST[5]=a=SRC[0] +// trans[0]=4 trans[1]=1 trans[2]=3 trans[3]=5 trans[4]=2 trans[5]=0 +// (*cmp) (a,b) = 0 if a==b, -1 if ab +template +void CorrespondacePerm(T* src, T* dst, int* transf, int sz, int (*cmp)(T, T)); + + + +#endif + + diff --git a/src/permutation_t/permutation_t.c b/src/permutation_t/permutation_t.c new file mode 100644 index 0000000..457e68b --- /dev/null +++ b/src/permutation_t/permutation_t.c @@ -0,0 +1,114 @@ +#include "src/permutation_t/permutation_t.h" + +#define CREATE_PERMUTATION(type, size)\ + type * CREATE_PERMUTATION_##type(size_t size){\ + if (sz == 0) return NULL;\ + PERMUTATION_##type *p = malloc(sizeof(PERMUTATION_##type));\ + p->size = size;\ + p->perm = malloc(size * sizeof(type));\ + return p; }\ + +permutation* +create_permutation(size_t sz) +{ + if(sz == 0) return NULL; + permutation *p=malloc(sizeof(permutation)); + p->size = sz; + p->perm = malloc(sz*sizeof(unsigned int)); + return p; +} + +/*void +copy_array_unsigned(unsigned int *dst, const unsigned int *src, size_t size) +{ + for(size_t i = 0; i < size ; ++i) + dst[i]=src[i]; +}*/ + +void +assign_permutation(permutation *p, unsigned int *arr) +{ + copy_array_unsigned(p->perm, arr, p->size); +} + +bool +is_permutation_set_theoric(const permutation *p) +{ + if(p==NULL) return false; + size_t size = p->size, j; + unsigned int *count_array_i = calloc(size, sizeof(unsigned int)); + if(count_array_i == NULL) + { + printf("can't allocate count_array_i\n"); + return false; + } + for(size_t i = 0; i < size; ++i) + { + j = p->perm[i]; + if((j >= size) || count_array_i[j]) + { + free(count_array_i); + return false; + } + count_array_i[j]++; + } + return true; +} +/* 2,7,4,1 is a permutation of 1,2,4,7 + *it is equivalent of 1,3,2,0 in set_theoric(4)=0,1,2,3 + this function calculate the permutation equivalent in set_theoric + * */ +permutation * +translate_set_theoric(const permutation *p, permutation *translate_p) +{ + if(p==NULL) return NULL; + size_t size = p->size; + permutation *translate_p = create_permutation(size); + unsigned int *temperm = malloc(size*sizeof(unsigned int)); + unsigned int *tmperm = malloc(size*sizeof(unsigned int)); + copy_array_unsigned_int(tmperm, p->perm, p->size); // copy + qsort(tmperm, size, sizeof(unsigned int), compare_unsigned_int); + // tmperm contain p->perm ordered + + size_t cur=0; + for(size_t i=0; i< size; ++i) + { + for(size_t j=0; jperm[j] == tmperm[i]) + { + bool found = false; + for(size_t c=0; cperm[i]=j; + temperm[cur++]=j; + break; + } + } + } + } + free(tmperm); + free(temperm); + return translate_p; +} + +bool +is_permutation(const permutation *p) +{ + bool ret = is_permutation_set_theoric(p); + if(ret == false) + { + permutation *t_p = translate_set_theoric(p); + ret = is_permutation_set_theoric(t_p); + free(t_p); + } + return ret; +} diff --git a/src/permutation_t/permutation_t.h b/src/permutation_t/permutation_t.h new file mode 100644 index 0000000..862b2e8 --- /dev/null +++ b/src/permutation_t/permutation_t.h @@ -0,0 +1,52 @@ +#ifndef __PERMUTATION_T_C_H__ +#define __PERMUTATION_T_C_H__ + +#include "src/tools_t/tools_t.h" +#include "src/set_theoric_t/set_theoric_t.h" + +#define STRUCT_PERMUTATION(type)\ + struct PERMUTATION_##type{\ + size_t size;\ + type * perm; }; +STRUCT_PERMUTATION(TYPE_CHAR) +STRUCT_PERMUTATION(TYPE_U_CHAR) +STRUCT_PERMUTATION(TYPE_INT) +STRUCT_PERMUTATION(TYPE_U_INT) +STRUCT_PERMUTATION(TYPE_L_INT) +STRUCT_PERMUTATION(TYPE_U_L_INT) +STRUCT_PERMUTATION(TYPE_FLOAT) +STRUCT_PERMUTATION(TYPE_DOUBLE) +STRUCT_PERMUTATION(TYPE_L_DOUBLE) +STRUCT_PERMUTATION(TYPE_STRING) + +typedef struct PERMUTATION_TYPE_CHAR PERMUTATION_TYPE_CHAR; +typedef struct PERMUTATION_TYPE_U_CHAR PERMUTATION_TYPE_U_CHAR; +typedef struct PERMUTATION_TYPE_INT PERMUTATION_TYPE_INT; +typedef struct PERMUTATION_TYPE_U_INT PERMUTATION_TYPE_U_INT; +typedef struct PERMUTATION_TYPE_L_INT PERMUTATION_TYPE_L_INT; +typedef struct PERMUTATION_TYPE_U_L_INT PERMUTATION_TYPE_U_L_INT; +typedef struct PERMUTATION_TYPE_FLOAT PERMUTATION_TYPE_FLOAT; +typedef struct PERMUTATION_TYPE_DOUBLE PERMUTATION_TYPE_DOUBLE; +typedef struct PERMUTATION_TYPE_L_DOUBLE PERMUTATION_TYPE_L_DOUBLE; +typedef struct PERMUTATION_TYPE_STRING PERMUTATION_TYPE_STRING; + +/* struct of permutation of unsigned int array, not necessarly set_theoric + * + * */ +struct permutation +{ + size_t size; + unsigned int *perm; +}; + +typedef struct permutation permutation; + +permutation * create_permutation(size_t sz); +void assign_permutation(permutation *p, unsigned int *arr); + +bool is_permutation_set_theoric(const permutation *p); + +// more general! need translation and use is_permutation_set_theoric +bool is_permutation(const permutation *p); + +#endif /*__PERMUTATION_T_C_H__*/ diff --git a/src/set_theoric/set_theoric.c b/src/set_theoric/set_theoric.c new file mode 100644 index 0000000..69b8174 --- /dev/null +++ b/src/set_theoric/set_theoric.c @@ -0,0 +1,26 @@ + +#include "src/set_theoric/set_theoric.h" + +set_theoric * create_set_theoric(unsigned int id) +{ + if(id == 0) return NULL; + + set_theoric *ret_set = malloc(sizeof(set_theoric)); + ret_set.set=malloc(id*sizeof(unsigned int)); + ret_set.id = id; + for(int i = 0; i < id; ++i) + ret_set.set[i] = i; + + return ret_set; + +} + +bool is_set_theoric(set_theoric *st) +{ + if(st == NULL) return true; + + for(int i = 0; i < st->id; ++i) + if(st->set[i] != i) return false; + + return true; +} diff --git a/src/set_theoric/set_theoric.h b/src/set_theoric/set_theoric.h new file mode 100644 index 0000000..1435e5e --- /dev/null +++ b/src/set_theoric/set_theoric.h @@ -0,0 +1,22 @@ +#ifndef __SET_THEORIC_C__H +#define __SET_THEORIC_C__H + +#include + +#include "src/tools/tools.h" + +struct set_theoric +{ + unsigned int id; + unsigned int *set; +}; + +typedef set_theoric set_theoric; + +set_theoric * create_set_theoric(unsigned int id); + +bool is_set_theoric(set_theoric *st); + + + +#endif /*__SET_THEORIC_C__H*/ diff --git a/src/set_theoric_t/set_theoric_t.c b/src/set_theoric_t/set_theoric_t.c new file mode 100644 index 0000000..6bab797 --- /dev/null +++ b/src/set_theoric_t/set_theoric_t.c @@ -0,0 +1,24 @@ + +#include "src/set_theoric_t/set_theoric_t.h" + +#define CREATE_SET_THEORIC(type, id)\ + type * CREATE_SET_THEORIC_##type(type id){\ + if(id == 0) return NULL;\ + SET_THEORIC_##type *ret_set = malloc(sizeof(SET_THEORIC_##type));\ + ret_set->id = id;\ + ret_set->set = malloc(id*sizeof(type));\ + for(type i = 0; i < id; ++i) ret_set->set[i]=i;\ + return ret_set; } +CREATE_SET_THEORIC(TYPE_U_CHAR) +CREATE_SET_THEORIC(TYPE_U_INT) +CREATE_SET_THEORIC(TYPE_U_LONG_INT) + +#define IS_SET_THEORIC(type, st)\ + bool IS_SET_THEORIC_##type(type *st){\ + for(type i = 0; i < st->id; ++i){\ + if(st->set[i] != i) return false;\ + return true; } +IS_SET_THEORIC(TYPE_U_CHAR,st) +IS_SET_THEORIC(TYPE_U_INT,st) +IS_SET_THEORIC(TYPE_U_LONG_INT,st) + diff --git a/src/set_theoric_t/set_theoric_t.h b/src/set_theoric_t/set_theoric_t.h new file mode 100644 index 0000000..bcfbd29 --- /dev/null +++ b/src/set_theoric_t/set_theoric_t.h @@ -0,0 +1,31 @@ +#ifndef __SET_THEORIC_T_C__H +#define __SET_THEORIC_T_C__H + +#include + +#include "src/tools_t/tools_t.h" + +#define STRUCT_SET_THEORIC(type)\ + struct SET_THEORIC_##type{\ + type id;\ + type *set;}; + +STRUCT_SET_THEORIC(TYPE_U_CHAR) +STRUCT_SET_THEORIC(TYPE_U_INT) +STRUCT_SET_THEORIC(TYPE_U_LONG_INT) + +typedef struct SET_THEORIC_TYPE_U_CHAR SET_THEORIC_TYPE_U_CHAR; +typedef struct SET_THEORIC_TYPE_U_INT SET_THEORIC_TYPE_U_INT; +typedef struct SET_THEORIC_TYPE_U_LONG_INT SET_THEORIC_TYPE_U_LONG_INT; + +SET_THEORIC_TYPE_U_CHAR * CREATE_SET_THEORIC_TYPE_U_CHAR(TYPE_U_CHAR); +SET_THEORIC_TYPE_U_INT * CREATE_SET_THEORIC_TYPE_U_INT(TYPE_U_INT); +SET_THEORIC_TYPE_U_LONG_INT * CREATE_SET_THEORIC_TYPE_U_LONG_INT(TYPE_U_LONG_INT); + +bool IS_SET_THEORIC_TYPE_U_CHAR(SET_THEORIC_TYPE_U_CHAR *st); +bool IS_SET_THEORIC_TYPE_U_INT(SET_THEORIC_TYPE_U_INT *st); +bool IS_SET_THEORIC_TYPE_U_LONG_INT(SET_THEORIC_TYPE_U_LONG_INT *st); + + + +#endif /*__SET_THEORIC_T_C__H*/ diff --git a/src/tensor/tens0neD/tens0neD.cpp b/src/tensor/tens0neD/tens0neD.cpp new file mode 100644 index 0000000..058e846 --- /dev/null +++ b/src/tensor/tens0neD/tens0neD.cpp @@ -0,0 +1,500 @@ +#include +#include + +#include + +#include +#include + + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" +#include "src/tensor/tens0neD/tens0neD.h" +//#include "include/tens0neD.h" + + +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/src/permutation/permutation.h" +#include "src/permutation/permutation.h" + + +template +void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz) { + transform(Dst.Dim, Src.Dim, perm, sz); + dimension dsrc = Src.Dim; + dimension ddst = Dst.Dim; + int coor[dsrc.rank]; + int dcoor[ddst.rank], ldst; + for (int i = 0; i < Src.Dim.size; i++) { + dsrc.LinearToCoord(coor, i); + for (int j = 0; j < dsrc.rank; j++) dcoor[j] = coor[perm[j]]; + ldst = ddst.CoordToLinear(dcoor); + Dst.elements[ldst] = Src.elements[i]; + } +} + +template void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); +template void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); + +template +Tensor& Tensor::operator=(const Tensor& M) { + Dim = M.Dim; + for (int i = 0; i < Dim.size; ++i) elements[i] = M.elements[i]; + return *this; +} + +template +Tensor& Tensor::operator*=(const T& val) { + //for (int i = 0; i < rank.size; ++i) elements[i] *= val; + return *this; +} + +template +Tensor& operator*(const Tensor& M0, const Tensor& M1) { + struct dimension d; add(d, M0.Dim, M1.Dim); + Tensor Mret(d); + for (int i = 0; i < M0.Dim.size; ++i) Mret.elements[i] = M0.elements[i]; + Mret.Dim += M0.Dim; + return Mret; +} + + +void subArray(int* dst, int* src, int debDst, int finDst, int debSrc) { + for (int i = debDst; i < finDst; i++) { + dst[i] = src[i + debSrc]; + } +} + +void concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1) { + int i = debDst; + for (int j = debSrc0; j < finSrc0; j++) { + dst[i++] = src0[j]; + } + for (int j = debSrc1; j < finSrc1; j++) { + dst[i++] = src1[j]; + } +} + +template +void Tensor::initVal(T val) { + int* coord = new int[Dim.rank]; + T pp, mult = 0.5; + for (int i = 0; i < Dim.size; i++) { + Dim.LinearToCoord(coord, i); + elements[i] = val; + pp = mult; + for (int j = 0; j < Dim.rank; j++) { + elements[i] += (coord[j] + 1) * pp; + pp *= mult; + } + } +} +template +void Tensor::initVal(float val); +template +void Tensor::initVal(double val); + +template +void Tensor::print() { + Dim.print(); + int* coord = new int[Dim.rank]; + int begin = 0, end = Dim.rank - 1; + //int beginInv = Dim.rank - 1, endInv = 0; + int (*iter)(int) = incr; + //int (*iterInv)(int) = decr; + bool (*cond)(int, int) = isLessEqThan; + //bool (*condInv)(int, int) = isGreatEqThan; + if (Dim.endian == false) { + begin = Dim.rank - 1; end = 0; + //beginInv = 0; endInv = Dim.rank - 1; + iter = decr; cond = isGreatEqThan; + //iterInv = incr; condInv = isLessEqThan; + } + for (int i = 0; i < Dim.size; i++) { + Dim.LinearToCoord(coord, i); + //if (coord[Dim.rank - 1] == 0) { + if (coord[begin] == 0) { + for (int j = begin; cond(j, end); j = iter(j)) { + //for (int j = Dim.rank - 1; j >= 0; j--) { + if (coord[j] == 0) { + printf("("); + } + else break; + } + } + + //printf(" ");for (int j = 0; j < Dim.rank; j++) printf("[%d]", coord[j]); printf(" "); + //printf(" "); for (int j = beginInv; condInv(j, endInv); j = iterInv(j)) printf("[%d]", coord[j]); printf(" "); + //printf(" "); for (int k = beginInv; condInv(k, endInv); k = iterInv(k)) { printf("[%d]", coord[k]); } printf(" "); + + printf(" %.6f ", elements[i]); + + //if (coord[Dim.rank - 1] == Dim.dim[Dim.rank - 1] - 1) { + if (coord[begin] == Dim.dim[begin] - 1) { + for (int j = begin; cond(j, end); j = iter(j)) { + //for (int j = Dim.rank - 1; j >= 0; j--) { + if (coord[j] == Dim.dim[j] - 1) { + printf(")"); + } + else break; + } + } + } + + printf("\n"); +} +template +void Tensor::print(); +template +void Tensor::print(); + +template +void tensorProd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + int* coord = new int[M.Dim.rank]; + int* coord0 = new int[M0.Dim.rank], lin0; + int* coord1 = new int[M1.Dim.rank], lin1; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, M0.Dim.rank, 0); + subArray(coord1, coord, 0, M1.Dim.rank, M0.Dim.rank); + lin0 = (M0.Dim).CoordToLinear(coord0); + lin1 = (M1.Dim).CoordToLinear(coord1); + M.elements[i] = M0.elements[lin0] * M1.elements[lin1]; + } +} + +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + + + + +bool checkMatchProdTensor(const dimension& d0, const dimension& d1, int nestingDepth) { + if (d0.rank <= nestingDepth || d1.rank <= nestingDepth) return false; + for (int i = 0; i < nestingDepth;i++) { + if (d1.dim[i] != d0.dim[d0.rank - nestingDepth + i]) return false; + } + return true; +} + +bool checkMatchProdTensorReverse(const dimension& d0, const dimension& d1, int nestingDepth) { + if (d0.rank <= nestingDepth || d1.rank <= nestingDepth) return false; + for (int i = 0; i < nestingDepth;i++) { + if (d1.dim[i] != d0.dim[d0.rank - 1 - i]) return false; + } + return true; +} + +void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth) { + int len0 = d0.rank - nestingDepth; + int len1 = d1.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, d0.dim, 0, len0, 0); + subArray(tsub1, d1.dim, 0, len1, nestingDepth); + subArray(tDk1, d1.dim, 0, nestingDepth, 0); + subArray(tDk0, d0.dim, 0, nestingDepth, len0); + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + + min(dM, dM0, dM1); + //max(dM, dM0, dM1); +} + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l +// M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth) { + if (!checkMatchProdTensor(M0.Dim, M1.Dim, nestingDepth)) { + printf("Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + //throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + } + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM; + min(dM, dM0, dM1); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + int* coord = new int[M.Dim.rank]; + + int* coord0 = new int[len0], lin0; + int* coord1 = new int[len1], lin1; + + int* coordM0 = new int[M0.Dim.rank]; + int* coordM1 = new int[M1.Dim.rank]; + + int* Koord = new int[nestingDepth]; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, len0, 0); + subArray(coord1, coord, 0, len1, len0); + M.elements[i] = 0; + for (int k = 0; k < dM.size; k++) { + dM.LinearToCoord(Koord, k); + concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, nestingDepth); + concatArray(coordM1, Koord, coord1, 0, 0, nestingDepth, 0, len1); + lin0 = (M0.Dim).CoordToLinear(coordM0); + lin1 = (M1.Dim).CoordToLinear(coordM1); + M.elements[i] += M0.elements[lin0] * M1.elements[lin1]; + } + } +} + +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +void reverseDim(dimension& d, const dimension& d0) { + d.rank = d0.rank; + d.size = d0.size; + if (d.dim != NULL) free(d.dim); + d.dim = (int*)malloc(d.rank * sizeof(int)); + for (int i = 0; i < d.rank; i++) d.dim[i] = d0.dim[d.rank - i - 1]; +} + +template +void reverseTensor(Tensor& M, const Tensor& M0) { + reverseDim(M.Dim, M0.Dim); + size_t id; + int coor[M0.Dim.rank]; + for (size_t i = 0; i < M.Dim.size; i++) { + M0.Dim.LinearToCoord(coor, i); + reverseArray(coor, M0.Dim.rank); + id = M.Dim.CoordToLinear(coor); + M.elements[id] = M0.elements[i]; + } +} + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l +// M[x0,x1,x3..xl x{l+1}..xn] X M[xn,x{n-1},..x{l+1}xl y{l+1}..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth) { + if (!checkMatchProdTensorReverse(M0.Dim, M1.Dim, nestingDepth)) { + printf("Failed in Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + //throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + } + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM; + bool rev; + minReverse(dM, dM0, dM1, rev); + if (rev) reverseArray(dM.dim, dM.rank); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + int* coord = new int[M.Dim.rank]; + + int* coord0 = new int[len0], lin0; + int* coord1 = new int[len1], lin1; + + int* coordM0 = new int[M0.Dim.rank]; + int* coordM1 = new int[M1.Dim.rank]; + + int* Koord = new int[nestingDepth]; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, len0, 0); + subArray(coord1, coord, 0, len1, len0); + M.elements[i] = 0; + for (int k = 0; k < dM.size; k++) { + dM.LinearToCoord(Koord, k); + concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, nestingDepth); + reverseArray(Koord, nestingDepth); + concatArray(coordM1, Koord, coord1, 0, 0, nestingDepth, 0, len1); + lin0 = (M0.Dim).CoordToLinear(coordM0); + lin1 = (M1.Dim).CoordToLinear(coordM1); + M.elements[i] += M0.elements[lin0] * M1.elements[lin1]; + } + } +} + +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +template +void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + //permuteArray(M.Dim.dim, M0.Dim.dim, p); + //for (int i = 0; i < p.size; i++) { M.Dim.dim[i] = M0.Dim.dim[p.perm[i]]; } + p.permute(M.Dim.dim, M0.Dim.dim); + size_t img; + int coor[p.size]; + int rooc[p.size]; + for (size_t i = 0; i < M.Dim.size;i++) { + M0.Dim.LinearToCoord(coor, i); + p.permute(rooc, coor); + img = M.Dim.CoordToLinear(rooc); + if (img >= M.Dim.size) printf(" i: %ld vs img:%ld size: %ld\n", i, img, M.Dim.size); + M.elements[img] = M0.elements[i]; + + } + } +} + +template +void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p); + +template +bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest) { + if (contractNest < Msecond.Dim.rank && contractNest < Mfirst.Dim.rank) { + std::vector founded; + int begin = Mfirst.Dim.rank - contractNest, tmp; + for (int i = 0; i < Msecond.Dim.rank;i++) perm[i] = i; + for (int i = begin; i < Mfirst.Dim.rank; i++) { + for (int j = 0; j < Msecond.Dim.rank;j++) { + if (std::find(founded.begin(), founded.end(), perm[j]) == founded.end()) {// not found + if (Msecond.Dim.dim[perm[j]] == Mfirst.Dim.dim[i]) { + founded.push_back(perm[j]); + tmp = perm[i - begin]; + perm[i - begin] = perm[j]; + perm[j] = tmp; + } + } + } + } + return (founded.size() == contractNest); + } + return false; +} +template +bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + +template +bool scanInvPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest) { + if (contractNest < Msecond.Dim.rank && contractNest < Mfirst.Dim.rank) { + std::vector founded; + int begin = Mfirst.Dim.rank - contractNest, tmp; + for (int i = 0; i < Msecond.Dim.rank;i++) perm[i] = i; + for (int i = begin; i < Mfirst.Dim.rank; i++) { + for (int j = 0; j < Msecond.Dim.rank;j++) { + if (std::find(founded.begin(), founded.end(), j) == founded.end()) {// not found + if (Msecond.Dim.dim[j] == Mfirst.Dim.dim[perm[i - begin]]) { + founded.push_back(j); + tmp = perm[i - begin]; + perm[i - begin] = j; + perm[j] = tmp; + } + } + } + } + return (founded.size() == contractNest); + } + return false; +} +template +bool scanInvPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + +void LinearTransformCoord(size_t& dst, size_t src, int* inversePerm, size_t Msize, dimension dDst, dimension dSrc) { + size_t sm = src; + size_t pp = Msize; + size_t s = 0; + size_t p = 1; + int ret;// = new int[rank]; + int i; + for (i = 0; i < dSrc.rank; ++i) { + pp /= dSrc.dim[i]; + ret = sm / pp; + p = 1; + for (int j = inversePerm[i] + 1; j < dDst.rank;j++) { + p *= dDst.dim[j]; + } + s += ret * p; + + sm %= pp; + + } + dst = s; + if (s > Msize) printf("I have a problem in LinearTransformCoord: s:%ld siez:%ld \n", s, Msize); + +} + + +template +void permuteTensor(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + + if (p.size == M0.Dim.rank) p.permute(M.Dim.dim, M0.Dim.dim); + else { + printf("something wrong perm, not the same size as M0.Dim.rank\n"); + exit(1); + } + size_t img = 0; + printf("in permuteTensor:\n"); + M0.Dim.print(); + M.Dim.print(); + setInit se(M.Dim.rank, 0); + int invP[M.Dim.rank]; + inverseArray(invP, p.perm, M.Dim.rank); + for (size_t i = 0; i < M.Dim.size;i++) { + //LinearTransformCoord(img, i, p.perm, M.Dim.size, M.Dim, M0.Dim); + LinearTransformCoord(img, i, invP, M.Dim.size, M.Dim, M0.Dim); + M.elements[img] = M0.elements[i]; + } + } +} + +template +void permuteTensor(Tensor& M, const Tensor& M0, permutation p); + diff --git a/src/tensor/tens0neD/tens0neD.h b/src/tensor/tens0neD/tens0neD.h new file mode 100644 index 0000000..db7b7c9 --- /dev/null +++ b/src/tensor/tens0neD/tens0neD.h @@ -0,0 +1,114 @@ +#ifndef __TENS_0NE_D_H__ +#define __TENS_0NE_D_H__ + +#include +#include + +#include + +//#include "tensor.h" +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/src/dimension/dimension.h" +//#include "/home/fanasina/progr_/ptens0neD/src/permutation/permutation.h" +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/tensCuda.h" + +#include "src/dimension/dimension.h" +#include "src/permutation/permutation.h" +#include "src/tensor/tensCuda/tensCuda.h" + +template +struct Tensor { + struct dimension Dim; + T* elements; + Tensor(struct dimension dm = dimension(1)) { + Dim = dm; + //elements = new T[Dim.size]; + elements = (T*)malloc(Dim.size * sizeof(T)); + } + void initTensor() { + //delete[]elements; + //elements = new T[Dim.size]; + if (elements != NULL) + free(elements); + elements = (T*)malloc(Dim.size * sizeof(T)); + } + void initVal(T val); // { for (int i = 0; i < Dim.size; i++) elements[i] = val + 0.001f * i; } + void print(); + Tensor& operator=(const Tensor& M); + Tensor& operator*=(const T& val); + template + friend Tensor& operator*(const Tensor& M0, const Tensor& M1); + + // M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + // M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) + template + friend void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + + // M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + // M[x0,x1,x3..xl x{l+1}..xn] X M[xn,x{n-1},..x{l+1}xl y{l+1}..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) + template + friend void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + + template + friend void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict); + + /*template + friend void cudaTensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +*/ + + template + friend void tensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void permuteTensor(Tensor& M, const Tensor& M0, permutation p); + template + friend void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p); + template + friend bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + //template + //friend void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + +}; + +template +void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); + + +template +Tensor& operator*(const Tensor& M0, const Tensor& M1); + + +void subArray(int* dst, int* src, int debDst, int finDst, int debSrc); + +void concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); + +void reverseArray(int* arr, int sz); + +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + +bool checkMatchProdTensor(const dimension& d0, const dimension& d1, int nestingDepth); + +void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth); + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +#endif + diff --git a/src/tensor/tensCuda/d_tensCuda.cu b/src/tensor/tensCuda/d_tensCuda.cu new file mode 100644 index 0000000..09ebcc2 --- /dev/null +++ b/src/tensor/tensCuda/d_tensCuda.cu @@ -0,0 +1,493 @@ +/*#include +#include + +#include "cuda.h" +#include "cuda_runtime.h" +*/ + +#include "d_tensCuda.h" +//#include "index.h" +#include + +//////////////////////////////////////////////////////// + +//1D grid of 1D blocks +__device__ +int d_getGlobalIdx_1D_1D() { + return blockIdx.x * blockDim.x + threadIdx.x; +} +//1D grid of 2D blocks +__device__ +int d_getGlobalIdx_1D_2D() { + return blockIdx.x * blockDim.x * blockDim.y + + threadIdx.y * blockDim.x + threadIdx.x; +} +//1D grid of 3D blocks +__device__ +int d_getGlobalIdx_1D_3D() { + return blockIdx.x * blockDim.x * blockDim.y * blockDim.z + + threadIdx.z * blockDim.y * blockDim.x + + threadIdx.y * blockDim.x + threadIdx.x; +} +//2D grid of 1D blocks +__device__ int d_getGlobalIdx_2D_1D() { + int blockId + = blockIdx.y * gridDim.x + blockIdx.x; + int threadId = blockId * blockDim.x + threadIdx.x; + return threadId; +} +//2D grid of 2D blocks +__device__ +int d_getGlobalIdx_2D_2D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int threadId = blockId * (blockDim.x * blockDim.y) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//2D grid of 3D blocks +__device__ +int d_getGlobalIdx_2D_3D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z) + + (threadIdx.z * (blockDim.x * blockDim.y)) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//3D grid of 1D blocks +__device__ +int d_getGlobalIdx_3D_1D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * blockDim.x + threadIdx.x; + return threadId; +} +//3D grid of 2D blocks +__device__ +int d_getGlobalIdx_3D_2D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * (blockDim.x * blockDim.y) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//3D grid of 3D blocks +__device__ +int d_getGlobalIdx_3D_3D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z) + + (threadIdx.z * (blockDim.x * blockDim.y)) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} + + +/////////////////////////////////////////////////////////////////////////// + + +__device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size) { + size_t sm = lin; + size_t pp = size; + for (int i = rank - 1;i > 0; --i) { + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + } + ret[0] = sm; +} + +__device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank) { + size_t pp = 1; + size_t sm = 0; + for (int i = 0; i < rank; ++i) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + +__device__ size_t d_CoordToLinear(int* coo, int* dim, int rank) { + size_t pp = 1; + size_t sm = 0; + for (int i = rank - 1; i >= 0; --i) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + + + +__device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size) { + size_t sm = lin; + size_t pp = size; + for (int i = 0; i < rank - 1; ++i) { + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + } + ret[rank - 1] = sm; +} +/*__device__ void d_LinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + for (int i = 0; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + if (i == rankA - 1) { + part0 = s; + s = 0; + p = size / sizeA; + } + + } + part1 = s; + +}*/ +__device__ void d_LinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + int i; + for (i = 0; i < rankA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + + } + part0 = s; + s = 0; + p = size / sizeA;//sizeB + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + + } + + part1 = s; + +} +__device__ void d_LinearToSplitSubrankLimSzEnd(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + for (int i = rank - 1; i >= 0; --i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + if (i == rankA) { + part1 = s; + s = 0; + p = size / sizeA; + } + + } + part0 = s; + +} + + +__device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc) { + for (int i = debDst; i < finDst; i++) { + dst[i] = src[i + debSrc]; + } +} + +template +__global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB) { + size_t lin0, lin1; + + size_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i < size) { + d_LinearToSplitSubrankLimSz(lin0, lin1, i, dimC, rankC, rankA, size, sizeA); + + C[i] = A[lin0] * B[lin1]; + + } +} + +template __global__ void d_prodTensor(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, float* B, int* dimB, int rankB); + +template +__global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB) { + size_t lin0, lin1; + + size_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i < size) { + d_LinearToSplitSubrankLimSzEnd(lin0, lin1, i, dimC, rankC, rankA, size, sizeA); + + C[i] = A[lin0] * B[lin1]; + + } +} + +template __global__ void d_prodTensorEnd(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, float* B, int* dimB, int rankB); + +__device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev) { + if (rank0 > rank1) { + rank = rank1; + for (int i = 0; i < rank1; ++i) dim[i] = dim1[i]; + rev = true; + } + else if (rank0 < rank1) { + rank = rank0; + for (int i = 0; i < rank1; ++i) dim[i] = dim0[i]; + rev = false; + } + else {// rank0 == rank1 + rank = rank0; + for (int i = 0; i < rank0; i++) { + if (dim[i] > dim1[rank1 - 1 - i]) dim[i] = dim1[rank1 - 1 - i]; + else dim[i] = dim0[i]; + } + rev = false; + } +} + +__device__ void d_reverseArray(int* arr, int sz) { + int* tmp; + //tmp = (int*)malloc(sz * sizeof(int)); + + tmp = new int[sz]; + if (tmp == NULL) { + size_t limit = 0; + cudaDeviceGetLimit(&limit, cudaLimitStackSize); + printf("cudaLimitStackSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + cudaDeviceGetLimit(&limit, cudaLimitPrintfFifoSize); + printf("cudaLimitPrintfFifoSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + cudaDeviceGetLimit(&limit, cudaLimitMallocHeapSize); + printf("cudaLimitMallocHeapSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + + printf("error Allocation in tmp = (int*)malloc(sz * sizeof(int)); | | "); + }int i = 0; + for (; i < sz / 2; i++) { + tmp[i] = arr[i]; + arr[i] = arr[sz - 1 - i]; + } + for (; i < sz; i++) { + arr[i] = tmp[sz - 1 - i]; + } + //free(tmp); + delete[]tmp; +} + +__device__ int d_min(int a, int b) { + if (a < b) return a; + return b; +} + +__device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1) { + int i = debDst; + for (int j = debSrc0; j < finSrc0; j++) { + dst[i++] = src0[j]; + } + for (int j = debSrc1; j < finSrc1; j++) { + dst[i++] = src1[j]; + } +} + + + +__device__ void d_ConcatLinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, int rankB, size_t size, size_t sizeA, size_t sizeB, int* dM, int dMrank, size_t dMsize, int ind) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + //size_t sz_dA = sizeA / dMsize; + int rankdA = rankA - dMrank; + + int ret; + int i; + for (i = 0; i < rankdA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + sm %= pp; + } + size_t s1 = 0; + + size_t pb = sizeB / dMsize; + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + pb /= dim[i]; + s1 += ret * pb; + sm %= pp; + } + + size_t smd = ind; + size_t ppb = dMsize; + //size_t pb = size / sz_dA; + pb = sizeB; + p = dMsize; + for (int j = 0;j < dMrank;j++) { + ppb /= dM[j]; + ret = smd / ppb; + p /= dM[j]; + s += ret * p; + pb /= dM[j]; + s1 += ret * pb; + smd %= ppb; + } + //pp = size / sz_dA; + part0 = s; + part1 = s1; +} + +__device__ void d_SplitLineardToSubrank(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, int rankB, size_t size, size_t sizeA, size_t sizeB, int* dM, int dMrank, size_t dMsize) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + //size_t sz_dA = sizeA / dMsize; + int rankdA = rankA - dMrank; + + int ret; + int i; + for (i = 0; i < rankdA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + sm %= pp; + } + size_t s1 = 0; + + size_t pb = sizeB / dMsize; + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + pb /= dim[i]; + s1 += ret * pb; + sm %= pp; + } + part0 = s; + part1 = s1; +} + + +__device__ void d_UnionConcatLinearSplitedSubrank(size_t& part0, size_t& part1, size_t p0, size_t p1, size_t size, size_t sizeB, int* dM, int dMrank, size_t dMsize, int ind) { + size_t s = p0; + size_t s1 = p1; + int ret; + size_t smd = ind; + size_t ppb = dMsize; + //size_t pb = size / sz_dA; + size_t pb = sizeB; + size_t p = dMsize; + for (int j = 0;j < dMrank;j++) { + ppb /= dM[j]; + ret = smd / ppb; + p /= dM[j]; + s += ret * p; + pb /= dM[j]; + s1 += ret * pb; + smd %= ppb; + } + //pp = size / sz_dA; + part0 = s; + part1 = s1; +} + +template +__global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t sizeC, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize) { + + size_t p0, p1; + size_t lin0, lin1; + + + //size_t i = threadIdx.x + blockIdx.x * blockDim.x; + size_t i = d_getGlobalIdx_1D_1D(); + + if (i < sizeC) { + + d_SplitLineardToSubrank(p0, p1, i, dimC, rankC, rankA, rankB, sizeC, sizeA, sizeB, dM, dMrank, dMsize); + + C[i] = 0; + for (size_t k = 0; k < dMsize; k++) { + + d_UnionConcatLinearSplitedSubrank(lin0, lin1, p0, p1, sizeC, sizeB, dM, dMrank, dMsize, k); + + //d_ConcatLinearToSplitSubrankLimSz(lin0, lin1, i, dimC, rankC, rankA, rankB, sizeC, sizeA, sizeB, dM, dMrank, dMsize, k); + + C[i] += A[lin0] * B[lin1]; + } + } + +} + +template +__global__ void d_TensorContractnReverseProd(float* C, int* dimC, int rankC, size_t size, float* A, int rankA, size_t sizeA, float* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); + +__device__ void d_LinearTransformCoord(size_t& dst, size_t src, int* inversePerm, size_t sizeA, int rankDst, int rankSrc, int* dDst, int* dSrc) { + size_t sm = src; + size_t pp = sizeA; + size_t s = 0; + size_t p = 1; + int ret;// = new int[rank]; + int i, j; + for (i = 0; i < rankSrc; ++i) { + pp /= dSrc[i]; + ret = sm / pp; + p = 1; + for (j = inversePerm[i] + 1; j < rankDst;j++) { + p *= dDst[j]; + } + s += ret * p; + + sm %= pp; + + } + dst = s; + if (s > sizeA) printf("I have a problem in LinearTransformCoord: s:%ld siez:%ld \n", s, sizeA); + +} + +template +__global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm) { + + //size_t i = threadIdx.x + blockIdx.x * blockDim.x; + size_t i = d_getGlobalIdx_1D_1D(); + + if (i < sizeC) { + //printf("(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, int* invPerm); + diff --git a/src/tensor/tensCuda/d_tensCuda.h b/src/tensor/tensCuda/d_tensCuda.h new file mode 100644 index 0000000..2a69dfd --- /dev/null +++ b/src/tensor/tensCuda/d_tensCuda.h @@ -0,0 +1,69 @@ +#ifndef __D_CUDA_TENSOR_H__ +#define __D_CUDA_TENSOR_H__ + +#include "cuda.h" +#include "cuda_runtime.h" + +//#include "cuda_device_runtime_api.h" + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/d_tensCuda.h" +#include "src/tensor/tensCuda/d_tensCuda.h" + + +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_1D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_1D_2D(); +//#1D grid of 3D blocks +__device__ int d_getGlobalIdx_1D_3D(); +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_2D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_2D_2D(); +//2D grid of 3D blocks +__device__ int d_getGlobalIdx_2D_3D(); +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_3D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_3D_2D(); +//#1D grid of 3D blocks +__device__ int d_getGlobalIdx_3D_3D(); + + + +extern cudaError_t cudaDeviceGetLimit(size_t* pValue, enum cudaLimit limit); + + +__device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size); + +__device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank); + +__device__ size_t d_CoordToLinear(int* coo, int* dim, int rank); + + +__device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size); + +__device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc); + +__device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev); + +__device__ void d_reverseArray(int* arr, int sz); + +__device__ int d_min(int a, int b); + +__device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); + + +template +__global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); + +template +__global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); + +template +__global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t size, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); + +template +__global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm); + +#endif \ No newline at end of file diff --git a/src/tensor/tensCuda/tensCuda.cu b/src/tensor/tensCuda/tensCuda.cu new file mode 100644 index 0000000..cc69d43 --- /dev/null +++ b/src/tensor/tensCuda/tensCuda.cu @@ -0,0 +1,574 @@ +#include +#include + +#include + +#include +#include + + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/tensCuda.h" +#include "src/tensor/tensCuda/tensCuda.h" + + + + +template +void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + + int* d_imM, * d_imM0, * d_imM1; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + int BLOCKSIZE = 256;//1024; + int DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + //int DIMBLOCKS = (M.Dim.size) / BLOCKSIZE; + + d_prodTensor << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, e1, d_imM1, M1.Dim.rank); + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } +} + + +//template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + + +template +void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + + int* d_imM, * d_imM0, * d_imM1; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + size_t BLOCKSIZE = 1024; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + + d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, e1, d_imM1, M1.Dim.rank); + + cudaDeviceSynchronize(); + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } +} + + +//template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template void cudaTensorProdEnd(Tensor& M, const Tensor& M1, const Tensor& M0); + + +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + + p.permute(M.Dim.dim, M0.Dim.dim); + + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cudaEventRecord(start); + + + int* d_imM, * d_imM0; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + T* e, * e0; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + size_t BLOCKSIZE = 256; //1024;//512; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + dim3 blckSZ, gridSZ; + blckSZ.x = BLOCKSIZE; + gridSZ.x = DIMBLOCKS; + + int* invP, * d_invP; + invP = (int*)malloc(M.Dim.rank * sizeof(int)); + inverseArray(invP, p.perm, M.Dim.rank); + errCu = cudaMalloc((void**)&d_invP, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_invP, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_invP, invP, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_invP, invP, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + //printf("size: %ld\n", M.Dim.size); + + //d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank); + //d_TensorContractnReverseProd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + //d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + d_PermLinearTransformCoord << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, d_invP); + //d_PermLinearTransformCoord << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, p.perm); + //cudaDeviceSynchronize(); + + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + cudaEventRecord(stop); + cudaEventSynchronize(stop); + float milliseconds = 0; + cudaEventElapsedTime(&milliseconds, start, stop); + printf("ellaps time cuda permute tensor: %f ms\n", milliseconds); + + } +} + +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + + +// strict match contract ! if no strict, we take the minimum +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M11, int nestingDepth, bool strict) { + + + int perm[M11.Dim.rank]; + struct Tensor M1; + if (scanPermuteMatchContractTensorfromSrcToDst(perm, M11, M0, nestingDepth)) { + for (int i = 0; i < M11.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + struct permutation p(M11.Dim.rank, perm); + permuteTensor(M1, M11, p); + M1.Dim.print(); + + } + else { + printf("Failed in Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + exit(1); + } + + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cudaEventRecord(start); + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM(dM0); + //bool rev; + //minReverse(dM, dM0, dM1, rev); + //if (rev) reverseArray(dM.dim, dM.rank); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + + + int* d_imM, * d_imM0, * d_imM1, * d_imdM; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imdM, dM.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imdM, dM.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imdM, dM.dim, dM.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imdM, dM.dim, dM.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + size_t BLOCKSIZE = 256; //1024;//512; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + dim3 blckSZ, gridSZ; + blckSZ.x = BLOCKSIZE; + gridSZ.x = DIMBLOCKS; + + + //d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank); + //d_TensorContractnReverseProd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + //d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, M0.Dim.rank, M0.Dim.size, e1, M1.Dim.rank, M1.Dim.size, d_imdM, dM.rank, dM.size); + + //cudaDeviceSynchronize(); + + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + cudaEventRecord(stop); + cudaEventSynchronize(stop); + float milliseconds = 0; + cudaEventElapsedTime(&milliseconds, start, stop); + printf("ellaps time cuda prod contract prod: %f ms\n", milliseconds); + + +} + +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict); +//template void cudaTensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + diff --git a/src/tensor/tensCuda/tensCuda.h b/src/tensor/tensCuda/tensCuda.h new file mode 100644 index 0000000..e71a7e6 --- /dev/null +++ b/src/tensor/tensCuda/tensCuda.h @@ -0,0 +1,31 @@ +#ifndef __TENS_CUDA_H__ +#define __TENS_CUDA_H__ + +#include +#include + +#include + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" +#include "src/tensor/tens0neD/tens0neD.h" + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/d_tensCuda.h" +#include "src/tensor/tensCuda/d_tensCuda.h" +//#include "src/dimension/dimension.h" + +template +struct Tensor; + +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict = true); + +template +void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); +template +void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1); +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + + +#endif + diff --git a/src/tools/tools.c b/src/tools/tools.c new file mode 100644 index 0000000..15a75ea --- /dev/null +++ b/src/tools/tools.c @@ -0,0 +1,144 @@ +#include "src/tools/tools.h" + +int +compare_int(void* a, void* b) +{ + return (*(int*)a - *(int*)b); +} +int +compare_unsigned_int(void* a, void* b) +{ + return ((unsigned int*)a-*(unsigned int*)b); +} +int +compare_float(void *a, void *b) +{ + if (*(float*)a == *(float*)b) return 0; + if (*(float*)a > *(float*)b) return 1; + return -1; +} +int +compare_double(void *a, void *b) +{ + if (*(double*)a == *(double*)b) return 0; + if (*(double*)a > *(double*)b) return 1; + return -1; +} +int +compare_string(void *a, void *b) +{ + return strcmp((char*)a, (char*)b); +} + +int +max_array_int(int * arr, size_t sz) +{ + if(sz == 0) return 0; + int mx = arr[0]; + for(size_t i = 1; i < sz; ++i) + if(mx < arr[i]) mx = arr[i]; + + return mx; +} + +unsigned int +max_array_unsigned_int(unsigned int *arr, size_t sz) +{ + if(sz == 0) return 0; + unsigned int mx = arr[0]; + for(size_t i = 1; i < sz; ++i) + if(mx < arr[i]) mx = arr[i]; + + return mx; +} +int +min_array_int(int * arr, size_t sz) +{ + if(sz == 0) return 0; + int mn = arr[0]; + for(size_t i = 1; i < sz; ++i) + if(mn > arr[i]) mn = arr[i]; + + return mn; +} +unsigned int +min_array_unsigned_int(unsigned int *arr, size_t sz) +{ + if(sz == 0) return 0; + unsigned int mn = arr[0]; + for(size_t i = 1; i < sz; ++i) + if(mn > arr[i]) mn = arr[i]; + + return mn; +} + +size_t +arg_max_array_int(int * arr, size_t sz) +{ + if(sz == 0) return 0; + size_t i_mx = 0; + for(size_t i = 1; i < sz; ++i) + if(arr[i_mx] < arr[i]) i_mx = i; + + return i_mx; +} + +size_t +arg_max_array_unsigned_int(unsigned int *arr, size_t sz) +{ + if(sz == 0) return 0; + size_t i_mx = 0; + for(size_t i = 1; i < sz; ++i) + if(arr[i_mx] < arr[i]) i_mx = i; + + return i_mx; +} + +size_t +arg_min_array_int(int * arr, size_t sz) +{ + if(sz == 0) return 0; + size_t i_mn = 0; + for(size_t i = 1; i < sz; ++i) + if(arr[i_mn] > arr[i]) i_mn = i; + + return i_mn; +} + +size_t +arg_min_array_unsigned_int(unsigned int *arr, size_t sz) +{ + if(sz == 0) return 0; + size_t i_mn = 0; + for(size_t i = 1; i < sz; ++i) + if(arr[i_mn] > arr[i]) i_mn = i; + + return i_mn; +} + +void +copy_array_unsigned_int(unsigned int *dst, const unsigned int *src, size_t size) +{ + for(size_t i=0; i< size; ++i) + dst[i]=src[i]; +} + +/* +bool is_less_eq_than_i(int a, int b) { return a <= b; } +bool is_less_than_i(int a, int b) { return a < b; } +bool is_great_eq_than_i(int a, int b) { return a >= b; } +bool is_great_than_i(int a, int b) { return a > b; } +*/ +int incr_i(int i) { return i + 1; } +int decr_i(int i) { return i - 1; } + +/* +bool is_less_eq_than_u(unsigned int a, unsigned int b) { return a <= b; } +bool is_less_than_u(unsigned int a, unsigned int b) { return a < b; } +bool is_great_eq_than_u(unsigned int a, unsigned int b) { return a >= b; } +bool is_great_than_u(unsigned int a, unsigned int b) { return a > b; } +*/ +unsigned int incr_u(unsigned int i) { return i + 1; } +unsigned int decr_u(unsigned int i) { return i - 1; } + + diff --git a/src/tools/tools.h b/src/tools/tools.h new file mode 100644 index 0000000..371d0d1 --- /dev/null +++ b/src/tools/tools.h @@ -0,0 +1,48 @@ +#ifndef __TOOLS_C_H__ +#define __TOOLS_C_H__ + +#include +#include + +#define FREE(x) { free((x)); (x) = NULL;} + +int compare_int(void* a, void* b); +int compare_unsigned_int(void* a, void* b); +int compare_float(void *a, void *b); +int compare_double(void *a, void *b); +int compare_string(void *a, void *b); + +int max_array_int(int *arr, size_t sz); +unsigned int max_array_unsigned_int(unsigned int *arr, size_t sz); +int min_array_int(int *arr, size_t sz); +unsigned int min_array_unsigned_int(unsigned int *arr, size_t sz); + +size_t arg_max_array_int(int *arr, size_t sz); +size_t arg_max_array_unsigned_int(unsigned int *arr, size_t sz); +size_t arg_min_array_int(int *arr, size_t sz); +size_t arg_min_array_unsigned_int(unsigned int *arr, size_t sz); + +void copy_array_unsigned_int(unsigned int *dst, const unsigned int *src, size_t size); + +/* +bool is_less_eq_than_i(int a, int b); // { return a <= b; } +bool is_less_than_i(int a, int b); // { return a < b; } +bool is_great_eq_than_i(int a, int b); // { return a >= b; } +bool is_great_than_i(int a, int b); // { return a > b; } +*/ +int incr_i(int i); // { return i + 1; } +int decr_i(int i); // { return i - 1; } + + +/* +bool is_less_eq_than_u(unsigned int a, unsigned int b); // { return a <= b; } +bool is_less_than_u(unsigned int a, unsigned int b); // { return a < b; } +bool is_great_eq_than_u(unsigned int a, unsigned int b); // { return a >= b; } +bool is_great_than_u(unsigned int a, unsigned int b); // { return a > b; } +*/ +unsigned int incr_u(unsigned int i); // { return i + 1; } +unsigned int decr_u(unsigned int i); // { return i - 1; } + + + +#endif /*__TOOLS_C_H__*/ diff --git a/src/tools_t/tools_t.c b/src/tools_t/tools_t.c new file mode 100644 index 0000000..f0732cc --- /dev/null +++ b/src/tools_t/tools_t.c @@ -0,0 +1,137 @@ +#include "src/tools_t/tools_t.h" + +#define COMPARE_N(type,a,b)\ + int COMPARE_N_##type(const void *a, const void *b){ \ + if (*(type*)a == *(type*)b) return 0; \ + if (*(type*)a > *(type*)b) return 1; \ + return -1; } + +COMPARE_N(TYPE_CHAR,a,b) +COMPARE_N(TYPE_U_CHAR,a,b) +COMPARE_N(TYPE_INT,a,b) +COMPARE_N(TYPE_U_INT,a,b) +COMPARE_N(TYPE_L_INT,a,b) +COMPARE_N(TYPE_U_L_INT,a,b) +COMPARE_N(TYPE_FLOAT,a,b) +COMPARE_N(TYPE_DOUBLE,a,b) +COMPARE_N(TYPE_L_DOUBLE,a,b) +int +COMPARE_N_TYPE_STRING(const void *a,const void* b) +{ + return strcmp(( char*)a,( char*)b); +} + +#define COPY_ARRAY(type, dst, src, size)\ + void COPY_ARRAY_##type(type *dst, const type *src, size_t size){\ + for(size_t i = 0; i < size; ++i) dst[i]=src[i]; } + +COPY_ARRAY(TYPE_CHAR,dst,src,size); +COPY_ARRAY(TYPE_U_CHAR,dst,src,size); +COPY_ARRAY(TYPE_INT,dst,src,size); +COPY_ARRAY(TYPE_U_INT,dst,src,size); +COPY_ARRAY(TYPE_L_INT,dst,src,size); +COPY_ARRAY(TYPE_U_L_INT,dst,src,size); +COPY_ARRAY(TYPE_FLOAT,dst,src,size); +COPY_ARRAY(TYPE_DOUBLE,dst,src,size); +COPY_ARRAY(TYPE_L_DOUBLE,dst,src,size); +void COPY_ARRAY_TYPE_STRING(char** dst, const char** src, size_t size) +{ + for(size_t i = 0; i < size; ++i) strcpy(dst[i],src[i]); +} + +#define MAX_ARRAY(type, array, size, compare)\ + type MAX_ARRAY_##type(const type *array, size_t size){\ + if(array == NULL) return 0;\ + type mx =(type)array[0];\ + for(size_t i = 0; i < size; ++i)\ + if(compare(&mx,&array[i]) < 0) mx =(type)array[i];\ + return mx;} +MAX_ARRAY(TYPE_CHAR,array,size,COMPARE_N_TYPE_CHAR); +MAX_ARRAY(TYPE_U_CHAR,array,size,COMPARE_N_TYPE_U_CHAR); +MAX_ARRAY(TYPE_INT,array,size,COMPARE_N_TYPE_INT); +MAX_ARRAY(TYPE_U_INT,array,size,COMPARE_N_TYPE_U_INT); +MAX_ARRAY(TYPE_L_INT,array,size,COMPARE_N_TYPE_L_INT); +MAX_ARRAY(TYPE_U_L_INT,array,size,COMPARE_N_TYPE_U_L_INT); +MAX_ARRAY(TYPE_FLOAT,array,size,COMPARE_N_TYPE_FLOAT); +MAX_ARRAY(TYPE_DOUBLE,array,size,COMPARE_N_TYPE_DOUBLE); +MAX_ARRAY(TYPE_L_DOUBLE,array,size,COMPARE_N_TYPE_L_DOUBLE); +MAX_ARRAY(TYPE_STRING,array,size,COMPARE_N_TYPE_STRING); + +#define ARG_MAX_ARRAY(type, array, size, compare)\ + size_t ARG_MAX_ARRAY_##type(const type *array, size_t size){\ + if(array == NULL) return 0;\ + size_t i_mx = 0;\ + for(size_t i = 0; i < size; ++i)\ + if(compare(&array[i_mx],&array[i]) < 0) i_mx = i;\ + return i_mx;} +ARG_MAX_ARRAY(TYPE_CHAR,array,size,COMPARE_N_TYPE_CHAR); +ARG_MAX_ARRAY(TYPE_U_CHAR,array,size,COMPARE_N_TYPE_U_CHAR); +ARG_MAX_ARRAY(TYPE_INT,array,size,COMPARE_N_TYPE_INT); +ARG_MAX_ARRAY(TYPE_U_INT,array,size,COMPARE_N_TYPE_U_INT); +ARG_MAX_ARRAY(TYPE_L_INT,array,size,COMPARE_N_TYPE_L_INT); +ARG_MAX_ARRAY(TYPE_U_L_INT,array,size,COMPARE_N_TYPE_U_L_INT); +ARG_MAX_ARRAY(TYPE_FLOAT,array,size,COMPARE_N_TYPE_FLOAT); +ARG_MAX_ARRAY(TYPE_DOUBLE,array,size,COMPARE_N_TYPE_DOUBLE); +ARG_MAX_ARRAY(TYPE_L_DOUBLE,array,size,COMPARE_N_TYPE_L_DOUBLE); +ARG_MAX_ARRAY(TYPE_STRING,array,size,COMPARE_N_TYPE_STRING); + +#define MIN_ARRAY(type, array, size, compare)\ + type MIN_ARRAY_##type(const type *array, size_t size){\ + if(array == NULL) return 0;\ + type mn =(type)array[0];\ + for(size_t i = 0; i < size; ++i)\ + if(compare(&mn,&array[i]) > 0) mn =(type)array[i];\ + return mn;} +MIN_ARRAY(TYPE_CHAR,array,size,COMPARE_N_TYPE_CHAR); +MIN_ARRAY(TYPE_U_CHAR,array,size,COMPARE_N_TYPE_U_CHAR); +MIN_ARRAY(TYPE_INT,array,size,COMPARE_N_TYPE_INT); +MIN_ARRAY(TYPE_U_INT,array,size,COMPARE_N_TYPE_U_INT); +MIN_ARRAY(TYPE_L_INT,array,size,COMPARE_N_TYPE_L_INT); +MIN_ARRAY(TYPE_U_L_INT,array,size,COMPARE_N_TYPE_U_L_INT); +MIN_ARRAY(TYPE_FLOAT,array,size,COMPARE_N_TYPE_FLOAT); +MIN_ARRAY(TYPE_DOUBLE,array,size,COMPARE_N_TYPE_DOUBLE); +MIN_ARRAY(TYPE_L_DOUBLE,array,size,COMPARE_N_TYPE_L_DOUBLE); +MIN_ARRAY(TYPE_STRING,array,size,COMPARE_N_TYPE_STRING); + +#define ARG_MIN_ARRAY(type, array, size, compare)\ + size_t ARG_MIN_ARRAY_##type(const type *array, size_t size){\ + if(array == NULL) return 0;\ + size_t i_mn = 0;\ + for(size_t i = 0; i < size; ++i)\ + if(compare(&array[i_mn],&array[i]) > 0) i_mn = i;\ + return i_mn;} +ARG_MIN_ARRAY(TYPE_CHAR,array,size,COMPARE_N_TYPE_CHAR); +ARG_MIN_ARRAY(TYPE_U_CHAR,array,size,COMPARE_N_TYPE_U_CHAR); +ARG_MIN_ARRAY(TYPE_INT,array,size,COMPARE_N_TYPE_INT); +ARG_MIN_ARRAY(TYPE_U_INT,array,size,COMPARE_N_TYPE_U_INT); +ARG_MIN_ARRAY(TYPE_L_INT,array,size,COMPARE_N_TYPE_L_INT); +ARG_MIN_ARRAY(TYPE_U_L_INT,array,size,COMPARE_N_TYPE_U_L_INT); +ARG_MIN_ARRAY(TYPE_FLOAT,array,size,COMPARE_N_TYPE_FLOAT); +ARG_MIN_ARRAY(TYPE_DOUBLE,array,size,COMPARE_N_TYPE_DOUBLE); +ARG_MIN_ARRAY(TYPE_L_DOUBLE,array,size,COMPARE_N_TYPE_L_DOUBLE); +ARG_MIN_ARRAY(TYPE_STRING,array,size,COMPARE_N_TYPE_STRING); + + +int main() +{ + unsigned int ui1 = 2545466; + unsigned int ui2 = 2544566; + printf(" %u >? %u = %d \n",ui1,ui2,COMPARE_N_TYPE_U_INT(&ui1,&ui2)); + printf(" %u >? %u = %d \n",ui1,ui1,COMPARE_N_TYPE_U_INT(&ui1,&ui1)); + printf(" %u >? %u = %d \n",ui2,ui1,COMPARE_N_TYPE_U_INT(&ui2,&ui1)); + int i1 = 2545466; + int i2 = 2544566; + printf(" %d >? %d = %d \n",i1,i2,COMPARE_N_TYPE_U_INT(&i1,&i2)); + printf(" %d >? %d = %d \n",i1,i1,COMPARE_N_TYPE_U_INT(&i1,&i1)); + printf(" %d >? %d = %d \n",i2,i1,COMPARE_N_TYPE_U_INT(&i2,&i1)); + + int tabi[]={5,2,6,4,3,1}; + int tabr[6]={0}; + COPY_ARRAY_TYPE_INT(tabr,tabi,6); + + for(size_t i=0; i<6; ++i) printf(" %d \n", tabr[i]); + + printf("MIN = %d \n",MIN_ARRAY_TYPE_INT(tabr,6)); + + return 0; +} diff --git a/src/tools_t/tools_t.h b/src/tools_t/tools_t.h new file mode 100644 index 0000000..ac0959b --- /dev/null +++ b/src/tools_t/tools_t.h @@ -0,0 +1,92 @@ +#ifndef __TOOLS_T_C_H__ +#define __TOOLS_T_C_H__ + +#include +#include +#include + +#define TYPE_CHAR char +#define TYPE_U_CHAR unsigned char +#define TYPE_INT int +#define TYPE_U_INT unsigned int +#define TYPE_L_INT long int +#define TYPE_U_L_INT unsigned long int +#define TYPE_FLOAT float +#define TYPE_DOUBLE double +#define TYPE_L_DOUBLE long double +#define TYPE_STRING char* + +#define FREE(x) { free((x)); (x) = NULL;} + +#define FOREACH(array, size, function)\ + for(size_t _ind = 0; _ind < size; ++_ind) function(array[_ind]); + + +int COMPARE_N_TYPE_CHAR(const void *,const void*); +int COMPARE_N_TYPE_U_CHAR(const void *,const void*); +int COMPARE_N_TYPE_INT(const void *,const void*); +int COMPARE_N_TYPE_U_INT(const void *,const void*); +int COMPARE_N_TYPE_L_INT(const void *,const void*); +int COMPARE_N_TYPE_U_L_INT(const void *,const void*); +int COMPARE_N_TYPE_FLOAT(const void *,const void*); +int COMPARE_N_TYPE_DOUBLE(const void *,const void*); +int COMPARE_N_TYPE_L_DOUBLE(const void *,const void*); +int COMPARE_N_TYPE_STRING(const void *,const void*); + +void COPY_ARRAY_TYPE_CHAR(TYPE_CHAR* dst, const TYPE_CHAR* src, size_t size); +void COPY_ARRAY_TYPE_U_CHAR(TYPE_U_CHAR* dst, const TYPE_U_CHAR* src, size_t size); +void COPY_ARRAY_TYPE_INT(TYPE_INT* dst, const TYPE_INT* src, size_t size); +void COPY_ARRAY_TYPE_U_INT(TYPE_U_INT* dst, const TYPE_U_INT* src, size_t size); +void COPY_ARRAY_TYPE_L_INT(TYPE_L_INT* dst, const TYPE_L_INT* src, size_t size); +void COPY_ARRAY_TYPE_U_L_INT(TYPE_U_L_INT* dst, const TYPE_U_L_INT* src, size_t size); +void COPY_ARRAY_TYPE_FLOAT(TYPE_FLOAT* dst, const TYPE_FLOAT* src, size_t size); +void COPY_ARRAY_TYPE_DOUBLE(TYPE_DOUBLE* dst, const TYPE_DOUBLE* src, size_t size); +void COPY_ARRAY_TYPE_L_DOUBLE(TYPE_L_DOUBLE* dst, const TYPE_L_DOUBLE* src, size_t size); +void COPY_ARRAY_TYPE_STRING(TYPE_STRING* dst, const TYPE_STRING* src, size_t size); + +TYPE_CHAR MAX_ARRAY_TYPE_CHAR(const TYPE_CHAR *array, size_t size); +TYPE_U_CHAR MAX_ARRAY_TYPE_U_CHAR(const TYPE_U_CHAR *array, size_t size); +TYPE_INT MAX_ARRAY_TYPE_INT(const TYPE_INT *array, size_t size); +TYPE_U_INT MAX_ARRAY_TYPE_U_INT(const TYPE_U_INT *array, size_t size); +TYPE_L_INT MAX_ARRAY_TYPE_L_INT(const TYPE_L_INT *array, size_t size); +TYPE_U_L_INT MAX_ARRAY_TYPE_U_L_INT(const TYPE_U_L_INT *array, size_t size); +TYPE_FLOAT MAX_ARRAY_TYPE_FLOAT(const TYPE_FLOAT *array, size_t size); +TYPE_DOUBLE MAX_ARRAY_TYPE_DOUBLE(const TYPE_DOUBLE *array, size_t size); +TYPE_L_DOUBLE MAX_ARRAY_TYPE_L_DOUBLE(const TYPE_L_DOUBLE *array, size_t size); +TYPE_STRING MAX_ARRAY_TYPE_STRING(const TYPE_STRING *array, size_t size); + +size_t ARG_MAX_ARRAY_TYPE_CHAR(const TYPE_CHAR *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_U_CHAR(const TYPE_U_CHAR *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_INT(const TYPE_INT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_U_INT(const TYPE_U_INT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_L_INT(const TYPE_L_INT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_U_L_INT(const TYPE_U_L_INT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_FLOAT(const TYPE_FLOAT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_DOUBLE(const TYPE_DOUBLE *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_L_DOUBLE(const TYPE_L_DOUBLE *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_STRING(const TYPE_STRING *array, size_t size); + +TYPE_CHAR MIN_ARRAY_TYPE_CHAR(const TYPE_CHAR *array, size_t size); +TYPE_U_CHAR MIN_ARRAY_TYPE_U_CHAR(const TYPE_U_CHAR *array, size_t size); +TYPE_INT MIN_ARRAY_TYPE_INT(const TYPE_INT *array, size_t size); +TYPE_U_INT MIN_ARRAY_TYPE_U_INT(const TYPE_U_INT *array, size_t size); +TYPE_L_INT MIN_ARRAY_TYPE_L_INT(const TYPE_L_INT *array, size_t size); +TYPE_U_L_INT MIN_ARRAY_TYPE_U_L_INT(const TYPE_U_L_INT *array, size_t size); +TYPE_FLOAT MIN_ARRAY_TYPE_FLOAT(const TYPE_FLOAT *array, size_t size); +TYPE_DOUBLE MIN_ARRAY_TYPE_DOUBLE(const TYPE_DOUBLE *array, size_t size); +TYPE_L_DOUBLE MIN_ARRAY_TYPE_L_DOUBLE(const TYPE_L_DOUBLE *array, size_t size); +TYPE_STRING MIN_ARRAY_TYPE_STRING(const TYPE_STRING *array, size_t size); + +size_t ARG_MIN_ARRAY_TYPE_CHAR(const TYPE_CHAR *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_U_CHAR(const TYPE_U_CHAR *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_INT(const TYPE_INT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_U_INT(const TYPE_U_INT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_L_INT(const TYPE_L_INT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_U_L_INT(const TYPE_U_L_INT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_FLOAT(const TYPE_FLOAT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_DOUBLE(const TYPE_DOUBLE *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_L_DOUBLE(const TYPE_L_DOUBLE *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_STRING(const TYPE_STRING *array, size_t size); + + +#endif /*__TOOLS_T_C_H__*/ diff --git a/test/isgood.cu b/test/isgood.cu new file mode 100644 index 0000000..6aeaced --- /dev/null +++ b/test/isgood.cu @@ -0,0 +1,652 @@ +#include +#include + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" +#include "src/tensor/tens0neD/tens0neD.h" +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/tensCuda.h" +#include "src/tensor/tensCuda/tensCuda.h" +/*TEST(LineraCoodTransform, check_print) { + int t3[] = { [0] = 2,[1] = 4,[2] = 3 }; + + struct dimension D0(3, t3); + int coor0[3] = { 1,3,2 }; + int* coor1 = new int[3]; + + int l0 = D0.CoordToLinear(coor0); + + D0.print(); + + D0.LinearToCoord(coor1, l0); + + for (int i = 0; i < D0.rank; i++) { + EXPECT_EQ(coor0[i], coor1[i]) << " coor0: " << coor0[i] << " coor1: " << coor1[i] << " i: " << i; + } +}*/ + +TEST(subArray, concatArray) { + int t[] = { 1,5,6,2,3 }; + int t0[] = { 1,5,6 }; + int t1[] = { 2,3 }; + int n = 5; + int s0[3]; + int s1[2]; + int s[n]; + + subArray(s0, t, 0, 3, 0); + subArray(s1, t, 0, 2, 3); + ASSERT_EQ(0, memcmp(t0, s0, sizeof(int) * 3)); + ASSERT_EQ(0, memcmp(t1, s1, sizeof(int) * 2)); + + concatArray(s, s0, s1, 0, 0, 3, 0, 2); + ASSERT_EQ(0, memcmp(t, s, sizeof(int) * 5)); + +} +TEST(tensorProdpetit, floatTemp) { + + /*int t3[] = { 2, 4, 3 }; + + int t4[] = { 2, 4, 3, 2 };*/ + int t3[] = { 3, 6, 5 }; + + int t4[] = { 3, 5, 8, 4 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + tensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + d = M.Dim; + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.0001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + + + +} + +TEST(tensorProd, doubleTemp) { + + int t3[] = { 2, 4, 3 }; + + int t4[] = { 4, 3, 2,3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + tensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + d = M.Dim; + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]); + ASSERT_DOUBLE_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]); + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + + +} + + + + +void printArray(int* t, int sz) { + for (int i = 0; i < sz;i++) printf(" %d ", t[i]); +} + +TEST(tensorContractnProd, floatTemp) { + + int t3[] = { 2, 4, 3 }; + + int t4[] = { 4, 3, 2, 3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + //for (size_t idx = 0; idx < d.size; idx++) Msum.elements[idx] = 0.0f; + + //Msum.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) r[i] = idx4[i]; + + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + + } + + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]); + + } + + + +} + +TEST(tensorContractnProdD, doubleTemp) { + + int t3[] = { 2, 3, 4 }; + + int t4[] = { 3, 4, 2, 3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + //for (size_t idx = 0; idx < d.size; idx++) Msum.elements[idx] = 0.0f; + + //Msum.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) r[i] = idx4[i]; + + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + + } + + ASSERT_DOUBLE_EQ(Msum.elements[lin], M.elements[lin]); + + } + + + +} + +TEST(reverseArray, innt) { + int n = 6; + int t4[6] = { 3, 4, 2, 3 ,5, 1 }; + int revt4[6] = { 1,5,3,2, 4, 3 }; + reverseArray(t4, n); + for (int i = 0; i < n; i++) { + ASSERT_EQ(t4[i], revt4[i]); + } +} + +TEST(tensorContractnReverseProd, floatTemp) { + + int t3[] = { 4, 4, 3 }; + + int t4[] = { 3, 4, 7, 2 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnReverseProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + int rev[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) { + r[i] = idx4[i]; + rev[i] = idx4[dee - 1 - i]; + } + + concatArray(coord3, pcoord3, rev, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + } + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]); + } +} + + +TEST(cudaTensorProd, floatTemp) { + + int t3[] = { 15, 6, 24 }; + + int t4[] = { 23, 15, 6, 10 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(1.0f); // M3.print(); + M4.initVal(0.5f); // M4.print(); + + cudaTensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " M " << M.elements[lin] << " lin: " << lin << " M3: " << M3.elements[lin3] << " lin3:" << lin3 << " lin4: " << lin4 << " M4 " << M4.elements[lin4] << std::endl; + //std::cout << " M " << M.elements[lin] << " lin: " << lin << " M3: " << M3.elements[lin3] << " lin3:" << lin3 << " lin4: " << lin4 << " M4 " << M4.elements[lin4] << std::endl; + + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.0001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + +} + + +TEST(permuteTensor, float) { + int t4[] = { 3, 8, 2, 4 }; + struct dimension d4(4, t4); + struct Tensor M4(d4), M; + M4.initVal(1.0f); + permutation p(4, true); + int n = 5; + //for (int n = 0; n < 24;n++) { + PlaceToTab(p.perm, n, p.size); + printf(" %*d : ", 2, n); + for (int i = 0; i < p.size; i++)printf("(%d)%d ", i, p.perm[i]);printf("\n"); + permuteTensor(M, M4, p); + //permuteTensorDef(M, M4, p); + int ind[4]; + int coor[4]; + size_t cM, cM4; + for (ind[0] = 0; ind[0] < M4.Dim.dim[0]; ind[0]++) + for (ind[1] = 0; ind[1] < M4.Dim.dim[1]; ind[1]++) + for (ind[2] = 0; ind[2] < M4.Dim.dim[2]; ind[2]++) + for (ind[3] = 0; ind[3] < M4.Dim.dim[3]; ind[3]++) { + p.permute(coor, ind); + cM = M.Dim.CoordToLinear(coor); + cM4 = M4.Dim.CoordToLinear(ind); + //printf("M[%ld]=%f M4[%ld]=%f \n", cM, M.elements[cM], cM4, M4.elements[cM4]); + ASSERT_FLOAT_EQ(M.elements[cM], M4.elements[cM4]); + } + +} + + +TEST(cudapermuteTensor, float) { + int t4[] = { 3, 8, 2, 4 }; + struct dimension d4(4, t4); + struct Tensor M4(d4), M; + M4.initVal(1.0f); + permutation p(4, true); + int n = 5; + //for (int n = 0; n < 24;n++) { + PlaceToTab(p.perm, n, p.size); + printf(" %*d : ", 2, n); + for (int i = 0; i < p.size; i++)printf("{%d}%d ", i, p.perm[i]);printf("\n"); + cudapermuteTensor(M, M4, p); + //permuteTensor(M, M4, p); + //permuteTensorDef(M, M4, p); + int ind[4]; + int coor[4]; + size_t cM, cM4; + for (ind[0] = 0; ind[0] < M4.Dim.dim[0]; ind[0]++) + for (ind[1] = 0; ind[1] < M4.Dim.dim[1]; ind[1]++) + for (ind[2] = 0; ind[2] < M4.Dim.dim[2]; ind[2]++) + for (ind[3] = 0; ind[3] < M4.Dim.dim[3]; ind[3]++) { + p.permute(coor, ind); + cM = M.Dim.CoordToLinear(coor); + cM4 = M4.Dim.CoordToLinear(ind); + //printf("M[%ld]=%f M4[%ld]=%f \n", cM, M.elements[cM], cM4, M4.elements[cM4]); + ASSERT_FLOAT_EQ(M.elements[cM], M4.elements[cM4]); + } +} + +TEST(scanPermuteMatchContractTensorfromSrcToDst1, permId) { + int t[] = { 3, 8, 2, 3, 4 }; + //int tm[] = { 4, 2, 7, 3 }; + int tm[] = { 2, 3,4,7 }; + struct dimension d(5, t); + struct dimension dm(4, tm); + struct Tensor M4(d), M(dm); + M4.initVal(1.0f); + M.initVal(1.0f); + int dee = 3; + //int result[4] = { 1,3,0,2 }; + int result[4] = { 0,1,2,3 }; + int perm[M.Dim.rank]; + ASSERT_TRUE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, dee)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(" first perm \n"); + ASSERT_EQ(0, memcmp(result, perm, sizeof(int) * M.Dim.rank)); + + Tensor tM; + permutation p(M.Dim.rank, perm); + permuteTensor(tM, M, p); + + ASSERT_FALSE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, 4)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + tM.Dim.print(); + int resultDim[] = { 2,3,4,7 }; + ASSERT_EQ(0, memcmp(resultDim, tM.Dim.dim, sizeof(int) * tM.Dim.rank)); + +} + +TEST(scanPermuteMatchContractTensorfromSrcToDst2, floatest) { + int t[] = { 3, 8, 2, 3, 4 }; + int tm[] = { 4, 2, 7, 3 }; + //int tm[] = { 2, 3,4,7 }; + struct dimension d(5, t); + struct dimension dm(4, tm); + struct Tensor M4(d), M(dm); + M4.initVal(1.0f); + M.initVal(1.0f); + int dee = 3; + int result[4] = { 1,3,0,2 }; + //int result[4] = { 0,1,2,3 }; + int perm[M.Dim.rank]; + ASSERT_TRUE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, dee)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(" first perm \n"); + ASSERT_EQ(0, memcmp(result, perm, sizeof(int) * M.Dim.rank)); + + Tensor tM; + permutation p(M.Dim.rank, perm); + permuteTensor(tM, M, p); + + ASSERT_FALSE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, 4)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + tM.Dim.print(); + int resultDim[] = { 2,3,4,7 }; + ASSERT_EQ(0, memcmp(resultDim, tM.Dim.dim, sizeof(int) * tM.Dim.rank)); + + + +} + + +TEST(cudaTensorContractNestProd, floatTemp) { + + int t3[] = { 77, 8, 25 }; + + int t4[] = { 8, 25, 52, 144 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(1.0f); // M3.print(); + M4.initVal(0.0f); // M4.print(); + + int dee = 2; + + M4.Dim.print(); + + try { + //tensorContractnProd(M, M3, M4, dee); + cudaTensorContractNestProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep: \n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + //int rev[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) { + r[i] = idx4[i]; + //rev[i] = idx4[dee - 1 - i]; + } + + //concatArray(coord3, pcoord3, rev, 0, 0, l0, 0, dee); + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + } + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]) << " lin: " << lin << " Msumelem: " << Msum.elements[lin] << " Melem: " << M.elements[lin]; + } +} + + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/testool b/testool new file mode 100644 index 0000000000000000000000000000000000000000..1170e5dad63451809f21ec4ec5a252ee638a4ea7 GIT binary patch literal 26680 zcmeHQ3v^UPny!SmMiUSe@jc@3fbt*>v-!Fy3~OsS3LF$d%3W41e?WUR=p>S}Deg{t3q>zWn^9AIIF#cdDDC|adfc@} z@jKq7oAbE6MEy{}?Fn*t)%dc~rI$?}UsgP>thAzf^|;kjE*p2*>2DBDH{7THyi!f?bcFD$K4RgrK} z`AQ8d*MJXJ(T#S|iW}`^g{5U`Ww^8=vP_W{_3~wER_^Q>IrhZhWanyP@KWb`(qv`N zUOdk(E(w<`FRhA{gcr}tDXXj~SzNfZjGA9wURlA-+eS35AoXqvdbuxNeQ-tBR0u7g zNQYFl0(B}f-^e)MJlN2;@_Zun0;UfvFnpm) zz1cQ&$~B6;lol7=ou92P`T!9IcDU%$2C)yh=#s-k-^p~cA!CZ@Q5T)YiC|aKc(84kQ>O*o!M4Yw z4r^<*wV6uWLw%gDKH675$5$Wbu4n#!cHNsdSaly;wV!q@TpY@bX6~^zPJ88CrDDUr zp0Cs&mIc#C-U%mggBz!v30-Srzaa!y9EuFXt-lK)y7fV^mh_P|B=;gC$iVX~mwJb{?iTe_LicifnVyo&g`Bv?;JLuA? z`z(@X)la(%mDaW+u~=&{g7ZqhwWz0D4_%Lb+s$B-p?v6Bb!!hu0PRu#@`dK);=^61)DzZW~&46o%@xL-pzk$DgywpHA}8c+)8@ zH{~;OG@e+-idBzJcAObW&UiRSC~DfBK@&nK&1k)t!a?;>(^-$nb{ZaL+{w*Af3Cz}KtwZD0PGZ__u z_x61F)&Ay$p4PT!bmnZn&ReVQU8}CKb;6%xu|4j5Y(-+XSatc$R%7Up&gOtspWi}_ zSt(6c{k&FGarr7T{_a$(HgqUO^QMNH!|!g|s6i4&BpI^^8}jCWUpWD>h(J=uW8Kq{eJm7{XU__Cv}bY3x<>?U4VG+#^_1? zI(@M6rpAlk>ARSB1xCyZiOgddr5@~t`ElCST`hgBIx>r7G1fI@7bs+W-zEE|c>Uhv z@w>Kt0>)9-wEim-hoR!ANky86DI0}KE$DvLBk!(5v6#25r#EeK4Mo#dGlpYRu~px9 z^hK2Ab)iFx8Z^>OV=?q4Mr;SA(1FPqqBK2p&`oXx!;JN5hhwqz(KUlR zxlOEC8|t7@++j@(HHBYDe-bJfM2=v9)zoRr>oVw})@4@ux~PB11>rXs z6;aRJnuU3MH|Ex8*SU4Ks_y&V7Y7OvfZbSnt!+UfH|u#d8>D8-d4}vZfo|bEo@%^4 zql-|Ju~Hjqrb)ILlWa8n#J+O9rs|CFfc?#_cN{#X81u)C?hLw*FkX5*=-&Fu=Wf7x zI@YdjbskZ9G7|vJl5!$C2}78clz0l$l9HZP=np+1+=D2xU{;o%e~Ifx6wK9unM4+2gagj*~IrgCaD)Yj`H4n>c#KSv59-2=U&K&cE!R) zl05kJ{|hfFPOum2dD?cx!bOr;Ogh0{xWX^l=^}|2DaXZ&f-YkuOOFv}Vs%2{S)B6- zdAlQ7Za&%L5#qY-B$3#X)Qheogg!SM&$awbzo#gy>(*NS>~`O@eY~AJrw7j6exN(f z9s3IT1q1;B?7>?8o!@;wo?hQ$Pseo2)9x%Q{)pZ~RC~6Tzpbr1f$fgFe|J3GeXaGF z-0jI)ew^{Q$7}f!-FCTi{VkvFzGE%_I&u#gAuh}8iVrE>Sj#W{?f0~nuQXowWG(;X z=f{*5Jy^>RGtTyG&<*grdwf>adtesb|2?neA3QFt_H-3(C-tK18X2dL*XKVR^NqzXf*7EOs zdThB_;M+553~jygHy5hLP{WgNU5Kx#^Bb_~zLh_&{L{y=SlzDr{D%6_*1A{dJ8%3s zDZY8rU*xrJX~CBO8}qli6e?I@t0;Vw?owFUsnEbJOzTu=&@F6tD@=z%V`#f^a0ffM z1MO{R^&|Kj0&;LieQ3K@zr(GrOncj13a_!kc2W4DTj7ll^&OGI4sPMWPK6!1g@9Y( zAt*G4qWv&*8bdG9pDi?ocGI7UHiq`nUzaw9n&@vA8bfc>SLpcY*a9GIWz^q!SR3a> z(PYchq-oW?PTy@l_X%8S4CzQ8xBwpvch=8b-=aO(cB(6;XOqjdp`9uCVr6H)Rqv9W z=Zu}k)-bjYO<3G2Z6Cy9+OB`MF$%^W{MiP5!>|9Q!}%)?^0v41MxZwWy%Fe*KyL(k zBhVXx-U#$Ypf>`&5$KJ;UvdOe@SD-V=<2{#)9HO$V03ZdBoK-_&fS-=b4ip2_mHv?;cuLCy&&wCsCz&XGc;JrZl(fh~10C3tn&F+@w_!O`PSoGh}2M*Z>ec&!&3o!CN^zm6e zb3gQf&A=?+-{Nz40q|R34X_HI@izkx1Eav(v9Hkr?1#Mxg&(-q0|UUp*i6X+-UloI zo`pSx8sHY-X5dxWD~ba50b79Uv4f@XW7_f9Weos70A>L<;+u*BU>3eth50GH(~RH=yaCD!q;Q53rS$+0R7YEQMjCp*Zr@> zVhf3WYR1*44xf{L^34Nl)btToUNULa`9vmt`mJ*ma$1pyC`6CowG;F#pcHCr173U3 z-fSX;Gc!u4!QMj)9U(O+Ck4s!cW6}mw-Rlt)Du> zX@4yEo09Nz!G8w)#UA~+j(!>VUxI(FhdrVeIcJ#}@FHgeX z0RAK3>z{x-`qw!64dBzSmJ;#zf`1X_q$M8z7draw;1_~l=HVAP{4~r_{|5d)dia0u z@W+CG3Ffv${9N!K2LBO{{uW2S4E#*YeTn!Rz<&n(Z65u9b@Us+pMyCv5q~fE?}7ig zNBycW5M49exmks!5@h^d6h@M+|e%szd8wj z1Ni?5{#_pZwT^xR_?ejF6Y=+ge_s-QJNVxw;iuslLC=mv_Q!(1Cka0n{PB3!-0AWE zR>%J`@P7e*BKsS_KMa1N{AvJy6`o;<_HK1&W5l?8Hah&dH(hpm8uh%V$xfBBhVXxq!EyFd*qxREV*>C7}FT#pdcI)q|4ix z2G8WWoCkCk*D*A88Nn#$0@1peitxWV604;8c0B8;V9M8JH=cb|Fa_$;%>0wMuD_tf z^~qcx&Nz%wfYsDyKBtGK3MvKq-K_8F5>t&XXE5H+4GX=Q9TUH!EQgONx=8!@O}H*1 z_rEc7e~7Cl9pU=@Y%kWy*LodXuVMaSuFHL}`Qrw1?wjA>1pW||#ki2M!2RLKuau-N z=RM8J$+aBzVRA6@VrC>Ur_YM&tGB_OXv^dBA)+=?&~paXQ)~|M=W$=JUh>8`7@M1 zA9icC)0}hk#L7M`KinBtLjMXELvUXscp&P}W}x*$K;oU}cD|-(sBgy@3wo`9%c)GC zcAl4hA=957ZRmP!h|B3rpTiT7UUPwd4d?+Q>UsYP6G1>>_A2m@)hy%<`TyPphOXDH zxD0FiURkQG*Yph4#P@qWi#)*egPcE0Z& za-0wIIC`8JHDST3g`)SKfUh#WhUG(= zi}gR#|07@s68}R?U&3+YVW3WB|F`giT<@Jg^%ItVfEOeZpV2J;6V5jokK394J{uPK zsZ7sc|K+~&K*y}7h8Gw=<^3Do?-|Z{w-++wC+yGaiwxmera#Z)`t%AAsh#b`%&+aLF7J`f0^SW`S~i-zv2a{JQv2m4&}K& z4wtk2ZQPG~j}!LifgUifc;4UV_|Nj$U(R-3_swGqSbjNoxb)*XmVbc9%bncHFG1(f zc-nn}>QABa;+V*m}||`-<rgUBn5L+=w^iaqlDFHo|e-FLgd^~TdQ7$*VP z!8oQ8-hbjAmD}=WtJBZ6jiZ)%-th&SZR(Sm;~ux{4{YL;b~WcdSlW}NdVY+TG%|=( zuxd?tq;M%P5;nL(u5jFRNqD6SR#Zkxg3BwagDb<8D@($WH7?T9>QbCQURung>>0Di z(UU=ouPCfqp@PM0D$s<%NZ4>zm4vHGD=Rz|8@zBySs^L#)ylGn3epU(g18JWujGrW zk|Gt1l&nUT9u<(T)bmrYWCcGQRusdU)Qk=DlrS1{jiw6AOJU5YBkfcWkEe1xbv)0| z?o?>`izPKaqnk1u>oqEZvX5n`Mpmu~FTiU6UZOvR^#v!8kgMp;Ay@q}T)XxC=jM-c z$A(N`z$g6~EGPSsf(1;c_*2aM`ddJwm`MLP>qlNk z^2a+!nBvc$2SIl02Ut&Vp(rvk+iB)upZ-b~5EOYaBXz;;KK&YI2xf^Qqv#1f<UIU^h)aQNrvTq_N``{w)kKfCzFa58wSd#H_t_t||Uju{UFZzl8k3}N=eLnq# zoJfLWC1Kcruc|%+9X|@vV~|8-e_#I3gWiTr;O8h=zoPx|pu&?tVQFZ=(S7m&141^T`7P4or(L)oqGC@eHO(vDaV zJ*lTbnP}3!eE!T}eZM@l