commit 8e529cb426990ccc7d214ec09ebb81a1836c8a27 Author: fanasina Date: Sun Jul 16 00:15:15 2023 +0200 trying TEMPLATE C diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..fcdf68d --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**", + "/home/fanasina/progr_/ptens0neD/**", + "/usr/include/boost/predef/language" + ], + "defines": [], + "compilerPath": "/usr/bin/clang-13", + "cStandard": "c17", + "cppStandard": "c++14", + "intelliSenseMode": "linux-clang-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..decddbd --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "files.associations": { + "array": "cpp", + "string_view": "cpp", + "initializer_list": "cpp", + "utility": "cpp" + } +} \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1ced397 --- /dev/null +++ b/Makefile @@ -0,0 +1,53 @@ +CC=nvcc +LDFLAGS=-lgtest -lpthread +ROOT_DIR=$(shell pwd) +INCLUDE_DIR=$(ROOT_DIR) +CFLAGS=-I$(INCLUDE_DIR) +SRC_DIR=$(ROOT_DIR)/src +SRC=$(wildcard src/*/*.c*) $(wildcard src/*/*/*.c*) +OBJPP=$(SRC:.cpp=.o) +OBJS=$(OBJPP:.cu=.o) +#HEADS=$(OBJS:.o=.h) +TEST_DIR=$(ROOT_DIR)/test +EXEC=$(TEST_DIR)/isgood +PERMSRC=$(wildcard src/*/*perm*.cpp) +PERMSRC_O=$(PERMSRC:.cpp=.o) +DIMSRC=$(wildcard src/*/*dim*.cpp) +DIMSRC_O=$(DIMSRC:.cpp=.o) +TENSRCPP=$(wildcard src/*/*/tens*.cpp) +TENSRCPP_O=$(TENSRCPP:.cpp=.o) +TENSRCU=$(wildcard src/*/*/tens*.cu) +TENSRCU_O=$(TENSRCU:.cu=.o) +DTENSRCU=$(wildcard src/*/*/d_tens*.cu) +DTENSRCU_O=$(DTENSRCU:.cu=.o) + + +TENSRC=$(TENSRCPP) $(TENSRCU) +all: $(EXEC) + +$(EXEC): $(EXEC).cu $(OBJS) + $(CC) -o $@ $^ -I$(INCLUDE_DIR) $(LDFLAGS) + +$(DIMSRC_O): $(DIMSRC) $(PERMSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(TENSRCPP_O): $(TENSRCPP) $(DIMSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(TENSRCU_O): $(TENSRCU) $(DTENSRCU_O) $(DIMSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(PERMSRC_O): $(PERMSRC) + $(CC) -o $@ -c $< $(CFLAGS) + +$(DTENSRCU_O) : $(DTENSRCU) + $(CC) -o $@ -c $< $(CFLAGS) + +.PHONY: clean mrproper + +clean: + rm -f $(OBJS) + +mrproper: clean + rm -f $(EXEC) + diff --git a/cmdcreate.sh b/cmdcreate.sh new file mode 100644 index 0000000..4cf833f --- /dev/null +++ b/cmdcreate.sh @@ -0,0 +1,3 @@ +#!/bin/bash + + nvcc isgood.cu tensor.cu cudatensor.cu ../permutation/permutation.cpp -o isgood -lgtest -lpthread -g --relocatable-device-code=true diff --git a/create.sh b/create.sh new file mode 100644 index 0000000..9c9ed63 --- /dev/null +++ b/create.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +make "$@" + diff --git a/kmemcheck.sh b/kmemcheck.sh new file mode 100644 index 0000000..c32bafa --- /dev/null +++ b/kmemcheck.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +make "$@" +compute-sanitizer --tool memcheck ./test/isgood diff --git a/krun.sh b/krun.sh new file mode 100644 index 0000000..44f67fb --- /dev/null +++ b/krun.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +make "$@" +./test/isgood diff --git a/memcheck.sh b/memcheck.sh new file mode 100644 index 0000000..6ac3e9a --- /dev/null +++ b/memcheck.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +compute-sanitizer --tool memcheck ./build/isgood diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..acb1995 --- /dev/null +++ b/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +./test/isgood diff --git a/src/coordinate/coordinate.h b/src/coordinate/coordinate.h new file mode 100644 index 0000000..2521882 --- /dev/null +++ b/src/coordinate/coordinate.h @@ -0,0 +1,21 @@ +#ifndef __COORDINATE_C__H__ +#define __COORDINATE_C__H__ + +#include "src/dimension/dimension.h" + + +struct coordinate +{ + size_t lin_coo; + unsigned int *coord; + struct dimension *dimension; +}; + +typedef coordinate coordinate; + +void LinearToCoord(struct coordinate *coor); +void CoordToLinear(struct coordinate *coor); + + + +#endif diff --git a/src/dimension/dimension.cpp b/src/dimension/dimension.cpp new file mode 100644 index 0000000..e351cff --- /dev/null +++ b/src/dimension/dimension.cpp @@ -0,0 +1,181 @@ +#include +#include + +#include + +#include +#include + + + +//#include "/home/fanasina/progr_/ptens0neD/src/dimension/dimension.h" + +//#include "/home/fanasina/progr_/ptens0neD/src/permutation/permutation.h" + + +#include "src/dimension/dimension.hpp" + +#include "src/permutation/permutation.hpp" +//#include "permutation.h" + +/*void dimension::initDim(int* arr, bool end = true) { + endian = end; + delete[]dim; + dim = new int[rank]; + size = 1; + for (int i = 0; i < rank; ++i) { + dim[i] = arr[i]; + size *= dim[i]; + } +}*/ + +dimension& dimension::operator=(const dimension& d) { + int oldRank = rank; + rank = d.rank; + size = d.size; + initDim(d.dim, oldRank); + //for (int i = 0; i < rank; i++) dim[i] = d.dim[i]; + return *this; +} + +dimension& dimension::operator+=(const dimension& d) { + int oldRank = rank; + int* t = new int[rank + d.rank]; + for (int i = 0; i < rank; i++) t[i] = dim[i]; + for (int i = 0; i < d.rank; i++) t[rank + i] = d.dim[i]; + size *= d.size; + rank += d.rank; + initDim(t, oldRank); + return *this; +} + +void dimension::LinearToCoord(int* ret, int lin) const { + int begin = 0, end = rank - 1; + int (*iter)(int) = incr; + bool (*cond)(int, int) = isLessThan; + if (endian == false) { + //if (endian) { + begin = rank - 1; end = 0; + iter = decr; cond = isGreatThan; + } + //printf("to coor begin = %d end = %d \n", begin, end); + + int sm = lin; + int pp = size; + for (int i = begin; cond(i, end); i = iter(i)) { + //printf(" i: %d ", i); + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + //printf("sm[%d] = %d , pp=%d ; ", i, sm, pp); + } + ret[end] = sm; +} + +int dimension::CoordToLinear(int* coo) const { + int begin = 0; + int end = rank - 1; + int (*iter)(int); iter = &incr; + bool (*cond)(int, int); cond = &isLessEqThan; + + if (endian) { + begin = rank - 1; end = 0; + iter = &decr; cond = &isGreatEqThan; + } + + int pp = 1; + int sm = 0; + for (int i = begin; cond(i, end); i = iter(i)) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + +bool isLessEqThan(int a, int b) { return a <= b; } +bool isLessThan(int a, int b) { return a < b; } +bool isGreatEqThan(int a, int b) { return a >= b; } +bool isGreatThan(int a, int b) { return a > b; } +int incr(int i) { return i + 1; } +int decr(int i) { return i - 1; } + + +void add(dimension& d, const dimension& d0, const dimension& d1) { + int oldRank = d.rank; + int* t = new int[d0.rank + d1.rank]; + for (int i = 0; i < d0.rank; i++) t[i] = d0.dim[i]; + for (int i = 0; i < d1.rank; i++) t[d0.rank + i] = d1.dim[i]; + d.rank = d0.rank + d1.rank; + d.initDim(t, oldRank); +} + +void max(dimension& d, const dimension& d0, const dimension& d1) { + if (d0.rank > d1.rank) { + d = d0; + } + else if (d0.rank < d1.rank) { + d = d1; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] < d1.dim[i]) d.dim[i] = d1.dim[i]; + } + } +} + +void min(dimension& d, const dimension& d0, const dimension& d1) { + if (d0.rank > d1.rank) { + d = d1; + } + else if (d0.rank < d1.rank) { + d = d0; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] > d1.dim[i]) d.dim[i] = d1.dim[i]; + } + } +} + +void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& rev) { + if (d0.rank > d1.rank) { + d = d1; + rev = true; + } + else if (d0.rank < d1.rank) { + d = d0; + rev = false; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] > d1.dim[d.rank - 1 - i]) d.dim[i] = d1.dim[d.rank - 1 - i]; + } + rev = false; + } +} + +void reverseArray(int* arr, int sz) { + int tmp[sz], i = 0; + for (; i < sz / 2; i++) { + tmp[i] = arr[i]; + arr[i] = arr[sz - 1 - i]; + } + for (; i < sz; i++) { + arr[i] = tmp[sz - 1 - i]; + } +} + +void transform(dimension& dDst, const dimension& dSrc, int* perm, int sz) { + dDst = dSrc; + setInit setIn(sz); + if (sz == dSrc.rank) { + if (isPermutation(perm, setIn, sz)) { + for (int i = 0; i < sz; i++) dDst.dim[i] = dSrc.dim[perm[i]]; + } + } +} + + diff --git a/src/dimension/dimension.h b/src/dimension/dimension.h new file mode 100644 index 0000000..d522d2d --- /dev/null +++ b/src/dimension/dimension.h @@ -0,0 +1,31 @@ +#ifndef __DIM__ +#define __DIM__ + +#include +#include + +struct dimension +{ + unsigned int rank; + unsigned int* dim; + size_t size; +}; +typedef dimension dimension; + + +void print_dimension(dimension d); + + +void add(dimension* d, const dimension* d0, const dimension* d1); + +void max(dimension* d, const dimension* d0, const dimension* d1); + +void min(dimension* d, const dimension* d0, const dimension* d1); + +bool minReverse(dimension* d, const dimension* d0, const dimension* d1); + +void transform(dimension* dDst, const dimension* dSrc, int* perm); + + +#endif + diff --git a/src/dimension/dimension.hpp b/src/dimension/dimension.hpp new file mode 100644 index 0000000..cf8bf66 --- /dev/null +++ b/src/dimension/dimension.hpp @@ -0,0 +1,90 @@ +#ifndef __DIMENSION__ +#define __DIMENSION__ + +#include +#include + +#include + +//#include "tensor.h" + +//#include "dimension.h" + +static int iArray1[1] = { 1 }; + + + +struct dimension { + //friend dimension& operator+(const dimension& d, const dimension& d1); + friend void add(dimension& d, const dimension& d0, const dimension& d1); + friend void max(dimension& d, const dimension& d0, const dimension& d1); + friend void min(dimension& d, const dimension& d0, const dimension& d1); + friend void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& Rev); + friend bool checkMatchProdTensor(dimension& d0, const dimension& d1, int nestingDepth); + friend bool checkMatchProdTensorReverse(dimension& d0, const dimension& d1, int nestingDepth); + friend void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth); + + + int rank; + int* dim; + size_t size; + bool endian; //LitleEndian : true, BigEndian : false, + void initDim(int* arr, int oldRank) { + + //delete[]dim; + //dim = new int[rank]; + if (rank > oldRank) { + free(dim); + dim = (int*)malloc(rank * sizeof(int)); + } + size = 1; + for (int i = 0; i < rank; ++i) { + dim[i] = arr[i]; + size *= dim[i]; + } + } + void initDim(bool end = true) { + endian = end; + //delete[]dim; + //dim = new int[rank]; + + if (dim != NULL) free(dim); + dim = (int*)malloc(rank * sizeof(int)); + } + dimension& operator=(const dimension& d); + dimension& operator+=(const dimension& d); + //dimension& operator*=(const dimension& d); + dimension(int d = 1, int* arr = iArray1, bool end = true) { + endian = end; + rank = d; + //dim = new int[d]; + dim = (int*)malloc(d * sizeof(int)); + initDim(arr, rank); + } + void print() const { printf(" rank: %d\n", rank);for (int i = 0; i < rank; i++) printf(" %d ", dim[i]);printf("\nsize:%ld\n", size); } + void LinearToCoord(int* ret, int lin) const; + int CoordToLinear(int* coo) const; +}; + +bool isLessEqThan(int a, int b); // { return a <= b; } +bool isLessThan(int a, int b); // { return a < b; } +bool isGreatEqThan(int a, int b); // { return a >= b; } +bool isGreatThan(int a, int b); // { return a > b; } +int incr(int i); // { return i + 1; } +int decr(int i); // { return i - 1; } + + + +void add(dimension& d, const dimension& d0, const dimension& d1); + +void max(dimension& d, const dimension& d0, const dimension& d1); + +void min(dimension& d, const dimension& d0, const dimension& d1); + +void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& rev); + +void transform(dimension& dDst, const dimension& dSrc, int* perm, int sz); + + +#endif + diff --git a/src/permutation/permutation.c b/src/permutation/permutation.c new file mode 100644 index 0000000..484002b --- /dev/null +++ b/src/permutation/permutation.c @@ -0,0 +1,105 @@ +#include "src/permutation/permutation.h" + +permutation* +create_permutation(size_t sz) +{ + if(sz == 0) return NULL; + permutation *p=malloc(sizeof(permutation)); + p->size = sz; + p->perm = malloc(sz*sizeof(unsigned int)); +} + +/*void +copy_array_unsigned(unsigned int *dst, const unsigned int *src, size_t size) +{ + for(size_t i = 0; i < size ; ++i) + dst[i]=src[i]; +}*/ + +void +assign_permutation(permutation *p, unsigned int *arr) +{ + copy_array_unsigned(p->perm, arr, p->size); +} + +bool +is_permutation_set_theoric(const permutation *p) +{ + if(p==NULL) return false; + size_t size = p->size, j; + unsigned int *count_array_i = calloc(size, sizeof(unsigned int)); + if(count_array_i == NULL) + { + printf("can't allocate count_array_i\n"); + return false; + } + for(size_t i = 0; i < size; ++i) + { + j = p->perm[i]; + if((j >= size) || count_array_i[j]) + { + free(count_array_i); + return false; + } + count_array_i[j]++; + } + return true; +} +/* 2,7,4,1 is a permutation of 1,2,4,7 + *it is equivalent of 1,3,2,0 in set_theoric(4)=0,1,2,3 + this function calculate the permutation equivalent in set_theoric + * */ +permutation * +translate_set_theoric(const permutation *p, permutation *translate_p) +{ + if(p==NULL) return NULL; + size_t size = p->size; + permutation *translate_p = create_permutation(size); + unsigned int *temperm = malloc(size*sizeof(unsigned int)); + unsigned int *tmperm = malloc(size*sizeof(unsigned int)); + copy_array_unsigned_int(tmperm, p->perm, p->size); // copy + qsort(tmperm, size, sizeof(unsigned int), compare_unsigned_int); + // tmperm contain p->perm ordered + + size_t cur=0; + for(size_t i=0; i< size; ++i) + { + for(size_t j=0; jperm[j] == tmperm[i]) + { + bool found = false; + for(size_t c=0; cperm[i]=j; + temperm[cur++]=j; + break; + } + } + } + } + free(tmperm); + free(temperm); + return translate_p; +} + +bool +is_permutation(const permutation *p) +{ + bool ret = is_permutation_set_theoric(p); + if(ret == false) + { + permutation *t_p = translate_set_theoric(p); + ret = is_permutation_set_theoric(t_p); + free(t_p); + } + return ret; +} diff --git a/src/permutation/permutation.cpp b/src/permutation/permutation.cpp new file mode 100644 index 0000000..dcf5f0a --- /dev/null +++ b/src/permutation/permutation.cpp @@ -0,0 +1,292 @@ +#include +#include +#include +#include + +//#include "/home/fanasina/progr_/ptens0neD/src/permutation/permutation.h" + +#include "src/permutation/permutation.h" + +int sign(int a) { + if (a < 0) return -1; + return 1; +} + +bool isPermutation(int* perm, setInit se, int sz) { + std::vector tmp; + for (int i = 0; i < sz; i++) { + for (int j = 0; j < sz; j++) { + if (perm[i] == se.setinit[j]) { + if (find(tmp.begin(), tmp.end(), j) == tmp.end()) { + tmp.push_back(j); + break; + } + } + } + } + return tmp.size() == sz; + +} + +int permutation::signature() { + int ss = 1; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + ss *= sign(perm[j] - perm[i]);// * sign(j - i); + } + } + return ss; +} + +int signature(int* tab, int sz) { + int ss = 1; + for (int i = 0; i < sz; i++) { + for (int j = i + 1; j < sz; j++) { + ss *= sign(tab[j] - tab[i]) * sign(j - i); + } + } + return ss; +} + +template +void permutation::permute(T* dst, T* src) { + for (int i = 0; i < size;i++) { + dst[i] = src[perm[i]]; + } +} + +template +void permutation::permute(int* dst, int* src); +template +void permutation::permute(float* dst, float* src); + +// complexité sz*(sz+1)/2 +size_t TabToPlaceAlgo(int* tb, int sz) { + int cnt = 0; + int pl; + + int* tPlace = new int[sz]; + + for (int i = sz - 1; i >= 0; i--) { + cnt++; + pl = 0; + for (int j = i + 1; j < sz; j++) { + cnt++; + if (tb[j] < tb[i]) { + pl++; + } + if (pl == tb[i]) break; + } + tPlace[tb[i]] = pl; + + } + size_t q = 0; + for (int i = 0; i < sz;i++) { + cnt++; + //printf("tPlace[%d] == %d et tb[%d] == %d\n", i, tPlace[i], i, tb[i]); + q = (i + 1) * q + tPlace[i]; + } + //printf("algo cnt = %d ", cnt); + return q; +} + +// complexité sz*(sz+1)/2 +size_t TabToPlaceOpt1(int* tb, int sz) { + int cnt = 0; + int mx; + int* tPlace = new int[sz]; + for (int i = sz - 1; i >= 0; i--) { + cnt++; + if (i == sz - 1) { + mx = tb[i]; + tPlace[mx] = 0; + } + else if (mx > tb[i]) { + int pli = 0; // si c est le plus à droite 0 si pas de superieur à lui, on incremente si on trouve plus petit + for (int j = sz - 1; j > i; j--) { + cnt++; + if (tb[i] > tb[j]) { + pli++; + } + else if (tb[i] == tb[j]) { + //return -1; // something wrong + throw "something wrong here, tb[i]==tb[j]"; + } + } + tPlace[tb[i]] = pli; + } + else if (mx < tb[i]) { + mx = tb[i]; + tPlace[mx] = sz - 1 - i; + } + else { + //return -1; // something wrong + throw "something wrong here, tb[i]==mx"; + + } + } + size_t q = 0; + for (int i = 0; i < sz; i++) { + cnt++; + //printf("tab tPlace[%d] == %d et tb[%d] == %d [ q=%d, cnt = %d\n", i, tPlace[i], i, tb[i], q, cnt); + q = (i + 1) * q + tPlace[i]; + } + //printf("Opt cnt = %d ", cnt); + + return q; +} + +// complexité sz*(sz+1) +size_t TabToPlaceNotab(int* tb, int sz) { + int cnt = 0; + int mx = sz - 1; + size_t q = 0; + int pl; + for (int i = 0; i < sz; i++) { + cnt++; + int j; + for (j = 0; j < sz;j++) { + cnt++; + if (tb[j] == i) break; + } + pl = 0; + j++; + for (;j < sz;j++) { + cnt++; + if (tb[j] < i) { + pl++; + } + if (pl == i) break; + } + q = (i + 1) * q + pl; + //q = (sz - tb[i]) * q + pl; + //printf("notab tPlace[tb[%d]] == %d et tb[%d] == %d [ q=%d, cnt = %d\n", i, pl, i, tb[i], q, cnt); + + } + //printf(" notab cnt = %d ", cnt); + + return q; +} + + + + + +//complexité sz*sz/2 +void PlaceToTab(int* tb, size_t pl, int sz) { + int cnt = 0; + size_t a = pl; + int pltbi; + int size = 1; + // s'assurer que tb soit null + for (int i = 0;i < sz;i++) tb[i] = 0; + + for (int i = sz - 1; i >= 0; i--) { + cnt++; + pltbi = a % (i + 1); + a /= (i + 1); + if (i == sz - 1) { + tb[sz - 1 - pltbi] = i; + } + else { + int lt = 0, j = sz - 1; + while (lt < pltbi && j >= 0) { + cnt++; + if (tb[j--] < i) { + lt++; + } + } + while (tb[j] > i) { + cnt++; + j--; + } + tb[j] = i; + + } + } + //printf("cnt PlaceToTab :%d ", cnt); +} + +size_t factorial(int n) { + size_t ret = 1; + for (size_t i = 2; i <= n; i++) { + ret *= i; + } + return ret; +} + +// src1 o src0 = dst; dst(i) = src1(src0(i)) +void compose(int* dst, int* src0, int* src1, int sz) { + for (int i = 0; i < sz; i++) { + dst[i] = src1[src0[i]]; + } +} +// src1 o src0 = dst; dst(i) = src1(src0(i)) +void compose(size_t& rdst, size_t psrc0, size_t psrc1, int sz) { + int dst[sz], src0[sz], src1[sz]; + PlaceToTab(src0, psrc0, sz); + PlaceToTab(src1, psrc1, sz); + for (int i = 0; i < sz; i++) { + dst[i] = src1[src0[i]]; + } + rdst = TabToPlaceOpt1(dst, sz); +} +/* +template +void transform(T* dst, T* src, int* perm, int sz) { + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +} +template +void transform(T* dst, T* src, size_t pl, int sz) { + int perm[sz]; + PlaceToTab(perm, pl, sz); + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +} +*/ +void permuteArray(int* dst, int* src, int* perm, int sz) { + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +} + +void inverseArray(int* dst, int* src, int sz) { + for (int i = 0; i < sz; i++) { + dst[src[i]] = i; + } +} + +// seek?/ seek o src = seek(src)=dst => seek = dst o inv(src) +void permCorrespondance(int* sk, int* dst, int* src, int sz) { + int inv[sz]; + inverseArray(inv, src, sz); + compose(sk, dst, inv, sz); +} + + +// SRC o transf = DST, SRC:{a,b,c,d,g,f} o transf = DST:{g,b,d,f,c,a} +// SRC[0]=a, SRC[1]=b, SRC[2]=c SRC[3]=d SRC[4]=g SRC[5]=f +// DST[0]=g=SRC[4] DST[1]=b=SRC[1] DST[2]=d=SRC[3] DST[3]=f=SRC[5] DST[4]=c=SRC[2] DST[5]=a=SRC[1] +// trans[0]=4 trans[1]=1 trans[2]=3 trans[3]=5 trans[4]=2 trans[5]=0 +// (*cmp) (a,b) = 0 if a==b, -1 if ab +template +void CorrespondacePerm(T* src, T* dst, int* transf, int sz, int (*cmp)(T, T)) { + int tmp[sz]; + std::vector tmpV; + int curt = 0; + for (int i = 0; i < sz; i++) { + for (int j = 0; j < sz; j++) { + if (cmp(dst[i], src[j]) == 0) { + if (std::find(tmpV.begin(), tmpV.end(), j) == tmpV.end()) {// not found + transf[i] = j; + tmpV.push_back(j); + break; + } + } + } + } +} +template void CorrespondacePerm(char* src, char* dst, int* transf, int sz, int (*cmp)(char, char)); \ No newline at end of file diff --git a/src/permutation/permutation.h b/src/permutation/permutation.h new file mode 100644 index 0000000..5a0c55a --- /dev/null +++ b/src/permutation/permutation.h @@ -0,0 +1,25 @@ +#ifndef __PERMUTATION_C_H__ +#define __PERMUTATION_C_H__ + +#include "src/set_theoric/set_theoric.h" + +/* struct of permutation of unsigned int array, not necessarly set_theoric + * + * */ +struct permutation +{ + size_t size; + unsigned int *perm; +}; + +typedef struct permutation permutation; + +permutation * create_permutation(size_t sz); +void assign_permutation(permutation *p, unsigned int *arr); + +bool is_permutation_set_theoric(const permutation *p); + +// more general! need translation and use is_permutation_set_theoric +bool is_permutation(const permutation *p); + +#endif /*__PERMUTATION_C_H__*/ diff --git a/src/permutation/permutation.hpp b/src/permutation/permutation.hpp new file mode 100644 index 0000000..29861b6 --- /dev/null +++ b/src/permutation/permutation.hpp @@ -0,0 +1,99 @@ + +#ifndef __PERMUTATION_H__ +#define __PERMUTATION_H__ + +#include + + +struct setInit { + int size; + int* setinit; + setInit(int sz = 1, int beg = 0) { + size = sz; + setinit = new int[sz]; + for (int i = 0; i < sz; i++) setinit[i] = beg + i; + } +}; + +struct permutation { + int size;// type + int rang; //place;//rang; + int* perm; + permutation(int sz, bool b) { + size = sz; + perm = new int[size]; + } + permutation(int sz = 1, int* tb = { 0 }) { + size = sz; + perm = new int[size]; + for (int i = 0; i < size; i++) perm[i] = tb[i]; + } + //int TabToPlace(int* tb , int sz ); + //void PlaceToTab(int* tb , int pl , int sz); + int signature(); + template + void permute(T* dst, T* src); +}; + +bool isPermutation(int* perm, setInit se, int sz); + +int signature(int* tab, int sz); + +// complexité sz*(sz+1)/2 +size_t TabToPlaceAlgo(int* tb, int sz); + +// complexité sz*(sz+1)/2 +size_t TabToPlaceOpt1(int* tb, int sz); + +// complexité sz*(sz+1) +size_t TabToPlaceNotab(int* tb, int sz); + +//complexité sz*sz/2 +void PlaceToTab(int* tb, size_t pl, int sz); + +size_t factorial(int n); + +// src1 o src0 = dst; dst(i) = src1(src0(i)) +void compose(int* dst, int* src0, int* src1, int sz); +// src1 o src0 = dst; dst(i) = src1(src0(i)) +void compose(size_t& rdst, size_t psrc0, size_t psrc1, int sz); + +/*template +void transform(T* dst, T* src, int* perm, int sz); +template +void transform(T* dst, T* src, size_t pl, int sz); + +template +void transform(T* dst, T* src, int* perm, int sz) { + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +} +template +void transform(T* dst, T* src, size_t pl, int sz) { + int perm[sz]; + PlaceToTab(perm, pl, sz); + for (int i = 0; i < sz; i++) { + dst[i] = src[perm[i]]; + } +}*/ + +void permuteArray(int* dst, int* src, int* perm, int sz); +void inverseArray(int* dst, int* src, int sz); + +// seek?/ seek o src = seek(src)=dst => seek = dst o inv(src) +void permCorrespondance(int* sk, int* dst, int* src, int sz); + +// SRC o transf = DST, SRC:{a,b,c,d,g,f} o transf = DST:{g,b,d,f,c,a} +// SRC[0]=a, SRC[1]=b, SRC[2]=c SRC[3]=d SRC[4]=g SRC[5]=f +// DST[0]=g=SRC[4] DST[1]=b=SRC[1] DST[2]=d=SRC[3] DST[3]=f=SRC[5] DST[4]=c=SRC[2] DST[5]=a=SRC[0] +// trans[0]=4 trans[1]=1 trans[2]=3 trans[3]=5 trans[4]=2 trans[5]=0 +// (*cmp) (a,b) = 0 if a==b, -1 if ab +template +void CorrespondacePerm(T* src, T* dst, int* transf, int sz, int (*cmp)(T, T)); + + + +#endif + + diff --git a/src/permutation_t/permutation_t.c b/src/permutation_t/permutation_t.c new file mode 100644 index 0000000..457e68b --- /dev/null +++ b/src/permutation_t/permutation_t.c @@ -0,0 +1,114 @@ +#include "src/permutation_t/permutation_t.h" + +#define CREATE_PERMUTATION(type, size)\ + type * CREATE_PERMUTATION_##type(size_t size){\ + if (sz == 0) return NULL;\ + PERMUTATION_##type *p = malloc(sizeof(PERMUTATION_##type));\ + p->size = size;\ + p->perm = malloc(size * sizeof(type));\ + return p; }\ + +permutation* +create_permutation(size_t sz) +{ + if(sz == 0) return NULL; + permutation *p=malloc(sizeof(permutation)); + p->size = sz; + p->perm = malloc(sz*sizeof(unsigned int)); + return p; +} + +/*void +copy_array_unsigned(unsigned int *dst, const unsigned int *src, size_t size) +{ + for(size_t i = 0; i < size ; ++i) + dst[i]=src[i]; +}*/ + +void +assign_permutation(permutation *p, unsigned int *arr) +{ + copy_array_unsigned(p->perm, arr, p->size); +} + +bool +is_permutation_set_theoric(const permutation *p) +{ + if(p==NULL) return false; + size_t size = p->size, j; + unsigned int *count_array_i = calloc(size, sizeof(unsigned int)); + if(count_array_i == NULL) + { + printf("can't allocate count_array_i\n"); + return false; + } + for(size_t i = 0; i < size; ++i) + { + j = p->perm[i]; + if((j >= size) || count_array_i[j]) + { + free(count_array_i); + return false; + } + count_array_i[j]++; + } + return true; +} +/* 2,7,4,1 is a permutation of 1,2,4,7 + *it is equivalent of 1,3,2,0 in set_theoric(4)=0,1,2,3 + this function calculate the permutation equivalent in set_theoric + * */ +permutation * +translate_set_theoric(const permutation *p, permutation *translate_p) +{ + if(p==NULL) return NULL; + size_t size = p->size; + permutation *translate_p = create_permutation(size); + unsigned int *temperm = malloc(size*sizeof(unsigned int)); + unsigned int *tmperm = malloc(size*sizeof(unsigned int)); + copy_array_unsigned_int(tmperm, p->perm, p->size); // copy + qsort(tmperm, size, sizeof(unsigned int), compare_unsigned_int); + // tmperm contain p->perm ordered + + size_t cur=0; + for(size_t i=0; i< size; ++i) + { + for(size_t j=0; jperm[j] == tmperm[i]) + { + bool found = false; + for(size_t c=0; cperm[i]=j; + temperm[cur++]=j; + break; + } + } + } + } + free(tmperm); + free(temperm); + return translate_p; +} + +bool +is_permutation(const permutation *p) +{ + bool ret = is_permutation_set_theoric(p); + if(ret == false) + { + permutation *t_p = translate_set_theoric(p); + ret = is_permutation_set_theoric(t_p); + free(t_p); + } + return ret; +} diff --git a/src/permutation_t/permutation_t.h b/src/permutation_t/permutation_t.h new file mode 100644 index 0000000..862b2e8 --- /dev/null +++ b/src/permutation_t/permutation_t.h @@ -0,0 +1,52 @@ +#ifndef __PERMUTATION_T_C_H__ +#define __PERMUTATION_T_C_H__ + +#include "src/tools_t/tools_t.h" +#include "src/set_theoric_t/set_theoric_t.h" + +#define STRUCT_PERMUTATION(type)\ + struct PERMUTATION_##type{\ + size_t size;\ + type * perm; }; +STRUCT_PERMUTATION(TYPE_CHAR) +STRUCT_PERMUTATION(TYPE_U_CHAR) +STRUCT_PERMUTATION(TYPE_INT) +STRUCT_PERMUTATION(TYPE_U_INT) +STRUCT_PERMUTATION(TYPE_L_INT) +STRUCT_PERMUTATION(TYPE_U_L_INT) +STRUCT_PERMUTATION(TYPE_FLOAT) +STRUCT_PERMUTATION(TYPE_DOUBLE) +STRUCT_PERMUTATION(TYPE_L_DOUBLE) +STRUCT_PERMUTATION(TYPE_STRING) + +typedef struct PERMUTATION_TYPE_CHAR PERMUTATION_TYPE_CHAR; +typedef struct PERMUTATION_TYPE_U_CHAR PERMUTATION_TYPE_U_CHAR; +typedef struct PERMUTATION_TYPE_INT PERMUTATION_TYPE_INT; +typedef struct PERMUTATION_TYPE_U_INT PERMUTATION_TYPE_U_INT; +typedef struct PERMUTATION_TYPE_L_INT PERMUTATION_TYPE_L_INT; +typedef struct PERMUTATION_TYPE_U_L_INT PERMUTATION_TYPE_U_L_INT; +typedef struct PERMUTATION_TYPE_FLOAT PERMUTATION_TYPE_FLOAT; +typedef struct PERMUTATION_TYPE_DOUBLE PERMUTATION_TYPE_DOUBLE; +typedef struct PERMUTATION_TYPE_L_DOUBLE PERMUTATION_TYPE_L_DOUBLE; +typedef struct PERMUTATION_TYPE_STRING PERMUTATION_TYPE_STRING; + +/* struct of permutation of unsigned int array, not necessarly set_theoric + * + * */ +struct permutation +{ + size_t size; + unsigned int *perm; +}; + +typedef struct permutation permutation; + +permutation * create_permutation(size_t sz); +void assign_permutation(permutation *p, unsigned int *arr); + +bool is_permutation_set_theoric(const permutation *p); + +// more general! need translation and use is_permutation_set_theoric +bool is_permutation(const permutation *p); + +#endif /*__PERMUTATION_T_C_H__*/ diff --git a/src/set_theoric/set_theoric.c b/src/set_theoric/set_theoric.c new file mode 100644 index 0000000..69b8174 --- /dev/null +++ b/src/set_theoric/set_theoric.c @@ -0,0 +1,26 @@ + +#include "src/set_theoric/set_theoric.h" + +set_theoric * create_set_theoric(unsigned int id) +{ + if(id == 0) return NULL; + + set_theoric *ret_set = malloc(sizeof(set_theoric)); + ret_set.set=malloc(id*sizeof(unsigned int)); + ret_set.id = id; + for(int i = 0; i < id; ++i) + ret_set.set[i] = i; + + return ret_set; + +} + +bool is_set_theoric(set_theoric *st) +{ + if(st == NULL) return true; + + for(int i = 0; i < st->id; ++i) + if(st->set[i] != i) return false; + + return true; +} diff --git a/src/set_theoric/set_theoric.h b/src/set_theoric/set_theoric.h new file mode 100644 index 0000000..1435e5e --- /dev/null +++ b/src/set_theoric/set_theoric.h @@ -0,0 +1,22 @@ +#ifndef __SET_THEORIC_C__H +#define __SET_THEORIC_C__H + +#include + +#include "src/tools/tools.h" + +struct set_theoric +{ + unsigned int id; + unsigned int *set; +}; + +typedef set_theoric set_theoric; + +set_theoric * create_set_theoric(unsigned int id); + +bool is_set_theoric(set_theoric *st); + + + +#endif /*__SET_THEORIC_C__H*/ diff --git a/src/set_theoric_t/set_theoric_t.c b/src/set_theoric_t/set_theoric_t.c new file mode 100644 index 0000000..6bab797 --- /dev/null +++ b/src/set_theoric_t/set_theoric_t.c @@ -0,0 +1,24 @@ + +#include "src/set_theoric_t/set_theoric_t.h" + +#define CREATE_SET_THEORIC(type, id)\ + type * CREATE_SET_THEORIC_##type(type id){\ + if(id == 0) return NULL;\ + SET_THEORIC_##type *ret_set = malloc(sizeof(SET_THEORIC_##type));\ + ret_set->id = id;\ + ret_set->set = malloc(id*sizeof(type));\ + for(type i = 0; i < id; ++i) ret_set->set[i]=i;\ + return ret_set; } +CREATE_SET_THEORIC(TYPE_U_CHAR) +CREATE_SET_THEORIC(TYPE_U_INT) +CREATE_SET_THEORIC(TYPE_U_LONG_INT) + +#define IS_SET_THEORIC(type, st)\ + bool IS_SET_THEORIC_##type(type *st){\ + for(type i = 0; i < st->id; ++i){\ + if(st->set[i] != i) return false;\ + return true; } +IS_SET_THEORIC(TYPE_U_CHAR,st) +IS_SET_THEORIC(TYPE_U_INT,st) +IS_SET_THEORIC(TYPE_U_LONG_INT,st) + diff --git a/src/set_theoric_t/set_theoric_t.h b/src/set_theoric_t/set_theoric_t.h new file mode 100644 index 0000000..bcfbd29 --- /dev/null +++ b/src/set_theoric_t/set_theoric_t.h @@ -0,0 +1,31 @@ +#ifndef __SET_THEORIC_T_C__H +#define __SET_THEORIC_T_C__H + +#include + +#include "src/tools_t/tools_t.h" + +#define STRUCT_SET_THEORIC(type)\ + struct SET_THEORIC_##type{\ + type id;\ + type *set;}; + +STRUCT_SET_THEORIC(TYPE_U_CHAR) +STRUCT_SET_THEORIC(TYPE_U_INT) +STRUCT_SET_THEORIC(TYPE_U_LONG_INT) + +typedef struct SET_THEORIC_TYPE_U_CHAR SET_THEORIC_TYPE_U_CHAR; +typedef struct SET_THEORIC_TYPE_U_INT SET_THEORIC_TYPE_U_INT; +typedef struct SET_THEORIC_TYPE_U_LONG_INT SET_THEORIC_TYPE_U_LONG_INT; + +SET_THEORIC_TYPE_U_CHAR * CREATE_SET_THEORIC_TYPE_U_CHAR(TYPE_U_CHAR); +SET_THEORIC_TYPE_U_INT * CREATE_SET_THEORIC_TYPE_U_INT(TYPE_U_INT); +SET_THEORIC_TYPE_U_LONG_INT * CREATE_SET_THEORIC_TYPE_U_LONG_INT(TYPE_U_LONG_INT); + +bool IS_SET_THEORIC_TYPE_U_CHAR(SET_THEORIC_TYPE_U_CHAR *st); +bool IS_SET_THEORIC_TYPE_U_INT(SET_THEORIC_TYPE_U_INT *st); +bool IS_SET_THEORIC_TYPE_U_LONG_INT(SET_THEORIC_TYPE_U_LONG_INT *st); + + + +#endif /*__SET_THEORIC_T_C__H*/ diff --git a/src/tensor/tens0neD/tens0neD.cpp b/src/tensor/tens0neD/tens0neD.cpp new file mode 100644 index 0000000..058e846 --- /dev/null +++ b/src/tensor/tens0neD/tens0neD.cpp @@ -0,0 +1,500 @@ +#include +#include + +#include + +#include +#include + + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" +#include "src/tensor/tens0neD/tens0neD.h" +//#include "include/tens0neD.h" + + +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/src/permutation/permutation.h" +#include "src/permutation/permutation.h" + + +template +void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz) { + transform(Dst.Dim, Src.Dim, perm, sz); + dimension dsrc = Src.Dim; + dimension ddst = Dst.Dim; + int coor[dsrc.rank]; + int dcoor[ddst.rank], ldst; + for (int i = 0; i < Src.Dim.size; i++) { + dsrc.LinearToCoord(coor, i); + for (int j = 0; j < dsrc.rank; j++) dcoor[j] = coor[perm[j]]; + ldst = ddst.CoordToLinear(dcoor); + Dst.elements[ldst] = Src.elements[i]; + } +} + +template void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); +template void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); + +template +Tensor& Tensor::operator=(const Tensor& M) { + Dim = M.Dim; + for (int i = 0; i < Dim.size; ++i) elements[i] = M.elements[i]; + return *this; +} + +template +Tensor& Tensor::operator*=(const T& val) { + //for (int i = 0; i < rank.size; ++i) elements[i] *= val; + return *this; +} + +template +Tensor& operator*(const Tensor& M0, const Tensor& M1) { + struct dimension d; add(d, M0.Dim, M1.Dim); + Tensor Mret(d); + for (int i = 0; i < M0.Dim.size; ++i) Mret.elements[i] = M0.elements[i]; + Mret.Dim += M0.Dim; + return Mret; +} + + +void subArray(int* dst, int* src, int debDst, int finDst, int debSrc) { + for (int i = debDst; i < finDst; i++) { + dst[i] = src[i + debSrc]; + } +} + +void concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1) { + int i = debDst; + for (int j = debSrc0; j < finSrc0; j++) { + dst[i++] = src0[j]; + } + for (int j = debSrc1; j < finSrc1; j++) { + dst[i++] = src1[j]; + } +} + +template +void Tensor::initVal(T val) { + int* coord = new int[Dim.rank]; + T pp, mult = 0.5; + for (int i = 0; i < Dim.size; i++) { + Dim.LinearToCoord(coord, i); + elements[i] = val; + pp = mult; + for (int j = 0; j < Dim.rank; j++) { + elements[i] += (coord[j] + 1) * pp; + pp *= mult; + } + } +} +template +void Tensor::initVal(float val); +template +void Tensor::initVal(double val); + +template +void Tensor::print() { + Dim.print(); + int* coord = new int[Dim.rank]; + int begin = 0, end = Dim.rank - 1; + //int beginInv = Dim.rank - 1, endInv = 0; + int (*iter)(int) = incr; + //int (*iterInv)(int) = decr; + bool (*cond)(int, int) = isLessEqThan; + //bool (*condInv)(int, int) = isGreatEqThan; + if (Dim.endian == false) { + begin = Dim.rank - 1; end = 0; + //beginInv = 0; endInv = Dim.rank - 1; + iter = decr; cond = isGreatEqThan; + //iterInv = incr; condInv = isLessEqThan; + } + for (int i = 0; i < Dim.size; i++) { + Dim.LinearToCoord(coord, i); + //if (coord[Dim.rank - 1] == 0) { + if (coord[begin] == 0) { + for (int j = begin; cond(j, end); j = iter(j)) { + //for (int j = Dim.rank - 1; j >= 0; j--) { + if (coord[j] == 0) { + printf("("); + } + else break; + } + } + + //printf(" ");for (int j = 0; j < Dim.rank; j++) printf("[%d]", coord[j]); printf(" "); + //printf(" "); for (int j = beginInv; condInv(j, endInv); j = iterInv(j)) printf("[%d]", coord[j]); printf(" "); + //printf(" "); for (int k = beginInv; condInv(k, endInv); k = iterInv(k)) { printf("[%d]", coord[k]); } printf(" "); + + printf(" %.6f ", elements[i]); + + //if (coord[Dim.rank - 1] == Dim.dim[Dim.rank - 1] - 1) { + if (coord[begin] == Dim.dim[begin] - 1) { + for (int j = begin; cond(j, end); j = iter(j)) { + //for (int j = Dim.rank - 1; j >= 0; j--) { + if (coord[j] == Dim.dim[j] - 1) { + printf(")"); + } + else break; + } + } + } + + printf("\n"); +} +template +void Tensor::print(); +template +void Tensor::print(); + +template +void tensorProd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + int* coord = new int[M.Dim.rank]; + int* coord0 = new int[M0.Dim.rank], lin0; + int* coord1 = new int[M1.Dim.rank], lin1; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, M0.Dim.rank, 0); + subArray(coord1, coord, 0, M1.Dim.rank, M0.Dim.rank); + lin0 = (M0.Dim).CoordToLinear(coord0); + lin1 = (M1.Dim).CoordToLinear(coord1); + M.elements[i] = M0.elements[lin0] * M1.elements[lin1]; + } +} + +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + + + + +bool checkMatchProdTensor(const dimension& d0, const dimension& d1, int nestingDepth) { + if (d0.rank <= nestingDepth || d1.rank <= nestingDepth) return false; + for (int i = 0; i < nestingDepth;i++) { + if (d1.dim[i] != d0.dim[d0.rank - nestingDepth + i]) return false; + } + return true; +} + +bool checkMatchProdTensorReverse(const dimension& d0, const dimension& d1, int nestingDepth) { + if (d0.rank <= nestingDepth || d1.rank <= nestingDepth) return false; + for (int i = 0; i < nestingDepth;i++) { + if (d1.dim[i] != d0.dim[d0.rank - 1 - i]) return false; + } + return true; +} + +void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth) { + int len0 = d0.rank - nestingDepth; + int len1 = d1.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, d0.dim, 0, len0, 0); + subArray(tsub1, d1.dim, 0, len1, nestingDepth); + subArray(tDk1, d1.dim, 0, nestingDepth, 0); + subArray(tDk0, d0.dim, 0, nestingDepth, len0); + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + + min(dM, dM0, dM1); + //max(dM, dM0, dM1); +} + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l +// M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth) { + if (!checkMatchProdTensor(M0.Dim, M1.Dim, nestingDepth)) { + printf("Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + //throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + } + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM; + min(dM, dM0, dM1); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + int* coord = new int[M.Dim.rank]; + + int* coord0 = new int[len0], lin0; + int* coord1 = new int[len1], lin1; + + int* coordM0 = new int[M0.Dim.rank]; + int* coordM1 = new int[M1.Dim.rank]; + + int* Koord = new int[nestingDepth]; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, len0, 0); + subArray(coord1, coord, 0, len1, len0); + M.elements[i] = 0; + for (int k = 0; k < dM.size; k++) { + dM.LinearToCoord(Koord, k); + concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, nestingDepth); + concatArray(coordM1, Koord, coord1, 0, 0, nestingDepth, 0, len1); + lin0 = (M0.Dim).CoordToLinear(coordM0); + lin1 = (M1.Dim).CoordToLinear(coordM1); + M.elements[i] += M0.elements[lin0] * M1.elements[lin1]; + } + } +} + +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +void reverseDim(dimension& d, const dimension& d0) { + d.rank = d0.rank; + d.size = d0.size; + if (d.dim != NULL) free(d.dim); + d.dim = (int*)malloc(d.rank * sizeof(int)); + for (int i = 0; i < d.rank; i++) d.dim[i] = d0.dim[d.rank - i - 1]; +} + +template +void reverseTensor(Tensor& M, const Tensor& M0) { + reverseDim(M.Dim, M0.Dim); + size_t id; + int coor[M0.Dim.rank]; + for (size_t i = 0; i < M.Dim.size; i++) { + M0.Dim.LinearToCoord(coor, i); + reverseArray(coor, M0.Dim.rank); + id = M.Dim.CoordToLinear(coor); + M.elements[id] = M0.elements[i]; + } +} + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l +// M[x0,x1,x3..xl x{l+1}..xn] X M[xn,x{n-1},..x{l+1}xl y{l+1}..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth) { + if (!checkMatchProdTensorReverse(M0.Dim, M1.Dim, nestingDepth)) { + printf("Failed in Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + //throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + } + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM; + bool rev; + minReverse(dM, dM0, dM1, rev); + if (rev) reverseArray(dM.dim, dM.rank); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + int* coord = new int[M.Dim.rank]; + + int* coord0 = new int[len0], lin0; + int* coord1 = new int[len1], lin1; + + int* coordM0 = new int[M0.Dim.rank]; + int* coordM1 = new int[M1.Dim.rank]; + + int* Koord = new int[nestingDepth]; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, len0, 0); + subArray(coord1, coord, 0, len1, len0); + M.elements[i] = 0; + for (int k = 0; k < dM.size; k++) { + dM.LinearToCoord(Koord, k); + concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, nestingDepth); + reverseArray(Koord, nestingDepth); + concatArray(coordM1, Koord, coord1, 0, 0, nestingDepth, 0, len1); + lin0 = (M0.Dim).CoordToLinear(coordM0); + lin1 = (M1.Dim).CoordToLinear(coordM1); + M.elements[i] += M0.elements[lin0] * M1.elements[lin1]; + } + } +} + +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +template +void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + //permuteArray(M.Dim.dim, M0.Dim.dim, p); + //for (int i = 0; i < p.size; i++) { M.Dim.dim[i] = M0.Dim.dim[p.perm[i]]; } + p.permute(M.Dim.dim, M0.Dim.dim); + size_t img; + int coor[p.size]; + int rooc[p.size]; + for (size_t i = 0; i < M.Dim.size;i++) { + M0.Dim.LinearToCoord(coor, i); + p.permute(rooc, coor); + img = M.Dim.CoordToLinear(rooc); + if (img >= M.Dim.size) printf(" i: %ld vs img:%ld size: %ld\n", i, img, M.Dim.size); + M.elements[img] = M0.elements[i]; + + } + } +} + +template +void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p); + +template +bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest) { + if (contractNest < Msecond.Dim.rank && contractNest < Mfirst.Dim.rank) { + std::vector founded; + int begin = Mfirst.Dim.rank - contractNest, tmp; + for (int i = 0; i < Msecond.Dim.rank;i++) perm[i] = i; + for (int i = begin; i < Mfirst.Dim.rank; i++) { + for (int j = 0; j < Msecond.Dim.rank;j++) { + if (std::find(founded.begin(), founded.end(), perm[j]) == founded.end()) {// not found + if (Msecond.Dim.dim[perm[j]] == Mfirst.Dim.dim[i]) { + founded.push_back(perm[j]); + tmp = perm[i - begin]; + perm[i - begin] = perm[j]; + perm[j] = tmp; + } + } + } + } + return (founded.size() == contractNest); + } + return false; +} +template +bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + +template +bool scanInvPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest) { + if (contractNest < Msecond.Dim.rank && contractNest < Mfirst.Dim.rank) { + std::vector founded; + int begin = Mfirst.Dim.rank - contractNest, tmp; + for (int i = 0; i < Msecond.Dim.rank;i++) perm[i] = i; + for (int i = begin; i < Mfirst.Dim.rank; i++) { + for (int j = 0; j < Msecond.Dim.rank;j++) { + if (std::find(founded.begin(), founded.end(), j) == founded.end()) {// not found + if (Msecond.Dim.dim[j] == Mfirst.Dim.dim[perm[i - begin]]) { + founded.push_back(j); + tmp = perm[i - begin]; + perm[i - begin] = j; + perm[j] = tmp; + } + } + } + } + return (founded.size() == contractNest); + } + return false; +} +template +bool scanInvPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + +void LinearTransformCoord(size_t& dst, size_t src, int* inversePerm, size_t Msize, dimension dDst, dimension dSrc) { + size_t sm = src; + size_t pp = Msize; + size_t s = 0; + size_t p = 1; + int ret;// = new int[rank]; + int i; + for (i = 0; i < dSrc.rank; ++i) { + pp /= dSrc.dim[i]; + ret = sm / pp; + p = 1; + for (int j = inversePerm[i] + 1; j < dDst.rank;j++) { + p *= dDst.dim[j]; + } + s += ret * p; + + sm %= pp; + + } + dst = s; + if (s > Msize) printf("I have a problem in LinearTransformCoord: s:%ld siez:%ld \n", s, Msize); + +} + + +template +void permuteTensor(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + + if (p.size == M0.Dim.rank) p.permute(M.Dim.dim, M0.Dim.dim); + else { + printf("something wrong perm, not the same size as M0.Dim.rank\n"); + exit(1); + } + size_t img = 0; + printf("in permuteTensor:\n"); + M0.Dim.print(); + M.Dim.print(); + setInit se(M.Dim.rank, 0); + int invP[M.Dim.rank]; + inverseArray(invP, p.perm, M.Dim.rank); + for (size_t i = 0; i < M.Dim.size;i++) { + //LinearTransformCoord(img, i, p.perm, M.Dim.size, M.Dim, M0.Dim); + LinearTransformCoord(img, i, invP, M.Dim.size, M.Dim, M0.Dim); + M.elements[img] = M0.elements[i]; + } + } +} + +template +void permuteTensor(Tensor& M, const Tensor& M0, permutation p); + diff --git a/src/tensor/tens0neD/tens0neD.h b/src/tensor/tens0neD/tens0neD.h new file mode 100644 index 0000000..db7b7c9 --- /dev/null +++ b/src/tensor/tens0neD/tens0neD.h @@ -0,0 +1,114 @@ +#ifndef __TENS_0NE_D_H__ +#define __TENS_0NE_D_H__ + +#include +#include + +#include + +//#include "tensor.h" +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/src/dimension/dimension.h" +//#include "/home/fanasina/progr_/ptens0neD/src/permutation/permutation.h" +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/tensCuda.h" + +#include "src/dimension/dimension.h" +#include "src/permutation/permutation.h" +#include "src/tensor/tensCuda/tensCuda.h" + +template +struct Tensor { + struct dimension Dim; + T* elements; + Tensor(struct dimension dm = dimension(1)) { + Dim = dm; + //elements = new T[Dim.size]; + elements = (T*)malloc(Dim.size * sizeof(T)); + } + void initTensor() { + //delete[]elements; + //elements = new T[Dim.size]; + if (elements != NULL) + free(elements); + elements = (T*)malloc(Dim.size * sizeof(T)); + } + void initVal(T val); // { for (int i = 0; i < Dim.size; i++) elements[i] = val + 0.001f * i; } + void print(); + Tensor& operator=(const Tensor& M); + Tensor& operator*=(const T& val); + template + friend Tensor& operator*(const Tensor& M0, const Tensor& M1); + + // M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + // M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) + template + friend void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + + // M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + // M[x0,x1,x3..xl x{l+1}..xn] X M[xn,x{n-1},..x{l+1}xl y{l+1}..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) + template + friend void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + + template + friend void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict); + + /*template + friend void cudaTensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +*/ + + template + friend void tensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void permuteTensor(Tensor& M, const Tensor& M0, permutation p); + template + friend void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p); + template + friend bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + //template + //friend void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + +}; + +template +void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); + + +template +Tensor& operator*(const Tensor& M0, const Tensor& M1); + + +void subArray(int* dst, int* src, int debDst, int finDst, int debSrc); + +void concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); + +void reverseArray(int* arr, int sz); + +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + +bool checkMatchProdTensor(const dimension& d0, const dimension& d1, int nestingDepth); + +void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth); + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +#endif + diff --git a/src/tensor/tensCuda/d_tensCuda.cu b/src/tensor/tensCuda/d_tensCuda.cu new file mode 100644 index 0000000..09ebcc2 --- /dev/null +++ b/src/tensor/tensCuda/d_tensCuda.cu @@ -0,0 +1,493 @@ +/*#include +#include + +#include "cuda.h" +#include "cuda_runtime.h" +*/ + +#include "d_tensCuda.h" +//#include "index.h" +#include + +//////////////////////////////////////////////////////// + +//1D grid of 1D blocks +__device__ +int d_getGlobalIdx_1D_1D() { + return blockIdx.x * blockDim.x + threadIdx.x; +} +//1D grid of 2D blocks +__device__ +int d_getGlobalIdx_1D_2D() { + return blockIdx.x * blockDim.x * blockDim.y + + threadIdx.y * blockDim.x + threadIdx.x; +} +//1D grid of 3D blocks +__device__ +int d_getGlobalIdx_1D_3D() { + return blockIdx.x * blockDim.x * blockDim.y * blockDim.z + + threadIdx.z * blockDim.y * blockDim.x + + threadIdx.y * blockDim.x + threadIdx.x; +} +//2D grid of 1D blocks +__device__ int d_getGlobalIdx_2D_1D() { + int blockId + = blockIdx.y * gridDim.x + blockIdx.x; + int threadId = blockId * blockDim.x + threadIdx.x; + return threadId; +} +//2D grid of 2D blocks +__device__ +int d_getGlobalIdx_2D_2D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int threadId = blockId * (blockDim.x * blockDim.y) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//2D grid of 3D blocks +__device__ +int d_getGlobalIdx_2D_3D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z) + + (threadIdx.z * (blockDim.x * blockDim.y)) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//3D grid of 1D blocks +__device__ +int d_getGlobalIdx_3D_1D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * blockDim.x + threadIdx.x; + return threadId; +} +//3D grid of 2D blocks +__device__ +int d_getGlobalIdx_3D_2D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * (blockDim.x * blockDim.y) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//3D grid of 3D blocks +__device__ +int d_getGlobalIdx_3D_3D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z) + + (threadIdx.z * (blockDim.x * blockDim.y)) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} + + +/////////////////////////////////////////////////////////////////////////// + + +__device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size) { + size_t sm = lin; + size_t pp = size; + for (int i = rank - 1;i > 0; --i) { + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + } + ret[0] = sm; +} + +__device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank) { + size_t pp = 1; + size_t sm = 0; + for (int i = 0; i < rank; ++i) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + +__device__ size_t d_CoordToLinear(int* coo, int* dim, int rank) { + size_t pp = 1; + size_t sm = 0; + for (int i = rank - 1; i >= 0; --i) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + + + +__device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size) { + size_t sm = lin; + size_t pp = size; + for (int i = 0; i < rank - 1; ++i) { + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + } + ret[rank - 1] = sm; +} +/*__device__ void d_LinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + for (int i = 0; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + if (i == rankA - 1) { + part0 = s; + s = 0; + p = size / sizeA; + } + + } + part1 = s; + +}*/ +__device__ void d_LinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + int i; + for (i = 0; i < rankA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + + } + part0 = s; + s = 0; + p = size / sizeA;//sizeB + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + + } + + part1 = s; + +} +__device__ void d_LinearToSplitSubrankLimSzEnd(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + for (int i = rank - 1; i >= 0; --i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + if (i == rankA) { + part1 = s; + s = 0; + p = size / sizeA; + } + + } + part0 = s; + +} + + +__device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc) { + for (int i = debDst; i < finDst; i++) { + dst[i] = src[i + debSrc]; + } +} + +template +__global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB) { + size_t lin0, lin1; + + size_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i < size) { + d_LinearToSplitSubrankLimSz(lin0, lin1, i, dimC, rankC, rankA, size, sizeA); + + C[i] = A[lin0] * B[lin1]; + + } +} + +template __global__ void d_prodTensor(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, float* B, int* dimB, int rankB); + +template +__global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB) { + size_t lin0, lin1; + + size_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i < size) { + d_LinearToSplitSubrankLimSzEnd(lin0, lin1, i, dimC, rankC, rankA, size, sizeA); + + C[i] = A[lin0] * B[lin1]; + + } +} + +template __global__ void d_prodTensorEnd(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, float* B, int* dimB, int rankB); + +__device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev) { + if (rank0 > rank1) { + rank = rank1; + for (int i = 0; i < rank1; ++i) dim[i] = dim1[i]; + rev = true; + } + else if (rank0 < rank1) { + rank = rank0; + for (int i = 0; i < rank1; ++i) dim[i] = dim0[i]; + rev = false; + } + else {// rank0 == rank1 + rank = rank0; + for (int i = 0; i < rank0; i++) { + if (dim[i] > dim1[rank1 - 1 - i]) dim[i] = dim1[rank1 - 1 - i]; + else dim[i] = dim0[i]; + } + rev = false; + } +} + +__device__ void d_reverseArray(int* arr, int sz) { + int* tmp; + //tmp = (int*)malloc(sz * sizeof(int)); + + tmp = new int[sz]; + if (tmp == NULL) { + size_t limit = 0; + cudaDeviceGetLimit(&limit, cudaLimitStackSize); + printf("cudaLimitStackSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + cudaDeviceGetLimit(&limit, cudaLimitPrintfFifoSize); + printf("cudaLimitPrintfFifoSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + cudaDeviceGetLimit(&limit, cudaLimitMallocHeapSize); + printf("cudaLimitMallocHeapSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + + printf("error Allocation in tmp = (int*)malloc(sz * sizeof(int)); | | "); + }int i = 0; + for (; i < sz / 2; i++) { + tmp[i] = arr[i]; + arr[i] = arr[sz - 1 - i]; + } + for (; i < sz; i++) { + arr[i] = tmp[sz - 1 - i]; + } + //free(tmp); + delete[]tmp; +} + +__device__ int d_min(int a, int b) { + if (a < b) return a; + return b; +} + +__device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1) { + int i = debDst; + for (int j = debSrc0; j < finSrc0; j++) { + dst[i++] = src0[j]; + } + for (int j = debSrc1; j < finSrc1; j++) { + dst[i++] = src1[j]; + } +} + + + +__device__ void d_ConcatLinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, int rankB, size_t size, size_t sizeA, size_t sizeB, int* dM, int dMrank, size_t dMsize, int ind) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + //size_t sz_dA = sizeA / dMsize; + int rankdA = rankA - dMrank; + + int ret; + int i; + for (i = 0; i < rankdA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + sm %= pp; + } + size_t s1 = 0; + + size_t pb = sizeB / dMsize; + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + pb /= dim[i]; + s1 += ret * pb; + sm %= pp; + } + + size_t smd = ind; + size_t ppb = dMsize; + //size_t pb = size / sz_dA; + pb = sizeB; + p = dMsize; + for (int j = 0;j < dMrank;j++) { + ppb /= dM[j]; + ret = smd / ppb; + p /= dM[j]; + s += ret * p; + pb /= dM[j]; + s1 += ret * pb; + smd %= ppb; + } + //pp = size / sz_dA; + part0 = s; + part1 = s1; +} + +__device__ void d_SplitLineardToSubrank(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, int rankB, size_t size, size_t sizeA, size_t sizeB, int* dM, int dMrank, size_t dMsize) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + //size_t sz_dA = sizeA / dMsize; + int rankdA = rankA - dMrank; + + int ret; + int i; + for (i = 0; i < rankdA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + sm %= pp; + } + size_t s1 = 0; + + size_t pb = sizeB / dMsize; + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + pb /= dim[i]; + s1 += ret * pb; + sm %= pp; + } + part0 = s; + part1 = s1; +} + + +__device__ void d_UnionConcatLinearSplitedSubrank(size_t& part0, size_t& part1, size_t p0, size_t p1, size_t size, size_t sizeB, int* dM, int dMrank, size_t dMsize, int ind) { + size_t s = p0; + size_t s1 = p1; + int ret; + size_t smd = ind; + size_t ppb = dMsize; + //size_t pb = size / sz_dA; + size_t pb = sizeB; + size_t p = dMsize; + for (int j = 0;j < dMrank;j++) { + ppb /= dM[j]; + ret = smd / ppb; + p /= dM[j]; + s += ret * p; + pb /= dM[j]; + s1 += ret * pb; + smd %= ppb; + } + //pp = size / sz_dA; + part0 = s; + part1 = s1; +} + +template +__global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t sizeC, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize) { + + size_t p0, p1; + size_t lin0, lin1; + + + //size_t i = threadIdx.x + blockIdx.x * blockDim.x; + size_t i = d_getGlobalIdx_1D_1D(); + + if (i < sizeC) { + + d_SplitLineardToSubrank(p0, p1, i, dimC, rankC, rankA, rankB, sizeC, sizeA, sizeB, dM, dMrank, dMsize); + + C[i] = 0; + for (size_t k = 0; k < dMsize; k++) { + + d_UnionConcatLinearSplitedSubrank(lin0, lin1, p0, p1, sizeC, sizeB, dM, dMrank, dMsize, k); + + //d_ConcatLinearToSplitSubrankLimSz(lin0, lin1, i, dimC, rankC, rankA, rankB, sizeC, sizeA, sizeB, dM, dMrank, dMsize, k); + + C[i] += A[lin0] * B[lin1]; + } + } + +} + +template +__global__ void d_TensorContractnReverseProd(float* C, int* dimC, int rankC, size_t size, float* A, int rankA, size_t sizeA, float* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); + +__device__ void d_LinearTransformCoord(size_t& dst, size_t src, int* inversePerm, size_t sizeA, int rankDst, int rankSrc, int* dDst, int* dSrc) { + size_t sm = src; + size_t pp = sizeA; + size_t s = 0; + size_t p = 1; + int ret;// = new int[rank]; + int i, j; + for (i = 0; i < rankSrc; ++i) { + pp /= dSrc[i]; + ret = sm / pp; + p = 1; + for (j = inversePerm[i] + 1; j < rankDst;j++) { + p *= dDst[j]; + } + s += ret * p; + + sm %= pp; + + } + dst = s; + if (s > sizeA) printf("I have a problem in LinearTransformCoord: s:%ld siez:%ld \n", s, sizeA); + +} + +template +__global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm) { + + //size_t i = threadIdx.x + blockIdx.x * blockDim.x; + size_t i = d_getGlobalIdx_1D_1D(); + + if (i < sizeC) { + //printf("(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, int* invPerm); + diff --git a/src/tensor/tensCuda/d_tensCuda.h b/src/tensor/tensCuda/d_tensCuda.h new file mode 100644 index 0000000..2a69dfd --- /dev/null +++ b/src/tensor/tensCuda/d_tensCuda.h @@ -0,0 +1,69 @@ +#ifndef __D_CUDA_TENSOR_H__ +#define __D_CUDA_TENSOR_H__ + +#include "cuda.h" +#include "cuda_runtime.h" + +//#include "cuda_device_runtime_api.h" + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/d_tensCuda.h" +#include "src/tensor/tensCuda/d_tensCuda.h" + + +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_1D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_1D_2D(); +//#1D grid of 3D blocks +__device__ int d_getGlobalIdx_1D_3D(); +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_2D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_2D_2D(); +//2D grid of 3D blocks +__device__ int d_getGlobalIdx_2D_3D(); +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_3D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_3D_2D(); +//#1D grid of 3D blocks +__device__ int d_getGlobalIdx_3D_3D(); + + + +extern cudaError_t cudaDeviceGetLimit(size_t* pValue, enum cudaLimit limit); + + +__device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size); + +__device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank); + +__device__ size_t d_CoordToLinear(int* coo, int* dim, int rank); + + +__device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size); + +__device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc); + +__device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev); + +__device__ void d_reverseArray(int* arr, int sz); + +__device__ int d_min(int a, int b); + +__device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); + + +template +__global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); + +template +__global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); + +template +__global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t size, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); + +template +__global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm); + +#endif \ No newline at end of file diff --git a/src/tensor/tensCuda/tensCuda.cu b/src/tensor/tensCuda/tensCuda.cu new file mode 100644 index 0000000..cc69d43 --- /dev/null +++ b/src/tensor/tensCuda/tensCuda.cu @@ -0,0 +1,574 @@ +#include +#include + +#include + +#include +#include + + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/tensCuda.h" +#include "src/tensor/tensCuda/tensCuda.h" + + + + +template +void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + + int* d_imM, * d_imM0, * d_imM1; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + int BLOCKSIZE = 256;//1024; + int DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + //int DIMBLOCKS = (M.Dim.size) / BLOCKSIZE; + + d_prodTensor << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, e1, d_imM1, M1.Dim.rank); + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } +} + + +//template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + + +template +void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + + int* d_imM, * d_imM0, * d_imM1; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + size_t BLOCKSIZE = 1024; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + + d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, e1, d_imM1, M1.Dim.rank); + + cudaDeviceSynchronize(); + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } +} + + +//template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template void cudaTensorProdEnd(Tensor& M, const Tensor& M1, const Tensor& M0); + + +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + + p.permute(M.Dim.dim, M0.Dim.dim); + + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cudaEventRecord(start); + + + int* d_imM, * d_imM0; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + T* e, * e0; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + size_t BLOCKSIZE = 256; //1024;//512; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + dim3 blckSZ, gridSZ; + blckSZ.x = BLOCKSIZE; + gridSZ.x = DIMBLOCKS; + + int* invP, * d_invP; + invP = (int*)malloc(M.Dim.rank * sizeof(int)); + inverseArray(invP, p.perm, M.Dim.rank); + errCu = cudaMalloc((void**)&d_invP, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_invP, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_invP, invP, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_invP, invP, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + //printf("size: %ld\n", M.Dim.size); + + //d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank); + //d_TensorContractnReverseProd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + //d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + d_PermLinearTransformCoord << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, d_invP); + //d_PermLinearTransformCoord << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, p.perm); + //cudaDeviceSynchronize(); + + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + cudaEventRecord(stop); + cudaEventSynchronize(stop); + float milliseconds = 0; + cudaEventElapsedTime(&milliseconds, start, stop); + printf("ellaps time cuda permute tensor: %f ms\n", milliseconds); + + } +} + +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + + +// strict match contract ! if no strict, we take the minimum +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M11, int nestingDepth, bool strict) { + + + int perm[M11.Dim.rank]; + struct Tensor M1; + if (scanPermuteMatchContractTensorfromSrcToDst(perm, M11, M0, nestingDepth)) { + for (int i = 0; i < M11.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + struct permutation p(M11.Dim.rank, perm); + permuteTensor(M1, M11, p); + M1.Dim.print(); + + } + else { + printf("Failed in Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + exit(1); + } + + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cudaEventRecord(start); + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM(dM0); + //bool rev; + //minReverse(dM, dM0, dM1, rev); + //if (rev) reverseArray(dM.dim, dM.rank); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + + + int* d_imM, * d_imM0, * d_imM1, * d_imdM; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imdM, dM.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imdM, dM.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imdM, dM.dim, dM.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imdM, dM.dim, dM.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + size_t BLOCKSIZE = 256; //1024;//512; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + dim3 blckSZ, gridSZ; + blckSZ.x = BLOCKSIZE; + gridSZ.x = DIMBLOCKS; + + + //d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank); + //d_TensorContractnReverseProd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + //d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, M0.Dim.rank, M0.Dim.size, e1, M1.Dim.rank, M1.Dim.size, d_imdM, dM.rank, dM.size); + + //cudaDeviceSynchronize(); + + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + cudaEventRecord(stop); + cudaEventSynchronize(stop); + float milliseconds = 0; + cudaEventElapsedTime(&milliseconds, start, stop); + printf("ellaps time cuda prod contract prod: %f ms\n", milliseconds); + + +} + +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict); +//template void cudaTensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + diff --git a/src/tensor/tensCuda/tensCuda.h b/src/tensor/tensCuda/tensCuda.h new file mode 100644 index 0000000..e71a7e6 --- /dev/null +++ b/src/tensor/tensCuda/tensCuda.h @@ -0,0 +1,31 @@ +#ifndef __TENS_CUDA_H__ +#define __TENS_CUDA_H__ + +#include +#include + +#include + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" +#include "src/tensor/tens0neD/tens0neD.h" + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/d_tensCuda.h" +#include "src/tensor/tensCuda/d_tensCuda.h" +//#include "src/dimension/dimension.h" + +template +struct Tensor; + +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict = true); + +template +void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); +template +void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1); +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + + +#endif + diff --git a/src/tools/tools.c b/src/tools/tools.c new file mode 100644 index 0000000..15a75ea --- /dev/null +++ b/src/tools/tools.c @@ -0,0 +1,144 @@ +#include "src/tools/tools.h" + +int +compare_int(void* a, void* b) +{ + return (*(int*)a - *(int*)b); +} +int +compare_unsigned_int(void* a, void* b) +{ + return ((unsigned int*)a-*(unsigned int*)b); +} +int +compare_float(void *a, void *b) +{ + if (*(float*)a == *(float*)b) return 0; + if (*(float*)a > *(float*)b) return 1; + return -1; +} +int +compare_double(void *a, void *b) +{ + if (*(double*)a == *(double*)b) return 0; + if (*(double*)a > *(double*)b) return 1; + return -1; +} +int +compare_string(void *a, void *b) +{ + return strcmp((char*)a, (char*)b); +} + +int +max_array_int(int * arr, size_t sz) +{ + if(sz == 0) return 0; + int mx = arr[0]; + for(size_t i = 1; i < sz; ++i) + if(mx < arr[i]) mx = arr[i]; + + return mx; +} + +unsigned int +max_array_unsigned_int(unsigned int *arr, size_t sz) +{ + if(sz == 0) return 0; + unsigned int mx = arr[0]; + for(size_t i = 1; i < sz; ++i) + if(mx < arr[i]) mx = arr[i]; + + return mx; +} +int +min_array_int(int * arr, size_t sz) +{ + if(sz == 0) return 0; + int mn = arr[0]; + for(size_t i = 1; i < sz; ++i) + if(mn > arr[i]) mn = arr[i]; + + return mn; +} +unsigned int +min_array_unsigned_int(unsigned int *arr, size_t sz) +{ + if(sz == 0) return 0; + unsigned int mn = arr[0]; + for(size_t i = 1; i < sz; ++i) + if(mn > arr[i]) mn = arr[i]; + + return mn; +} + +size_t +arg_max_array_int(int * arr, size_t sz) +{ + if(sz == 0) return 0; + size_t i_mx = 0; + for(size_t i = 1; i < sz; ++i) + if(arr[i_mx] < arr[i]) i_mx = i; + + return i_mx; +} + +size_t +arg_max_array_unsigned_int(unsigned int *arr, size_t sz) +{ + if(sz == 0) return 0; + size_t i_mx = 0; + for(size_t i = 1; i < sz; ++i) + if(arr[i_mx] < arr[i]) i_mx = i; + + return i_mx; +} + +size_t +arg_min_array_int(int * arr, size_t sz) +{ + if(sz == 0) return 0; + size_t i_mn = 0; + for(size_t i = 1; i < sz; ++i) + if(arr[i_mn] > arr[i]) i_mn = i; + + return i_mn; +} + +size_t +arg_min_array_unsigned_int(unsigned int *arr, size_t sz) +{ + if(sz == 0) return 0; + size_t i_mn = 0; + for(size_t i = 1; i < sz; ++i) + if(arr[i_mn] > arr[i]) i_mn = i; + + return i_mn; +} + +void +copy_array_unsigned_int(unsigned int *dst, const unsigned int *src, size_t size) +{ + for(size_t i=0; i< size; ++i) + dst[i]=src[i]; +} + +/* +bool is_less_eq_than_i(int a, int b) { return a <= b; } +bool is_less_than_i(int a, int b) { return a < b; } +bool is_great_eq_than_i(int a, int b) { return a >= b; } +bool is_great_than_i(int a, int b) { return a > b; } +*/ +int incr_i(int i) { return i + 1; } +int decr_i(int i) { return i - 1; } + +/* +bool is_less_eq_than_u(unsigned int a, unsigned int b) { return a <= b; } +bool is_less_than_u(unsigned int a, unsigned int b) { return a < b; } +bool is_great_eq_than_u(unsigned int a, unsigned int b) { return a >= b; } +bool is_great_than_u(unsigned int a, unsigned int b) { return a > b; } +*/ +unsigned int incr_u(unsigned int i) { return i + 1; } +unsigned int decr_u(unsigned int i) { return i - 1; } + + diff --git a/src/tools/tools.h b/src/tools/tools.h new file mode 100644 index 0000000..371d0d1 --- /dev/null +++ b/src/tools/tools.h @@ -0,0 +1,48 @@ +#ifndef __TOOLS_C_H__ +#define __TOOLS_C_H__ + +#include +#include + +#define FREE(x) { free((x)); (x) = NULL;} + +int compare_int(void* a, void* b); +int compare_unsigned_int(void* a, void* b); +int compare_float(void *a, void *b); +int compare_double(void *a, void *b); +int compare_string(void *a, void *b); + +int max_array_int(int *arr, size_t sz); +unsigned int max_array_unsigned_int(unsigned int *arr, size_t sz); +int min_array_int(int *arr, size_t sz); +unsigned int min_array_unsigned_int(unsigned int *arr, size_t sz); + +size_t arg_max_array_int(int *arr, size_t sz); +size_t arg_max_array_unsigned_int(unsigned int *arr, size_t sz); +size_t arg_min_array_int(int *arr, size_t sz); +size_t arg_min_array_unsigned_int(unsigned int *arr, size_t sz); + +void copy_array_unsigned_int(unsigned int *dst, const unsigned int *src, size_t size); + +/* +bool is_less_eq_than_i(int a, int b); // { return a <= b; } +bool is_less_than_i(int a, int b); // { return a < b; } +bool is_great_eq_than_i(int a, int b); // { return a >= b; } +bool is_great_than_i(int a, int b); // { return a > b; } +*/ +int incr_i(int i); // { return i + 1; } +int decr_i(int i); // { return i - 1; } + + +/* +bool is_less_eq_than_u(unsigned int a, unsigned int b); // { return a <= b; } +bool is_less_than_u(unsigned int a, unsigned int b); // { return a < b; } +bool is_great_eq_than_u(unsigned int a, unsigned int b); // { return a >= b; } +bool is_great_than_u(unsigned int a, unsigned int b); // { return a > b; } +*/ +unsigned int incr_u(unsigned int i); // { return i + 1; } +unsigned int decr_u(unsigned int i); // { return i - 1; } + + + +#endif /*__TOOLS_C_H__*/ diff --git a/src/tools_t/tools_t.c b/src/tools_t/tools_t.c new file mode 100644 index 0000000..f0732cc --- /dev/null +++ b/src/tools_t/tools_t.c @@ -0,0 +1,137 @@ +#include "src/tools_t/tools_t.h" + +#define COMPARE_N(type,a,b)\ + int COMPARE_N_##type(const void *a, const void *b){ \ + if (*(type*)a == *(type*)b) return 0; \ + if (*(type*)a > *(type*)b) return 1; \ + return -1; } + +COMPARE_N(TYPE_CHAR,a,b) +COMPARE_N(TYPE_U_CHAR,a,b) +COMPARE_N(TYPE_INT,a,b) +COMPARE_N(TYPE_U_INT,a,b) +COMPARE_N(TYPE_L_INT,a,b) +COMPARE_N(TYPE_U_L_INT,a,b) +COMPARE_N(TYPE_FLOAT,a,b) +COMPARE_N(TYPE_DOUBLE,a,b) +COMPARE_N(TYPE_L_DOUBLE,a,b) +int +COMPARE_N_TYPE_STRING(const void *a,const void* b) +{ + return strcmp(( char*)a,( char*)b); +} + +#define COPY_ARRAY(type, dst, src, size)\ + void COPY_ARRAY_##type(type *dst, const type *src, size_t size){\ + for(size_t i = 0; i < size; ++i) dst[i]=src[i]; } + +COPY_ARRAY(TYPE_CHAR,dst,src,size); +COPY_ARRAY(TYPE_U_CHAR,dst,src,size); +COPY_ARRAY(TYPE_INT,dst,src,size); +COPY_ARRAY(TYPE_U_INT,dst,src,size); +COPY_ARRAY(TYPE_L_INT,dst,src,size); +COPY_ARRAY(TYPE_U_L_INT,dst,src,size); +COPY_ARRAY(TYPE_FLOAT,dst,src,size); +COPY_ARRAY(TYPE_DOUBLE,dst,src,size); +COPY_ARRAY(TYPE_L_DOUBLE,dst,src,size); +void COPY_ARRAY_TYPE_STRING(char** dst, const char** src, size_t size) +{ + for(size_t i = 0; i < size; ++i) strcpy(dst[i],src[i]); +} + +#define MAX_ARRAY(type, array, size, compare)\ + type MAX_ARRAY_##type(const type *array, size_t size){\ + if(array == NULL) return 0;\ + type mx =(type)array[0];\ + for(size_t i = 0; i < size; ++i)\ + if(compare(&mx,&array[i]) < 0) mx =(type)array[i];\ + return mx;} +MAX_ARRAY(TYPE_CHAR,array,size,COMPARE_N_TYPE_CHAR); +MAX_ARRAY(TYPE_U_CHAR,array,size,COMPARE_N_TYPE_U_CHAR); +MAX_ARRAY(TYPE_INT,array,size,COMPARE_N_TYPE_INT); +MAX_ARRAY(TYPE_U_INT,array,size,COMPARE_N_TYPE_U_INT); +MAX_ARRAY(TYPE_L_INT,array,size,COMPARE_N_TYPE_L_INT); +MAX_ARRAY(TYPE_U_L_INT,array,size,COMPARE_N_TYPE_U_L_INT); +MAX_ARRAY(TYPE_FLOAT,array,size,COMPARE_N_TYPE_FLOAT); +MAX_ARRAY(TYPE_DOUBLE,array,size,COMPARE_N_TYPE_DOUBLE); +MAX_ARRAY(TYPE_L_DOUBLE,array,size,COMPARE_N_TYPE_L_DOUBLE); +MAX_ARRAY(TYPE_STRING,array,size,COMPARE_N_TYPE_STRING); + +#define ARG_MAX_ARRAY(type, array, size, compare)\ + size_t ARG_MAX_ARRAY_##type(const type *array, size_t size){\ + if(array == NULL) return 0;\ + size_t i_mx = 0;\ + for(size_t i = 0; i < size; ++i)\ + if(compare(&array[i_mx],&array[i]) < 0) i_mx = i;\ + return i_mx;} +ARG_MAX_ARRAY(TYPE_CHAR,array,size,COMPARE_N_TYPE_CHAR); +ARG_MAX_ARRAY(TYPE_U_CHAR,array,size,COMPARE_N_TYPE_U_CHAR); +ARG_MAX_ARRAY(TYPE_INT,array,size,COMPARE_N_TYPE_INT); +ARG_MAX_ARRAY(TYPE_U_INT,array,size,COMPARE_N_TYPE_U_INT); +ARG_MAX_ARRAY(TYPE_L_INT,array,size,COMPARE_N_TYPE_L_INT); +ARG_MAX_ARRAY(TYPE_U_L_INT,array,size,COMPARE_N_TYPE_U_L_INT); +ARG_MAX_ARRAY(TYPE_FLOAT,array,size,COMPARE_N_TYPE_FLOAT); +ARG_MAX_ARRAY(TYPE_DOUBLE,array,size,COMPARE_N_TYPE_DOUBLE); +ARG_MAX_ARRAY(TYPE_L_DOUBLE,array,size,COMPARE_N_TYPE_L_DOUBLE); +ARG_MAX_ARRAY(TYPE_STRING,array,size,COMPARE_N_TYPE_STRING); + +#define MIN_ARRAY(type, array, size, compare)\ + type MIN_ARRAY_##type(const type *array, size_t size){\ + if(array == NULL) return 0;\ + type mn =(type)array[0];\ + for(size_t i = 0; i < size; ++i)\ + if(compare(&mn,&array[i]) > 0) mn =(type)array[i];\ + return mn;} +MIN_ARRAY(TYPE_CHAR,array,size,COMPARE_N_TYPE_CHAR); +MIN_ARRAY(TYPE_U_CHAR,array,size,COMPARE_N_TYPE_U_CHAR); +MIN_ARRAY(TYPE_INT,array,size,COMPARE_N_TYPE_INT); +MIN_ARRAY(TYPE_U_INT,array,size,COMPARE_N_TYPE_U_INT); +MIN_ARRAY(TYPE_L_INT,array,size,COMPARE_N_TYPE_L_INT); +MIN_ARRAY(TYPE_U_L_INT,array,size,COMPARE_N_TYPE_U_L_INT); +MIN_ARRAY(TYPE_FLOAT,array,size,COMPARE_N_TYPE_FLOAT); +MIN_ARRAY(TYPE_DOUBLE,array,size,COMPARE_N_TYPE_DOUBLE); +MIN_ARRAY(TYPE_L_DOUBLE,array,size,COMPARE_N_TYPE_L_DOUBLE); +MIN_ARRAY(TYPE_STRING,array,size,COMPARE_N_TYPE_STRING); + +#define ARG_MIN_ARRAY(type, array, size, compare)\ + size_t ARG_MIN_ARRAY_##type(const type *array, size_t size){\ + if(array == NULL) return 0;\ + size_t i_mn = 0;\ + for(size_t i = 0; i < size; ++i)\ + if(compare(&array[i_mn],&array[i]) > 0) i_mn = i;\ + return i_mn;} +ARG_MIN_ARRAY(TYPE_CHAR,array,size,COMPARE_N_TYPE_CHAR); +ARG_MIN_ARRAY(TYPE_U_CHAR,array,size,COMPARE_N_TYPE_U_CHAR); +ARG_MIN_ARRAY(TYPE_INT,array,size,COMPARE_N_TYPE_INT); +ARG_MIN_ARRAY(TYPE_U_INT,array,size,COMPARE_N_TYPE_U_INT); +ARG_MIN_ARRAY(TYPE_L_INT,array,size,COMPARE_N_TYPE_L_INT); +ARG_MIN_ARRAY(TYPE_U_L_INT,array,size,COMPARE_N_TYPE_U_L_INT); +ARG_MIN_ARRAY(TYPE_FLOAT,array,size,COMPARE_N_TYPE_FLOAT); +ARG_MIN_ARRAY(TYPE_DOUBLE,array,size,COMPARE_N_TYPE_DOUBLE); +ARG_MIN_ARRAY(TYPE_L_DOUBLE,array,size,COMPARE_N_TYPE_L_DOUBLE); +ARG_MIN_ARRAY(TYPE_STRING,array,size,COMPARE_N_TYPE_STRING); + + +int main() +{ + unsigned int ui1 = 2545466; + unsigned int ui2 = 2544566; + printf(" %u >? %u = %d \n",ui1,ui2,COMPARE_N_TYPE_U_INT(&ui1,&ui2)); + printf(" %u >? %u = %d \n",ui1,ui1,COMPARE_N_TYPE_U_INT(&ui1,&ui1)); + printf(" %u >? %u = %d \n",ui2,ui1,COMPARE_N_TYPE_U_INT(&ui2,&ui1)); + int i1 = 2545466; + int i2 = 2544566; + printf(" %d >? %d = %d \n",i1,i2,COMPARE_N_TYPE_U_INT(&i1,&i2)); + printf(" %d >? %d = %d \n",i1,i1,COMPARE_N_TYPE_U_INT(&i1,&i1)); + printf(" %d >? %d = %d \n",i2,i1,COMPARE_N_TYPE_U_INT(&i2,&i1)); + + int tabi[]={5,2,6,4,3,1}; + int tabr[6]={0}; + COPY_ARRAY_TYPE_INT(tabr,tabi,6); + + for(size_t i=0; i<6; ++i) printf(" %d \n", tabr[i]); + + printf("MIN = %d \n",MIN_ARRAY_TYPE_INT(tabr,6)); + + return 0; +} diff --git a/src/tools_t/tools_t.h b/src/tools_t/tools_t.h new file mode 100644 index 0000000..ac0959b --- /dev/null +++ b/src/tools_t/tools_t.h @@ -0,0 +1,92 @@ +#ifndef __TOOLS_T_C_H__ +#define __TOOLS_T_C_H__ + +#include +#include +#include + +#define TYPE_CHAR char +#define TYPE_U_CHAR unsigned char +#define TYPE_INT int +#define TYPE_U_INT unsigned int +#define TYPE_L_INT long int +#define TYPE_U_L_INT unsigned long int +#define TYPE_FLOAT float +#define TYPE_DOUBLE double +#define TYPE_L_DOUBLE long double +#define TYPE_STRING char* + +#define FREE(x) { free((x)); (x) = NULL;} + +#define FOREACH(array, size, function)\ + for(size_t _ind = 0; _ind < size; ++_ind) function(array[_ind]); + + +int COMPARE_N_TYPE_CHAR(const void *,const void*); +int COMPARE_N_TYPE_U_CHAR(const void *,const void*); +int COMPARE_N_TYPE_INT(const void *,const void*); +int COMPARE_N_TYPE_U_INT(const void *,const void*); +int COMPARE_N_TYPE_L_INT(const void *,const void*); +int COMPARE_N_TYPE_U_L_INT(const void *,const void*); +int COMPARE_N_TYPE_FLOAT(const void *,const void*); +int COMPARE_N_TYPE_DOUBLE(const void *,const void*); +int COMPARE_N_TYPE_L_DOUBLE(const void *,const void*); +int COMPARE_N_TYPE_STRING(const void *,const void*); + +void COPY_ARRAY_TYPE_CHAR(TYPE_CHAR* dst, const TYPE_CHAR* src, size_t size); +void COPY_ARRAY_TYPE_U_CHAR(TYPE_U_CHAR* dst, const TYPE_U_CHAR* src, size_t size); +void COPY_ARRAY_TYPE_INT(TYPE_INT* dst, const TYPE_INT* src, size_t size); +void COPY_ARRAY_TYPE_U_INT(TYPE_U_INT* dst, const TYPE_U_INT* src, size_t size); +void COPY_ARRAY_TYPE_L_INT(TYPE_L_INT* dst, const TYPE_L_INT* src, size_t size); +void COPY_ARRAY_TYPE_U_L_INT(TYPE_U_L_INT* dst, const TYPE_U_L_INT* src, size_t size); +void COPY_ARRAY_TYPE_FLOAT(TYPE_FLOAT* dst, const TYPE_FLOAT* src, size_t size); +void COPY_ARRAY_TYPE_DOUBLE(TYPE_DOUBLE* dst, const TYPE_DOUBLE* src, size_t size); +void COPY_ARRAY_TYPE_L_DOUBLE(TYPE_L_DOUBLE* dst, const TYPE_L_DOUBLE* src, size_t size); +void COPY_ARRAY_TYPE_STRING(TYPE_STRING* dst, const TYPE_STRING* src, size_t size); + +TYPE_CHAR MAX_ARRAY_TYPE_CHAR(const TYPE_CHAR *array, size_t size); +TYPE_U_CHAR MAX_ARRAY_TYPE_U_CHAR(const TYPE_U_CHAR *array, size_t size); +TYPE_INT MAX_ARRAY_TYPE_INT(const TYPE_INT *array, size_t size); +TYPE_U_INT MAX_ARRAY_TYPE_U_INT(const TYPE_U_INT *array, size_t size); +TYPE_L_INT MAX_ARRAY_TYPE_L_INT(const TYPE_L_INT *array, size_t size); +TYPE_U_L_INT MAX_ARRAY_TYPE_U_L_INT(const TYPE_U_L_INT *array, size_t size); +TYPE_FLOAT MAX_ARRAY_TYPE_FLOAT(const TYPE_FLOAT *array, size_t size); +TYPE_DOUBLE MAX_ARRAY_TYPE_DOUBLE(const TYPE_DOUBLE *array, size_t size); +TYPE_L_DOUBLE MAX_ARRAY_TYPE_L_DOUBLE(const TYPE_L_DOUBLE *array, size_t size); +TYPE_STRING MAX_ARRAY_TYPE_STRING(const TYPE_STRING *array, size_t size); + +size_t ARG_MAX_ARRAY_TYPE_CHAR(const TYPE_CHAR *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_U_CHAR(const TYPE_U_CHAR *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_INT(const TYPE_INT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_U_INT(const TYPE_U_INT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_L_INT(const TYPE_L_INT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_U_L_INT(const TYPE_U_L_INT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_FLOAT(const TYPE_FLOAT *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_DOUBLE(const TYPE_DOUBLE *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_L_DOUBLE(const TYPE_L_DOUBLE *array, size_t size); +size_t ARG_MAX_ARRAY_TYPE_STRING(const TYPE_STRING *array, size_t size); + +TYPE_CHAR MIN_ARRAY_TYPE_CHAR(const TYPE_CHAR *array, size_t size); +TYPE_U_CHAR MIN_ARRAY_TYPE_U_CHAR(const TYPE_U_CHAR *array, size_t size); +TYPE_INT MIN_ARRAY_TYPE_INT(const TYPE_INT *array, size_t size); +TYPE_U_INT MIN_ARRAY_TYPE_U_INT(const TYPE_U_INT *array, size_t size); +TYPE_L_INT MIN_ARRAY_TYPE_L_INT(const TYPE_L_INT *array, size_t size); +TYPE_U_L_INT MIN_ARRAY_TYPE_U_L_INT(const TYPE_U_L_INT *array, size_t size); +TYPE_FLOAT MIN_ARRAY_TYPE_FLOAT(const TYPE_FLOAT *array, size_t size); +TYPE_DOUBLE MIN_ARRAY_TYPE_DOUBLE(const TYPE_DOUBLE *array, size_t size); +TYPE_L_DOUBLE MIN_ARRAY_TYPE_L_DOUBLE(const TYPE_L_DOUBLE *array, size_t size); +TYPE_STRING MIN_ARRAY_TYPE_STRING(const TYPE_STRING *array, size_t size); + +size_t ARG_MIN_ARRAY_TYPE_CHAR(const TYPE_CHAR *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_U_CHAR(const TYPE_U_CHAR *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_INT(const TYPE_INT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_U_INT(const TYPE_U_INT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_L_INT(const TYPE_L_INT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_U_L_INT(const TYPE_U_L_INT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_FLOAT(const TYPE_FLOAT *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_DOUBLE(const TYPE_DOUBLE *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_L_DOUBLE(const TYPE_L_DOUBLE *array, size_t size); +size_t ARG_MIN_ARRAY_TYPE_STRING(const TYPE_STRING *array, size_t size); + + +#endif /*__TOOLS_T_C_H__*/ diff --git a/test/isgood.cu b/test/isgood.cu new file mode 100644 index 0000000..6aeaced --- /dev/null +++ b/test/isgood.cu @@ -0,0 +1,652 @@ +#include +#include + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" +#include "src/tensor/tens0neD/tens0neD.h" +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/tensCuda.h" +#include "src/tensor/tensCuda/tensCuda.h" +/*TEST(LineraCoodTransform, check_print) { + int t3[] = { [0] = 2,[1] = 4,[2] = 3 }; + + struct dimension D0(3, t3); + int coor0[3] = { 1,3,2 }; + int* coor1 = new int[3]; + + int l0 = D0.CoordToLinear(coor0); + + D0.print(); + + D0.LinearToCoord(coor1, l0); + + for (int i = 0; i < D0.rank; i++) { + EXPECT_EQ(coor0[i], coor1[i]) << " coor0: " << coor0[i] << " coor1: " << coor1[i] << " i: " << i; + } +}*/ + +TEST(subArray, concatArray) { + int t[] = { 1,5,6,2,3 }; + int t0[] = { 1,5,6 }; + int t1[] = { 2,3 }; + int n = 5; + int s0[3]; + int s1[2]; + int s[n]; + + subArray(s0, t, 0, 3, 0); + subArray(s1, t, 0, 2, 3); + ASSERT_EQ(0, memcmp(t0, s0, sizeof(int) * 3)); + ASSERT_EQ(0, memcmp(t1, s1, sizeof(int) * 2)); + + concatArray(s, s0, s1, 0, 0, 3, 0, 2); + ASSERT_EQ(0, memcmp(t, s, sizeof(int) * 5)); + +} +TEST(tensorProdpetit, floatTemp) { + + /*int t3[] = { 2, 4, 3 }; + + int t4[] = { 2, 4, 3, 2 };*/ + int t3[] = { 3, 6, 5 }; + + int t4[] = { 3, 5, 8, 4 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + tensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + d = M.Dim; + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.0001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + + + +} + +TEST(tensorProd, doubleTemp) { + + int t3[] = { 2, 4, 3 }; + + int t4[] = { 4, 3, 2,3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + tensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + d = M.Dim; + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]); + ASSERT_DOUBLE_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]); + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + + +} + + + + +void printArray(int* t, int sz) { + for (int i = 0; i < sz;i++) printf(" %d ", t[i]); +} + +TEST(tensorContractnProd, floatTemp) { + + int t3[] = { 2, 4, 3 }; + + int t4[] = { 4, 3, 2, 3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + //for (size_t idx = 0; idx < d.size; idx++) Msum.elements[idx] = 0.0f; + + //Msum.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) r[i] = idx4[i]; + + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + + } + + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]); + + } + + + +} + +TEST(tensorContractnProdD, doubleTemp) { + + int t3[] = { 2, 3, 4 }; + + int t4[] = { 3, 4, 2, 3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + //for (size_t idx = 0; idx < d.size; idx++) Msum.elements[idx] = 0.0f; + + //Msum.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) r[i] = idx4[i]; + + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + + } + + ASSERT_DOUBLE_EQ(Msum.elements[lin], M.elements[lin]); + + } + + + +} + +TEST(reverseArray, innt) { + int n = 6; + int t4[6] = { 3, 4, 2, 3 ,5, 1 }; + int revt4[6] = { 1,5,3,2, 4, 3 }; + reverseArray(t4, n); + for (int i = 0; i < n; i++) { + ASSERT_EQ(t4[i], revt4[i]); + } +} + +TEST(tensorContractnReverseProd, floatTemp) { + + int t3[] = { 4, 4, 3 }; + + int t4[] = { 3, 4, 7, 2 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnReverseProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + int rev[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) { + r[i] = idx4[i]; + rev[i] = idx4[dee - 1 - i]; + } + + concatArray(coord3, pcoord3, rev, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + } + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]); + } +} + + +TEST(cudaTensorProd, floatTemp) { + + int t3[] = { 15, 6, 24 }; + + int t4[] = { 23, 15, 6, 10 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(1.0f); // M3.print(); + M4.initVal(0.5f); // M4.print(); + + cudaTensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " M " << M.elements[lin] << " lin: " << lin << " M3: " << M3.elements[lin3] << " lin3:" << lin3 << " lin4: " << lin4 << " M4 " << M4.elements[lin4] << std::endl; + //std::cout << " M " << M.elements[lin] << " lin: " << lin << " M3: " << M3.elements[lin3] << " lin3:" << lin3 << " lin4: " << lin4 << " M4 " << M4.elements[lin4] << std::endl; + + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.0001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + +} + + +TEST(permuteTensor, float) { + int t4[] = { 3, 8, 2, 4 }; + struct dimension d4(4, t4); + struct Tensor M4(d4), M; + M4.initVal(1.0f); + permutation p(4, true); + int n = 5; + //for (int n = 0; n < 24;n++) { + PlaceToTab(p.perm, n, p.size); + printf(" %*d : ", 2, n); + for (int i = 0; i < p.size; i++)printf("(%d)%d ", i, p.perm[i]);printf("\n"); + permuteTensor(M, M4, p); + //permuteTensorDef(M, M4, p); + int ind[4]; + int coor[4]; + size_t cM, cM4; + for (ind[0] = 0; ind[0] < M4.Dim.dim[0]; ind[0]++) + for (ind[1] = 0; ind[1] < M4.Dim.dim[1]; ind[1]++) + for (ind[2] = 0; ind[2] < M4.Dim.dim[2]; ind[2]++) + for (ind[3] = 0; ind[3] < M4.Dim.dim[3]; ind[3]++) { + p.permute(coor, ind); + cM = M.Dim.CoordToLinear(coor); + cM4 = M4.Dim.CoordToLinear(ind); + //printf("M[%ld]=%f M4[%ld]=%f \n", cM, M.elements[cM], cM4, M4.elements[cM4]); + ASSERT_FLOAT_EQ(M.elements[cM], M4.elements[cM4]); + } + +} + + +TEST(cudapermuteTensor, float) { + int t4[] = { 3, 8, 2, 4 }; + struct dimension d4(4, t4); + struct Tensor M4(d4), M; + M4.initVal(1.0f); + permutation p(4, true); + int n = 5; + //for (int n = 0; n < 24;n++) { + PlaceToTab(p.perm, n, p.size); + printf(" %*d : ", 2, n); + for (int i = 0; i < p.size; i++)printf("{%d}%d ", i, p.perm[i]);printf("\n"); + cudapermuteTensor(M, M4, p); + //permuteTensor(M, M4, p); + //permuteTensorDef(M, M4, p); + int ind[4]; + int coor[4]; + size_t cM, cM4; + for (ind[0] = 0; ind[0] < M4.Dim.dim[0]; ind[0]++) + for (ind[1] = 0; ind[1] < M4.Dim.dim[1]; ind[1]++) + for (ind[2] = 0; ind[2] < M4.Dim.dim[2]; ind[2]++) + for (ind[3] = 0; ind[3] < M4.Dim.dim[3]; ind[3]++) { + p.permute(coor, ind); + cM = M.Dim.CoordToLinear(coor); + cM4 = M4.Dim.CoordToLinear(ind); + //printf("M[%ld]=%f M4[%ld]=%f \n", cM, M.elements[cM], cM4, M4.elements[cM4]); + ASSERT_FLOAT_EQ(M.elements[cM], M4.elements[cM4]); + } +} + +TEST(scanPermuteMatchContractTensorfromSrcToDst1, permId) { + int t[] = { 3, 8, 2, 3, 4 }; + //int tm[] = { 4, 2, 7, 3 }; + int tm[] = { 2, 3,4,7 }; + struct dimension d(5, t); + struct dimension dm(4, tm); + struct Tensor M4(d), M(dm); + M4.initVal(1.0f); + M.initVal(1.0f); + int dee = 3; + //int result[4] = { 1,3,0,2 }; + int result[4] = { 0,1,2,3 }; + int perm[M.Dim.rank]; + ASSERT_TRUE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, dee)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(" first perm \n"); + ASSERT_EQ(0, memcmp(result, perm, sizeof(int) * M.Dim.rank)); + + Tensor tM; + permutation p(M.Dim.rank, perm); + permuteTensor(tM, M, p); + + ASSERT_FALSE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, 4)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + tM.Dim.print(); + int resultDim[] = { 2,3,4,7 }; + ASSERT_EQ(0, memcmp(resultDim, tM.Dim.dim, sizeof(int) * tM.Dim.rank)); + +} + +TEST(scanPermuteMatchContractTensorfromSrcToDst2, floatest) { + int t[] = { 3, 8, 2, 3, 4 }; + int tm[] = { 4, 2, 7, 3 }; + //int tm[] = { 2, 3,4,7 }; + struct dimension d(5, t); + struct dimension dm(4, tm); + struct Tensor M4(d), M(dm); + M4.initVal(1.0f); + M.initVal(1.0f); + int dee = 3; + int result[4] = { 1,3,0,2 }; + //int result[4] = { 0,1,2,3 }; + int perm[M.Dim.rank]; + ASSERT_TRUE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, dee)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(" first perm \n"); + ASSERT_EQ(0, memcmp(result, perm, sizeof(int) * M.Dim.rank)); + + Tensor tM; + permutation p(M.Dim.rank, perm); + permuteTensor(tM, M, p); + + ASSERT_FALSE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, 4)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + tM.Dim.print(); + int resultDim[] = { 2,3,4,7 }; + ASSERT_EQ(0, memcmp(resultDim, tM.Dim.dim, sizeof(int) * tM.Dim.rank)); + + + +} + + +TEST(cudaTensorContractNestProd, floatTemp) { + + int t3[] = { 77, 8, 25 }; + + int t4[] = { 8, 25, 52, 144 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(1.0f); // M3.print(); + M4.initVal(0.0f); // M4.print(); + + int dee = 2; + + M4.Dim.print(); + + try { + //tensorContractnProd(M, M3, M4, dee); + cudaTensorContractNestProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep: \n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + //int rev[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) { + r[i] = idx4[i]; + //rev[i] = idx4[dee - 1 - i]; + } + + //concatArray(coord3, pcoord3, rev, 0, 0, l0, 0, dee); + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + } + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]) << " lin: " << lin << " Msumelem: " << Msum.elements[lin] << " Melem: " << M.elements[lin]; + } +} + + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/testool b/testool new file mode 100644 index 0000000..1170e5d Binary files /dev/null and b/testool differ diff --git a/try/CMakeLists.txt b/try/CMakeLists.txt new file mode 100644 index 0000000..ee3b3a0 --- /dev/null +++ b/try/CMakeLists.txt @@ -0,0 +1,37 @@ +#cmake_minimum_required(VERSION 3.2) +cmake_minimum_required(VERSION 3.18) + +project(ptens0neD) + +include(FetchContent) +FetchContent_Declare(googletest + GIT_REPOSITORY https://github.com/google/googletest + GIT_TAG release-1.12.1 +) +FetchContent_GetProperties(googletest) + +if(NOT googletest_POPULATED) + FetchContent_Populate(googletest) + add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BUILD_DIR}) +endif() + +#add_library(add STATIC add.cu) +add_library(permutation STATIC src/permutation/permutation.cpp) +add_library(dimension STATIC src/dimension/dimension.cpp) +add_library(tens0ne STATIC src/tens0ne/tens0ne.cpp) +add_library(cutens0ne STATIC src/cutens0ne/cutens0ne.cu) +add_library(d_cutens0ne STATIC src/cutens0ne/d_cutens0ne.cu) + +#find_package(CUDA REQUIRED) +enable_language(CUDA) +#cuda_add_library(add STATIC add.cu) +#add_library(permutation STATIC ../permutation/permutation.cu) + +#cuda_add_executable( ) +add_executable(isgood isgood.cu device.cuh ../tensor/tensor.cpp ../permutation/permutation.cpp) +#cuda_add_executable(isgood isgood.cpp add.cu ) +#target_link_libraries(isgood add gtest_main gmock_main pthread) +target_link_libraries(isgood gtest_main gmock_main pthread) + +enable_testing() +add_test(Tester isgood) \ No newline at end of file diff --git a/try/saved0work/CMakeLists.txt b/try/saved0work/CMakeLists.txt new file mode 100644 index 0000000..e602a79 --- /dev/null +++ b/try/saved0work/CMakeLists.txt @@ -0,0 +1,71 @@ +#cmake_minimum_required(VERSION 3.20) +#cmake_minimum_required(VERSION 3.18) +cmake_minimum_required(VERSION 3.0.0) + +set(F0_PROJECT_NAME projectF0) + +set(F0_LIBRARIES + permutation + dimension + tens0neD +) +set(F0_CUDA_LIBRARIES + tensCuda + d_tensCuda +) + +set(F0_SOURCE_DIR + src +) + + + +project(${F0_PROJECT_NAME}) + +include(FetchContent) +FetchContent_Declare(googletest + GIT_REPOSITORY https://github.com/google/googletest + GIT_TAG release-1.12.1 +) +FetchContent_GetProperties(googletest) + +if(NOT googletest_POPULATED) + FetchContent_Populate(googletest) + add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BUILD_DIR}) +endif() + + +#add_library(add STATIC add.cu) + +foreach(libvarcu ${F0_CUDA_LIBRARIES}) + list(APPEND F0_SOURCES_CU ${F0_SOURCE_DIR}/tensor/tensCuda/${libvarcu}.cu) +endforeach() + +foreach(libvar ${F0_LIBRARIES}) + if( ${libvar} STREQUAL "tens0neD" ) + list(APPEND F0_SOURCES_CPP ${F0_SOURCE_DIR}/tensor/${libvar}/${libvar}.cpp) + else() + list(APPEND F0_SOURCES_CPP ${F0_SOURCE_DIR}/${libvar}/${libvar}.cpp) + endif() +endforeach() + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +#find_package(CUDA REQUIRED) # no need +enable_language(CUDA) + +#if version < 3.18 +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 75) +endif() + +add_library(${F0_PROJECT_NAME} STATIC ${F0_SOURCES_CPP} ${F0_SOURCES_CU}) # no need if add sources in executable + + + +add_executable(isgood test/isgood.cu) # if library set, no need of ${F0_SOURCES_CPP} ${F0_SOURCES_CU}) but if not you nedd to add theses files src + +target_link_libraries(isgood gtest_main gmock_main pthread ${F0_PROJECT_NAME}) # no need if no local library set + +enable_testing() +add_test(Tester isgood) \ No newline at end of file diff --git a/try/try_cmakelists0/CMakeLists.txt b/try/try_cmakelists0/CMakeLists.txt new file mode 100644 index 0000000..79fc3f5 --- /dev/null +++ b/try/try_cmakelists0/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.0.0) + +set(FOO_PROJECT_NAME + Foo +) +set(FOO_SOURCE_DIR + src +) +set(FOO_SOURCE + ${FOO_SOURCE_DIR}/main.cpp +) +set(FOO_LIBRARIES_DIR + libs +) +set(FOO_LIBRARIES + A + B +) + +project(${FOO_PROJECT_NAME}) + +######### +# GTest # +######### +enable_testing() +add_subdirectory(libs/gtest-1.7.0) +include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}) + +########### +# Project # +########### +add_executable(${FOO_PROJECT_NAME} ${FOO_SOURCE}) + +foreach(LIBRARY ${FOO_LIBRARIES}) + add_subdirectory("${FOO_LIBRARIES_DIR}/${LIBRARY}") +endforeach(LIBRARY) +target_link_libraries(${FOO_PROJECT_NAME} ${FOO_LIBRARIES}) diff --git a/try/try_cmakelists1/CMakeLists.txt b/try/try_cmakelists1/CMakeLists.txt new file mode 100644 index 0000000..80bb9b2 --- /dev/null +++ b/try/try_cmakelists1/CMakeLists.txt @@ -0,0 +1,80 @@ +cmake_minimum_required(VERSION 3.18) + +project(ptens0neD VERSION 2023.0 + LANGUAGES CXX + HOMEPAGE_URL "https://github.com/fanasina/ptens0neD") + +##################################################################### +# DEPENDENCIES +##################################################################### + +find_package(Threads REQUIRED) +find_package(Boost REQUIRED) + +include(FetchContent) + +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.10.0) + +FetchContent_GetProperties(googletest) +if(NOT googletest_POPULATED) + FetchContent_Populate(googletest) + add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} + EXCLUDE_FROM_ALL) +endif() + +##################################################################### +# LIBRARY +##################################################################### + +add_library(${PROJECT_NAME} + src/ptens0neD/implementation.cpp + ) + +add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) + +set_target_properties(${PROJECT_NAME} PROPERTIES + VERSION ${PROJECT_VERSION}) + +target_include_directories(${PROJECT_NAME} + PUBLIC include + PRIVATE src) + +target_link_libraries(${PROJECT_NAME} + PUBLIC Threads::Threads + PRIVATE Boost::boost) + +target_compile_options(${PROJECT_NAME} + PRIVATE -Wall -Wextra -pedantic -Werror) + +target_compile_features(${PROJECT_NAME} + PRIVATE cxx_std_17) + +include(GNUInstallDirs) + +install(TARGETS ${PROJECT_NAME} + EXPORT ptens0neDTargets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + +install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +##################################################################### +# UNIT TESTS +##################################################################### + +add_executable(${PROJECT_NAME}-tests + test/test-myclass.cpp) + +target_link_libraries(${PROJECT_NAME}-tests + PRIVATE gtest_main Threads::Threads ${PROJECT_NAME}::${PROJECT_NAME}) + +target_compile_options(${PROJECT_NAME}-tests + PRIVATE -Wall -Wextra -pedantic -Werror) + +target_compile_features(${PROJECT_NAME} + PRIVATE cxx_std_17) + diff --git a/try/try_cmakelists2/CMakeLists.txt b/try/try_cmakelists2/CMakeLists.txt new file mode 100644 index 0000000..9407483 --- /dev/null +++ b/try/try_cmakelists2/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.20) +project(geometries LANGUAGES CXX) + +add_executable(app) +target_sources(app PRIVATE "app.cpp") +target_include_directories(app PRIVATE "${PROJECT_SOURCE_DIR}") +add_subdirectory("shape") +add_subdirectory("square") +install(TARGETS app) +