From c91910a2781b54dffce5dab8c74bde97b386f42d Mon Sep 17 00:00:00 2001 From: fanasina Date: Tue, 17 Oct 2023 16:45:57 +0200 Subject: [PATCH] add simple test, and mv permutation test, update README --- Makefile | 5 +- README.md | 16 +- permutation_test/Makefile | 52 ++ permutation_test/compile.sh | 22 + permutation_test/is_good.c | 374 ++++++++++ permutation_test/isgood.cu | 652 ++++++++++++++++++ permutation_test/src/Makefile | 52 ++ permutation_test/src/coordinate/coordinate.h | 21 + permutation_test/src/dimension/dimension.cpp | 181 +++++ permutation_test/src/dimension/dimension.h | 31 + permutation_test/src/dimension/dimension.hpp | 90 +++ permutation_test/src/is_good.c | 157 +++++ .../src/permutation_t/permutation_t.c | 125 ++++ .../src/permutation_t/permutation_t.h | 45 ++ .../src/set_theoric_t/set_theoric_t.c | 25 + .../src/set_theoric_t/set_theoric_t.h | 24 + .../src/tensor/tens0neD/tens0neD.cpp | 500 ++++++++++++++ .../src/tensor/tens0neD/tens0neD.h | 114 +++ .../src/tensor/tensCuda/d_tensCuda.cu | 493 +++++++++++++ .../src/tensor/tensCuda/d_tensCuda.h | 69 ++ .../src/tensor/tensCuda/tensCuda.cu | 574 +++++++++++++++ .../src/tensor/tensCuda/tensCuda.h | 31 + test/Makefile | 27 +- test/compile.sh | 4 +- test/is_good.c | 59 +- 25 files changed, 3660 insertions(+), 83 deletions(-) create mode 100644 permutation_test/Makefile create mode 100644 permutation_test/compile.sh create mode 100644 permutation_test/is_good.c create mode 100644 permutation_test/isgood.cu create mode 100644 permutation_test/src/Makefile create mode 100644 permutation_test/src/coordinate/coordinate.h create mode 100644 permutation_test/src/dimension/dimension.cpp create mode 100644 permutation_test/src/dimension/dimension.h create mode 100644 permutation_test/src/dimension/dimension.hpp create mode 100644 permutation_test/src/is_good.c create mode 100644 permutation_test/src/permutation_t/permutation_t.c create mode 100644 permutation_test/src/permutation_t/permutation_t.h create mode 100644 permutation_test/src/set_theoric_t/set_theoric_t.c create mode 100644 permutation_test/src/set_theoric_t/set_theoric_t.h create mode 100644 permutation_test/src/tensor/tens0neD/tens0neD.cpp create mode 100644 permutation_test/src/tensor/tens0neD/tens0neD.h create mode 100644 permutation_test/src/tensor/tensCuda/d_tensCuda.cu create mode 100644 permutation_test/src/tensor/tensCuda/d_tensCuda.h create mode 100644 permutation_test/src/tensor/tensCuda/tensCuda.cu create mode 100644 permutation_test/src/tensor/tensCuda/tensCuda.h diff --git a/Makefile b/Makefile index c117232..a956096 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ # lib: -lytest PROJECT_LIB=libytest.so CC=gcc -#LDFLAGS= INCLUDE_DIRS=$(PWD) #$(wildcard $(PWD)/**/include) INCLUDE=-I$(PWD)/yftest/include -I$(PWD)/yfmock/include -I$(PWD)/ytools_t/include -I$(PWD)/ybar_progress/include CFLAGS=-lpthread -Wall -Werror -fpic $(INCLUDE) +#LDFLAGS= TOPTARGETS := all clean #update_headers @@ -26,7 +26,8 @@ all: $(PROJECT_LIB) update_headers $(PROJECT_LIB): $(OBJ) echo $(OBJ) #$(CC) -shared -o $@ $^ $(INCLUDE) $(LDFLAGS) - $(CC) -shared -o $@ $^ $(LDFLAGS) + #$(CC) -shared -o $@ $^ $(LDFLAGS) + $(CC) -shared -o $@ $^ $(CFLAGS) $(SUBDIRS): diff --git a/README.md b/README.md index a85b5a8..aa3a17d 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,6 @@ We can copy `libytest.so` in a directory in `/usr/lib/` or copy `libytest.so` in add ```export LD_LIBRARY_PATH=/path_to/lib_ytest:$LD_LIBRARY_PATH``` in `~/.bashrc`. - - ## test examples `test/is_good.c` @@ -53,6 +51,20 @@ chmod +x compile.sh ./compile.sh "is_good.c" ./launch_is_good_c ``` +## error +if we have +``` +./launch_is_good_m +./launch_is_good_m: error while loading shared libraries: libytest.so: cannot open shared object file: No such file or directory +```` +we need to add LD_LIBRARY_PATH env permanantly or +``` +LD_LIBRARY_PATH=/path_to/lib_ytest ./launch_is_good_m +``` +it is th same if `./launch_is_good_c` do not find `libytest.so` + + + ## some compile options ### if need debug print `./compile "is_good.c" "-D DEBUG=1"` diff --git a/permutation_test/Makefile b/permutation_test/Makefile new file mode 100644 index 0000000..ae98de8 --- /dev/null +++ b/permutation_test/Makefile @@ -0,0 +1,52 @@ + + + + +NAME_TEST=is_good +CC=gcc +ROOT_DIR=$(PWD) +INCLUDE_DIR=$(ROOT_DIR)/src +CFLAGS=-I$(INCLUDE_DIR) -I../include_ytest/include +LDFLAGS=-L$(PWD)/.. -lytest + +#SRC_DIR=$(ROOT_DIR)/src +#SRC=$(wildcard */*/*.c) +SRC=$(wildcard **/**/*.c) +OBJ=$(SRC:.c=.o) +#HEADS=$(OBJS:.o=.h) +TEST_DIR=$(PWD) +EXECSRC=$(NAME_TEST).c +EXEC=launch_$(NAME_TEST)_m +PERMSRC=src/permutation_t/permutation_t.c +PERMSRC_O=$(PERMSRC:.c=.o) +SETTSRC=src/set_theoric_t/set_theoric_t.c +SETTSRC_O=$(SETTSRC:.c=.o) +TOOLSRC=../ytools_t/src/tools_t/tools_t.c +TOOLSRC_O=$(TOOLSRC:.c=.o) + +LIB_YTEST=$(PWD)/../libytest.so + +all: $(EXEC) $(LIB_YTEST) + +$(EXEC): $(EXECSRC) $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) + +$(PERMSRC_O): $(PERMSRC) $(SETTSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(SETTSRC_O) : $(SETTSRC) $(TOOLSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(TOOLSRC_O): $(TOOLSRC) + $(CC) -o $@ -c $< $(CFLAGS) + +.PHONY: clean mrproper + +clean: + rm -f $(OBJ) + +mrproper: clean + rm -f $(EXEC) + +run: $(EXEC) + $(EXEC) -h diff --git a/permutation_test/compile.sh b/permutation_test/compile.sh new file mode 100644 index 0000000..80e9ab9 --- /dev/null +++ b/permutation_test/compile.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +if [ "$#" -le 0 ] ; then + echo "Usage: $0 is_good.c" >&2 + echo "for example to compile: is_good.c" >&2 + exit 1 +fi +if [ "$#" -le 1 ] ; then + echo "Usage: $0 $1" >&2 + echo " we can add more option for example '-D DEBUG=1' to have debug print, '-D HK' to have gtest like prompt, od '-g' to gbd" >&2 + echo "for example: $0 $1 \"-D DEBUG=1 -D HK -g\"" +fi + + + +gcc -o launch_is_good_c $1 -L$PWD/../ $2 -lytest -I../include_ytest/include src/permutation_t/permutation_t.o src/set_theoric_t/set_theoric_t.o -I./src +#gcc -o launch_is_good_c $1 $2 -lytest -I../include_ytest src/permutation_t/permutation_t.o src/set_theoric_t/set_theoric_t.o -I./src + +export LD_LIBRARY_PATH=$PWD/../:LD_LIBRARY_PATH + + +#gcc $1 src/ftest/ftest.c src/fmock/fmock.c src/tools_t/tools_t.c src/bar_progress/bar_progress.c src/permutation_t/permutation_t.c src/set_theoric_t/set_theoric_t.c -I./include $2 -o launch_is_good_c -lpthread diff --git a/permutation_test/is_good.c b/permutation_test/is_good.c new file mode 100644 index 0000000..1c37377 --- /dev/null +++ b/permutation_test/is_good.c @@ -0,0 +1,374 @@ +#include +#include +#include + +// for sleep ! +#ifdef __linux__ + #include +#elif _WIN32 + #include +#endif + +#include "ftest/ftest.h" +#include "fmock/fmock.h" + +#if 1 + +#include "permutation_t/permutation_t.h" + + +TEST(size_permutation2){ + PRINTF("another size_permutation2 again\n"); + ASSERT_TRUE(false); +} + +TEST(size_permutation) +{ + PERMUTATION_TYPE_CHAR *p = CREATE_PERMUTATION_TYPE_CHAR(3); + + PRINTF(" size = %lu \n",p->size); + EXPECT_EQ(p->size, 3); + PRINTF("test size_permutation2\n"); +} + +#endif + +TEST(size_permutation2){ + PRINTF("another size_permutation2 again false\n"); + bool val_bool = false; + ASSERT_TRUE(val_bool); +} +TEST(size_permutation2) +{ + PRINTF("test size_permutation2\n"); + bool val_bool = true; + ASSERT_FALSE(val_bool); +/* + PERMUTATION_TYPE_CHAR *p = CREATE_PERMUTATION_TYPE_CHAR(3); + + PRINTF(" size = %u \n",p->size); + if(p->size == 3) print_OK_with_msg_endl(" FF yeah GOOD test size passed "); + else print_KO_with_msg_endl("NOT GOOD test size not passed "); +*/ +} +TEST(float_equal){ + PRINTF("another size_permutation2 float\n"); + ASSERT_TRUE(true); + float a = 1.00001f; + float b = 1.00001f; + ASSERT_EQ_TYPE_FLOAT(a,b); + b=1.0000101f; + ASSERT_EQ_TYPE_FLOAT(a,b); + ASSERT_EQ_TYPE_FLOAT(1.0000102f,b); +} +TEST(double_equal){ + PRINTF("another size_permutation2 double\n"); + ASSERT_TRUE(true); + double a = 1.00000001; + double b = 1.00000001; + ASSERT_EQ_TYPE_DOUBLE(a,b); + b=1.00000001000000001; + ASSERT_EQ_TYPE_DOUBLE(a,b); + ASSERT_EQ_TYPE_DOUBLE(1.0000000100000002,b); +} + +TEST(){ + unsigned char c = 'a'; + + debug_print("another size_permutation2, a = %c\n",c); + ASSERT_FALSE(true); + ASSERT_TRUE(true); + ASSERT_TRUE(true); +} + + +TEST(){ + sleep(3); + int a = 5; + long b = 5; + ASSERT_EQ(a,b); + a=4; + ASSERT_EQ(a,b); + +} + +TEST(expect){ + sleep(2); + int a = 5; + int b = 6; + EXPECT_EQ(a,b); + // SKIP(); + SKIP("%s\n","on skip eq string"); + EXPECT_EQ_TYPE_STRING("hello","hello"); + float f1 = 1.00019999, f2=1.00019999; + EXPECT_EQ_TYPE_FLOAT(f1,f2); + +} + +TEST(){ + PRINTF("no test, only print\n"); +} + +TEST(){ + PRINTF("no test, only print\n"); +} + +TEST(){ + PRINTF("no test, only print\n"); +} + + +TEST(){ + + PERMUTATION_TYPE_CHAR *p_char = CREATE_PERMUTATION_TYPE_CHAR(6); + p_char->perm[0]='B'; + p_char->perm[1]='A'; + p_char->perm[2]='Y'; + p_char->perm[3]='C'; + p_char->perm[4]='D'; + p_char->perm[5]='Z'; + + PERMUTATION_TYPE_SIZE_T *tr_p_char = TRANSLATE_TO_SET_THEORIC_SIZE_T_TYPE_CHAR(p_char); + + for(int i = 0; i < tr_p_char->size; ++i) PRINTF(" [%d ]%ld ,",i,tr_p_char->perm[i]); + PRINTF("p_char == %s\n",p_char->perm); +} + +TEST(lessThan){ + long int a=1,b=2; + EXPECT_LT(a,b); + EXPECT_LT(b,a); + +} + +TEST(sleep){sleep(2);} +TEST(sleep){sleep(2);} +TEST(sleep){sleep(2);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} + + +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} + +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} +TEST(sleep){sleep(1);} + + +MOCK_FUNC(int, f_mock, (), ()) + +EXPECT_MOCK_CALL(int,f_mock, (),false, 2) { + EXPECT_EQ_IN_MOCKF(21,21,f_mock); + EXPECT_EQ(1,3); + EXPECT_EQ(4,4); + EXPECT_EQ_IN_MOCKF(23,24,f_mock); return 12;} +EXPECT_MOCK_CALL(int,f_mock, (),1, 1) { EXPECT_EQ_IN_MOCKF(23,21,f_mock);return 10;} + +EXPECT_MOCK_CALL(int,f_mock, (),1==2||2<1, 1) {return 18;} +EXPECT_MOCK_CALL(int,f_mock, (),1, INFINITY) {return -18;} + +TEST(mockf1){ + INIT_CALLER_MOCK(f_mock); + + for(int i = 0; i<8; ++i){ + + LOG("call f_mock:%d: ret:%d\n",i,f_mock()); + // int val=f_mock(); + //PRINTF("call f_mock:%d: ret:%d\n",i,val); + } + +} + +MOCK_FUNC(int, f2_mock,(int a,int b),(a,b)) + +STR_PRINT_CUR_VAR(f2_mock, (int a,int b),(a,b)){ + char *ret=malloc(150); + //char ret[150]; + sprintf(ret,"(int)a: %d, (int)b: %d",a,b); + return ret; +} + + +EXPECT_MOCK_CALL(int, f2_mock, (int a,int b), (a call f2_mock:%d: %d\n",0,f2_mock(1,4)); + + + for(int i=0; i<8; ++i){ + + if(i<2) { + //int val = f2_mock(i,4); + //LOG("call f2_mock:%d: %d\n",i,val); + LOG("call f2_mock:%d: %d\n",i,f2_mock(i,4)); + + } + else if(i<4) LOG("call:%d: %d\n",i,f2_mock(i,3)); + else LOG("call:%d:%d\n",i,f2_mock(i,i*i)); + } + +} + + +TEST(f3_mock_test){ + INIT_CALLER_MOCK(f3_mock); + + + for(int i=0; i<7; ++i){ + + if(i<1) { + LOG("call f3_mock:%d: %d\n",i,f3_mock(1,i)); + + } + else LOG("call:%d:%d\n",i,f3_mock(i,i*i)); + } + for(int i=COLOR_SZ-1; i>=0; --i) + LOG("%s colors_fld\n",colors_f[i]); + +} + +MOCK_FUNC(int, f4_mock,(int a,int b),(a,b)) +STR_PRINT_CUR_VAR(f4_mock, (int a,int b),(a,b)){ + char *ret=malloc(150); + //char ret[150]; + sprintf(ret,"(int)a: %d, (int)b: %d",a,b); + return ret; +} + +TEST(f4_mock_test){ + //EXPECT_EQ(1,f4_mock(1,1)); + PRINTF("f4 no excepted create ret: %d\n",f4_mock(1,1)); + PRINTF("second call f4 : %d\n",f4_mock(2,0)); + +} + +MOCK_FUNC(int, f5_mock,(int a,int b, int c),(a,b,c)) + +TEST(f5__mock){ + LOG("f5 ???:%d\n",f5_mock(1,2,3)); + LOG("f5 !!!:%d\n",f5_mock(2,5,3)); +} + +MOCK_FUNC(int, f6_mock,(int a,int b, int c),(a,b,c)) +STR_PRINT_CUR_VAR(f6_mock,(int a,int b, int c),(a,b,c)){ + char *ret=malloc(150); + sprintf(ret,"(%d,%d,%d)",a,b,c); + return ret; +} + +EXPECT_MOCK_CALL(int, f6_mock,(int a, int b, int c),((ab),2){ + return a*b; +} + + +TEST(f7_mock_test){ + int v0=f7_mock(1,1); + PRINTF("f7 ret: %d\n",v0); + int v1=f7_mock(2,0); + PRINTF("second call f7 : %d\n",v1); + +} + + +int main(int argc, char **argv){ + + //run_all_tests(); + //run_all_tests_parallel(4); + + run_all_tests_args(argc, argv); + + //purge_tests(); + //run_some_tests(8, 1, 2, 2, 3, 3, 0, 4, 1); + //run_some_tests(8, 5, 7, 1, 1, 1, 1, 1, 1); + //run_some_tests_one_by_one(3, 1, 2, 2); + //run_all_tests_exept(2, 1, 3); + return 0; +} diff --git a/permutation_test/isgood.cu b/permutation_test/isgood.cu new file mode 100644 index 0000000..6aeaced --- /dev/null +++ b/permutation_test/isgood.cu @@ -0,0 +1,652 @@ +#include +#include + +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tens0neD/tens0neD.h" +#include "src/tensor/tens0neD/tens0neD.h" +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/src/tensor/tensCuda/tensCuda.h" +#include "src/tensor/tensCuda/tensCuda.h" +/*TEST(LineraCoodTransform, check_print) { + int t3[] = { [0] = 2,[1] = 4,[2] = 3 }; + + struct dimension D0(3, t3); + int coor0[3] = { 1,3,2 }; + int* coor1 = new int[3]; + + int l0 = D0.CoordToLinear(coor0); + + D0.print(); + + D0.LinearToCoord(coor1, l0); + + for (int i = 0; i < D0.rank; i++) { + EXPECT_EQ(coor0[i], coor1[i]) << " coor0: " << coor0[i] << " coor1: " << coor1[i] << " i: " << i; + } +}*/ + +TEST(subArray, concatArray) { + int t[] = { 1,5,6,2,3 }; + int t0[] = { 1,5,6 }; + int t1[] = { 2,3 }; + int n = 5; + int s0[3]; + int s1[2]; + int s[n]; + + subArray(s0, t, 0, 3, 0); + subArray(s1, t, 0, 2, 3); + ASSERT_EQ(0, memcmp(t0, s0, sizeof(int) * 3)); + ASSERT_EQ(0, memcmp(t1, s1, sizeof(int) * 2)); + + concatArray(s, s0, s1, 0, 0, 3, 0, 2); + ASSERT_EQ(0, memcmp(t, s, sizeof(int) * 5)); + +} +TEST(tensorProdpetit, floatTemp) { + + /*int t3[] = { 2, 4, 3 }; + + int t4[] = { 2, 4, 3, 2 };*/ + int t3[] = { 3, 6, 5 }; + + int t4[] = { 3, 5, 8, 4 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + tensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + d = M.Dim; + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.0001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + + + +} + +TEST(tensorProd, doubleTemp) { + + int t3[] = { 2, 4, 3 }; + + int t4[] = { 4, 3, 2,3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + tensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + d = M.Dim; + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]); + ASSERT_DOUBLE_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]); + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + + +} + + + + +void printArray(int* t, int sz) { + for (int i = 0; i < sz;i++) printf(" %d ", t[i]); +} + +TEST(tensorContractnProd, floatTemp) { + + int t3[] = { 2, 4, 3 }; + + int t4[] = { 4, 3, 2, 3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + //for (size_t idx = 0; idx < d.size; idx++) Msum.elements[idx] = 0.0f; + + //Msum.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) r[i] = idx4[i]; + + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + + } + + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]); + + } + + + +} + +TEST(tensorContractnProdD, doubleTemp) { + + int t3[] = { 2, 3, 4 }; + + int t4[] = { 3, 4, 2, 3 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + //for (size_t idx = 0; idx < d.size; idx++) Msum.elements[idx] = 0.0f; + + //Msum.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) r[i] = idx4[i]; + + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + + } + + ASSERT_DOUBLE_EQ(Msum.elements[lin], M.elements[lin]); + + } + + + +} + +TEST(reverseArray, innt) { + int n = 6; + int t4[6] = { 3, 4, 2, 3 ,5, 1 }; + int revt4[6] = { 1,5,3,2, 4, 3 }; + reverseArray(t4, n); + for (int i = 0; i < n; i++) { + ASSERT_EQ(t4[i], revt4[i]); + } +} + +TEST(tensorContractnReverseProd, floatTemp) { + + int t3[] = { 4, 4, 3 }; + + int t4[] = { 3, 4, 7, 2 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(3.0f); // M3.print(); + M4.initVal(2.0f); // M4.print(); + + int dee = 2; + + try { + //tensorContractnProd(M, M3, M4, dee); + tensorContractnReverseProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep:\n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + int rev[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) { + r[i] = idx4[i]; + rev[i] = idx4[dee - 1 - i]; + } + + concatArray(coord3, pcoord3, rev, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + } + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]); + } +} + + +TEST(cudaTensorProd, floatTemp) { + + int t3[] = { 15, 6, 24 }; + + int t4[] = { 23, 15, 6, 10 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(1.0f); // M3.print(); + M4.initVal(0.5f); // M4.print(); + + cudaTensorProd(M, M3, M4); + //tensorProd(M, M4, M3); + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + for (int i = 0; i < d3.rank; i++) coord3[i] = idx3[i]; + for (int i = 0; i < d4.rank; i++) coord4[i] = idx4[i]; + + concatArray(coord, coord3, coord4, 0, 0, d3.rank, 0, d4.rank); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + lin = d.CoordToLinear(coord); + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + + //ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + ASSERT_FLOAT_EQ(M.elements[lin], M3.elements[lin3] * M4.elements[lin4]) << " M " << M.elements[lin] << " lin: " << lin << " M3: " << M3.elements[lin3] << " lin3:" << lin3 << " lin4: " << lin4 << " M4 " << M4.elements[lin4] << std::endl; + //std::cout << " M " << M.elements[lin] << " lin: " << lin << " M3: " << M3.elements[lin3] << " lin3:" << lin3 << " lin4: " << lin4 << " M4 " << M4.elements[lin4] << std::endl; + + //ASSERT_NEAR(M.elements[lin], M3.elements[lin3] * M4.elements[lin4], 0.0001) << " lin: " << lin << " lin3: " << lin3 << " lin4: " << lin4; + } + +} + + +TEST(permuteTensor, float) { + int t4[] = { 3, 8, 2, 4 }; + struct dimension d4(4, t4); + struct Tensor M4(d4), M; + M4.initVal(1.0f); + permutation p(4, true); + int n = 5; + //for (int n = 0; n < 24;n++) { + PlaceToTab(p.perm, n, p.size); + printf(" %*d : ", 2, n); + for (int i = 0; i < p.size; i++)printf("(%d)%d ", i, p.perm[i]);printf("\n"); + permuteTensor(M, M4, p); + //permuteTensorDef(M, M4, p); + int ind[4]; + int coor[4]; + size_t cM, cM4; + for (ind[0] = 0; ind[0] < M4.Dim.dim[0]; ind[0]++) + for (ind[1] = 0; ind[1] < M4.Dim.dim[1]; ind[1]++) + for (ind[2] = 0; ind[2] < M4.Dim.dim[2]; ind[2]++) + for (ind[3] = 0; ind[3] < M4.Dim.dim[3]; ind[3]++) { + p.permute(coor, ind); + cM = M.Dim.CoordToLinear(coor); + cM4 = M4.Dim.CoordToLinear(ind); + //printf("M[%ld]=%f M4[%ld]=%f \n", cM, M.elements[cM], cM4, M4.elements[cM4]); + ASSERT_FLOAT_EQ(M.elements[cM], M4.elements[cM4]); + } + +} + + +TEST(cudapermuteTensor, float) { + int t4[] = { 3, 8, 2, 4 }; + struct dimension d4(4, t4); + struct Tensor M4(d4), M; + M4.initVal(1.0f); + permutation p(4, true); + int n = 5; + //for (int n = 0; n < 24;n++) { + PlaceToTab(p.perm, n, p.size); + printf(" %*d : ", 2, n); + for (int i = 0; i < p.size; i++)printf("{%d}%d ", i, p.perm[i]);printf("\n"); + cudapermuteTensor(M, M4, p); + //permuteTensor(M, M4, p); + //permuteTensorDef(M, M4, p); + int ind[4]; + int coor[4]; + size_t cM, cM4; + for (ind[0] = 0; ind[0] < M4.Dim.dim[0]; ind[0]++) + for (ind[1] = 0; ind[1] < M4.Dim.dim[1]; ind[1]++) + for (ind[2] = 0; ind[2] < M4.Dim.dim[2]; ind[2]++) + for (ind[3] = 0; ind[3] < M4.Dim.dim[3]; ind[3]++) { + p.permute(coor, ind); + cM = M.Dim.CoordToLinear(coor); + cM4 = M4.Dim.CoordToLinear(ind); + //printf("M[%ld]=%f M4[%ld]=%f \n", cM, M.elements[cM], cM4, M4.elements[cM4]); + ASSERT_FLOAT_EQ(M.elements[cM], M4.elements[cM4]); + } +} + +TEST(scanPermuteMatchContractTensorfromSrcToDst1, permId) { + int t[] = { 3, 8, 2, 3, 4 }; + //int tm[] = { 4, 2, 7, 3 }; + int tm[] = { 2, 3,4,7 }; + struct dimension d(5, t); + struct dimension dm(4, tm); + struct Tensor M4(d), M(dm); + M4.initVal(1.0f); + M.initVal(1.0f); + int dee = 3; + //int result[4] = { 1,3,0,2 }; + int result[4] = { 0,1,2,3 }; + int perm[M.Dim.rank]; + ASSERT_TRUE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, dee)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(" first perm \n"); + ASSERT_EQ(0, memcmp(result, perm, sizeof(int) * M.Dim.rank)); + + Tensor tM; + permutation p(M.Dim.rank, perm); + permuteTensor(tM, M, p); + + ASSERT_FALSE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, 4)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + tM.Dim.print(); + int resultDim[] = { 2,3,4,7 }; + ASSERT_EQ(0, memcmp(resultDim, tM.Dim.dim, sizeof(int) * tM.Dim.rank)); + +} + +TEST(scanPermuteMatchContractTensorfromSrcToDst2, floatest) { + int t[] = { 3, 8, 2, 3, 4 }; + int tm[] = { 4, 2, 7, 3 }; + //int tm[] = { 2, 3,4,7 }; + struct dimension d(5, t); + struct dimension dm(4, tm); + struct Tensor M4(d), M(dm); + M4.initVal(1.0f); + M.initVal(1.0f); + int dee = 3; + int result[4] = { 1,3,0,2 }; + //int result[4] = { 0,1,2,3 }; + int perm[M.Dim.rank]; + ASSERT_TRUE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, dee)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(" first perm \n"); + ASSERT_EQ(0, memcmp(result, perm, sizeof(int) * M.Dim.rank)); + + Tensor tM; + permutation p(M.Dim.rank, perm); + permuteTensor(tM, M, p); + + ASSERT_FALSE(scanPermuteMatchContractTensorfromSrcToDst(perm, M, M4, 4)); + for (int i = 0; i < M.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + tM.Dim.print(); + int resultDim[] = { 2,3,4,7 }; + ASSERT_EQ(0, memcmp(resultDim, tM.Dim.dim, sizeof(int) * tM.Dim.rank)); + + + +} + + +TEST(cudaTensorContractNestProd, floatTemp) { + + int t3[] = { 77, 8, 25 }; + + int t4[] = { 8, 25, 52, 144 }; + struct dimension d3(3, t3), d4(4, t4), d; + + struct Tensor M3(d3), M4(d4), M; + M3.initVal(1.0f); // M3.print(); + M4.initVal(0.0f); // M4.print(); + + int dee = 2; + + M4.Dim.print(); + + try { + //tensorContractnProd(M, M3, M4, dee); + cudaTensorContractNestProd(M, M3, M4, dee); + } + catch (const std::invalid_argument& e) { + printf("bye from test tensorContractnProd floatTemp invalid arg! deep: \n"); + dimension dM; + extractDimNestingDepth(dM, d3, d4, dee); + dM.print(); + ASSERT_TRUE(false); + } + + int coord[M.Dim.rank]; + int coord3[M3.Dim.rank]; + int coord4[M4.Dim.rank]; + int idx3[M3.Dim.rank]; + int idx4[M4.Dim.rank]; + + int l0, l1; + l0 = M3.Dim.rank - dee; + l1 = M4.Dim.rank - dee; + int pcoord3[l0]; + int pcoord4[l1]; + int r[dee]; + //int rev[dee]; + + int lin3, lin4, lin; + d = M.Dim; + d.print(); + + Tensor Msum(d); + + for (idx3[0] = 0; idx3[0] < d3.dim[0];idx3[0]++) + for (idx4[2] = 0; idx4[2] < d4.dim[2];idx4[2]++) + for (idx4[3] = 0; idx4[3] < d4.dim[3];idx4[3]++) { + + for (int i = 0; i < l0; i++) pcoord3[i] = idx3[i]; + for (int i = 0; i < l1; i++) pcoord4[i] = idx4[i + dee]; + concatArray(coord, pcoord3, pcoord4, 0, 0, l0, 0, l1); + lin = d.CoordToLinear(coord); + Msum.elements[lin] = 0.0f; + //for (idx3[1] = 0; idx3[1] < d3.dim[1];idx3[1]++) + //for (idx3[2] = 0; idx3[2] < d3.dim[2]; idx3[2]++) + for (idx4[0] = 0; idx4[0] < d4.dim[0];idx4[0]++) + for (idx4[1] = 0; idx4[1] < d4.dim[1];idx4[1]++) + { + for (int i = 0; i < dee; i++) { + r[i] = idx4[i]; + //rev[i] = idx4[dee - 1 - i]; + } + + //concatArray(coord3, pcoord3, rev, 0, 0, l0, 0, dee); + concatArray(coord3, pcoord3, r, 0, 0, l0, 0, dee); + concatArray(coord4, r, pcoord4, 0, 0, dee, 0, l1); + //printf("[");printArray(coord3, M3.Dim.rank); printf("]["); printArray(coord4, M4.Dim.rank);printf("] =*= ("); printArray(coord, Msum.Dim.rank); printf(") |||"); + lin3 = d3.CoordToLinear(coord3); + lin4 = d4.CoordToLinear(coord4); + + Msum.elements[lin] += (M3.elements[lin3] * M4.elements[lin4]); + //printf("lin:%d lin3:%d lin4:%d el+:%f\n", lin, lin3, lin4, Msum.elements[lin]); + } + ASSERT_FLOAT_EQ(Msum.elements[lin], M.elements[lin]) << " lin: " << lin << " Msumelem: " << Msum.elements[lin] << " Melem: " << M.elements[lin]; + } +} + + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/permutation_test/src/Makefile b/permutation_test/src/Makefile new file mode 100644 index 0000000..897eb11 --- /dev/null +++ b/permutation_test/src/Makefile @@ -0,0 +1,52 @@ + +NAME_TEST=is_good +CC=gcc +LDFLAGS=-lpthread +ROOT_DIR=$(shell pwd) +INCLUDE_DIR=$(ROOT_DIR) +CFLAGS=-I$(INCLUDE_DIR) +SRC_DIR=$(ROOT_DIR) +SRC=$(wildcard src/*/*.c) +OBJ=$(SRC:.c=.o) +#HEADS=$(OBJS:.o=.h) +TEST_DIR=$(ROOT_DIR) +EXECSRC=$(TEST_DIR)/$(NAME_TEST).c +EXEC=$(ROOT_DIR)/launch_$(NAME_TEST) +PERMSRC=$(wildcard perm*/*perm*.c) +PERMSRC_O=$(PERMSRC:.c=.o) +SETTSRC=$(wildcard set*/set*.c) +SETTSRC_O=$(SETTSRC:.c=.o) +TOOLSRC=$(wildcard too*/tool*.c) +TOOLSRC_O=$(TOOLSRC:.c=.o) +TESTSRC=$(wildcard *test*/*test*.c) +TESTSRC_O=$(TESTSRC:.c=.o) + + +all: $(EXEC) + +$(EXEC): $(EXECSRC) $(OBJ) + $(CC) -o $@ $^ -I$(INCLUDE_DIR) $(LDFLAGS) + + +$(TESTSRC_O): $(TESTSRC) $(TOOLSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(PERMSRC_O): $(PERMSRC) $(SETTSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(SETTSRC_O) : $(SETTSRC) $(TOOLSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) + +$(TOOLSRC_O): $(TOOLSRC) + $(CC) -o $@ -c $< $(CFLAGS) + +.PHONY: clean mrproper + +clean: + rm -f $(OBJS) + +mrproper: clean + rm -f $(EXEC) + +run: $(EXEC) + $(EXEC) diff --git a/permutation_test/src/coordinate/coordinate.h b/permutation_test/src/coordinate/coordinate.h new file mode 100644 index 0000000..2850d0a --- /dev/null +++ b/permutation_test/src/coordinate/coordinate.h @@ -0,0 +1,21 @@ +#ifndef __COORDINATE_C__H__ +#define __COORDINATE_C__H__ + +#include "dimension/dimension.h" + + +struct coordinate +{ + size_t lin_coo; + unsigned int *coord; + struct dimension *dimension; +}; + +typedef coordinate coordinate; + +void LinearToCoord(struct coordinate *coor); +void CoordToLinear(struct coordinate *coor); + + + +#endif diff --git a/permutation_test/src/dimension/dimension.cpp b/permutation_test/src/dimension/dimension.cpp new file mode 100644 index 0000000..aba7d8a --- /dev/null +++ b/permutation_test/src/dimension/dimension.cpp @@ -0,0 +1,181 @@ +#include +#include + +#include + +#include +#include + + + +//#include "/home/fanasina/progr_/ptens0neD/dimension/dimension.h" + +//#include "/home/fanasina/progr_/ptens0neD/permutation/permutation.h" + + +#include "dimension/dimension.hpp" + +#include "permutation/permutation.hpp" +//#include "permutation.h" + +/*void dimension::initDim(int* arr, bool end = true) { + endian = end; + delete[]dim; + dim = new int[rank]; + size = 1; + for (int i = 0; i < rank; ++i) { + dim[i] = arr[i]; + size *= dim[i]; + } +}*/ + +dimension& dimension::operator=(const dimension& d) { + int oldRank = rank; + rank = d.rank; + size = d.size; + initDim(d.dim, oldRank); + //for (int i = 0; i < rank; i++) dim[i] = d.dim[i]; + return *this; +} + +dimension& dimension::operator+=(const dimension& d) { + int oldRank = rank; + int* t = new int[rank + d.rank]; + for (int i = 0; i < rank; i++) t[i] = dim[i]; + for (int i = 0; i < d.rank; i++) t[rank + i] = d.dim[i]; + size *= d.size; + rank += d.rank; + initDim(t, oldRank); + return *this; +} + +void dimension::LinearToCoord(int* ret, int lin) const { + int begin = 0, end = rank - 1; + int (*iter)(int) = incr; + bool (*cond)(int, int) = isLessThan; + if (endian == false) { + //if (endian) { + begin = rank - 1; end = 0; + iter = decr; cond = isGreatThan; + } + //printf("to coor begin = %d end = %d \n", begin, end); + + int sm = lin; + int pp = size; + for (int i = begin; cond(i, end); i = iter(i)) { + //printf(" i: %d ", i); + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + //printf("sm[%d] = %d , pp=%d ; ", i, sm, pp); + } + ret[end] = sm; +} + +int dimension::CoordToLinear(int* coo) const { + int begin = 0; + int end = rank - 1; + int (*iter)(int); iter = &incr; + bool (*cond)(int, int); cond = &isLessEqThan; + + if (endian) { + begin = rank - 1; end = 0; + iter = &decr; cond = &isGreatEqThan; + } + + int pp = 1; + int sm = 0; + for (int i = begin; cond(i, end); i = iter(i)) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + +bool isLessEqThan(int a, int b) { return a <= b; } +bool isLessThan(int a, int b) { return a < b; } +bool isGreatEqThan(int a, int b) { return a >= b; } +bool isGreatThan(int a, int b) { return a > b; } +int incr(int i) { return i + 1; } +int decr(int i) { return i - 1; } + + +void add(dimension& d, const dimension& d0, const dimension& d1) { + int oldRank = d.rank; + int* t = new int[d0.rank + d1.rank]; + for (int i = 0; i < d0.rank; i++) t[i] = d0.dim[i]; + for (int i = 0; i < d1.rank; i++) t[d0.rank + i] = d1.dim[i]; + d.rank = d0.rank + d1.rank; + d.initDim(t, oldRank); +} + +void max(dimension& d, const dimension& d0, const dimension& d1) { + if (d0.rank > d1.rank) { + d = d0; + } + else if (d0.rank < d1.rank) { + d = d1; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] < d1.dim[i]) d.dim[i] = d1.dim[i]; + } + } +} + +void min(dimension& d, const dimension& d0, const dimension& d1) { + if (d0.rank > d1.rank) { + d = d1; + } + else if (d0.rank < d1.rank) { + d = d0; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] > d1.dim[i]) d.dim[i] = d1.dim[i]; + } + } +} + +void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& rev) { + if (d0.rank > d1.rank) { + d = d1; + rev = true; + } + else if (d0.rank < d1.rank) { + d = d0; + rev = false; + } + else {// d0.rank = d1.rank + d = d0; + for (int i = 0; i < d.rank; i++) { + if (d.dim[i] > d1.dim[d.rank - 1 - i]) d.dim[i] = d1.dim[d.rank - 1 - i]; + } + rev = false; + } +} + +void reverseArray(int* arr, int sz) { + int tmp[sz], i = 0; + for (; i < sz / 2; i++) { + tmp[i] = arr[i]; + arr[i] = arr[sz - 1 - i]; + } + for (; i < sz; i++) { + arr[i] = tmp[sz - 1 - i]; + } +} + +void transform(dimension& dDst, const dimension& dSrc, int* perm, int sz) { + dDst = dSrc; + setInit setIn(sz); + if (sz == dSrc.rank) { + if (isPermutation(perm, setIn, sz)) { + for (int i = 0; i < sz; i++) dDst.dim[i] = dSrc.dim[perm[i]]; + } + } +} + + diff --git a/permutation_test/src/dimension/dimension.h b/permutation_test/src/dimension/dimension.h new file mode 100644 index 0000000..d522d2d --- /dev/null +++ b/permutation_test/src/dimension/dimension.h @@ -0,0 +1,31 @@ +#ifndef __DIM__ +#define __DIM__ + +#include +#include + +struct dimension +{ + unsigned int rank; + unsigned int* dim; + size_t size; +}; +typedef dimension dimension; + + +void print_dimension(dimension d); + + +void add(dimension* d, const dimension* d0, const dimension* d1); + +void max(dimension* d, const dimension* d0, const dimension* d1); + +void min(dimension* d, const dimension* d0, const dimension* d1); + +bool minReverse(dimension* d, const dimension* d0, const dimension* d1); + +void transform(dimension* dDst, const dimension* dSrc, int* perm); + + +#endif + diff --git a/permutation_test/src/dimension/dimension.hpp b/permutation_test/src/dimension/dimension.hpp new file mode 100644 index 0000000..cf8bf66 --- /dev/null +++ b/permutation_test/src/dimension/dimension.hpp @@ -0,0 +1,90 @@ +#ifndef __DIMENSION__ +#define __DIMENSION__ + +#include +#include + +#include + +//#include "tensor.h" + +//#include "dimension.h" + +static int iArray1[1] = { 1 }; + + + +struct dimension { + //friend dimension& operator+(const dimension& d, const dimension& d1); + friend void add(dimension& d, const dimension& d0, const dimension& d1); + friend void max(dimension& d, const dimension& d0, const dimension& d1); + friend void min(dimension& d, const dimension& d0, const dimension& d1); + friend void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& Rev); + friend bool checkMatchProdTensor(dimension& d0, const dimension& d1, int nestingDepth); + friend bool checkMatchProdTensorReverse(dimension& d0, const dimension& d1, int nestingDepth); + friend void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth); + + + int rank; + int* dim; + size_t size; + bool endian; //LitleEndian : true, BigEndian : false, + void initDim(int* arr, int oldRank) { + + //delete[]dim; + //dim = new int[rank]; + if (rank > oldRank) { + free(dim); + dim = (int*)malloc(rank * sizeof(int)); + } + size = 1; + for (int i = 0; i < rank; ++i) { + dim[i] = arr[i]; + size *= dim[i]; + } + } + void initDim(bool end = true) { + endian = end; + //delete[]dim; + //dim = new int[rank]; + + if (dim != NULL) free(dim); + dim = (int*)malloc(rank * sizeof(int)); + } + dimension& operator=(const dimension& d); + dimension& operator+=(const dimension& d); + //dimension& operator*=(const dimension& d); + dimension(int d = 1, int* arr = iArray1, bool end = true) { + endian = end; + rank = d; + //dim = new int[d]; + dim = (int*)malloc(d * sizeof(int)); + initDim(arr, rank); + } + void print() const { printf(" rank: %d\n", rank);for (int i = 0; i < rank; i++) printf(" %d ", dim[i]);printf("\nsize:%ld\n", size); } + void LinearToCoord(int* ret, int lin) const; + int CoordToLinear(int* coo) const; +}; + +bool isLessEqThan(int a, int b); // { return a <= b; } +bool isLessThan(int a, int b); // { return a < b; } +bool isGreatEqThan(int a, int b); // { return a >= b; } +bool isGreatThan(int a, int b); // { return a > b; } +int incr(int i); // { return i + 1; } +int decr(int i); // { return i - 1; } + + + +void add(dimension& d, const dimension& d0, const dimension& d1); + +void max(dimension& d, const dimension& d0, const dimension& d1); + +void min(dimension& d, const dimension& d0, const dimension& d1); + +void minReverse(dimension& d, const dimension& d0, const dimension& d1, bool& rev); + +void transform(dimension& dDst, const dimension& dSrc, int* perm, int sz); + + +#endif + diff --git a/permutation_test/src/is_good.c b/permutation_test/src/is_good.c new file mode 100644 index 0000000..b62472c --- /dev/null +++ b/permutation_test/src/is_good.c @@ -0,0 +1,157 @@ +#include +#include +#include + +// for sleep ! +#ifdef __linux__ + #include +#elif _WIN32 + #include +#endif + +#include "ftest/ftest.h" + +#include "permutation_t/permutation_t.h" + +TEST(size_permutation2){ + PRINTF("another size_permutation2 again\n"); + ASSERT_TRUE(false); +} + +TEST(size_permutation) +{ + PERMUTATION_TYPE_CHAR *p = CREATE_PERMUTATION_TYPE_CHAR(3); + + PRINTF(" size = %lu \n",p->size); + EXPECT_EQ(p->size, 3); + PRINTF("test size_permutation2\n"); +} +TEST(size_permutation2){ + PRINTF("another size_permutation2 again false\n"); + bool val_bool = false; + ASSERT_TRUE(val_bool); +} +TEST(size_permutation2) +{ + PRINTF("test size_permutation2\n"); + bool val_bool = true; + ASSERT_FALSE(val_bool); +/* + PERMUTATION_TYPE_CHAR *p = CREATE_PERMUTATION_TYPE_CHAR(3); + + PRINTF(" size = %u \n",p->size); + if(p->size == 3) print_OK_with_msg_endl(" FF yeah GOOD test size passed "); + else print_KO_with_msg_endl("NOT GOOD test size not passed "); +*/ +} +TEST(float_equal){ + PRINTF("another size_permutation2 float\n"); + ASSERT_TRUE(true); + float a = 1.00001f; + float b = 1.00001f; + ASSERT_EQ_TYPE_FLOAT(a,b); + b=1.0000101f; + ASSERT_EQ_TYPE_FLOAT(a,b); + ASSERT_EQ_TYPE_FLOAT(1.0000102f,b); +} +TEST(double_equal){ + PRINTF("another size_permutation2 double\n"); + ASSERT_TRUE(true); + double a = 1.00000001; + double b = 1.00000001; + ASSERT_EQ_TYPE_DOUBLE(a,b); + b=1.00000001000000001; + ASSERT_EQ_TYPE_DOUBLE(a,b); + ASSERT_EQ_TYPE_DOUBLE(1.0000000100000002,b); +} + +TEST(){ + unsigned char c = 'a'; + + debug_print("another size_permutation2, a = %c\n",c); + ASSERT_FALSE(true); + ASSERT_TRUE(true); + ASSERT_TRUE(true); +} + +TEST(){ + sleep(3); + int a = 5; + long b = 5; + ASSERT_EQ(a,b); + a=4; + ASSERT_EQ(a,b); + +} + +TEST(expect){ + sleep(2); + int a = 5; + int b = 6; + EXPECT_EQ(a,b); + //SKIP(); + SKIP("on skip eq string\n"); + EXPECT_EQ_TYPE_STRING("hello","hello"); + float f1 = 1.00019999, f2=1.00019999; + EXPECT_EQ_TYPE_FLOAT(f1,f2); + +} + +TEST(){ + PRINTF("no test, only print\n"); +} + +TEST(){ + PRINTF("no test, only print\n"); +} + +TEST(){ + PRINTF("no test, only print\n"); +} + + +TEST(){ + + PERMUTATION_TYPE_CHAR *p_char = CREATE_PERMUTATION_TYPE_CHAR(6); + p_char->perm[0]='B'; + p_char->perm[1]='A'; + p_char->perm[2]='Y'; + p_char->perm[3]='C'; + p_char->perm[4]='D'; + p_char->perm[5]='Z'; + + PERMUTATION_TYPE_SIZE_T *tr_p_char = TRANSLATE_TO_SET_THEORIC_SIZE_T_TYPE_CHAR(p_char); + + for(int i = 0; i < tr_p_char->size; ++i) PRINTF(" [%d ]%ld ,",i,tr_p_char->perm[i]); + PRINTF("p_char == %s\n",p_char->perm); +} + +TEST(lessThan){ + long int a=1,b=2; + EXPECT_LT(a,b); + EXPECT_LT(b,a); + +} + +TEST(sleep){sleep(2);} +TEST(sleep){sleep(2);} +TEST(sleep){sleep(2);} +TEST(sleep){sleep(2);} +TEST(sleep){sleep(2);} +TEST(sleep){sleep(2);} +TEST(sleep){sleep(2);} + +int main(int argc, char **argv){ + + //run_all_tests(); + //run_all_tests_parallel(4); + + run_all_tests_args(argc, argv); + + //purge_tests(); + //run_some_tests(8, 1, 2, 2, 3, 3, 0, 4, 1); + //run_some_tests(8, 5, 7, 1, 1, 1, 1, 1, 1); + //run_some_tests_one_by_one(3, 1, 2, 2); + //run_all_tests_exept(2, 1, 3); + return 0; +} diff --git a/permutation_test/src/permutation_t/permutation_t.c b/permutation_test/src/permutation_t/permutation_t.c new file mode 100644 index 0000000..4caf54b --- /dev/null +++ b/permutation_test/src/permutation_t/permutation_t.c @@ -0,0 +1,125 @@ +#include "permutation_t/permutation_t.h" + +#define GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(type)\ + bool IS_PERMUTATION_SET_THEORIC_##type(const PERMUTATION_##type *p){\ + if(p == NULL) return false;\ + size_t size = p->size;\ + type j;\ + size_t *count_array_i = calloc(size, sizeof(size_t));\ + if(count_array_i == NULL){\ + printf("can't alloc count_array_i\n"); return false;}\ + for(size_t i = 0; i < size; ++i){\ + j = p->perm[i];\ + if((COMPARE_N_##type(&j, (type*)&size) >= 0) || count_array_i[j]){\ + free(count_array_i); return false; }\ + ++count_array_i[j];}\ + free(count_array_i);\ + return true; }\ + +GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(TYPE_U_CHAR) +GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(TYPE_U_INT) +GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(TYPE_U_L_INT) +GENERATE_PERMUTATION_FUNCTIONS_UNSIGNED(TYPE_SIZE_T) + + +#define GENERATE_PERMUTATION_FUNCTIONS(type)\ + PERMUTATION_##type * CREATE_PERMUTATION_##type(size_t size){\ + if (size == 0) return NULL;\ + PERMUTATION_##type *p = malloc(sizeof(PERMUTATION_##type));\ + p->size = size;\ + p->perm = malloc(size * sizeof(type));\ + return p; }\ +\ + PERMUTATION_TYPE_SIZE_T * TRANSLATE_TO_SET_THEORIC_SIZE_T_##type(const PERMUTATION_##type *p ){\ + if (p == NULL) return NULL;\ + PERMUTATION_TYPE_SIZE_T *t_p = malloc(sizeof(PERMUTATION_TYPE_SIZE_T));\ + size_t size = p->size;\ + t_p->size = size;\ + t_p->perm = malloc(size * sizeof(TYPE_SIZE_T));\ + type *sorted_perm = malloc(size * sizeof(type));\ + COPY_ARRAY_##type(sorted_perm,(const type*)p->perm, size);\ + qsort(sorted_perm, size, sizeof(type), COMPARE_N_##type);\ + size_t *rec_index_visited = malloc(size * sizeof(size_t));\ + size_t cur_rec = 0; bool found_rec;\ + for(size_t i = 0; i < size; ++i){\ + for(size_t j = 0; j < size; ++j){\ + if(COMPARE_N_##type(&(p->perm[j]), &(sorted_perm[i])) == 0){\ + found_rec = false;\ + for(size_t k = 0; k < cur_rec; ++k){\ + if(rec_index_visited[k] == j){\ + found_rec == true; break; } } \ + if(found_rec == false){\ + /*t_p->perm[i] = j;*/\ + t_p->perm[j] = i;\ + rec_index_visited[cur_rec++] = j; \ + break; }\ + }\ + }\ + }\ + free(rec_index_visited);\ + free(sorted_perm);\ + return t_p; \ + }\ +\ + bool IS_PERMUTATION_##type(const PERMUTATION_##type *p){\ + if(p == NULL) return false;\ + PERMUTATION_TYPE_SIZE_T *t_p = TRANSLATE_TO_SET_THEORIC_SIZE_T_##type(p);\ + bool ret = IS_PERMUTATION_SET_THEORIC_TYPE_SIZE_T(t_p);\ + free(t_p);\ + return ret; }\ + + + +GENERATE_PERMUTATION_FUNCTIONS(TYPE_CHAR) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_U_CHAR) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_INT) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_U_INT) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_L_INT) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_U_L_INT) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_SIZE_T) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_FLOAT) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_DOUBLE) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_L_DOUBLE) +GENERATE_PERMUTATION_FUNCTIONS(TYPE_STRING) + + + + + + +/* why TRANSLATE ? + * 2,7,4,1 is a permutation of 1,2,4,7 + *it is equivalent of 1,3,2,0 in set_theoric(4)=0,1,2,3 + this function calculate the permutation equivalent in set_theoric + 2,4,2,5 is translate to 0,1,0,2 + * */ + + +/* if need optimization in translate +#define GENERATE_UNSIGNED_SIZE_WITH_TYPED(type_unsigned, type)\ + PERMUTATION_##type_unsigned * TRANSLATE_TO_SET_THEORIC_##type_unsigned_##type(PERMUTATION_##type *p ){\ + if (p == NULL) return NULL;\ + PERMUTATION_##type_unsigned *t_p = malloc(sizeof(PERMUTATION_##type_unsigned));\ + type_unsigned size = p->size;\ + t_p->perm = malloc(size * sizeof(type_unsigned));\ + type *sorted_perm = malloc(size * sizeof(type));\ + COPY_ARRAY_##type(sorted_perm, p->perm, size);\ + qsort(sorted_perm, size, sizeof(type), COMPARE_N_##type);\ + type_unsigned *rec_index_visited = malloc(size * sizeof(type_unsigned));\ + type_unsigned cur_rec = 0; bool found_rec;\ + for(type_unsigned i = 0; i < size; ++i){\ + for(type_unsigned j = 0; j < size; ++j){\ + if(COMPARE_N_##type(&(p->perm[j]), &(sorted_perm[i])) == 0){\ + found_rec = false;\ + for(type_unsigned k = 0; k < cur_rec; ++k){\ + if(rec_index_visited[k] == j){\ + found_rec == true; break; } } \ + if(found_rec == false){\ + t_p->perm[i] = j;\ + rec_index_visited[cur++] = j; \ + break; } } } } \ + free(rec_index_visited);\ + free(sorted_perm);\ + return t_p; }\ + +*/ diff --git a/permutation_test/src/permutation_t/permutation_t.h b/permutation_test/src/permutation_t/permutation_t.h new file mode 100644 index 0000000..49f03de --- /dev/null +++ b/permutation_test/src/permutation_t/permutation_t.h @@ -0,0 +1,45 @@ +#ifndef __PERMUTATION_T_C_H__ +#define __PERMUTATION_T_C_H__ + +#include "tools_t/tools_t.h" +#include "set_theoric_t/set_theoric_t.h" + +/* struct of permutation, not necessarly set_theoric + * + * */ + + + +#define GENERATE_PERMUTATION(type)\ + struct PERMUTATION_##type{\ + size_t size;\ + type * perm; };\ +\ + typedef struct PERMUTATION_##type PERMUTATION_##type;\ + PERMUTATION_##type * CREATE_PERMUTATION_##type(size_t size);\ +PERMUTATION_TYPE_SIZE_T * TRANSLATE_TO_SET_THEORIC_SIZE_T_##type(const PERMUTATION_##type *p );\ + + +GENERATE_PERMUTATION(TYPE_SIZE_T) +GENERATE_PERMUTATION(TYPE_CHAR) +GENERATE_PERMUTATION(TYPE_U_CHAR) +GENERATE_PERMUTATION(TYPE_INT) +GENERATE_PERMUTATION(TYPE_U_INT) +GENERATE_PERMUTATION(TYPE_L_INT) +GENERATE_PERMUTATION(TYPE_U_L_INT) +GENERATE_PERMUTATION(TYPE_FLOAT) +GENERATE_PERMUTATION(TYPE_DOUBLE) +GENERATE_PERMUTATION(TYPE_L_DOUBLE) +GENERATE_PERMUTATION(TYPE_STRING) + +#define GENERATE_FUNCTIONS_UNSIGNED(type)\ + bool IS_PERMUTATION_SET_THEORIC_##type(const PERMUTATION_##type *p);\ + +GENERATE_FUNCTIONS_UNSIGNED(TYPE_U_CHAR) +GENERATE_FUNCTIONS_UNSIGNED(TYPE_U_INT) +GENERATE_FUNCTIONS_UNSIGNED(TYPE_U_L_INT) +GENERATE_FUNCTIONS_UNSIGNED(TYPE_SIZE_T) + + + +#endif /*__PERMUTATION_T_C_H__*/ diff --git a/permutation_test/src/set_theoric_t/set_theoric_t.c b/permutation_test/src/set_theoric_t/set_theoric_t.c new file mode 100644 index 0000000..ab589cd --- /dev/null +++ b/permutation_test/src/set_theoric_t/set_theoric_t.c @@ -0,0 +1,25 @@ + +#include "set_theoric_t/set_theoric_t.h" + +#define GENERATE_SET_THEORIC(type) \ + SET_THEORIC_##type * CREATE_SET_THEORIC_##type(size_t id){ \ + if(id == 0) return NULL; \ + SET_THEORIC_##type *ret_set = malloc(sizeof(SET_THEORIC_##type)); \ + ret_set->id = id; \ + ret_set->set = malloc(id*sizeof(type)); \ + for(type i = 0; i < id; ++i) ret_set->set[i]=i; \ + return ret_set; \ + } \ + \ + bool IS_SET_THEORIC_##type(SET_THEORIC_##type *st){ \ + for(type i = 0; i < st->id; ++i){ \ + if(st->set[i] != i) return false; \ + return true; \ + } \ + } \ + +GENERATE_SET_THEORIC(TYPE_U_CHAR) +GENERATE_SET_THEORIC(TYPE_U_INT) +GENERATE_SET_THEORIC(TYPE_U_L_INT) +GENERATE_SET_THEORIC(TYPE_SIZE_T) + diff --git a/permutation_test/src/set_theoric_t/set_theoric_t.h b/permutation_test/src/set_theoric_t/set_theoric_t.h new file mode 100644 index 0000000..a13122c --- /dev/null +++ b/permutation_test/src/set_theoric_t/set_theoric_t.h @@ -0,0 +1,24 @@ +#ifndef __SET_THEORIC_T_C__H +#define __SET_THEORIC_T_C__H + +#include + +#include "tools_t/tools_t.h" + +#define GENERATE_UNSIGNED_SET_THEORIC(type) \ + struct SET_THEORIC_##type{ \ + type id; \ + type *set; \ + }; \ + typedef struct SET_THEORIC_##type SET_THEORIC_##type; \ + SET_THEORIC_##type * CREATE_SET_THEORIC_##type(size_t id/*TYPE_##type*/); \ + bool IS_SET_THEORIC_##type(SET_THEORIC_##type *st); \ + +GENERATE_UNSIGNED_SET_THEORIC(TYPE_U_CHAR) +GENERATE_UNSIGNED_SET_THEORIC(TYPE_U_INT) +GENERATE_UNSIGNED_SET_THEORIC(TYPE_U_L_INT) +GENERATE_UNSIGNED_SET_THEORIC(TYPE_SIZE_T) + + + +#endif /*__SET_THEORIC_T_C__H*/ diff --git a/permutation_test/src/tensor/tens0neD/tens0neD.cpp b/permutation_test/src/tensor/tens0neD/tens0neD.cpp new file mode 100644 index 0000000..efa83c8 --- /dev/null +++ b/permutation_test/src/tensor/tens0neD/tens0neD.cpp @@ -0,0 +1,500 @@ +#include +#include + +#include + +#include +#include + + +//#include "/home/fanasina/progr_/ptens0neD/tensor/tens0neD/tens0neD.h" +#include "tensor/tens0neD/tens0neD.h" +//#include "include/tens0neD.h" + + +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/permutation/permutation.h" +#include "permutation/permutation.h" + + +template +void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz) { + transform(Dst.Dim, Src.Dim, perm, sz); + dimension dsrc = Src.Dim; + dimension ddst = Dst.Dim; + int coor[dsrc.rank]; + int dcoor[ddst.rank], ldst; + for (int i = 0; i < Src.Dim.size; i++) { + dsrc.LinearToCoord(coor, i); + for (int j = 0; j < dsrc.rank; j++) dcoor[j] = coor[perm[j]]; + ldst = ddst.CoordToLinear(dcoor); + Dst.elements[ldst] = Src.elements[i]; + } +} + +template void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); +template void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); + +template +Tensor& Tensor::operator=(const Tensor& M) { + Dim = M.Dim; + for (int i = 0; i < Dim.size; ++i) elements[i] = M.elements[i]; + return *this; +} + +template +Tensor& Tensor::operator*=(const T& val) { + //for (int i = 0; i < rank.size; ++i) elements[i] *= val; + return *this; +} + +template +Tensor& operator*(const Tensor& M0, const Tensor& M1) { + struct dimension d; add(d, M0.Dim, M1.Dim); + Tensor Mret(d); + for (int i = 0; i < M0.Dim.size; ++i) Mret.elements[i] = M0.elements[i]; + Mret.Dim += M0.Dim; + return Mret; +} + + +void subArray(int* dst, int* src, int debDst, int finDst, int debSrc) { + for (int i = debDst; i < finDst; i++) { + dst[i] = src[i + debSrc]; + } +} + +void concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1) { + int i = debDst; + for (int j = debSrc0; j < finSrc0; j++) { + dst[i++] = src0[j]; + } + for (int j = debSrc1; j < finSrc1; j++) { + dst[i++] = src1[j]; + } +} + +template +void Tensor::initVal(T val) { + int* coord = new int[Dim.rank]; + T pp, mult = 0.5; + for (int i = 0; i < Dim.size; i++) { + Dim.LinearToCoord(coord, i); + elements[i] = val; + pp = mult; + for (int j = 0; j < Dim.rank; j++) { + elements[i] += (coord[j] + 1) * pp; + pp *= mult; + } + } +} +template +void Tensor::initVal(float val); +template +void Tensor::initVal(double val); + +template +void Tensor::print() { + Dim.print(); + int* coord = new int[Dim.rank]; + int begin = 0, end = Dim.rank - 1; + //int beginInv = Dim.rank - 1, endInv = 0; + int (*iter)(int) = incr; + //int (*iterInv)(int) = decr; + bool (*cond)(int, int) = isLessEqThan; + //bool (*condInv)(int, int) = isGreatEqThan; + if (Dim.endian == false) { + begin = Dim.rank - 1; end = 0; + //beginInv = 0; endInv = Dim.rank - 1; + iter = decr; cond = isGreatEqThan; + //iterInv = incr; condInv = isLessEqThan; + } + for (int i = 0; i < Dim.size; i++) { + Dim.LinearToCoord(coord, i); + //if (coord[Dim.rank - 1] == 0) { + if (coord[begin] == 0) { + for (int j = begin; cond(j, end); j = iter(j)) { + //for (int j = Dim.rank - 1; j >= 0; j--) { + if (coord[j] == 0) { + printf("("); + } + else break; + } + } + + //printf(" ");for (int j = 0; j < Dim.rank; j++) printf("[%d]", coord[j]); printf(" "); + //printf(" "); for (int j = beginInv; condInv(j, endInv); j = iterInv(j)) printf("[%d]", coord[j]); printf(" "); + //printf(" "); for (int k = beginInv; condInv(k, endInv); k = iterInv(k)) { printf("[%d]", coord[k]); } printf(" "); + + printf(" %.6f ", elements[i]); + + //if (coord[Dim.rank - 1] == Dim.dim[Dim.rank - 1] - 1) { + if (coord[begin] == Dim.dim[begin] - 1) { + for (int j = begin; cond(j, end); j = iter(j)) { + //for (int j = Dim.rank - 1; j >= 0; j--) { + if (coord[j] == Dim.dim[j] - 1) { + printf(")"); + } + else break; + } + } + } + + printf("\n"); +} +template +void Tensor::print(); +template +void Tensor::print(); + +template +void tensorProd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + int* coord = new int[M.Dim.rank]; + int* coord0 = new int[M0.Dim.rank], lin0; + int* coord1 = new int[M1.Dim.rank], lin1; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, M0.Dim.rank, 0); + subArray(coord1, coord, 0, M1.Dim.rank, M0.Dim.rank); + lin0 = (M0.Dim).CoordToLinear(coord0); + lin1 = (M1.Dim).CoordToLinear(coord1); + M.elements[i] = M0.elements[lin0] * M1.elements[lin1]; + } +} + +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + + + + +bool checkMatchProdTensor(const dimension& d0, const dimension& d1, int nestingDepth) { + if (d0.rank <= nestingDepth || d1.rank <= nestingDepth) return false; + for (int i = 0; i < nestingDepth;i++) { + if (d1.dim[i] != d0.dim[d0.rank - nestingDepth + i]) return false; + } + return true; +} + +bool checkMatchProdTensorReverse(const dimension& d0, const dimension& d1, int nestingDepth) { + if (d0.rank <= nestingDepth || d1.rank <= nestingDepth) return false; + for (int i = 0; i < nestingDepth;i++) { + if (d1.dim[i] != d0.dim[d0.rank - 1 - i]) return false; + } + return true; +} + +void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth) { + int len0 = d0.rank - nestingDepth; + int len1 = d1.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, d0.dim, 0, len0, 0); + subArray(tsub1, d1.dim, 0, len1, nestingDepth); + subArray(tDk1, d1.dim, 0, nestingDepth, 0); + subArray(tDk0, d0.dim, 0, nestingDepth, len0); + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + + min(dM, dM0, dM1); + //max(dM, dM0, dM1); +} + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l +// M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth) { + if (!checkMatchProdTensor(M0.Dim, M1.Dim, nestingDepth)) { + printf("Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + //throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + } + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM; + min(dM, dM0, dM1); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + int* coord = new int[M.Dim.rank]; + + int* coord0 = new int[len0], lin0; + int* coord1 = new int[len1], lin1; + + int* coordM0 = new int[M0.Dim.rank]; + int* coordM1 = new int[M1.Dim.rank]; + + int* Koord = new int[nestingDepth]; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, len0, 0); + subArray(coord1, coord, 0, len1, len0); + M.elements[i] = 0; + for (int k = 0; k < dM.size; k++) { + dM.LinearToCoord(Koord, k); + concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, nestingDepth); + concatArray(coordM1, Koord, coord1, 0, 0, nestingDepth, 0, len1); + lin0 = (M0.Dim).CoordToLinear(coordM0); + lin1 = (M1.Dim).CoordToLinear(coordM1); + M.elements[i] += M0.elements[lin0] * M1.elements[lin1]; + } + } +} + +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +void reverseDim(dimension& d, const dimension& d0) { + d.rank = d0.rank; + d.size = d0.size; + if (d.dim != NULL) free(d.dim); + d.dim = (int*)malloc(d.rank * sizeof(int)); + for (int i = 0; i < d.rank; i++) d.dim[i] = d0.dim[d.rank - i - 1]; +} + +template +void reverseTensor(Tensor& M, const Tensor& M0) { + reverseDim(M.Dim, M0.Dim); + size_t id; + int coor[M0.Dim.rank]; + for (size_t i = 0; i < M.Dim.size; i++) { + M0.Dim.LinearToCoord(coor, i); + reverseArray(coor, M0.Dim.rank); + id = M.Dim.CoordToLinear(coor); + M.elements[id] = M0.elements[i]; + } +} + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l +// M[x0,x1,x3..xl x{l+1}..xn] X M[xn,x{n-1},..x{l+1}xl y{l+1}..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth) { + if (!checkMatchProdTensorReverse(M0.Dim, M1.Dim, nestingDepth)) { + printf("Failed in Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + //throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + } + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM; + bool rev; + minReverse(dM, dM0, dM1, rev); + if (rev) reverseArray(dM.dim, dM.rank); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + int* coord = new int[M.Dim.rank]; + + int* coord0 = new int[len0], lin0; + int* coord1 = new int[len1], lin1; + + int* coordM0 = new int[M0.Dim.rank]; + int* coordM1 = new int[M1.Dim.rank]; + + int* Koord = new int[nestingDepth]; + for (int i = 0; i < M.Dim.size; i++) { + M.Dim.LinearToCoord(coord, i); + subArray(coord0, coord, 0, len0, 0); + subArray(coord1, coord, 0, len1, len0); + M.elements[i] = 0; + for (int k = 0; k < dM.size; k++) { + dM.LinearToCoord(Koord, k); + concatArray(coordM0, coord0, Koord, 0, 0, len0, 0, nestingDepth); + reverseArray(Koord, nestingDepth); + concatArray(coordM1, Koord, coord1, 0, 0, nestingDepth, 0, len1); + lin0 = (M0.Dim).CoordToLinear(coordM0); + lin1 = (M1.Dim).CoordToLinear(coordM1); + M.elements[i] += M0.elements[lin0] * M1.elements[lin1]; + } + } +} + +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +template +void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + //permuteArray(M.Dim.dim, M0.Dim.dim, p); + //for (int i = 0; i < p.size; i++) { M.Dim.dim[i] = M0.Dim.dim[p.perm[i]]; } + p.permute(M.Dim.dim, M0.Dim.dim); + size_t img; + int coor[p.size]; + int rooc[p.size]; + for (size_t i = 0; i < M.Dim.size;i++) { + M0.Dim.LinearToCoord(coor, i); + p.permute(rooc, coor); + img = M.Dim.CoordToLinear(rooc); + if (img >= M.Dim.size) printf(" i: %ld vs img:%ld size: %ld\n", i, img, M.Dim.size); + M.elements[img] = M0.elements[i]; + + } + } +} + +template +void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p); + +template +bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest) { + if (contractNest < Msecond.Dim.rank && contractNest < Mfirst.Dim.rank) { + std::vector founded; + int begin = Mfirst.Dim.rank - contractNest, tmp; + for (int i = 0; i < Msecond.Dim.rank;i++) perm[i] = i; + for (int i = begin; i < Mfirst.Dim.rank; i++) { + for (int j = 0; j < Msecond.Dim.rank;j++) { + if (std::find(founded.begin(), founded.end(), perm[j]) == founded.end()) {// not found + if (Msecond.Dim.dim[perm[j]] == Mfirst.Dim.dim[i]) { + founded.push_back(perm[j]); + tmp = perm[i - begin]; + perm[i - begin] = perm[j]; + perm[j] = tmp; + } + } + } + } + return (founded.size() == contractNest); + } + return false; +} +template +bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + +template +bool scanInvPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest) { + if (contractNest < Msecond.Dim.rank && contractNest < Mfirst.Dim.rank) { + std::vector founded; + int begin = Mfirst.Dim.rank - contractNest, tmp; + for (int i = 0; i < Msecond.Dim.rank;i++) perm[i] = i; + for (int i = begin; i < Mfirst.Dim.rank; i++) { + for (int j = 0; j < Msecond.Dim.rank;j++) { + if (std::find(founded.begin(), founded.end(), j) == founded.end()) {// not found + if (Msecond.Dim.dim[j] == Mfirst.Dim.dim[perm[i - begin]]) { + founded.push_back(j); + tmp = perm[i - begin]; + perm[i - begin] = j; + perm[j] = tmp; + } + } + } + } + return (founded.size() == contractNest); + } + return false; +} +template +bool scanInvPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + +void LinearTransformCoord(size_t& dst, size_t src, int* inversePerm, size_t Msize, dimension dDst, dimension dSrc) { + size_t sm = src; + size_t pp = Msize; + size_t s = 0; + size_t p = 1; + int ret;// = new int[rank]; + int i; + for (i = 0; i < dSrc.rank; ++i) { + pp /= dSrc.dim[i]; + ret = sm / pp; + p = 1; + for (int j = inversePerm[i] + 1; j < dDst.rank;j++) { + p *= dDst.dim[j]; + } + s += ret * p; + + sm %= pp; + + } + dst = s; + if (s > Msize) printf("I have a problem in LinearTransformCoord: s:%ld siez:%ld \n", s, Msize); + +} + + +template +void permuteTensor(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + + if (p.size == M0.Dim.rank) p.permute(M.Dim.dim, M0.Dim.dim); + else { + printf("something wrong perm, not the same size as M0.Dim.rank\n"); + exit(1); + } + size_t img = 0; + printf("in permuteTensor:\n"); + M0.Dim.print(); + M.Dim.print(); + setInit se(M.Dim.rank, 0); + int invP[M.Dim.rank]; + inverseArray(invP, p.perm, M.Dim.rank); + for (size_t i = 0; i < M.Dim.size;i++) { + //LinearTransformCoord(img, i, p.perm, M.Dim.size, M.Dim, M0.Dim); + LinearTransformCoord(img, i, invP, M.Dim.size, M.Dim, M0.Dim); + M.elements[img] = M0.elements[i]; + } + } +} + +template +void permuteTensor(Tensor& M, const Tensor& M0, permutation p); + diff --git a/permutation_test/src/tensor/tens0neD/tens0neD.h b/permutation_test/src/tensor/tens0neD/tens0neD.h new file mode 100644 index 0000000..a901461 --- /dev/null +++ b/permutation_test/src/tensor/tens0neD/tens0neD.h @@ -0,0 +1,114 @@ +#ifndef __TENS_0NE_D_H__ +#define __TENS_0NE_D_H__ + +#include +#include + +#include + +//#include "tensor.h" +//#include "cudatensor.h" +//#include "/home/fanasina/progr_/ptens0neD/dimension/dimension.h" +//#include "/home/fanasina/progr_/ptens0neD/permutation/permutation.h" +//#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/tensCuda.h" + +#include "dimension/dimension.h" +#include "permutation/permutation.h" +#include "tensor/tensCuda/tensCuda.h" + +template +struct Tensor { + struct dimension Dim; + T* elements; + Tensor(struct dimension dm = dimension(1)) { + Dim = dm; + //elements = new T[Dim.size]; + elements = (T*)malloc(Dim.size * sizeof(T)); + } + void initTensor() { + //delete[]elements; + //elements = new T[Dim.size]; + if (elements != NULL) + free(elements); + elements = (T*)malloc(Dim.size * sizeof(T)); + } + void initVal(T val); // { for (int i = 0; i < Dim.size; i++) elements[i] = val + 0.001f * i; } + void print(); + Tensor& operator=(const Tensor& M); + Tensor& operator*=(const T& val); + template + friend Tensor& operator*(const Tensor& M0, const Tensor& M1); + + // M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + // M[x0,x1,x3..xl x{l+1}...xn] X M[xn,x{n-1},x{n-2}...xl y{l+1} ..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) + template + friend void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + + // M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + // M[x0,x1,x3..xl x{l+1}..xn] X M[xn,x{n-1},..x{l+1}xl y{l+1}..ym] = M[x0,x1..xly{l+1}...y{n+m-2l}] (deep = l > 0) + template + friend void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + + template + friend void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict); + + /*template + friend void cudaTensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); +*/ + + template + friend void tensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1); + + template + friend void permuteTensor(Tensor& M, const Tensor& M0, permutation p); + template + friend void permuteTensorDef(Tensor& M, const Tensor& M0, permutation p); + template + friend bool scanPermuteMatchContractTensorfromSrcToDst(int* perm, const Tensor& Msecond, const Tensor& Mfirst, int contractNest); + + //template + //friend void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + +}; + +template +void transform(Tensor& Dst, const Tensor& Src, int* perm, int sz); + + +template +Tensor& operator*(const Tensor& M0, const Tensor& M1); + + +void subArray(int* dst, int* src, int debDst, int finDst, int debSrc); + +void concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); + +void reverseArray(int* arr, int sz); + +template +void tensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + +bool checkMatchProdTensor(const dimension& d0, const dimension& d1, int nestingDepth); + +void extractDimNestingDepth(dimension& dM, const dimension& d0, const dimension& d1, int nestingDepth); + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +// M[x0,x1,x3..xn] X M[y0,y1,y3..ym] = M[z0,z1...zp] (deep = l > 0) /exists 1<= l<...=n-l alor p=n+m-2l + +//M[[i][j]]=sum_{[k]}M0[[i][k]]*M[[k][j]] +template +void tensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + +#endif + diff --git a/permutation_test/src/tensor/tensCuda/d_tensCuda.cu b/permutation_test/src/tensor/tensCuda/d_tensCuda.cu new file mode 100644 index 0000000..09ebcc2 --- /dev/null +++ b/permutation_test/src/tensor/tensCuda/d_tensCuda.cu @@ -0,0 +1,493 @@ +/*#include +#include + +#include "cuda.h" +#include "cuda_runtime.h" +*/ + +#include "d_tensCuda.h" +//#include "index.h" +#include + +//////////////////////////////////////////////////////// + +//1D grid of 1D blocks +__device__ +int d_getGlobalIdx_1D_1D() { + return blockIdx.x * blockDim.x + threadIdx.x; +} +//1D grid of 2D blocks +__device__ +int d_getGlobalIdx_1D_2D() { + return blockIdx.x * blockDim.x * blockDim.y + + threadIdx.y * blockDim.x + threadIdx.x; +} +//1D grid of 3D blocks +__device__ +int d_getGlobalIdx_1D_3D() { + return blockIdx.x * blockDim.x * blockDim.y * blockDim.z + + threadIdx.z * blockDim.y * blockDim.x + + threadIdx.y * blockDim.x + threadIdx.x; +} +//2D grid of 1D blocks +__device__ int d_getGlobalIdx_2D_1D() { + int blockId + = blockIdx.y * gridDim.x + blockIdx.x; + int threadId = blockId * blockDim.x + threadIdx.x; + return threadId; +} +//2D grid of 2D blocks +__device__ +int d_getGlobalIdx_2D_2D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int threadId = blockId * (blockDim.x * blockDim.y) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//2D grid of 3D blocks +__device__ +int d_getGlobalIdx_2D_3D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z) + + (threadIdx.z * (blockDim.x * blockDim.y)) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//3D grid of 1D blocks +__device__ +int d_getGlobalIdx_3D_1D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * blockDim.x + threadIdx.x; + return threadId; +} +//3D grid of 2D blocks +__device__ +int d_getGlobalIdx_3D_2D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * (blockDim.x * blockDim.y) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} +//3D grid of 3D blocks +__device__ +int d_getGlobalIdx_3D_3D() { + int blockId = blockIdx.x + blockIdx.y * gridDim.x + + gridDim.x * gridDim.y * blockIdx.z; + int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z) + + (threadIdx.z * (blockDim.x * blockDim.y)) + + (threadIdx.y * blockDim.x) + threadIdx.x; + return threadId; +} + + +/////////////////////////////////////////////////////////////////////////// + + +__device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size) { + size_t sm = lin; + size_t pp = size; + for (int i = rank - 1;i > 0; --i) { + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + } + ret[0] = sm; +} + +__device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank) { + size_t pp = 1; + size_t sm = 0; + for (int i = 0; i < rank; ++i) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + +__device__ size_t d_CoordToLinear(int* coo, int* dim, int rank) { + size_t pp = 1; + size_t sm = 0; + for (int i = rank - 1; i >= 0; --i) { + sm += (coo[i] * pp); + pp *= dim[i]; + } + return sm; +} + + + +__device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size) { + size_t sm = lin; + size_t pp = size; + for (int i = 0; i < rank - 1; ++i) { + pp /= dim[i]; + ret[i] = sm / pp; + sm %= pp; + } + ret[rank - 1] = sm; +} +/*__device__ void d_LinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + for (int i = 0; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + if (i == rankA - 1) { + part0 = s; + s = 0; + p = size / sizeA; + } + + } + part1 = s; + +}*/ +__device__ void d_LinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + int i; + for (i = 0; i < rankA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + + } + part0 = s; + s = 0; + p = size / sizeA;//sizeB + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + + } + + part1 = s; + +} +__device__ void d_LinearToSplitSubrankLimSzEnd(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, size_t size, size_t sizeA) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + int ret;// = new int[rank]; + for (int i = rank - 1; i >= 0; --i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + + sm %= pp; + if (i == rankA) { + part1 = s; + s = 0; + p = size / sizeA; + } + + } + part0 = s; + +} + + +__device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc) { + for (int i = debDst; i < finDst; i++) { + dst[i] = src[i + debSrc]; + } +} + +template +__global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB) { + size_t lin0, lin1; + + size_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i < size) { + d_LinearToSplitSubrankLimSz(lin0, lin1, i, dimC, rankC, rankA, size, sizeA); + + C[i] = A[lin0] * B[lin1]; + + } +} + +template __global__ void d_prodTensor(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, float* B, int* dimB, int rankB); + +template +__global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB) { + size_t lin0, lin1; + + size_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i < size) { + d_LinearToSplitSubrankLimSzEnd(lin0, lin1, i, dimC, rankC, rankA, size, sizeA); + + C[i] = A[lin0] * B[lin1]; + + } +} + +template __global__ void d_prodTensorEnd(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, float* B, int* dimB, int rankB); + +__device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev) { + if (rank0 > rank1) { + rank = rank1; + for (int i = 0; i < rank1; ++i) dim[i] = dim1[i]; + rev = true; + } + else if (rank0 < rank1) { + rank = rank0; + for (int i = 0; i < rank1; ++i) dim[i] = dim0[i]; + rev = false; + } + else {// rank0 == rank1 + rank = rank0; + for (int i = 0; i < rank0; i++) { + if (dim[i] > dim1[rank1 - 1 - i]) dim[i] = dim1[rank1 - 1 - i]; + else dim[i] = dim0[i]; + } + rev = false; + } +} + +__device__ void d_reverseArray(int* arr, int sz) { + int* tmp; + //tmp = (int*)malloc(sz * sizeof(int)); + + tmp = new int[sz]; + if (tmp == NULL) { + size_t limit = 0; + cudaDeviceGetLimit(&limit, cudaLimitStackSize); + printf("cudaLimitStackSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + cudaDeviceGetLimit(&limit, cudaLimitPrintfFifoSize); + printf("cudaLimitPrintfFifoSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + cudaDeviceGetLimit(&limit, cudaLimitMallocHeapSize); + printf("cudaLimitMallocHeapSize: %u | %d (%d) %d | \n", (unsigned)limit, blockIdx.x, blockDim.x, threadIdx.x); + + printf("error Allocation in tmp = (int*)malloc(sz * sizeof(int)); | | "); + }int i = 0; + for (; i < sz / 2; i++) { + tmp[i] = arr[i]; + arr[i] = arr[sz - 1 - i]; + } + for (; i < sz; i++) { + arr[i] = tmp[sz - 1 - i]; + } + //free(tmp); + delete[]tmp; +} + +__device__ int d_min(int a, int b) { + if (a < b) return a; + return b; +} + +__device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1) { + int i = debDst; + for (int j = debSrc0; j < finSrc0; j++) { + dst[i++] = src0[j]; + } + for (int j = debSrc1; j < finSrc1; j++) { + dst[i++] = src1[j]; + } +} + + + +__device__ void d_ConcatLinearToSplitSubrankLimSz(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, int rankB, size_t size, size_t sizeA, size_t sizeB, int* dM, int dMrank, size_t dMsize, int ind) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + //size_t sz_dA = sizeA / dMsize; + int rankdA = rankA - dMrank; + + int ret; + int i; + for (i = 0; i < rankdA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + sm %= pp; + } + size_t s1 = 0; + + size_t pb = sizeB / dMsize; + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + pb /= dim[i]; + s1 += ret * pb; + sm %= pp; + } + + size_t smd = ind; + size_t ppb = dMsize; + //size_t pb = size / sz_dA; + pb = sizeB; + p = dMsize; + for (int j = 0;j < dMrank;j++) { + ppb /= dM[j]; + ret = smd / ppb; + p /= dM[j]; + s += ret * p; + pb /= dM[j]; + s1 += ret * pb; + smd %= ppb; + } + //pp = size / sz_dA; + part0 = s; + part1 = s1; +} + +__device__ void d_SplitLineardToSubrank(size_t& part0, size_t& part1, size_t lin, int* dim, int rank, int rankA, int rankB, size_t size, size_t sizeA, size_t sizeB, int* dM, int dMrank, size_t dMsize) { + size_t sm = lin; + size_t pp = size; + size_t s = 0; + size_t p = sizeA; + //size_t sz_dA = sizeA / dMsize; + int rankdA = rankA - dMrank; + + int ret; + int i; + for (i = 0; i < rankdA; ++i) { + pp /= dim[i]; + ret = sm / pp; + p /= dim[i]; + s += ret * p; + sm %= pp; + } + size_t s1 = 0; + + size_t pb = sizeB / dMsize; + for (; i < rank; ++i) { + pp /= dim[i]; + ret = sm / pp; + pb /= dim[i]; + s1 += ret * pb; + sm %= pp; + } + part0 = s; + part1 = s1; +} + + +__device__ void d_UnionConcatLinearSplitedSubrank(size_t& part0, size_t& part1, size_t p0, size_t p1, size_t size, size_t sizeB, int* dM, int dMrank, size_t dMsize, int ind) { + size_t s = p0; + size_t s1 = p1; + int ret; + size_t smd = ind; + size_t ppb = dMsize; + //size_t pb = size / sz_dA; + size_t pb = sizeB; + size_t p = dMsize; + for (int j = 0;j < dMrank;j++) { + ppb /= dM[j]; + ret = smd / ppb; + p /= dM[j]; + s += ret * p; + pb /= dM[j]; + s1 += ret * pb; + smd %= ppb; + } + //pp = size / sz_dA; + part0 = s; + part1 = s1; +} + +template +__global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t sizeC, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize) { + + size_t p0, p1; + size_t lin0, lin1; + + + //size_t i = threadIdx.x + blockIdx.x * blockDim.x; + size_t i = d_getGlobalIdx_1D_1D(); + + if (i < sizeC) { + + d_SplitLineardToSubrank(p0, p1, i, dimC, rankC, rankA, rankB, sizeC, sizeA, sizeB, dM, dMrank, dMsize); + + C[i] = 0; + for (size_t k = 0; k < dMsize; k++) { + + d_UnionConcatLinearSplitedSubrank(lin0, lin1, p0, p1, sizeC, sizeB, dM, dMrank, dMsize, k); + + //d_ConcatLinearToSplitSubrankLimSz(lin0, lin1, i, dimC, rankC, rankA, rankB, sizeC, sizeA, sizeB, dM, dMrank, dMsize, k); + + C[i] += A[lin0] * B[lin1]; + } + } + +} + +template +__global__ void d_TensorContractnReverseProd(float* C, int* dimC, int rankC, size_t size, float* A, int rankA, size_t sizeA, float* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); + +__device__ void d_LinearTransformCoord(size_t& dst, size_t src, int* inversePerm, size_t sizeA, int rankDst, int rankSrc, int* dDst, int* dSrc) { + size_t sm = src; + size_t pp = sizeA; + size_t s = 0; + size_t p = 1; + int ret;// = new int[rank]; + int i, j; + for (i = 0; i < rankSrc; ++i) { + pp /= dSrc[i]; + ret = sm / pp; + p = 1; + for (j = inversePerm[i] + 1; j < rankDst;j++) { + p *= dDst[j]; + } + s += ret * p; + + sm %= pp; + + } + dst = s; + if (s > sizeA) printf("I have a problem in LinearTransformCoord: s:%ld siez:%ld \n", s, sizeA); + +} + +template +__global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm) { + + //size_t i = threadIdx.x + blockIdx.x * blockDim.x; + size_t i = d_getGlobalIdx_1D_1D(); + + if (i < sizeC) { + //printf("(float* C, int* dimC, int rankC, size_t size, float* A, int* dimA, int rankA, size_t sizeA, int* invPerm); + diff --git a/permutation_test/src/tensor/tensCuda/d_tensCuda.h b/permutation_test/src/tensor/tensCuda/d_tensCuda.h new file mode 100644 index 0000000..c2b8870 --- /dev/null +++ b/permutation_test/src/tensor/tensCuda/d_tensCuda.h @@ -0,0 +1,69 @@ +#ifndef __D_CUDA_TENSOR_H__ +#define __D_CUDA_TENSOR_H__ + +#include "cuda.h" +#include "cuda_runtime.h" + +//#include "cuda_device_runtime_api.h" + +//#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/d_tensCuda.h" +#include "tensor/tensCuda/d_tensCuda.h" + + +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_1D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_1D_2D(); +//#1D grid of 3D blocks +__device__ int d_getGlobalIdx_1D_3D(); +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_2D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_2D_2D(); +//2D grid of 3D blocks +__device__ int d_getGlobalIdx_2D_3D(); +//#1D grid of 1D blocks +__device__ int d_getGlobalIdx_3D_1D(); +//#1D grid of 2D blocks +__device__ int d_getGlobalIdx_3D_2D(); +//#1D grid of 3D blocks +__device__ int d_getGlobalIdx_3D_3D(); + + + +extern cudaError_t cudaDeviceGetLimit(size_t* pValue, enum cudaLimit limit); + + +__device__ void d_LinearToCoordEnd(int* ret, size_t lin, int* dim, int rank, size_t size); + +__device__ size_t d_CoordToLinearEnd(int* coo, int* dim, int rank); + +__device__ size_t d_CoordToLinear(int* coo, int* dim, int rank); + + +__device__ void d_LinearToCoord(int* ret, size_t lin, int* dim, int rank, size_t size); + +__device__ void d_subArray(int* dst, int* src, int debDst, int finDst, int debSrc); + +__device__ void d_minReverse(int* dim, int& rank, const int* dim0, int rank0, const int* dim1, int rank1, bool& rev); + +__device__ void d_reverseArray(int* arr, int sz); + +__device__ int d_min(int a, int b); + +__device__ void d_concatArray(int* dst, int* src0, int* src1, int debDst, int debSrc0, int finSrc0, int debSrc1, int finSrc1); + + +template +__global__ void d_prodTensor(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); + +template +__global__ void d_prodTensorEnd(T* C, int* dimC, int rankC, size_t size, T* A, int* dimA, int rankA, size_t sizeA, T* B, int* dimB, int rankB); + +template +__global__ void d_TensorContractnReverseProd(T* C, int* dimC, int rankC, size_t size, T* A, int rankA, size_t sizeA, T* B, int rankB, size_t sizeB, int* dM, int dMrank, size_t dMsize); + +template +__global__ void d_PermLinearTransformCoord(T* C, int* dimC, int rankC, size_t sizeC, T* A, int* dimA, int rankA, size_t sizeA, int* invPerm); + +#endif \ No newline at end of file diff --git a/permutation_test/src/tensor/tensCuda/tensCuda.cu b/permutation_test/src/tensor/tensCuda/tensCuda.cu new file mode 100644 index 0000000..9d4ec29 --- /dev/null +++ b/permutation_test/src/tensor/tensCuda/tensCuda.cu @@ -0,0 +1,574 @@ +#include +#include + +#include + +#include +#include + + +//#include "/home/fanasina/progr_/ptens0neD/tensor/tens0neD/tens0neD.h" + +//#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/tensCuda.h" +#include "tensor/tensCuda/tensCuda.h" + + + + +template +void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + + int* d_imM, * d_imM0, * d_imM1; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + int BLOCKSIZE = 256;//1024; + int DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + //int DIMBLOCKS = (M.Dim.size) / BLOCKSIZE; + + d_prodTensor << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, e1, d_imM1, M1.Dim.rank); + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } +} + + +//template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); + + +template +void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1) { + add(M.Dim, M0.Dim, M1.Dim); + M.initTensor(); + + int* d_imM, * d_imM0, * d_imM1; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + size_t BLOCKSIZE = 1024; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + + d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, e1, d_imM1, M1.Dim.rank); + + cudaDeviceSynchronize(); + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } +} + + +//template void cudaTensorProd(Tensor& M, const Tensor& M1, const Tensor& M0); +template void cudaTensorProdEnd(Tensor& M, const Tensor& M1, const Tensor& M0); + + +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p) { + if (p.size == M0.Dim.rank) { + M.Dim.rank = M0.Dim.rank; + M.Dim.size = M0.Dim.size; + M.Dim.initDim(); + M.initTensor(); + + p.permute(M.Dim.dim, M0.Dim.dim); + + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cudaEventRecord(start); + + + int* d_imM, * d_imM0; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + T* e, * e0; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + + size_t BLOCKSIZE = 256; //1024;//512; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + dim3 blckSZ, gridSZ; + blckSZ.x = BLOCKSIZE; + gridSZ.x = DIMBLOCKS; + + int* invP, * d_invP; + invP = (int*)malloc(M.Dim.rank * sizeof(int)); + inverseArray(invP, p.perm, M.Dim.rank); + errCu = cudaMalloc((void**)&d_invP, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_invP, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_invP, invP, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_invP, invP, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + //printf("size: %ld\n", M.Dim.size); + + //d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank); + //d_TensorContractnReverseProd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + //d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + d_PermLinearTransformCoord << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, d_invP); + //d_PermLinearTransformCoord << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, M0.Dim.size, p.perm); + //cudaDeviceSynchronize(); + + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + cudaEventRecord(stop); + cudaEventSynchronize(stop); + float milliseconds = 0; + cudaEventElapsedTime(&milliseconds, start, stop); + printf("ellaps time cuda permute tensor: %f ms\n", milliseconds); + + } +} + +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + + +// strict match contract ! if no strict, we take the minimum +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M11, int nestingDepth, bool strict) { + + + int perm[M11.Dim.rank]; + struct Tensor M1; + if (scanPermuteMatchContractTensorfromSrcToDst(perm, M11, M0, nestingDepth)) { + for (int i = 0; i < M11.Dim.rank; i++) printf(" %d[%d] ", i, perm[i]); printf(": last perm \n"); + struct permutation p(M11.Dim.rank, perm); + permuteTensor(M1, M11, p); + M1.Dim.print(); + + } + else { + printf("Failed in Deep = %d\n", nestingDepth); + //throw std::check_ProdTensor(" Failed imbrication order in Multiplication matrix "); + + throw std::invalid_argument(" Failed imbrication order in Multiplication matrix "); + exit(1); + } + + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cudaEventRecord(start); + + int len0 = M0.Dim.rank - nestingDepth; + int len1 = M1.Dim.rank - nestingDepth; + + int* tsub0 = new int[len0]; + int* tsub1 = new int[len1]; + int* tDk1 = new int[nestingDepth]; + int* tDk0 = new int[nestingDepth]; + subArray(tsub0, M0.Dim.dim, 0, len0, 0); + subArray(tsub1, M1.Dim.dim, 0, len1, nestingDepth); + subArray(tDk1, M1.Dim.dim, 0, nestingDepth, 0); + subArray(tDk0, M0.Dim.dim, 0, nestingDepth, len0); + + dimension dSub0(len0, tsub0); + dimension dSub1(len1, tsub1); + dimension dM1(nestingDepth, tDk1); + dimension dM0(nestingDepth, tDk0); + dimension dM(dM0); + //bool rev; + //minReverse(dM, dM0, dM1, rev); + //if (rev) reverseArray(dM.dim, dM.rank); + //max(dM, dM0, dM1); + + add(M.Dim, dSub0, dSub1); + M.initTensor(); + + + + int* d_imM, * d_imM0, * d_imM1, * d_imdM; + cudaError_t errCu = cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM, M.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imdM, dM.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imdM, dM.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM0, M0.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&d_imM1, M1.Dim.rank * sizeof(int)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM, M.Dim.dim, M.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imdM, dM.dim, dM.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imdM, dM.dim, dM.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM0, M0.Dim.dim, M0.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(d_imM1, M1.Dim.dim, M1.Dim.rank * sizeof(int), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + T* e, * e0, * e1; + errCu = cudaMalloc((void**)&e, M.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e, M.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e0, M0.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMalloc((void**)&e1, M1.Dim.size * sizeof(T)) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e0, M0.elements, M0.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(e1, M1.elements, M1.Dim.size * sizeof(T), cudaMemcpyHostToDevice) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + size_t BLOCKSIZE = 256; //1024;//512; + size_t DIMBLOCKS = (M.Dim.size + BLOCKSIZE - 1) / BLOCKSIZE; + dim3 blckSZ, gridSZ; + blckSZ.x = BLOCKSIZE; + gridSZ.x = DIMBLOCKS; + + + //d_prodTensorEnd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank); + //d_TensorContractnReverseProd << < DIMBLOCKS, BLOCKSIZE >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + //d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, d_imdM, dM.rank, dM.size, e0, d_imM0, M0.Dim.rank, e1, d_imM1, M1.Dim.rank, nestingDepth); + d_TensorContractnReverseProd << < gridSZ, blckSZ, 0, 0 >> > (e, d_imM, M.Dim.rank, M.Dim.size, e0, M0.Dim.rank, M0.Dim.size, e1, M1.Dim.rank, M1.Dim.size, d_imdM, dM.rank, dM.size); + + //cudaDeviceSynchronize(); + + + errCu = cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaMemcpy(M.elements, e, M.Dim.size * sizeof(T), cudaMemcpyDeviceToHost) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + + errCu = cudaFree(e); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(e1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(e1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM0); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM0) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + errCu = cudaFree(d_imM1); + if (cudaSuccess != errCu) { + printf("device fnc failed cudaFree(d_imM1) \n ErrorCuda: %d : %s\n", errCu, cudaGetErrorString(errCu)); + exit(errCu); + } + cudaEventRecord(stop); + cudaEventSynchronize(stop); + float milliseconds = 0; + cudaEventElapsedTime(&milliseconds, start, stop); + printf("ellaps time cuda prod contract prod: %f ms\n", milliseconds); + + +} + +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict); +//template void cudaTensorContractnReverseProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth); + diff --git a/permutation_test/src/tensor/tensCuda/tensCuda.h b/permutation_test/src/tensor/tensCuda/tensCuda.h new file mode 100644 index 0000000..be5d0d6 --- /dev/null +++ b/permutation_test/src/tensor/tensCuda/tensCuda.h @@ -0,0 +1,31 @@ +#ifndef __TENS_CUDA_H__ +#define __TENS_CUDA_H__ + +#include +#include + +#include + +//#include "/home/fanasina/progr_/ptens0neD/tensor/tens0neD/tens0neD.h" +#include "tensor/tens0neD/tens0neD.h" + +//#include "/home/fanasina/progr_/ptens0neD/tensor/tensCuda/d_tensCuda.h" +#include "tensor/tensCuda/d_tensCuda.h" +//#include "dimension/dimension.h" + +template +struct Tensor; + +template +void cudaTensorContractNestProd(Tensor& M, const Tensor& M0, const Tensor& M1, int nestingDepth, bool strict = true); + +template +void cudaTensorProd(Tensor& M, const Tensor& M0, const Tensor& M1); +template +void cudaTensorProdEnd(Tensor& M, const Tensor& M0, const Tensor& M1); +template +void cudapermuteTensor(Tensor& M, const Tensor& M0, permutation p); + + +#endif + diff --git a/test/Makefile b/test/Makefile index c8f9214..0306a62 100644 --- a/test/Makefile +++ b/test/Makefile @@ -17,41 +17,16 @@ OBJ=$(SRC:.c=.o) TEST_DIR=$(PWD) EXECSRC=$(NAME_TEST).c EXEC=launch_$(NAME_TEST)_m -PERMSRC=src/permutation_t/permutation_t.c -PERMSRC_O=$(PERMSRC:.c=.o) -SETTSRC=src/set_theoric_t/set_theoric_t.c -SETTSRC_O=$(SETTSRC:.c=.o) -TOOLSRC=../ytools_t/src/tools_t/tools_t.c -TOOLSRC_O=$(TOOLSRC:.c=.o) - -FTESTSRC=src/ftest/ftest.c -FTESTSRC_O=$(FTESTSRC:.c=.o) -FMOCKSRC=src/fmock/fmock.c -FMOCKSRC_O=$(FMOCKSRC:.c=.o) -BPROGRESSRC=src/bar_progress/bar_progress.c -BPROGRESSRC_0=$(BPROGRESSRC:.c=.o) - -LIB_YTEST=../libytest.so +LIB_YTEST=$(PWD)/../libytest.so all: $(EXEC) $(LIB_YTEST) $(EXEC): $(EXECSRC) $(OBJ) $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) -$(PERMSRC_O): $(PERMSRC) $(SETTSRC_O) - $(CC) -o $@ -c $< $(CFLAGS) - -$(SETTSRC_O) : $(SETTSRC) $(TOOLSRC_O) - $(CC) -o $@ -c $< $(CFLAGS) - -$(TOOLSRC_O): $(TOOLSRC) - $(CC) -o $@ -c $< $(CFLAGS) - .PHONY: clean mrproper clean: - #echo "all src : $(SRC)" - #echo "all obj : $(OBJ)" rm -f $(OBJ) mrproper: clean diff --git a/test/compile.sh b/test/compile.sh index 1949482..c7a9339 100644 --- a/test/compile.sh +++ b/test/compile.sh @@ -12,11 +12,11 @@ if [ "$#" -le 1 ] ; then fi -export LD_LIBRARY_PATH=$PWD/../:LD_LIBRARY_PATH -gcc -o launch_is_good_c $1 -L$PWD/../ $2 -lytest -I../include_ytest/include src/permutation_t/permutation_t.o src/set_theoric_t/set_theoric_t.o -I./src +gcc -o launch_is_good_c $1 -L$PWD/../ $2 -lytest -I../include_ytest/include #gcc -o launch_is_good_c $1 $2 -lytest -I../include_ytest src/permutation_t/permutation_t.o src/set_theoric_t/set_theoric_t.o -I./src +export LD_LIBRARY_PATH=$PWD/../:LD_LIBRARY_PATH #gcc $1 src/ftest/ftest.c src/fmock/fmock.c src/tools_t/tools_t.c src/bar_progress/bar_progress.c src/permutation_t/permutation_t.c src/set_theoric_t/set_theoric_t.c -I./include $2 -o launch_is_good_c -lpthread diff --git a/test/is_good.c b/test/is_good.c index 1c37377..c533c49 100644 --- a/test/is_good.c +++ b/test/is_good.c @@ -12,47 +12,20 @@ #include "ftest/ftest.h" #include "fmock/fmock.h" -#if 1 - -#include "permutation_t/permutation_t.h" - - -TEST(size_permutation2){ - PRINTF("another size_permutation2 again\n"); - ASSERT_TRUE(false); -} - -TEST(size_permutation) -{ - PERMUTATION_TYPE_CHAR *p = CREATE_PERMUTATION_TYPE_CHAR(3); - - PRINTF(" size = %lu \n",p->size); - EXPECT_EQ(p->size, 3); - PRINTF("test size_permutation2\n"); -} - -#endif - -TEST(size_permutation2){ - PRINTF("another size_permutation2 again false\n"); +TEST(true__){ + PRINTF("another test again false\n"); bool val_bool = false; ASSERT_TRUE(val_bool); } -TEST(size_permutation2) +TEST(test) { - PRINTF("test size_permutation2\n"); + PRINTF("test test\n"); bool val_bool = true; ASSERT_FALSE(val_bool); -/* - PERMUTATION_TYPE_CHAR *p = CREATE_PERMUTATION_TYPE_CHAR(3); - - PRINTF(" size = %u \n",p->size); - if(p->size == 3) print_OK_with_msg_endl(" FF yeah GOOD test size passed "); - else print_KO_with_msg_endl("NOT GOOD test size not passed "); -*/ + } TEST(float_equal){ - PRINTF("another size_permutation2 float\n"); + PRINTF("another test float\n"); ASSERT_TRUE(true); float a = 1.00001f; float b = 1.00001f; @@ -62,7 +35,7 @@ TEST(float_equal){ ASSERT_EQ_TYPE_FLOAT(1.0000102f,b); } TEST(double_equal){ - PRINTF("another size_permutation2 double\n"); + PRINTF("another test double\n"); ASSERT_TRUE(true); double a = 1.00000001; double b = 1.00000001; @@ -75,7 +48,7 @@ TEST(double_equal){ TEST(){ unsigned char c = 'a'; - debug_print("another size_permutation2, a = %c\n",c); + debug_print("another test, a = %c\n",c); ASSERT_FALSE(true); ASSERT_TRUE(true); ASSERT_TRUE(true); @@ -118,22 +91,6 @@ TEST(){ } -TEST(){ - - PERMUTATION_TYPE_CHAR *p_char = CREATE_PERMUTATION_TYPE_CHAR(6); - p_char->perm[0]='B'; - p_char->perm[1]='A'; - p_char->perm[2]='Y'; - p_char->perm[3]='C'; - p_char->perm[4]='D'; - p_char->perm[5]='Z'; - - PERMUTATION_TYPE_SIZE_T *tr_p_char = TRANSLATE_TO_SET_THEORIC_SIZE_T_TYPE_CHAR(p_char); - - for(int i = 0; i < tr_p_char->size; ++i) PRINTF(" [%d ]%ld ,",i,tr_p_char->perm[i]); - PRINTF("p_char == %s\n",p_char->perm); -} - TEST(lessThan){ long int a=1,b=2; EXPECT_LT(a,b);