From b3de7fb171f302c0755f928a2a35016dd32bf2f2 Mon Sep 17 00:00:00 2001 From: fanasina Date: Tue, 14 May 2024 09:41:07 +0200 Subject: [PATCH] rename rabbit_learn to Frozen_Lake --- .../src/{rabbit_learn.c => Frozen_Lake.c} | 69 +++++++++++++---- .../src/{rabbit_learn.h => Frozen_Lake.h} | 4 +- qlearn_0/test/Makefile | 75 +++++++++++++++++++ qlearn_0/test/compile.sh | 23 ++++++ qlearn_0/test/is_good.c | 48 ++++++++++++ 5 files changed, 204 insertions(+), 15 deletions(-) rename qlearn_0/src/{rabbit_learn.c => Frozen_Lake.c} (76%) rename qlearn_0/src/{rabbit_learn.h => Frozen_Lake.h} (94%) create mode 100644 qlearn_0/test/Makefile create mode 100644 qlearn_0/test/compile.sh create mode 100644 qlearn_0/test/is_good.c diff --git a/qlearn_0/src/rabbit_learn.c b/qlearn_0/src/Frozen_Lake.c similarity index 76% rename from qlearn_0/src/rabbit_learn.c rename to qlearn_0/src/Frozen_Lake.c index 3ea69f9..bd1aad0 100644 --- a/qlearn_0/src/rabbit_learn.c +++ b/qlearn_0/src/Frozen_Lake.c @@ -1,7 +1,6 @@ -#include "rabbit_learn.h" +#include "Frozen_Lake.h" -int ADD_MOVE[ACTION_COUNT]={ MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT, MOVE_UP }; char * action_name ="DLRU"; char * content_name ="ESCBF"; @@ -35,6 +34,17 @@ struct game_params * create_game_params ( gm_param->limit_EPISODES_number = limit_EPISODES_number; gm_param->limit_MOVE_number = limit_MOVE_number; + gm_param->add_move = malloc(ACTION_COUNT * sizeof(long int)); +/* + gm_param->add_move[DOWN] = signedLineFromCoord((long int []){0,-1}, dim); + gm_param->add_move[LEFT] = signedLineFromCoord((long int []){-1,0}, dim); + gm_param->add_move[RIGHT] = signedLineFromCoord((long int []){1,0}, dim); + gm_param->add_move[UP] = signedLineFromCoord((long int []){0,1}, dim); +*/ + gm_param->add_move[DOWN] = dim->perm[0]; + gm_param->add_move[LEFT] = -1; + gm_param->add_move[RIGHT] = 1; + gm_param->add_move[UP] = -1*dim->perm[0]; return gm_param; } @@ -57,6 +67,7 @@ struct delay_params * create_delay_params ( } void reset_game_status(struct game_status * status){ + status->rabbitOldRankPosition = status->startRankPosition ; status->rabbitRankPosition = status->startRankPosition ; status->endGame = false; status->count_MOVES = 0; @@ -82,11 +93,11 @@ struct game_status * create_game_status(){ long int generate_game(struct game *gm){ struct game_params *params = gm->params; - struct game_status *status; + struct game_status *status = gm->status; dimension *dim = params->dim; for(long int i=0; i<(params->dim)->rank; ++i){ (gm->cells[i]).rankPosition = i; - (gm->cells[i]).content = 0; + (gm->cells[i]).content = EMPTY; //0 for(long int j=0; j < ACTION_COUNT; ++j) (gm->cells[i]).Q[j] = 0; } @@ -101,6 +112,7 @@ long int generate_game(struct game *gm){ (gm->cells[random]).content = START; (status)->startRankPosition = random; + (status)->rabbitOldRankPosition = random; (status)->rabbitRankPosition = random; return (status->startRankPosition); @@ -117,6 +129,8 @@ struct game * create_game( gm->cells = create_game_cells(params->dim); gm->delay = delay; // create_delay_params(10000000, 2000000); gm->status = create_game_status(); + + generate_game(gm); return gm; @@ -133,10 +147,33 @@ void free_game(struct game *gm){ } free(gm->cells); free_dimension((gm->params)->dim); + free((gm->params)->add_move); free(gm->params); free(gm->qlearnParams); free(gm->delay); free(gm->status); + free(gm); +} + +long int function_add_move(struct game *gm, enum Action action){ + struct game_status * status = gm->status; + dimension *dim = (gm->params)->dim; + + if(action == UP && (status->rabbitRankPosition / dim->perm[0]) == 0){ + return dim->rank * -1; + } + if(action == DOWN && (status->rabbitRankPosition / dim->perm[0]) == dim->perm[1]-1){ + return dim->rank ; + } + if(action == LEFT && (status->rabbitRankPosition % dim->perm[0]) == 0){ + return dim->rank * -1; + } + if(action == RIGHT && (status->rabbitRankPosition % dim->perm[0]) == dim->perm[1]-1){ + return dim->rank; + } + + return (gm->params)->add_move[action]; + } void move_game(struct game *gm, enum Action action){ @@ -145,9 +182,11 @@ void move_game(struct game *gm, enum Action action){ struct cell * cells = gm->cells; ++(status->count_MOVES); - long int newRankPosition = status->rabbitRankPosition + ADD_MOVE[action]; + long int newRankPosition = status->rabbitRankPosition + function_add_move(gm, action); - if (newRankPosition < 0 || newRankPosition >= (params->dim)->rank) status->reward = REWARD_OUT; + if (newRankPosition < 0 || newRankPosition >= (params->dim)->rank) { + status->reward = REWARD_OUT; + } else if (status->count_MOVES > params->limit_MOVE_number) { status->rabbitRankPosition = newRankPosition; status->endGame = true; @@ -252,10 +291,10 @@ void mainQlearning_game(struct game *gm){ double proba_explor; srand(time(NULL)); - for(size_t k=0 ; k < params->limit_game_number; ++k){ - for(size_t episode = 0; episode < params->limit_EPISODES_number; ++episode){ - reset_game_status(status); + for(long int k=0 ; k < params->limit_game_number; ++k){ generate_game(gm); + for(long int episode = 0; episode < params->limit_EPISODES_number; ++episode){ + reset_game_status(status); while(!(status->endGame)){ random = rand() % NUMBER_EPISODE2; @@ -268,19 +307,21 @@ void mainQlearning_game(struct game *gm){ action = ARG_MAX_ARRAY_TYPE_DOUBLE( cells[status->rabbitRankPosition].Q, ACTION_COUNT ); printf("greedy action "); } - + + status->rabbitOldRankPosition = status->rabbitRankPosition; move_game(gm, action); - printf("action = %d rbPos = %ld, rwds = %ld, final_rwrd=%ld\n",action, status->rabbitRankPosition,status->reward, status->final_reward); + printf("ik=%ld, episode = %ld , action = %d rbPos = %ld, rwds = %ld, final_rwrd=%ld\n",k,episode,action, status->rabbitRankPosition,status->reward, status->final_reward); // update Q array on the action of the state - cells[status->rabbitRankPosition].Q[action] = cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT) - cells[status->rabbitRankPosition].Q[action])); + cells[status->rabbitOldRankPosition].Q[action] = cells[status->rabbitOldRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT) - cells[status->rabbitOldRankPosition].Q[action])); + //cells[status->rabbitRankPosition].Q[action] = cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT) - cells[status->rabbitRankPosition].Q[action])); //cells[status->rabbitRankPosition].Q[action] = (1 - qlearnParams->learning_rate) * cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT ))); status->final_reward += status->reward; print_game_dim2(gm); - usleep((gm->delay)->delay_between_episodes); + // usleep((gm->delay)->delay_between_episodes); } push_back_list_TYPE_L_INT(list_final_rewards, status->final_reward); @@ -290,7 +331,7 @@ void mainQlearning_game(struct game *gm){ printf(" %ld ",(list_final_rewards->current_list)->value); } remove_all_list_in_TYPE_L_INT(list_final_rewards); - usleep((gm->delay)->delay_between_games); + // usleep((gm->delay)->delay_between_games); } free(list_final_rewards); } diff --git a/qlearn_0/src/rabbit_learn.h b/qlearn_0/src/Frozen_Lake.h similarity index 94% rename from qlearn_0/src/rabbit_learn.h rename to qlearn_0/src/Frozen_Lake.h index d77174e..708d0c9 100644 --- a/qlearn_0/src/rabbit_learn.h +++ b/qlearn_0/src/Frozen_Lake.h @@ -11,7 +11,7 @@ #define ACTION_COUNT 4 /* Down(0), Left(1), Right(2), Up(3) */ enum Action { DOWN, LEFT, RIGHT, UP }; -enum MoveAction { MOVE_DOWN = -2, MOVE_LEFT = -1, MOVE_RIGHT = 1, MOVE_UP = 2 }; +//enum MoveAction { MOVE_DOWN = -2, MOVE_LEFT = -1, MOVE_RIGHT = 1, MOVE_UP = 2 }; #define CONTENT_COUNT 5 /* Empty(0), Start(1), Carrot(2), Block(3), Fox(4) */ @@ -22,6 +22,7 @@ enum Reward { REWARD_MOVES_OUT=-10, REWARD_OUT=-1, REWARD_EMPTY=-1, REWARD_CARRO struct game_params { size_t limit_game_number; dimension *dim; + long int *add_move; size_t limit_FOX_number; size_t limit_BLOCK_number; size_t limit_CARROT_number; @@ -37,6 +38,7 @@ struct delay_params { struct game_status { long int startRankPosition; long int rabbitRankPosition; + long int rabbitOldRankPosition; bool endGame; long int reward; long int final_reward; diff --git a/qlearn_0/test/Makefile b/qlearn_0/test/Makefile new file mode 100644 index 0000000..ac9ba4d --- /dev/null +++ b/qlearn_0/test/Makefile @@ -0,0 +1,75 @@ + + + + +NAME_TEST=is_good +CC=gcc +ROOT_DIR=$(PWD) +FROZENLAKEDIR=$(PWD)/.. +YTESTDIR=$(PWD)/../../ytest_t +YPERMDIR=$(PWD)/../../ypermutation_t +DIMDIR=$(PWD)/../../dimension_t +LISTDIR=$(PWD)/../../list_t + +INCLUDE_DIR=$(PWD)/../src +CFLAGS=-I$(INCLUDE_DIR) -I$(YTESTDIR)/include_ytest/include -I$(YPERMDIR)/src -I$(DIMDIR)/src -I$(LISTDIR)/src +LDFLAGS=-L$(YTESTDIR) -lytest #"-D DEBUG=1" + +#SRC_DIR=$(ROOT_DIR)/src +#SRC=$(wildcard */*/*.c) +SRC=$(wildcard **.c) +#HEADS=$(OBJS:.o=.h) +TEST_DIR=$(PWD) +EXECSRC=$(NAME_TEST).c +EXEC=launch_$(NAME_TEST)_m +LISTSRC_O=$(LISTDIR)/src/list_t/list_t.o + +FROZENLAKESRC=$(FROZENLAKEDIR)/src/Frozen_Lake.c +FROZENLAKESRC_O=$(FROZENLAKESRC:.c=.o) + + +PERMSRC_O=$(YPERMDIR)/src/permutation_t/permutation_t.o +DIMSRC_O=$(DIMDIR)/src/dimension_t/dimension_t.o + + +TOPTARGETS := all clean + +DEPS=$(PERMSRC) $(DIMDIR) $(LISTDIR) $(YTESTDIR) + +$(TOPTARGETS): $(DEPS) + +$(DEPS): + $(MAKE) -C $@ $(MAKECMDGOALS) + + +#LISTSRC_O=$(LISTSRC:.c=.o) +#SETTSRC_O=$(PWD)/../src/set_theoric_t/set_theoric_t.o +#SETTSRC_O=$(SETTSRC:.c=.o) +#TOOLSRC=$(TOOLDIR)/src/tools_t/tools_t.c +#TOOLSRC_O=$(TOOLSRC:.c=.o) + +#OBJ=$(SRC:.c=.o) $(FROZENLAKESRC_O) +OBJ=$(FROZENLAKESRC_O) $(LISTSRC_O) $(PERMSRC_O) $(DIMSRC_O) + +LIB_YTEST=$(YTESTDIR)/libytest.so + +all: $(EXEC) $(LIB_YTEST) + +$(EXEC): $(EXECSRC) $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) + +$(FROZENLAKESRC_O): $(FROZENLAKESRC) $(LISTSRC_O) $(PERMSRC_O) $(DIMSRC_O) + $(CC) -o $@ -c $^ $(CFLAGS) + +#$(LDFLAGS) + +.PHONY: clean mrproper + +clean: + rm -f $(OBJ) + +mrproper: clean + rm -f $(EXEC) + +run: $(EXEC) + $(EXEC) -h diff --git a/qlearn_0/test/compile.sh b/qlearn_0/test/compile.sh new file mode 100644 index 0000000..91c319a --- /dev/null +++ b/qlearn_0/test/compile.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +if [ "$#" -le 0 ] ; then + echo "Usage: $0 is_good.c" >&2 + echo "for example to compile: is_good.c" >&2 + exit 1 +fi +if [ "$#" -le 1 ] ; then + echo "Usage: $0 $1" >&2 + echo " we can add more option for example '-D DEBUG=1' to have debug print, '-D HK' to have gtest like prompt, od '-g' to gbd" >&2 + echo "for example: $0 $1 \"-D DEBUG=1 -D HK -g\"" +fi + +DIR_YTEST=$PWD/../../ytest_t +SRC=../src + +gcc -o launch_is_good_c $1 -L$DIR_YTEST $2 -lytest -I$DIR_YTEST/include_ytest/include $SRC/list_t/list_t.c -I$SRC +#gcc -o launch_is_good_c $1 $2 -lytest -I../include_ytest src/list_t/list_t.o src/set_theoric_t/set_theoric_t.o -I./src + +export LD_LIBRARY_PATH=$DIR_YTEST/:LD_LIBRARY_PATH + + +#gcc $1 src/ftest/ftest.c src/fmock/fmock.c src/tools_t/tools_t.c src/bar_progress/bar_progress.c src/list_t/list_t.c src/set_theoric_t/set_theoric_t.c -I./include $2 -o launch_is_good_c -lpthread diff --git a/qlearn_0/test/is_good.c b/qlearn_0/test/is_good.c new file mode 100644 index 0000000..06362e6 --- /dev/null +++ b/qlearn_0/test/is_good.c @@ -0,0 +1,48 @@ +#include "ftest/ftest.h" +#include "fmock/fmock.h" + + +#include "Frozen_Lake.h" + + +TEST(igameRabbit ){ + size_t array[] = {4,4} ; + dimension *dim = init_copy_dim(array,2); + struct game_params * params = create_game_params(1,dim,3,3,1,200,200); + struct qlearning_params * qlearnParams = create_qlearning_params(0.85,0.99,1); + struct delay_params * delay_game = create_delay_params(1000000, 200000); + struct game * gm = create_game(params, qlearnParams, delay_game); + + printDebug_dimension(gm->params->dim,"dimension game"); + mainQlearning_game(gm); + + free_game(gm); +} + + + +/* +TEST(ARG_MAX_ARRAY){ + + double Q[6] = {0.5,-2.3,-9,3,21,-65}; + size_t mx = ARG_MAX_ARRAY_TYPE_DOUBLE(Q, 6) ; + size_t mn = ARG_MIN_ARRAY_TYPE_DOUBLE(Q, 6) ; + printf("mx = %ld, Q[%ld ] = %lf \n", mx,mx,Q[mx]); + printf("mn = %ld, Q[%ld ] = %lf \n", mn,mn,Q[mn]); + + double a =-5; + double b=4; + + printf(" %lf vs %lf : %d \n ",a,b,COMPARE_N_TYPE_DOUBLE(&a,&b)); + printf(" %lf vs %lf : %d \n ",b,a,COMPARE_N_TYPE_DOUBLE(&b,&a)); + +} +*/ + +int main(int argc, char **argv){ + + + run_all_tests_args(argc, argv); + + return 0; +}