diff --git a/list_t/src/list_t/list_t.c b/list_t/src/list_t/list_t.c index 4a73063..8ae3e3a 100644 --- a/list_t/src/list_t/list_t.c +++ b/list_t/src/list_t/list_t.c @@ -3,7 +3,7 @@ #if 0 -#define GEN_LIST_ALL(type)\ +#define GEN_LIST_ALL_L(type)\ \ struct main_list_##type *create_var_list_##type(){\ struct main_list_##type *ret_var_list = malloc(sizeof(struct main_list_##type));\ @@ -135,6 +135,7 @@ #endif + GEN_LIST_ALL(TYPE_CHAR) GEN_LIST_ALL(TYPE_U_CHAR) GEN_LIST_ALL(TYPE_INT) diff --git a/list_t/src/list_t/list_t.h b/list_t/src/list_t/list_t.h index aa5526b..db32d96 100644 --- a/list_t/src/list_t/list_t.h +++ b/list_t/src/list_t/list_t.h @@ -22,9 +22,12 @@ void push_back_list_##type(struct main_list_##type *var_list, type value);\ void push_front_list_##type(struct main_list_##type *var_list, type value);\ size_t move_current_to_index_list_##type(struct main_list_##type *var_list, size_t index);\ + size_t move_current_to_begin_list_##type(struct main_list_##type *var_list);\ + size_t move_current_to_end_list_##type(struct main_list_##type *var_list);\ void insert_into_list_##type(struct main_list_##type *var_list, size_t index, type value );\ void remove_index_from_list_##type(struct main_list_##type *var_list, size_t index );\ void free_all_var_list_##type(struct main_list_##type *var_list);\ + void remove_all_list_in_##type(struct main_list_##type *var_list);\ void increment_list_##type(struct main_list_##type * var_list);\ void decrement_list_##type(struct main_list_##type * var_list);\ @@ -43,7 +46,11 @@ GENERATE_LIST_ALL(TYPE_STRING) GENERATE_LIST_ALL(TYPE_PTR) +#define FOR_LIST_FORM_BEGIN(type, var_list)\ + for(move_current_to_begin_list_##type(var_list); var_list->current_list; increment_list_##type(var_list)) +#define FOR_LIST_FORM_END(type, var_list)\ + for(move_current_to_end_list_##type(var_list); var_list->current_list; decrement_list_##type(var_list)) #define GEN_LIST_ALL(type)\ @@ -117,6 +124,18 @@ GENERATE_LIST_ALL(TYPE_PTR) var_list->current_index = index;\ return index;\ }\ + size_t move_current_to_begin_list_##type(struct main_list_##type *var_list){\ + if(var_list->begin_list == NULL) return 0;\ + var_list->current_list = var_list->begin_list;\ + var_list->current_index = 0;\ + return 0;\ + }\ + size_t move_current_to_end_list_##type(struct main_list_##type *var_list){\ + if(var_list->end_list == NULL) return 0;\ + var_list->current_list = var_list->end_list;\ + var_list->current_index = var_list->size - 1;\ + return var_list->current_index;\ + }\ void insert_into_list_##type(struct main_list_##type *var_list, size_t index, type value ){\ struct list_##type * list_to_add = malloc(sizeof(struct list_##type));\ list_to_add->value = value;\ @@ -158,13 +177,21 @@ GENERATE_LIST_ALL(TYPE_PTR) \ }\ }\ - void free_all_var_list_##type(struct main_list_##type *var_list){\ + void remove_all_list_in_##type(struct main_list_##type *var_list){\ struct list_##type *tmp = var_list->begin_list;\ while(tmp){\ var_list->current_list = tmp;\ tmp = tmp->next;\ free(var_list->current_list);\ }\ + var_list->begin_list = NULL;\ + var_list->current_list = NULL;\ + var_list->end_list = NULL;\ + var_list->size = 0;\ + var_list->current_index = 0;\ + }\ + void free_all_var_list_##type(struct main_list_##type *var_list){\ + remove_all_list_in_##type(var_list);\ free(var_list);\ }\ void increment_list_##type(struct main_list_##type * var_list){\ diff --git a/list_t/test/is_good.c b/list_t/test/is_good.c index 780b277..03797bc 100644 --- a/list_t/test/is_good.c +++ b/list_t/test/is_good.c @@ -49,6 +49,8 @@ TEST(insert){ TEST(remove){ struct main_list_TYPE_INT * var_list_int = create_var_list_TYPE_INT(); +remove_all_list_in_TYPE_INT(var_list_int); + for(int i=0; i<5; ++i) push_back_list_TYPE_INT(var_list_int, i); @@ -74,6 +76,35 @@ TEST(remove){ } +TEST(remove_All){ + struct main_list_TYPE_INT * var_list_int = create_var_list_TYPE_INT(); + + for(int i=0; i<5; ++i) + push_back_list_TYPE_INT(var_list_int, i); + + for(int i=0; i<10; ++i) + insert_into_list_TYPE_INT(var_list_int, i, -2*i+1); + + for(int i=var_list_int->size; i< 25; ++i) + insert_into_list_TYPE_INT(var_list_int, i, 3*i+1); + + for(move_current_to_index_list_TYPE_INT(var_list_int, 0); var_list_int->current_list; increment_list_TYPE_INT(var_list_int)) + LOG("cur %ld : %d : size :%ld \n", var_list_int->current_index, (var_list_int->current_list)->value, var_list_int->size); + + remove_all_list_in_TYPE_INT(var_list_int); + for(int i=0; i<5; ++i) + push_back_list_TYPE_INT(var_list_int, 10*i); + + + + LOG("%s"," =============== \n"); + + for(move_current_to_index_list_TYPE_INT(var_list_int, 0); var_list_int->current_list; increment_list_TYPE_INT(var_list_int)) + LOG("cur %ld : %d : size :%ld \n", var_list_int->current_index, (var_list_int->current_list)->value, var_list_int->size); + + free_all_var_list_TYPE_INT(var_list_int); + +} /* struct test_c { int value; @@ -115,6 +146,9 @@ TEST(list_TYPE_PTR){ free_all_var_list_TYPE_PTR(var_list_ptr); + free(t0); + free(t1); + free(t2); } diff --git a/qlearn_0/src/rabbit_learn.c b/qlearn_0/src/rabbit_learn.c index 8ec01c9..3ea69f9 100644 --- a/qlearn_0/src/rabbit_learn.c +++ b/qlearn_0/src/rabbit_learn.c @@ -1 +1,296 @@ + #include "rabbit_learn.h" + +int ADD_MOVE[ACTION_COUNT]={ MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT, MOVE_UP }; +char * action_name ="DLRU"; +char * content_name ="ESCBF"; + +struct qlearning_params * create_qlearning_params( + double learning_rate, + double discount_factor, + double exploration_factor +){ + struct qlearning_params * q_param = malloc(sizeof(struct qlearning_params)); + q_param->learning_rate = learning_rate; + q_param->discount_factor = discount_factor; + q_param->exploration_factor = exploration_factor; + +return q_param; +} +struct game_params * create_game_params ( + size_t limit_game_number, + dimension *dim, + size_t limit_FOX_number, + size_t limit_BLOCK_number, + size_t limit_CARROT_number, + size_t limit_EPISODES_number, + size_t limit_MOVE_number +){ + struct game_params * gm_param = malloc(sizeof(struct game_params)); + gm_param->limit_game_number = limit_game_number; + gm_param->dim = dim; + gm_param->limit_FOX_number = limit_FOX_number; + gm_param->limit_BLOCK_number = limit_BLOCK_number; + gm_param->limit_CARROT_number = limit_CARROT_number; + gm_param->limit_EPISODES_number = limit_EPISODES_number; + gm_param->limit_MOVE_number = limit_MOVE_number; + + return gm_param; +} + +struct cell * create_game_cells(dimension *dim){ + struct cell * cells = malloc((dim)->rank * sizeof(struct cell)); + for(long int i=0; i<(dim)->rank; ++i) + (cells[i]).Q = malloc(ACTION_COUNT * sizeof(double)); + + return cells; +} + +struct delay_params * create_delay_params ( + size_t delay_episodes, + size_t delay_game +){ + struct delay_params *delay = malloc(sizeof(struct delay_params)); + delay->delay_between_episodes = delay_episodes; + delay->delay_between_games = delay_game; + return delay; +} + +void reset_game_status(struct game_status * status){ + status->rabbitRankPosition = status->startRankPosition ; + status->endGame = false; + status->count_MOVES = 0; + + status->final_reward = 0; + +} + +struct game_status * create_game_status(){ + struct game_status *gm_stat = malloc(sizeof(struct game_status)); + reset_game_status(gm_stat); + return gm_stat; +} + +#define GENERATE_RANDOM_PLACES(CONTENT) \ + int CONTENT##_Number = rand() % (params->limit_##CONTENT##_number) + 1;\ + for(int i=0; i< CONTENT##_Number; ++i) { \ + do{\ + random = rand() % (dim->rank);\ + }while((gm->cells[random]).content != EMPTY);\ + (gm->cells[random]).content = CONTENT;\ + } + +long int generate_game(struct game *gm){ + struct game_params *params = gm->params; + struct game_status *status; + dimension *dim = params->dim; + for(long int i=0; i<(params->dim)->rank; ++i){ + (gm->cells[i]).rankPosition = i; + (gm->cells[i]).content = 0; + for(long int j=0; j < ACTION_COUNT; ++j) + (gm->cells[i]).Q[j] = 0; + } + srand(time(NULL)); + int random; + GENERATE_RANDOM_PLACES(FOX) + GENERATE_RANDOM_PLACES(CARROT) + GENERATE_RANDOM_PLACES(BLOCK) + do{ + random = rand() % (dim->rank); + }while((gm->cells[random]).content != EMPTY); + (gm->cells[random]).content = START; + + (status)->startRankPosition = random; + (status)->rabbitRankPosition = random; + + return (status->startRankPosition); +} + +struct game * create_game( + struct game_params *params, + struct qlearning_params *qlearnParams, + struct delay_params * delay +){ + struct game *gm = malloc(sizeof(struct game)); + gm->params = params; + gm->qlearnParams = qlearnParams; + gm->cells = create_game_cells(params->dim); + gm->delay = delay; // create_delay_params(10000000, 2000000); + gm->status = create_game_status(); + generate_game(gm); + + return gm; +} + + +//void free_qlearning_params(struct qlearning_params * qlearn_params); +//void free_game_params(struct game_params *game_prms); +//void free_cells(struct cell * cells); +//void free_game_status(struct game_status *status); +void free_game(struct game *gm){ + for(long int i = 0; i < ((gm->params)->dim)->rank; ++i){ + free((gm->cells[i]).Q); + } + free(gm->cells); + free_dimension((gm->params)->dim); + free(gm->params); + free(gm->qlearnParams); + free(gm->delay); + free(gm->status); +} + +void move_game(struct game *gm, enum Action action){ + struct game_params *params = gm->params; + struct game_status *status = gm->status; + struct cell * cells = gm->cells; + + ++(status->count_MOVES); + long int newRankPosition = status->rabbitRankPosition + ADD_MOVE[action]; + + if (newRankPosition < 0 || newRankPosition >= (params->dim)->rank) status->reward = REWARD_OUT; + else if (status->count_MOVES > params->limit_MOVE_number) { + status->rabbitRankPosition = newRankPosition; + status->endGame = true; + status->reward = REWARD_MOVES_OUT; + } + else if (cells[newRankPosition].content == BLOCK) { + status->reward = REWARD_BLOCK; + } + else if (cells[newRankPosition].content == FOX){ + status->rabbitRankPosition = newRankPosition; + status->endGame = true; + status->reward = REWARD_FOX; + } + else if (cells[newRankPosition].content == CARROT) { + status->rabbitRankPosition = newRankPosition; + status->endGame = true; + status->reward = REWARD_CARROT; + } + else { + status->rabbitRankPosition = newRankPosition; + status->reward = REWARD_EMPTY; + } +} + + +void printLine(char c, int l, bool prec) { + //printf("\033[00m"); + printf("\033[%d;0%dm", 0, 1); // noir // vide + if (prec) printf("%*c\n", 10, ' '); + for (int i = 0; i < l; i++) printf("%c", c); + printf("%*c\n", 10, ' '); +} +void printLinec(char c, int l) { + printLine(c, l, true); +} + +void code2dCoul(dimension *color, enum Content content) { + if( color && color->size > 1 ){ + if (content) { + color->perm[0] = content + 31; color->perm[1] = 1; + } + else { + color->perm[0] = 0; color->perm[1] = 1; + } + } +} + +void gotoxy(int x, int y) +{ + printf("%c[%d;%df", 0x1B, y, x); +} + +void print_game_dim2(struct game *gm) { + struct cell *cells = gm->cells; + dimension *dim = ((gm->params)->dim); + + if(dim->size == 2){ + dimension *coul = create_dim(2); + long int *coord = malloc(2*sizeof(long int)); + int mult = dim->perm[0] * 14; + char sep = '-'; + gotoxy(0, 10); + for (long int j = 0; j < dim->perm[1]; j++) { + for (long int i = 0; i < dim->perm[0]; i++) { + long int cur = i + j * dim->perm[0]; + code2dCoul(coul, cells[cur].content); + printf("\033[%ld;0%ldm", coul->perm[0], coul->perm[0]); + //printf("s:%2d,(%d,%d),%2c|", cells[cur].rankPosition, cells[cur].pos.x, cells[cur].pos.y, cont_name[cells[cur].c]); + printf("s:%2ld,(%ld,%ld),%2c|", cur, i , j , content_name[cells[cur].content]); + } + printLinec(sep, mult); + for (long int k = 0; k < ACTION_COUNT; k++) { + for (long int i = 0; i < dim->perm[0]; i++) { + long int cur = i + j * dim->perm[0]; + code2dCoul(coul, cells[cur].content); + printf("\033[%ld;%ldm", coul->perm[0], coul->perm[1]); + if ( (gm->status)->rabbitRankPosition == cur ){ /*lapin_pos.x == i && lapin_pos.y == j */ + printf("\033[37;01m"); + } + printf("%2c: %8.4f |", action_name[k], cells[cur].Q[k]); + } + printf("%*c\n", 10, ' '); + } + printLine(sep, mult, false); + } + free_dimension(coul); + free(coord); + } +} + + +void mainQlearning_game(struct game *gm){ + struct main_list_TYPE_L_INT * list_final_rewards = create_var_list_TYPE_L_INT(); + + struct game_params *params = gm->params; + struct game_status *status = gm->status; + struct qlearning_params * qlearnParams = gm->qlearnParams; + struct cell * cells = gm->cells; + enum Action action; + int random; + long int NUMBER_EPISODE2 = (params->limit_EPISODES_number) * (params->limit_EPISODES_number); + double proba_explor; + srand(time(NULL)); + + for(size_t k=0 ; k < params->limit_game_number; ++k){ + for(size_t episode = 0; episode < params->limit_EPISODES_number; ++episode){ + reset_game_status(status); + generate_game(gm); + + while(!(status->endGame)){ + random = rand() % NUMBER_EPISODE2; + proba_explor = (double)random / NUMBER_EPISODE2; + if( proba_explor < qlearnParams->exploration_factor * (1. / ((episode / 10 ) + 1))){ + action = rand() % ACTION_COUNT; + printf("exploration action "); + } + else{ + action = ARG_MAX_ARRAY_TYPE_DOUBLE( cells[status->rabbitRankPosition].Q, ACTION_COUNT ); + printf("greedy action "); + } + + move_game(gm, action); + + printf("action = %d rbPos = %ld, rwds = %ld, final_rwrd=%ld\n",action, status->rabbitRankPosition,status->reward, status->final_reward); + // update Q array on the action of the state + cells[status->rabbitRankPosition].Q[action] = cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT) - cells[status->rabbitRankPosition].Q[action])); + //cells[status->rabbitRankPosition].Q[action] = (1 - qlearnParams->learning_rate) * cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT ))); + + status->final_reward += status->reward; + + print_game_dim2(gm); + + usleep((gm->delay)->delay_between_episodes); + + } + push_back_list_TYPE_L_INT(list_final_rewards, status->final_reward); + } + + FOR_LIST_FORM_BEGIN(TYPE_L_INT, list_final_rewards){ + printf(" %ld ",(list_final_rewards->current_list)->value); + } + remove_all_list_in_TYPE_L_INT(list_final_rewards); + usleep((gm->delay)->delay_between_games); + } + free(list_final_rewards); +} diff --git a/qlearn_0/src/rabbit_learn.h b/qlearn_0/src/rabbit_learn.h index 02f4a42..d77174e 100644 --- a/qlearn_0/src/rabbit_learn.h +++ b/qlearn_0/src/rabbit_learn.h @@ -1,37 +1,111 @@ #ifndef __RABBIT_LEARN__H_C_ #define __RABBIT_LEARN__H_C_ +#include +#include + +//#include "tools_t/tools_t.h" #include "dimension_t/dimension_t.h" #include "list_t/list_t.h" +#define ACTION_COUNT 4 +/* Down(0), Left(1), Right(2), Up(3) */ +enum Action { DOWN, LEFT, RIGHT, UP }; +enum MoveAction { MOVE_DOWN = -2, MOVE_LEFT = -1, MOVE_RIGHT = 1, MOVE_UP = 2 }; + +#define CONTENT_COUNT 5 +/* Empty(0), Start(1), Carrot(2), Block(3), Fox(4) */ +enum Content { EMPTY, START, CARROT, BLOCK, FOX }; + +enum Reward { REWARD_MOVES_OUT=-10, REWARD_OUT=-1, REWARD_EMPTY=-1, REWARD_CARROT=10, REWARD_BLOCK=-1, REWARD_FOX=-10 }; + struct game_params { - size_t num_episods; + size_t limit_game_number; dimension *dim; - size_t max_fox_number; - size_t max_block_number; - size_t max_carrot_number; + size_t limit_FOX_number; + size_t limit_BLOCK_number; + size_t limit_CARROT_number; + size_t limit_EPISODES_number; + size_t limit_MOVE_number; +}; + +struct delay_params { + size_t delay_between_episodes; + size_t delay_between_games; +}; + +struct game_status { + long int startRankPosition; + long int rabbitRankPosition; + bool endGame; + long int reward; + long int final_reward; + size_t count_MOVES; }; struct qlearning_params { double learning_rate; double discount_factor; double exploration_factor; - }; struct cell { long int rankPosition; - int state; + int content; double *Q; }; -struct action { - int value; - int addMove; +struct game { + struct game_params *params; + struct delay_params *delay; + struct qlearning_params *qlearnParams; + struct cell *cells; + struct game_status *status; }; -struct states * create_states(dimension *dim); -void free_states(struct states *etat); +struct qlearning_params * create_qlearning_params( + double learning_rate, + double discount_factor, + double exploration_factor +); + +struct game_params * create_game_params ( + size_t limit_game_number, + dimension *dim, + size_t limit_FOX_number, + size_t limit_BLOCK_number, + size_t limit_CARROT_number, + size_t limit_EPISODES_number, + size_t limit_MOVE_number +); + +struct delay_params * create_delay_params ( + size_t delay_episodes, + size_t delay_game +); + +struct game_status * create_game_status(); + +struct game * create_game( + struct game_params *params, + struct qlearning_params *qlearnParams, + struct delay_params *delay +); + +void reset_game(struct game *gm); +long int generate_game(struct game *gm); + +void move_game(struct game *gm, enum Action action); + +//void free_qlearning_params(struct qlearning_params * qlearn_params); +//void free_game_params(struct game_params *game_prms); +//void free_cells(struct cell * cells); +//void free_game_status(struct game_status *status); +void free_game(struct game *gm); + + +void mainQlearning_game(struct game *gm); + #endif /* __RABBIT_LEARN__H_C_ */ diff --git a/ytest_t/libytest.so b/ytest_t/libytest.so index 4a6c779..91b8db2 100644 Binary files a/ytest_t/libytest.so and b/ytest_t/libytest.so differ diff --git a/ytools_t/src/tools_t/tools_t.c b/ytools_t/src/tools_t/tools_t.c index f5c29cb..09bd460 100644 --- a/ytools_t/src/tools_t/tools_t.c +++ b/ytools_t/src/tools_t/tools_t.c @@ -120,7 +120,8 @@ long int PRECISION_TYPE_L_DOUBLE = 100000000000000; PRINT_DEBUG_(" diff = %s a=%s b=%s PRECISION : %ld\n",str_diff, str_a, str_b, PRECISION_##type);\ free(str_diff); free(str_a); free(str_b);\ */if ((diff < 1) && (diff > -1) ) return 0; \ - return diff; \ + if (diff <= -1) return -1; \ + return 1; \ } \ \ void COPY_ARRAY_##type(type *dst, const type *src, size_t size){ \