rename rabbit_learn to Frozen_Lake
This commit is contained in:
@@ -1,7 +1,6 @@
|
|||||||
|
|
||||||
#include "rabbit_learn.h"
|
#include "Frozen_Lake.h"
|
||||||
|
|
||||||
int ADD_MOVE[ACTION_COUNT]={ MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT, MOVE_UP };
|
|
||||||
char * action_name ="DLRU";
|
char * action_name ="DLRU";
|
||||||
char * content_name ="ESCBF";
|
char * content_name ="ESCBF";
|
||||||
|
|
||||||
@@ -35,6 +34,17 @@ struct game_params * create_game_params (
|
|||||||
gm_param->limit_EPISODES_number = limit_EPISODES_number;
|
gm_param->limit_EPISODES_number = limit_EPISODES_number;
|
||||||
gm_param->limit_MOVE_number = limit_MOVE_number;
|
gm_param->limit_MOVE_number = limit_MOVE_number;
|
||||||
|
|
||||||
|
gm_param->add_move = malloc(ACTION_COUNT * sizeof(long int));
|
||||||
|
/*
|
||||||
|
gm_param->add_move[DOWN] = signedLineFromCoord((long int []){0,-1}, dim);
|
||||||
|
gm_param->add_move[LEFT] = signedLineFromCoord((long int []){-1,0}, dim);
|
||||||
|
gm_param->add_move[RIGHT] = signedLineFromCoord((long int []){1,0}, dim);
|
||||||
|
gm_param->add_move[UP] = signedLineFromCoord((long int []){0,1}, dim);
|
||||||
|
*/
|
||||||
|
gm_param->add_move[DOWN] = dim->perm[0];
|
||||||
|
gm_param->add_move[LEFT] = -1;
|
||||||
|
gm_param->add_move[RIGHT] = 1;
|
||||||
|
gm_param->add_move[UP] = -1*dim->perm[0];
|
||||||
return gm_param;
|
return gm_param;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -57,6 +67,7 @@ struct delay_params * create_delay_params (
|
|||||||
}
|
}
|
||||||
|
|
||||||
void reset_game_status(struct game_status * status){
|
void reset_game_status(struct game_status * status){
|
||||||
|
status->rabbitOldRankPosition = status->startRankPosition ;
|
||||||
status->rabbitRankPosition = status->startRankPosition ;
|
status->rabbitRankPosition = status->startRankPosition ;
|
||||||
status->endGame = false;
|
status->endGame = false;
|
||||||
status->count_MOVES = 0;
|
status->count_MOVES = 0;
|
||||||
@@ -82,11 +93,11 @@ struct game_status * create_game_status(){
|
|||||||
|
|
||||||
long int generate_game(struct game *gm){
|
long int generate_game(struct game *gm){
|
||||||
struct game_params *params = gm->params;
|
struct game_params *params = gm->params;
|
||||||
struct game_status *status;
|
struct game_status *status = gm->status;
|
||||||
dimension *dim = params->dim;
|
dimension *dim = params->dim;
|
||||||
for(long int i=0; i<(params->dim)->rank; ++i){
|
for(long int i=0; i<(params->dim)->rank; ++i){
|
||||||
(gm->cells[i]).rankPosition = i;
|
(gm->cells[i]).rankPosition = i;
|
||||||
(gm->cells[i]).content = 0;
|
(gm->cells[i]).content = EMPTY; //0
|
||||||
for(long int j=0; j < ACTION_COUNT; ++j)
|
for(long int j=0; j < ACTION_COUNT; ++j)
|
||||||
(gm->cells[i]).Q[j] = 0;
|
(gm->cells[i]).Q[j] = 0;
|
||||||
}
|
}
|
||||||
@@ -101,6 +112,7 @@ long int generate_game(struct game *gm){
|
|||||||
(gm->cells[random]).content = START;
|
(gm->cells[random]).content = START;
|
||||||
|
|
||||||
(status)->startRankPosition = random;
|
(status)->startRankPosition = random;
|
||||||
|
(status)->rabbitOldRankPosition = random;
|
||||||
(status)->rabbitRankPosition = random;
|
(status)->rabbitRankPosition = random;
|
||||||
|
|
||||||
return (status->startRankPosition);
|
return (status->startRankPosition);
|
||||||
@@ -117,6 +129,8 @@ struct game * create_game(
|
|||||||
gm->cells = create_game_cells(params->dim);
|
gm->cells = create_game_cells(params->dim);
|
||||||
gm->delay = delay; // create_delay_params(10000000, 2000000);
|
gm->delay = delay; // create_delay_params(10000000, 2000000);
|
||||||
gm->status = create_game_status();
|
gm->status = create_game_status();
|
||||||
|
|
||||||
|
|
||||||
generate_game(gm);
|
generate_game(gm);
|
||||||
|
|
||||||
return gm;
|
return gm;
|
||||||
@@ -133,10 +147,33 @@ void free_game(struct game *gm){
|
|||||||
}
|
}
|
||||||
free(gm->cells);
|
free(gm->cells);
|
||||||
free_dimension((gm->params)->dim);
|
free_dimension((gm->params)->dim);
|
||||||
|
free((gm->params)->add_move);
|
||||||
free(gm->params);
|
free(gm->params);
|
||||||
free(gm->qlearnParams);
|
free(gm->qlearnParams);
|
||||||
free(gm->delay);
|
free(gm->delay);
|
||||||
free(gm->status);
|
free(gm->status);
|
||||||
|
free(gm);
|
||||||
|
}
|
||||||
|
|
||||||
|
long int function_add_move(struct game *gm, enum Action action){
|
||||||
|
struct game_status * status = gm->status;
|
||||||
|
dimension *dim = (gm->params)->dim;
|
||||||
|
|
||||||
|
if(action == UP && (status->rabbitRankPosition / dim->perm[0]) == 0){
|
||||||
|
return dim->rank * -1;
|
||||||
|
}
|
||||||
|
if(action == DOWN && (status->rabbitRankPosition / dim->perm[0]) == dim->perm[1]-1){
|
||||||
|
return dim->rank ;
|
||||||
|
}
|
||||||
|
if(action == LEFT && (status->rabbitRankPosition % dim->perm[0]) == 0){
|
||||||
|
return dim->rank * -1;
|
||||||
|
}
|
||||||
|
if(action == RIGHT && (status->rabbitRankPosition % dim->perm[0]) == dim->perm[1]-1){
|
||||||
|
return dim->rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (gm->params)->add_move[action];
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void move_game(struct game *gm, enum Action action){
|
void move_game(struct game *gm, enum Action action){
|
||||||
@@ -145,9 +182,11 @@ void move_game(struct game *gm, enum Action action){
|
|||||||
struct cell * cells = gm->cells;
|
struct cell * cells = gm->cells;
|
||||||
|
|
||||||
++(status->count_MOVES);
|
++(status->count_MOVES);
|
||||||
long int newRankPosition = status->rabbitRankPosition + ADD_MOVE[action];
|
long int newRankPosition = status->rabbitRankPosition + function_add_move(gm, action);
|
||||||
|
|
||||||
if (newRankPosition < 0 || newRankPosition >= (params->dim)->rank) status->reward = REWARD_OUT;
|
if (newRankPosition < 0 || newRankPosition >= (params->dim)->rank) {
|
||||||
|
status->reward = REWARD_OUT;
|
||||||
|
}
|
||||||
else if (status->count_MOVES > params->limit_MOVE_number) {
|
else if (status->count_MOVES > params->limit_MOVE_number) {
|
||||||
status->rabbitRankPosition = newRankPosition;
|
status->rabbitRankPosition = newRankPosition;
|
||||||
status->endGame = true;
|
status->endGame = true;
|
||||||
@@ -252,10 +291,10 @@ void mainQlearning_game(struct game *gm){
|
|||||||
double proba_explor;
|
double proba_explor;
|
||||||
srand(time(NULL));
|
srand(time(NULL));
|
||||||
|
|
||||||
for(size_t k=0 ; k < params->limit_game_number; ++k){
|
for(long int k=0 ; k < params->limit_game_number; ++k){
|
||||||
for(size_t episode = 0; episode < params->limit_EPISODES_number; ++episode){
|
|
||||||
reset_game_status(status);
|
|
||||||
generate_game(gm);
|
generate_game(gm);
|
||||||
|
for(long int episode = 0; episode < params->limit_EPISODES_number; ++episode){
|
||||||
|
reset_game_status(status);
|
||||||
|
|
||||||
while(!(status->endGame)){
|
while(!(status->endGame)){
|
||||||
random = rand() % NUMBER_EPISODE2;
|
random = rand() % NUMBER_EPISODE2;
|
||||||
@@ -269,18 +308,20 @@ void mainQlearning_game(struct game *gm){
|
|||||||
printf("greedy action ");
|
printf("greedy action ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status->rabbitOldRankPosition = status->rabbitRankPosition;
|
||||||
move_game(gm, action);
|
move_game(gm, action);
|
||||||
|
|
||||||
printf("action = %d rbPos = %ld, rwds = %ld, final_rwrd=%ld\n",action, status->rabbitRankPosition,status->reward, status->final_reward);
|
printf("ik=%ld, episode = %ld , action = %d rbPos = %ld, rwds = %ld, final_rwrd=%ld\n",k,episode,action, status->rabbitRankPosition,status->reward, status->final_reward);
|
||||||
// update Q array on the action of the state
|
// update Q array on the action of the state
|
||||||
cells[status->rabbitRankPosition].Q[action] = cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT) - cells[status->rabbitRankPosition].Q[action]));
|
cells[status->rabbitOldRankPosition].Q[action] = cells[status->rabbitOldRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT) - cells[status->rabbitOldRankPosition].Q[action]));
|
||||||
|
//cells[status->rabbitRankPosition].Q[action] = cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT) - cells[status->rabbitRankPosition].Q[action]));
|
||||||
//cells[status->rabbitRankPosition].Q[action] = (1 - qlearnParams->learning_rate) * cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT )));
|
//cells[status->rabbitRankPosition].Q[action] = (1 - qlearnParams->learning_rate) * cells[status->rabbitRankPosition].Q[action] + (qlearnParams->learning_rate * (status->reward + qlearnParams->discount_factor * MAX_ARRAY_TYPE_DOUBLE(cells[status->rabbitRankPosition].Q, ACTION_COUNT )));
|
||||||
|
|
||||||
status->final_reward += status->reward;
|
status->final_reward += status->reward;
|
||||||
|
|
||||||
print_game_dim2(gm);
|
print_game_dim2(gm);
|
||||||
|
|
||||||
usleep((gm->delay)->delay_between_episodes);
|
// usleep((gm->delay)->delay_between_episodes);
|
||||||
|
|
||||||
}
|
}
|
||||||
push_back_list_TYPE_L_INT(list_final_rewards, status->final_reward);
|
push_back_list_TYPE_L_INT(list_final_rewards, status->final_reward);
|
||||||
@@ -290,7 +331,7 @@ void mainQlearning_game(struct game *gm){
|
|||||||
printf(" %ld ",(list_final_rewards->current_list)->value);
|
printf(" %ld ",(list_final_rewards->current_list)->value);
|
||||||
}
|
}
|
||||||
remove_all_list_in_TYPE_L_INT(list_final_rewards);
|
remove_all_list_in_TYPE_L_INT(list_final_rewards);
|
||||||
usleep((gm->delay)->delay_between_games);
|
// usleep((gm->delay)->delay_between_games);
|
||||||
}
|
}
|
||||||
free(list_final_rewards);
|
free(list_final_rewards);
|
||||||
}
|
}
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
#define ACTION_COUNT 4
|
#define ACTION_COUNT 4
|
||||||
/* Down(0), Left(1), Right(2), Up(3) */
|
/* Down(0), Left(1), Right(2), Up(3) */
|
||||||
enum Action { DOWN, LEFT, RIGHT, UP };
|
enum Action { DOWN, LEFT, RIGHT, UP };
|
||||||
enum MoveAction { MOVE_DOWN = -2, MOVE_LEFT = -1, MOVE_RIGHT = 1, MOVE_UP = 2 };
|
//enum MoveAction { MOVE_DOWN = -2, MOVE_LEFT = -1, MOVE_RIGHT = 1, MOVE_UP = 2 };
|
||||||
|
|
||||||
#define CONTENT_COUNT 5
|
#define CONTENT_COUNT 5
|
||||||
/* Empty(0), Start(1), Carrot(2), Block(3), Fox(4) */
|
/* Empty(0), Start(1), Carrot(2), Block(3), Fox(4) */
|
||||||
@@ -22,6 +22,7 @@ enum Reward { REWARD_MOVES_OUT=-10, REWARD_OUT=-1, REWARD_EMPTY=-1, REWARD_CARRO
|
|||||||
struct game_params {
|
struct game_params {
|
||||||
size_t limit_game_number;
|
size_t limit_game_number;
|
||||||
dimension *dim;
|
dimension *dim;
|
||||||
|
long int *add_move;
|
||||||
size_t limit_FOX_number;
|
size_t limit_FOX_number;
|
||||||
size_t limit_BLOCK_number;
|
size_t limit_BLOCK_number;
|
||||||
size_t limit_CARROT_number;
|
size_t limit_CARROT_number;
|
||||||
@@ -37,6 +38,7 @@ struct delay_params {
|
|||||||
struct game_status {
|
struct game_status {
|
||||||
long int startRankPosition;
|
long int startRankPosition;
|
||||||
long int rabbitRankPosition;
|
long int rabbitRankPosition;
|
||||||
|
long int rabbitOldRankPosition;
|
||||||
bool endGame;
|
bool endGame;
|
||||||
long int reward;
|
long int reward;
|
||||||
long int final_reward;
|
long int final_reward;
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
NAME_TEST=is_good
|
||||||
|
CC=gcc
|
||||||
|
ROOT_DIR=$(PWD)
|
||||||
|
FROZENLAKEDIR=$(PWD)/..
|
||||||
|
YTESTDIR=$(PWD)/../../ytest_t
|
||||||
|
YPERMDIR=$(PWD)/../../ypermutation_t
|
||||||
|
DIMDIR=$(PWD)/../../dimension_t
|
||||||
|
LISTDIR=$(PWD)/../../list_t
|
||||||
|
|
||||||
|
INCLUDE_DIR=$(PWD)/../src
|
||||||
|
CFLAGS=-I$(INCLUDE_DIR) -I$(YTESTDIR)/include_ytest/include -I$(YPERMDIR)/src -I$(DIMDIR)/src -I$(LISTDIR)/src
|
||||||
|
LDFLAGS=-L$(YTESTDIR) -lytest #"-D DEBUG=1"
|
||||||
|
|
||||||
|
#SRC_DIR=$(ROOT_DIR)/src
|
||||||
|
#SRC=$(wildcard */*/*.c)
|
||||||
|
SRC=$(wildcard **.c)
|
||||||
|
#HEADS=$(OBJS:.o=.h)
|
||||||
|
TEST_DIR=$(PWD)
|
||||||
|
EXECSRC=$(NAME_TEST).c
|
||||||
|
EXEC=launch_$(NAME_TEST)_m
|
||||||
|
LISTSRC_O=$(LISTDIR)/src/list_t/list_t.o
|
||||||
|
|
||||||
|
FROZENLAKESRC=$(FROZENLAKEDIR)/src/Frozen_Lake.c
|
||||||
|
FROZENLAKESRC_O=$(FROZENLAKESRC:.c=.o)
|
||||||
|
|
||||||
|
|
||||||
|
PERMSRC_O=$(YPERMDIR)/src/permutation_t/permutation_t.o
|
||||||
|
DIMSRC_O=$(DIMDIR)/src/dimension_t/dimension_t.o
|
||||||
|
|
||||||
|
|
||||||
|
TOPTARGETS := all clean
|
||||||
|
|
||||||
|
DEPS=$(PERMSRC) $(DIMDIR) $(LISTDIR) $(YTESTDIR)
|
||||||
|
|
||||||
|
$(TOPTARGETS): $(DEPS)
|
||||||
|
|
||||||
|
$(DEPS):
|
||||||
|
$(MAKE) -C $@ $(MAKECMDGOALS)
|
||||||
|
|
||||||
|
|
||||||
|
#LISTSRC_O=$(LISTSRC:.c=.o)
|
||||||
|
#SETTSRC_O=$(PWD)/../src/set_theoric_t/set_theoric_t.o
|
||||||
|
#SETTSRC_O=$(SETTSRC:.c=.o)
|
||||||
|
#TOOLSRC=$(TOOLDIR)/src/tools_t/tools_t.c
|
||||||
|
#TOOLSRC_O=$(TOOLSRC:.c=.o)
|
||||||
|
|
||||||
|
#OBJ=$(SRC:.c=.o) $(FROZENLAKESRC_O)
|
||||||
|
OBJ=$(FROZENLAKESRC_O) $(LISTSRC_O) $(PERMSRC_O) $(DIMSRC_O)
|
||||||
|
|
||||||
|
LIB_YTEST=$(YTESTDIR)/libytest.so
|
||||||
|
|
||||||
|
all: $(EXEC) $(LIB_YTEST)
|
||||||
|
|
||||||
|
$(EXEC): $(EXECSRC) $(OBJ)
|
||||||
|
$(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
|
$(FROZENLAKESRC_O): $(FROZENLAKESRC) $(LISTSRC_O) $(PERMSRC_O) $(DIMSRC_O)
|
||||||
|
$(CC) -o $@ -c $^ $(CFLAGS)
|
||||||
|
|
||||||
|
#$(LDFLAGS)
|
||||||
|
|
||||||
|
.PHONY: clean mrproper
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(OBJ)
|
||||||
|
|
||||||
|
mrproper: clean
|
||||||
|
rm -f $(EXEC)
|
||||||
|
|
||||||
|
run: $(EXEC)
|
||||||
|
$(EXEC) -h
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ "$#" -le 0 ] ; then
|
||||||
|
echo "Usage: $0 is_good.c" >&2
|
||||||
|
echo "for example to compile: is_good.c" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ "$#" -le 1 ] ; then
|
||||||
|
echo "Usage: $0 $1" >&2
|
||||||
|
echo " we can add more option for example '-D DEBUG=1' to have debug print, '-D HK' to have gtest like prompt, od '-g' to gbd" >&2
|
||||||
|
echo "for example: $0 $1 \"-D DEBUG=1 -D HK -g\""
|
||||||
|
fi
|
||||||
|
|
||||||
|
DIR_YTEST=$PWD/../../ytest_t
|
||||||
|
SRC=../src
|
||||||
|
|
||||||
|
gcc -o launch_is_good_c $1 -L$DIR_YTEST $2 -lytest -I$DIR_YTEST/include_ytest/include $SRC/list_t/list_t.c -I$SRC
|
||||||
|
#gcc -o launch_is_good_c $1 $2 -lytest -I../include_ytest src/list_t/list_t.o src/set_theoric_t/set_theoric_t.o -I./src
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$DIR_YTEST/:LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
|
||||||
|
#gcc $1 src/ftest/ftest.c src/fmock/fmock.c src/tools_t/tools_t.c src/bar_progress/bar_progress.c src/list_t/list_t.c src/set_theoric_t/set_theoric_t.c -I./include $2 -o launch_is_good_c -lpthread
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
#include "ftest/ftest.h"
|
||||||
|
#include "fmock/fmock.h"
|
||||||
|
|
||||||
|
|
||||||
|
#include "Frozen_Lake.h"
|
||||||
|
|
||||||
|
|
||||||
|
TEST(igameRabbit ){
|
||||||
|
size_t array[] = {4,4} ;
|
||||||
|
dimension *dim = init_copy_dim(array,2);
|
||||||
|
struct game_params * params = create_game_params(1,dim,3,3,1,200,200);
|
||||||
|
struct qlearning_params * qlearnParams = create_qlearning_params(0.85,0.99,1);
|
||||||
|
struct delay_params * delay_game = create_delay_params(1000000, 200000);
|
||||||
|
struct game * gm = create_game(params, qlearnParams, delay_game);
|
||||||
|
|
||||||
|
printDebug_dimension(gm->params->dim,"dimension game");
|
||||||
|
mainQlearning_game(gm);
|
||||||
|
|
||||||
|
free_game(gm);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
TEST(ARG_MAX_ARRAY){
|
||||||
|
|
||||||
|
double Q[6] = {0.5,-2.3,-9,3,21,-65};
|
||||||
|
size_t mx = ARG_MAX_ARRAY_TYPE_DOUBLE(Q, 6) ;
|
||||||
|
size_t mn = ARG_MIN_ARRAY_TYPE_DOUBLE(Q, 6) ;
|
||||||
|
printf("mx = %ld, Q[%ld ] = %lf \n", mx,mx,Q[mx]);
|
||||||
|
printf("mn = %ld, Q[%ld ] = %lf \n", mn,mn,Q[mn]);
|
||||||
|
|
||||||
|
double a =-5;
|
||||||
|
double b=4;
|
||||||
|
|
||||||
|
printf(" %lf vs %lf : %d \n ",a,b,COMPARE_N_TYPE_DOUBLE(&a,&b));
|
||||||
|
printf(" %lf vs %lf : %d \n ",b,a,COMPARE_N_TYPE_DOUBLE(&b,&a));
|
||||||
|
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
int main(int argc, char **argv){
|
||||||
|
|
||||||
|
|
||||||
|
run_all_tests_args(argc, argv);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user