learnig: add ending and fix leak memory

This commit is contained in:
2025-05-12 15:16:19 +02:00
parent d2a84f4a82
commit 52e6353469
8 changed files with 229 additions and 28 deletions
@@ -90,6 +90,10 @@ struct status_qlearning * create_status_qlearning (){
status_ql->list_target_cumul = create_var_list_TYPE_L_INT(); status_ql->list_target_cumul = create_var_list_TYPE_L_INT();
status_ql->progress_best_cumul = create_var_list_TYPE_L_INT(); status_ql->progress_best_cumul = create_var_list_TYPE_L_INT();
status_ql->ending = 0;
status_ql->mut_ending=malloc(sizeof(pthread_mutex_t));
pthread_mutex_init(status_ql->mut_ending, NULL);
//push_back_list_TYPE_L_INT(status_ql->list_main_cumul, 0); //push_back_list_TYPE_L_INT(status_ql->list_main_cumul, 0);
//push_back_list_TYPE_L_INT(status_ql->list_target_cumul, 0); //push_back_list_TYPE_L_INT(status_ql->list_target_cumul, 0);
push_back_list_TYPE_L_INT(status_ql->progress_best_cumul, -10000); push_back_list_TYPE_L_INT(status_ql->progress_best_cumul, -10000);
@@ -192,6 +196,8 @@ void free_status_qlearning(struct status_qlearning *status_ql){
free_all_var_list_TYPE_L_INT(status_ql->list_main_cumul); free_all_var_list_TYPE_L_INT(status_ql->list_main_cumul);
free_all_var_list_TYPE_L_INT(status_ql->list_target_cumul); free_all_var_list_TYPE_L_INT(status_ql->list_target_cumul);
free_all_var_list_TYPE_L_INT(status_ql->progress_best_cumul); free_all_var_list_TYPE_L_INT(status_ql->progress_best_cumul);
pthread_mutex_destroy(status_ql->mut_ending);
free(status_ql->mut_ending);
free(status_ql); free(status_ql);
} }
void free_delay_params (struct delay_params *dly_p){ void free_delay_params (struct delay_params *dly_p){
@@ -258,6 +264,7 @@ void train_qlearning(struct RL_agent * rlAgent,
// free_tensor_TYPE_FLOAT(action_value); // free_tensor_TYPE_FLOAT(action_value);
// free_tensor_TYPE_FLOAT(next_action_value); // free_tensor_TYPE_FLOAT(next_action_value);
free_tensor_TYPE_FLOAT(experimental_values);
} }
@@ -276,7 +283,7 @@ int select_action(struct RL_agent * rlAgent){
//init =false; //init =false;
//} //}
//int random = xrand() % randRange; //int random = xrand() % randRange;
float proba_explor = (float) (rand() % (1<<17 -1))/ (1<<17 -1); //frand(); //(float)(random ) / randRange; float proba_explor = (float) (xrand() % (1<<17 -1))/ (1<<17 -1); //frand(); //(float)(random ) / randRange;
if(proba_explor > rlAgent->qlearnParams->exploration_factor ){ if(proba_explor > rlAgent->qlearnParams->exploration_factor ){
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ); action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
//if(action == ARG_MIN_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank )) //if(action == ARG_MIN_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ))
@@ -307,13 +314,21 @@ int select_action(struct RL_agent * rlAgent){
return action; return action;
} }
int is_ending(struct status_qlearning *qlStatus){
int ret;
pthread_mutex_lock(qlStatus->mut_ending);
ret = qlStatus->ending;
pthread_mutex_unlock(qlStatus->mut_ending);
return ret;
}
void* runPrint(void *arg){ void* runPrint(void *arg){
struct RL_agent *rlAgent = (struct RL_agent*)arg; struct RL_agent *rlAgent = (struct RL_agent*)arg;
struct status_qlearning *qlStatus = rlAgent->status; struct status_qlearning *qlStatus = rlAgent->status;
struct print_params * pprint = rlAgent->pprint; struct print_params * pprint = rlAgent->pprint;
struct vehicle *car = rlAgent->car; struct vehicle *car = rlAgent->car;
size_t count_print = 0; size_t count_print = 0;
while(1){ while(!is_ending(qlStatus)){
if(/*(qlStatus->nb_episodes %125 == 0) &&*/ pprint->printed){ if(/*(qlStatus->nb_episodes %125 == 0) &&*/ pprint->printed){
//pthread_mutex_lock(&(pprint->mut_printed)); //pthread_mutex_lock(&(pprint->mut_printed));
pthread_mutex_lock(&(car->mut_coord)); pthread_mutex_lock(&(car->mut_coord));
@@ -350,6 +365,7 @@ if(/*(qlStatus->nb_episodes %125 == 0) &&*/ pprint->printed){
clear_screen(); clear_screen();
} }
} }
return NULL;
} }
char *fileNameDateScore(char * pre, char* post,size_t score){ char *fileNameDateScore(char * pre, char* post,size_t score){
@@ -375,7 +391,7 @@ void learn_to_drive(struct RL_agent * rlAgent){
pthread_t threadPrint; pthread_t threadPrint;
pthread_create(&threadPrint, NULL, runPrint, (void*)rlAgent); pthread_create(&threadPrint, NULL, runPrint, (void*)rlAgent);
while(true){ // while(true){
for(size_t index_episode = 0; index_episode < qlParams->number_episodes; ++index_episode){ for(size_t index_episode = 0; index_episode < qlParams->number_episodes; ++index_episode){
reset(car); reset(car);
qlStatus->nb_training_after_updated_weight_in_target = 0; qlStatus->nb_training_after_updated_weight_in_target = 0;
@@ -414,7 +430,10 @@ void learn_to_drive(struct RL_agent * rlAgent){
// Sleep(pprint->delay->delay_between_episodes); // Sleep(pprint->delay->delay_between_episodes);
//} //}
} }
} pthread_mutex_lock(qlStatus->mut_ending);
qlStatus->ending = 1;
pthread_mutex_unlock(qlStatus->mut_ending);
// }
pthread_join(threadPrint, NULL); pthread_join(threadPrint, NULL);
} }
@@ -50,7 +50,8 @@ struct status_qlearning {
size_t nb_episodes; size_t nb_episodes;
size_t index_episode; size_t index_episode;
int action; int action;
// int last_action; int ending;
pthread_mutex_t *mut_ending;
// size_t count_last_action; // size_t count_last_action;
}; };
+1 -1
View File
@@ -29,7 +29,7 @@ TEST_DIR=$(PWD)
EXECSRC=$(NAME_TEST).c EXECSRC=$(NAME_TEST).c
#EXECSRC=openF.c #EXECSRC=openF.c
EXEC=launch_$(NAME_TEST)_m EXEC=l1aunch_$(NAME_TEST)_m
NEUROSRC=$(NEURODIR)/src/neuron_t/neuron_t.c NEUROSRC=$(NEURODIR)/src/neuron_t/neuron_t.c
NEUROSRC_O=$(NEUROSRC:.c=.o) NEUROSRC_O=$(NEUROSRC:.c=.o)
+194 -16
View File
@@ -511,7 +511,7 @@ scanf("%c",&c);
} }
#endif #endif
// **************************************************************
#if 1 #if 1
TEST(first_learn_vehicle_50__9){ TEST(first_learn_vehicle_50__9){
@@ -633,20 +633,20 @@ EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weigh
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_01h42m16s_5300.txt"); EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_01h42m16s_5300.txt");
*/ */
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20240717_09h11m09s_1700.txt"); //EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20240717_09h11m09s_1700.txt");
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_09h11m09s_1700.txt"); //EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_09h11m09s_1700.txt");
struct status_qlearning *qlstatus = create_status_qlearning (); struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params ( struct delay_params *dly = create_delay_params (
500/*size_t delay_between_episodes*/, 50000 /*size_t delay_between_episodes*/,
50/*size_t delay_between_games*/ 500/*size_t delay_between_games*/
); );
struct qlearning_params *qlparams = create_qlearning_params ( struct qlearning_params *qlparams = create_qlearning_params (
0.95/*float gamma*/, 0.95/*float gamma*/,
learning_rate, learning_rate,
0 /* (not used!)float discount_factor*/, 0 /* (not used!)float discount_factor*/,
0.0001/*0.99*/ /*float exploration_factor*/, 0.01/*0.99*/ /*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/, 20/*long int nb_training_before_update_weight_in_target*/,
10000/*size_t number_episodes*/ 10000/*size_t number_episodes*/
); );
@@ -681,12 +681,184 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
// ****************************************************************
#if 1 #if 1
TEST(first_learn_vehicle_50__10){ TEST(first_learn_vehicle_50__10){
size_t nb_block = 7; size_t nb_block = 7;
size_t dim= 2; size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim); struct blocks * path = create_blocks(nb_block, dim);
#if 1
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
copy_coordinate(path->upper_bound_block[0], (float[]){150,250});
copy_coordinate(path->lower_bound_block[1], (float[]){150,0});
copy_coordinate(path->upper_bound_block[1], (float[]){250,150});
copy_coordinate(path->lower_bound_block[2], (float[]){250,80});
copy_coordinate(path->upper_bound_block[2], (float[]){360,200});
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
copy_coordinate(path->upper_bound_block[3], (float[]){600,170});
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
copy_coordinate(path->upper_bound_block[4], (float[]){760,300});
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
copy_coordinate(path->upper_bound_block[5], (float[]){760,350});
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
/*
copy_coordinate(path->lower_bound_block[4], (float[]){0,0});
copy_coordinate(path->upper_bound_block[4], (float[]){150,250});
copy_coordinate(path->lower_bound_block[3], (float[]){150,40});
copy_coordinate(path->upper_bound_block[3], (float[]){250,150});
copy_coordinate(path->lower_bound_block[2], (float[]){250,80});
copy_coordinate(path->upper_bound_block[2], (float[]){360,200});
copy_coordinate(path->lower_bound_block[1], (float[]){360,70});
copy_coordinate(path->upper_bound_block[1], (float[]){600,150});
copy_coordinate(path->lower_bound_block[0], (float[]){600,90});
copy_coordinate(path->upper_bound_block[0], (float[]){760,300});
copy_coordinate(path->lower_bound_block[6], (float[]){260,300});
copy_coordinate(path->upper_bound_block[6], (float[]){760,360});
copy_coordinate(path->lower_bound_block[5], (float[]){0,250});
copy_coordinate(path->upper_bound_block[5], (float[]){410,300});
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
copy_coordinate(path->upper_bound_block[0], (float[]){100,250});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){250,80});
copy_coordinate(path->lower_bound_block[2], (float[]){250,0});
copy_coordinate(path->upper_bound_block[2], (float[]){360,140});
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
copy_coordinate(path->upper_bound_block[3], (float[]){600,140});
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
copy_coordinate(path->upper_bound_block[4], (float[]){720,300});
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
copy_coordinate(path->upper_bound_block[5], (float[]){720,350});
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
*/
#else
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
#endif
update_bounds_limits_blocks(path);
struct vehicle *car = create_vehicle(path);
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
bool randomize=true;
float minR = -0.5, maxR = 0.5;
int randomRange = 500;
size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4;
float learning_rate = 0.00001 /* 0.001*/;
struct networks_qlearning *nnetworks = create_nework_qlearning(
pconf,
randomize, minR, maxR, randomRange,
nb_prod_thread, nb_calc_thread,
learning_rate
);
/*
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20240717_01h42m16s_5300.txt");
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_01h42m16s_5300.txt");
*/
/*
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20240717_09h11m09s_1700.txt");
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_09h11m09s_1700.txt");
*/
struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params (
500/*size_t delay_between_episodes*/,
50/*size_t delay_between_games*/
);
struct qlearning_params *qlparams = create_qlearning_params (
0.95/*float gamma*/,
learning_rate,
0 /* (not used!)float discount_factor*/,
0.0001/*0.99*/ /*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/,
1 /*size_t number_episodes*/
);
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
*/
struct print_params *pprint = create_print_params(
12/*float scale_x*/,12 /*float scale_y*/,
dly/*struct delay_params * dly_p*/
);
struct RL_agent *rlAgent = create_RL_agent (
nnetworks /*struct networks_qlearning * networks*/,
car /*struct vehicle * car*/,
qlstatus /*struct status_qlearning * status*/,
pprint /*struct print_params * pprint*/,
qlparams/*struct qlearning_params *qlearnParams*/
);
learn_to_drive(rlAgent);
free_RL_agent(rlAgent);
}
#endif
#if 1
TEST(first_learn_vehicle_50__11){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
#if 1 #if 1
copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
@@ -791,7 +963,7 @@ copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
int randomRange = 500; int randomRange = 500;
size_t nb_prod_thread = 2; size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4; size_t nb_calc_thread = 4;
float learning_rate = 0.00001 /* 0.001*/; float learning_rate = 0; /* 0.000001*/ /* 0.001*/;
struct networks_qlearning *nnetworks = create_nework_qlearning( struct networks_qlearning *nnetworks = create_nework_qlearning(
pconf, pconf,
randomize, minR, maxR, randomRange, randomize, minR, maxR, randomRange,
@@ -803,9 +975,12 @@ EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weigh
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_01h42m16s_5300.txt"); EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_01h42m16s_5300.txt");
*/ */
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20240717_09h11m09s_1700.txt"); EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20250508_17h50m56s_26300.txt");
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_09h11m09s_1700.txt"); EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20250508_17h50m56s_26300.txt");
/*
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20250508_23h02m40s_29000.txt");
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20250508_23h02m40s_29000.txt");
*/
struct status_qlearning *qlstatus = create_status_qlearning (); struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params ( struct delay_params *dly = create_delay_params (
500/*size_t delay_between_episodes*/, 500/*size_t delay_between_episodes*/,
@@ -852,7 +1027,7 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
#if 1 #if 1
TEST(first_learn_vehicle_50__11){ TEST(first_learn_vehicle_50__12){
size_t nb_block = 10; size_t nb_block = 10;
size_t dim= 2; size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim); struct blocks * path = create_blocks(nb_block, dim);
@@ -967,7 +1142,7 @@ copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
int randomRange = 500; int randomRange = 500;
size_t nb_prod_thread = 2; size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4; size_t nb_calc_thread = 4;
float learning_rate = 0.00001 /* 0.001*/; float learning_rate = 0.0000001 /* 0.001*/;
struct networks_qlearning *nnetworks = create_nework_qlearning( struct networks_qlearning *nnetworks = create_nework_qlearning(
pconf, pconf,
randomize, minR, maxR, randomRange, randomize, minR, maxR, randomRange,
@@ -976,9 +1151,12 @@ copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
); );
//print_vehicle_n_path(car, 12, 12); //print_vehicle_n_path(car, 12, 12);
/*
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20240717_09h11m09s_1700.txt"); EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20240717_09h11m09s_1700.txt");
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_09h11m09s_1700.txt"); EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_09h11m09s_1700.txt");
*/
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_20250508_17h50m56s_26300.txt");
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20250508_17h50m56s_26300.txt");
struct status_qlearning *qlstatus = create_status_qlearning (); struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params ( struct delay_params *dly = create_delay_params (
@@ -990,9 +1168,9 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
0.95/*float gamma*/, 0.95/*float gamma*/,
learning_rate, learning_rate,
0 /* (not used!)float discount_factor*/, 0 /* (not used!)float discount_factor*/,
0.0001/*0.99*/ /*float exploration_factor*/, 0.1/*0.99*/ /*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/, 20/*long int nb_training_before_update_weight_in_target*/,
10000/*size_t number_episodes*/ 1/*size_t number_episodes*/
); );
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df ); /* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f ); UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
@@ -1029,7 +1207,7 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
#if 1 #if 1
TEST(first_learn_vehicle){ TEST(first_learn_vehicle13){
size_t nb_block = 7; size_t nb_block = 7;
size_t dim= 2; size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim); struct blocks * path = create_blocks(nb_block, dim);
+1 -1
View File
@@ -635,7 +635,7 @@ void print_weight_in_neurons_##type(neurons_##type *nn, char *msg){\
if(tmp->weight_in){\ if(tmp->weight_in){\
sprintf(vmsg,"%s layer %ld",msg,i++);\ sprintf(vmsg,"%s layer %ld",msg,i++);\
print_tensor_msg_##type(tmp->weight_in, vmsg);\ print_tensor_msg_##type(tmp->weight_in, vmsg);\
}else{printf("weight_in %d NULL\n",i);}\ }else{printf("weight_in %ld NULL\n",i);}\
tmp = tmp->next_layer;\ tmp = tmp->next_layer;\
}\ }\
}\ }\
+1 -1
View File
@@ -330,7 +330,7 @@ void mainQlearning_game(struct game *gm){
print_game_dim2(gm); print_game_dim2(gm);
//getchar(); // getchar();
usleep((gm->delay)->delay_between_episodes); usleep((gm->delay)->delay_between_episodes);
} }
+2 -2
View File
@@ -6,9 +6,9 @@
TEST(igameRabbit ){ TEST(igameRabbit ){
size_t array[] = {6,6} ; size_t array[] = {4,4} ;
dimension *dim = init_copy_dim(array,2); dimension *dim = init_copy_dim(array,2);
struct game_params * params = create_game_params(4,dim,3,3,1,200,200); struct game_params * params = create_game_params(1,dim,3,3,1,200,200);
struct qlearning_params * qlearnParams = create_qlearning_params(0.85,0.99,1); struct qlearning_params * qlearnParams = create_qlearning_params(0.85,0.99,1);
struct delay_params * delay_game = create_delay_params(100000, 20000); struct delay_params * delay_game = create_delay_params(100000, 20000);
struct game * gm = create_game(params, qlearnParams, delay_game); struct game * gm = create_game(params, qlearnParams, delay_game);
+4 -1
View File
@@ -24,9 +24,12 @@ int xrand(){
static bool init = true; static bool init = true;
if(init){ if(init){
init = false; init = false;
struct timespec start_t; srand(time(NULL));
/*
struct timespec start_t;
clock_gettime(CLOCK_REALTIME, &start_t); clock_gettime(CLOCK_REALTIME, &start_t);
srand(start_t.tv_nsec); srand(start_t.tv_nsec);
*/
//srand(start_t.tv_nsec - start_t.tv_sec); //srand(start_t.tv_nsec - start_t.tv_sec);
} }
int ret = rand (); int ret = rand ();