diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.c b/deepQlearn_0/src/deepQlearning/learn_to_drive.c index 33d56f0..85e940c 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.c +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.c @@ -79,7 +79,9 @@ struct networks_qlearning * create_nework_qlearning( qnets->thread_learn = NULL; - + for(int i=0;inb_successive_action[i]=0; + } return qnets; @@ -162,7 +164,7 @@ struct qlearning_params * create_qlearning_params ( qparams->factor_update_learning_rate = 0.995; qparams->minimum_threshold_learning_rate = 0.0001 ; - qparams->factor_update_exploration_factor = 0.995; + qparams->factor_update_exploration_factor = 0.9995 /*0.995*/; qparams->minimum_threshold_exploration_factor = 0.01; // qparams->threshold_number_same_action = 500; @@ -244,6 +246,8 @@ void free_RL_agent(struct RL_agent *rlAgent){ free(rlAgent); } +#define ACCEPTABLE_REWARD 1000 + void train_qlearning(struct RL_agent * rlAgent, int action //, long reward ){ @@ -275,18 +279,19 @@ void train_qlearning(struct RL_agent * rlAgent, ttmp = ttmp->prev_layer; } -// *** - float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ ); - UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, net_main, learning_rate, new_value); - - qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ; +// *** + if(car_status->cumulative_reward > ACCEPTABLE_REWARD){ + float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, net_main, learning_rate, new_value); + qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ; + } // free_tensor_TYPE_FLOAT(action_value); // free_tensor_TYPE_FLOAT(next_action_value); free_tensor_TYPE_FLOAT(experimental_values); } - +#define MAX_SUCCESSIVE_ACTION 200 int select_action(struct RL_agent * rlAgent){ //static size_t explore = 0; int action; @@ -305,6 +310,17 @@ int select_action(struct RL_agent * rlAgent){ float proba_explor = (float) (xrand() % ((1<<17) -1))/ ((1<<17) -1); //frand(); //(float)(random ) / randRange; if(proba_explor > rlAgent->qlearnParams->exploration_factor ){ action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ); + //printf(" STRATEGY : action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes); + if(rlAgent->networks->nb_successive_action[action]>MAX_SUCCESSIVE_ACTION){ + rlAgent->networks->nb_successive_action[action]=0; + int recAction=action; + while(action==recAction){ + action = xrand() % action_value->dim->rank ; + //printf("debug: action=%d recAction=%d\n",action, recAction); + } + write(1,"#",1); + } + ////else write(1,".",1); //if(action == ARG_MIN_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank )) //action = xrand() % action_value->dim->rank ; } @@ -312,7 +328,15 @@ int select_action(struct RL_agent * rlAgent){ action = xrand() % action_value->dim->rank ; // explore++; //printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes); + //printf(" EXPLORE : action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes); + ////write(1,"*",1); } + for(int a=0;anetworks->nb_successive_action[a]=0; + } + (rlAgent->networks->nb_successive_action[action])++; + /* if(rlAgent->status->last_action == action){ ++(rlAgent->status->count_last_action); @@ -445,6 +469,9 @@ void* learn_to_drive(void * lrnarg){ //push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward); // printf(" cumul : %ld ", car_status->cumulative_reward); if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){ + int len_cumul=0; + char cumulSTR[128]; + len_cumul=sprintf(cumulSTR, " %ld ", car_status->cumulative_reward); push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward); char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward); @@ -455,6 +482,7 @@ void* learn_to_drive(void * lrnarg){ //fprintf(stderr,"debug: symlink %s with %s. explain:%s \n",main_symlink, file, explain_symlink(file, main_symlink) ); } else write(1,":",1); + write(1,cumulSTR,len_cumul); free(file); file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward); EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->target_net ,weight_in, file); diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.h b/deepQlearn_0/src/deepQlearning/learn_to_drive.h index 380e640..0aad4bd 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.h +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.h @@ -87,6 +87,7 @@ struct networks_qlearning { neurons_TYPE_FLOAT *target_net; neurons_TYPE_FLOAT *best_net; pthread_t *thread_learn; + ssize_t nb_successive_action[COUNT_ACTION]; }; struct RL_agent { diff --git a/deepQlearn_0/src/deepQlearning/vehicle.c b/deepQlearn_0/src/deepQlearning/vehicle.c index 6f2049c..e768aa3 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.c +++ b/deepQlearn_0/src/deepQlearning/vehicle.c @@ -581,7 +581,7 @@ void step_vehicle(struct vehicle *v, int action){ } } -#define RANDOM 1 +#define RANDOM 0 void reset(struct vehicle *v){ //static bool init = true; @@ -597,28 +597,29 @@ void reset(struct vehicle *v){ //srand(time(NULL)); //init = false; //} - int random; int diff; diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0]; - random = xrand() % (diff/2) ; #if RANDOM + int random; + random = xrand() % (diff/2) ; v->coord->x[0] = path->lower_bound_block[0]->x[0] + random; #else v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2; #endif diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1]; - random = xrand() % (diff/2); #if RANDOM + random = xrand() % (diff/2); v->coord->x[1] = path->lower_bound_block[0]->x[1] + random; #else v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2; #endif - random = xrand() % 50; #if RANDOM - // v->direction = 115 - random ; - v->direction = random - 25 ; + random = xrand() % 50; + v->direction = 80 - random ; + //v->direction = 115 - random ; + // v->direction = random - 25 ; #else - v->direction = -90; + v->direction = 70; //-90; #endif v->speed = SPEED; read_sensor(v); diff --git a/y_network_neural_network_/src/y_net_neur_net/y_nnn_manager.c b/y_network_neural_network_/src/y_net_neur_net/y_nnn_manager.c index 99684df..82e1bc5 100644 --- a/y_network_neural_network_/src/y_net_neur_net/y_nnn_manager.c +++ b/y_network_neural_network_/src/y_net_neur_net/y_nnn_manager.c @@ -137,7 +137,7 @@ void* runBashPrint(void *arg){ ////printf("%s ",pprint->string_space); len_buf=sprintf(buf,"%s ",pprint->string_space); BASH_WRITE_IF_EXIST(bash_arg, buf, len_buf) -#if 0 +#if 1 ////printf("ep: %ld ",qlStatus->index_episode); len_buf=sprintf(buf,"ep: %ld\n",qlStatus->index_episode); BASH_WRITE_IF_EXIST(bash_arg, buf, len_buf) diff --git a/y_network_neural_network_/test/is_good.c b/y_network_neural_network_/test/is_good.c index 5c0263b..7e6430b 100644 --- a/y_network_neural_network_/test/is_good.c +++ b/y_network_neural_network_/test/is_good.c @@ -279,7 +279,7 @@ float df(float x){ // ************************************************************** #if 1 -TEST(_first_learn_vehicle_50__9){ +HIDE_TEST(_first_learn_vehicle_50__9){ size_t nb_block = 7; size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); @@ -335,7 +335,7 @@ TEST(_first_learn_vehicle_50__9){ int randomRange = 500; size_t nb_prod_thread = 2; size_t nb_calc_thread = 4; - float learning_rate = 0.00001 /* 0.001*/; + float learning_rate = 0.001 /*0.01*/ /* 0.001*/; struct networks_qlearning *nnetworks = create_nework_qlearning( pconf, randomize, minR, maxR, randomRange, @@ -360,7 +360,7 @@ struct status_qlearning *qlstatus = create_status_qlearning (); 0.95/*float gamma*/, learning_rate, 0 /* (not used!)float discount_factor*/, - 0.01/*0.99*/ /*float exploration_factor*/, + 0.78/*0.01*//*0.99*/ /*float exploration_factor*/, 20/*long int nb_training_before_update_weight_in_target*/, 10000/*size_t number_episodes*/ ); @@ -421,7 +421,7 @@ struct status_qlearning *qlstatus = create_status_qlearning (); // **************************************************************** #if 1 -HIDE_TEST(first_learn_vehicle_50__10){ +HIDE_TEST(_first_learn_vehicle_50__10){ size_t nb_block = 7; size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); @@ -578,9 +578,35 @@ struct status_qlearning *qlstatus = create_status_qlearning (); ); - learn_to_drive(rlAgent); + //learn_to_drive(rlAgent); + //learn_to_drive(rlAgent); + + struct arg_bash *bash_arg= create_arg_bash(); - free_RL_agent(rlAgent); + struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent); + + struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP); + struct y_socket_t *argS = y_socket_create("1600", 2, 3, var); + + + pthread_t pollTh; + pthread_create(&pollTh, NULL, y_socket_poll_fds, (void*)argS); + + + + + + + + pthread_join(pollTh, NULL); + //pthread_join(thread_learn, NULL); + + y_socket_free(argS); + free_arg_var_(var); + free_arg_run_qlearn_bprint(argQL_BP); + //free_arg_bash(bash_arg); + + //free_RL_agent(rlAgent); @@ -588,7 +614,7 @@ struct status_qlearning *qlstatus = create_status_qlearning (); } #endif -HIDE_TEST(_first_learn_vehicle_50__11_9){ +TEST(_first_learn_vehicle_50__11_9){ size_t nb_block = 7; size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); @@ -664,7 +690,7 @@ HIDE_TEST(_first_learn_vehicle_50__11_9){ int randomRange = 500; size_t nb_prod_thread = 2; size_t nb_calc_thread = 4; - float learning_rate = 0.00001 /* 0.001*/; + float learning_rate = 0.001; /* 0.00001*/ /* 0.001*/; struct networks_qlearning *nnetworks = create_nework_qlearning( pconf, randomize, minR, maxR, randomRange, @@ -676,8 +702,8 @@ EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weigh EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_01h42m16s_5300.txt"); */ -EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_.symlink"); -EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_.symlink"); +//EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_.symlink"); +//EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_.symlink"); struct status_qlearning *qlstatus = create_status_qlearning (); struct delay_params *dly = create_delay_params ( @@ -689,7 +715,7 @@ struct status_qlearning *qlstatus = create_status_qlearning (); 0.95/*float gamma*/, learning_rate, 0 /* (not used!)float discount_factor*/, - 0.01/*0.99*/ /*float exploration_factor*/, + 0.99 /*0.99*/ /*float exploration_factor*/, 20/*long int nb_training_before_update_weight_in_target*/, 10000/*size_t number_episodes*/ ); @@ -1149,7 +1175,7 @@ struct status_qlearning *qlstatus = create_status_qlearning (); #if 1 -HIDE_TEST(first_learn_vehicle13){ +HIDE_TEST(__first_learn_vehicle13){ size_t nb_block = 7; size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); @@ -1216,8 +1242,8 @@ HIDE_TEST(first_learn_vehicle13){ struct status_qlearning *qlstatus = create_status_qlearning (); struct delay_params *dly = create_delay_params ( - 100/*size_t delay_between_episodes*/, - 10/*size_t delay_between_games*/ + 500/*size_t delay_between_episodes*/, + 50/*size_t delay_between_games*/ ); struct qlearning_params *qlparams = create_qlearning_params ( @@ -1246,9 +1272,37 @@ HIDE_TEST(first_learn_vehicle13){ qlparams/*struct qlearning_params *qlearnParams*/ ); - learn_to_drive(rlAgent); + //learn_to_drive(rlAgent); - free_RL_agent(rlAgent); + //learn_to_drive(rlAgent); + + struct arg_bash *bash_arg= create_arg_bash(); + + struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent); + + struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP); + struct y_socket_t *argS = y_socket_create("1600", 2, 3, var); + + + pthread_t pollTh; + pthread_create(&pollTh, NULL, y_socket_poll_fds, (void*)argS); + + + + + + + + pthread_join(pollTh, NULL); + //pthread_join(thread_learn, NULL); + + y_socket_free(argS); + free_arg_var_(var); + free_arg_run_qlearn_bprint(argQL_BP); + + + //free_RL_agent(rlAgent); + //free_RL_agent(rlAgent); @@ -2701,7 +2755,7 @@ struct status_qlearning *qlstatus = create_status_qlearning (); #if 1 -HIDE_TEST(first_learn_vehicle13){ +HIDE_TEST(_first_learn_vehicle13){ size_t nb_block = 7; size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); @@ -2798,9 +2852,37 @@ HIDE_TEST(first_learn_vehicle13){ qlparams/*struct qlearning_params *qlearnParams*/ ); - learn_to_drive(rlAgent); + //learn_to_drive(rlAgent); - free_RL_agent(rlAgent); + //learn_to_drive(rlAgent); + + struct arg_bash *bash_arg= create_arg_bash(); + + struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent); + + struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP); + struct y_socket_t *argS = y_socket_create("1600", 2, 3, var); + + + pthread_t pollTh; + pthread_create(&pollTh, NULL, y_socket_poll_fds, (void*)argS); + + + + + + + + pthread_join(pollTh, NULL); + //pthread_join(thread_learn, NULL); + + y_socket_free(argS); + free_arg_var_(var); + free_arg_run_qlearn_bprint(argQL_BP); + + + //free_RL_agent(rlAgent); + //free_RL_agent(rlAgent);