diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.c b/deepQlearn_0/src/deepQlearning/learn_to_drive.c index 2381ccc..debedff 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.c +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.c @@ -7,9 +7,11 @@ char *action_name[8] = {"LEFT", "CENTER", "RIGHT"}; #define USE_THRESHOLD 0 float reLU(float x){ +#if CHECK_NAN if(x!=x){// nan printf("nan relu "); } +#endif #if USE_THRESHOLD if(x>UPPER_THRESHOLD) return UPPER_THRESHOLD; #endif @@ -191,6 +193,7 @@ struct qlearning_params * create_qlearning_params ( qparams->minimum_threshold_exploration_factor = 0.0001; // qparams->threshold_number_same_action = 500; + qparams->caller_func_name=NULL; return qparams; } @@ -257,6 +260,7 @@ void free_print_params (struct print_params *pprint){ } void free_qlearning_params(struct qlearning_params *q_params){ + if(q_params->caller_func_name) free(q_params->caller_func_name); free(q_params); } void free_RL_agent(struct RL_agent *rlAgent){ @@ -464,8 +468,8 @@ char *fileNameDateScore(char * pre, char* post,size_t score){ return filename; } -const char* target_symlink = ".ff_target_.symlink"; -const char* main_symlink = ".ff_main_.symlink"; +//const char* target_symlink = ".ff_target_.symlink"; +//const char* main_symlink = ".ff_main_.symlink"; const char* dest_folder=".ff_learnDir"; void* learn_to_drive(void * lrnarg){ @@ -521,27 +525,58 @@ void* learn_to_drive(void * lrnarg){ int len_cumul=0; char cumulSTR[128]; len_cumul=sprintf(cumulSTR, " %ld ", car_status->cumulative_reward); + char *mainfuncCaller=malloc(128); + char *targetfuncCaller=malloc(128); + char *mainSymlinkCaller=malloc(256); + char *targetSymlinkCaller=malloc(256); + if(qlParams->caller_func_name){ + sprintf(mainfuncCaller,".ff_learnDir/.ff_main_%s",qlParams->caller_func_name); + sprintf(targetfuncCaller,".ff_learnDir/.ff_target_%s",qlParams->caller_func_name); + sprintf(mainSymlinkCaller,".ff_main_%s.symlink",qlParams->caller_func_name); + sprintf(targetSymlinkCaller,".ff_target_%s.symlink",qlParams->caller_func_name); + }else{ + strcpy(mainfuncCaller,".ff_learnDir/.ff_main_"); + strcpy(targetfuncCaller,".ff_learnDir/.ff_target_"); + strcpy(mainSymlinkCaller,".ff_main_.symlink"); + strcpy(targetSymlinkCaller,".ff_target_.symlink"); + } push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward); - char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward); + //char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward); + char *file = fileNameDateScore(mainfuncCaller,"",car_status->cumulative_reward); EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->main_net ,weight_in, file); - unlink(main_symlink); - if(symlink(file, main_symlink)==-1){ - fprintf(stderr,"debug: symlink %s with %s.\n",main_symlink, file); + + + //unlink(main_symlink); + unlink(mainSymlinkCaller); + //if(symlink(file, main_symlink)==-1) + if(symlink(file, mainSymlinkCaller)==-1) + { + //fprintf(stderr,"debug: symlink %s with %s.\n",main_symlink, file); + fprintf(stderr,"debug: symlink %s with %s.\n",mainSymlinkCaller, file); //fprintf(stderr,"debug: symlink %s with %s. explain:%s \n",main_symlink, file, explain_symlink(file, main_symlink) ); } else write(1,":",1); write(1,cumulSTR,len_cumul); free(file); - file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward); + //file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward); + file = fileNameDateScore(targetfuncCaller,"",car_status->cumulative_reward); EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->target_net ,weight_in, file); - unlink(target_symlink); - if(symlink(file, target_symlink)==-1){ - fprintf(stderr,"debug: symlink %s with %s\n",target_symlink,file ); + //unlink(target_symlink); + //if(symlink(file, target_symlink)==-1) + unlink(targetSymlinkCaller); + if(symlink(file, targetSymlinkCaller)==-1) + { + //fprintf(stderr,"debug: symlink %s with %s\n",target_symlink,file ); + fprintf(stderr,"debug: symlink %s with %s\n",targetSymlinkCaller,file ); //fprintf(stderr,"debug: symlink %s with %s explain:%s\n",target_symlink,file,explain_symlink(file, target_symlink) ); } else write(1,"-",1); free(file); + free(mainfuncCaller); + free(targetfuncCaller); + free(mainSymlinkCaller); + free(targetSymlinkCaller); } break; } @@ -551,6 +586,66 @@ void* learn_to_drive(void * lrnarg){ // Sleep(pprint->delay->delay_between_episodes); //} } +// UPDATE IF ENDING AND BETTER REWARD + + if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value) + { + int len_cumul=0; + char cumulSTR[128]; + len_cumul=sprintf(cumulSTR, " %ld ", car_status->cumulative_reward); + char *funcCaller_extension=malloc(128); + char *mainSymlinkCaller=malloc(256); + char *targetSymlinkCaller=malloc(256); + if(qlParams->caller_func_name){ + sprintf(funcCaller_extension,"%s.txt",qlParams->caller_func_name); + sprintf(mainSymlinkCaller,".ff_main_%s.symlink",qlParams->caller_func_name); + sprintf(targetSymlinkCaller,".ff_target_%s.symlink",qlParams->caller_func_name); + }else{ + sprintf(funcCaller_extension,".%s","txt"); + strcpy(mainSymlinkCaller,".ff_main_.symlink"); + strcpy(targetSymlinkCaller,".ff_target_.symlink"); + + } + push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward); + //char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward); + char *file = fileNameDateScore(".ff_learnDir/.ff_main_",funcCaller_extension,car_status->cumulative_reward); + EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->main_net ,weight_in, file); + + + //unlink(main_symlink); + unlink(mainSymlinkCaller); + //if(symlink(file, main_symlink)==-1) + if(symlink(file, mainSymlinkCaller)==-1) + { + //fprintf(stderr,"debug: symlink %s with %s.\n",main_symlink, file); + fprintf(stderr,"debug: symlink %s with %s.\n",mainSymlinkCaller, file); + //fprintf(stderr,"debug: symlink %s with %s. explain:%s \n",main_symlink, file, explain_symlink(file, main_symlink) ); + } + else write(1,":",1); + write(1,cumulSTR,len_cumul); + free(file); + //file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward); + file = fileNameDateScore(".ff_learnDir/.ff_target_",funcCaller_extension,car_status->cumulative_reward); + EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->target_net ,weight_in, file); + //unlink(target_symlink); + //if(symlink(file, target_symlink)==-1) + unlink(targetSymlinkCaller); + if(symlink(file, targetSymlinkCaller)==-1) + { + //fprintf(stderr,"debug: symlink %s with %s\n",target_symlink,file ); + fprintf(stderr,"debug: symlink %s with %s\n",targetSymlinkCaller,file ); + //fprintf(stderr,"debug: symlink %s with %s explain:%s\n",target_symlink,file,explain_symlink(file, target_symlink) ); + } + else write(1,"-",1); + free(file); + free(funcCaller_extension); + free(mainSymlinkCaller); + free(targetSymlinkCaller); + } + +// END UPDATE + + pthread_mutex_lock(qlStatus->mut_ending); qlStatus->ending = 1; pthread_mutex_unlock(qlStatus->mut_ending); diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.h b/deepQlearn_0/src/deepQlearning/learn_to_drive.h index 7f9b25b..b8e0e3b 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.h +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.h @@ -22,6 +22,10 @@ #include "vehicle.h" +#ifndef CHECK_NAN +#define CHECK_NAN 0 +#endif + //float reLU(float x); //float d_reLU(float x); @@ -48,6 +52,7 @@ struct qlearning_params { long int nb_training_before_update_weight_in_target; size_t number_episodes; // size_t threshold_number_same_action; + char *caller_func_name; }; diff --git a/y_network_neural_network_/test/is_good.c b/y_network_neural_network_/test/is_good.c index 74ef01d..8b89992 100644 --- a/y_network_neural_network_/test/is_good.c +++ b/y_network_neural_network_/test/is_good.c @@ -778,6 +778,8 @@ TEST(_first_learn_vehicle_50__11){ size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); + LOG("debug: f_name = %s\n", __func__); + #if 1 copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); @@ -922,6 +924,10 @@ struct status_qlearning *qlstatus = create_status_qlearning (); UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df ); UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f ); */ + qlparams->caller_func_name=malloc(strlen(__func__)+1); + strcpy(qlparams->caller_func_name, __func__); + + struct print_params *pprint = create_print_params( 12/*float scale_x*/,12 /*float scale_y*/, dly/*struct delay_params * dly_p*/ @@ -1088,7 +1094,7 @@ copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); int randomRange = 500; size_t nb_prod_thread = 2; size_t nb_calc_thread = 4; - float learning_rate = 0.0000001 /* 0.001*/; + float learning_rate = 0.00001 /* 0.001*/; struct networks_qlearning *nnetworks = create_network_qlearning( pconf, randomize, minR, maxR, randomRange, @@ -1235,7 +1241,7 @@ HIDE_TEST(__first_learn_vehicle13){ int randomRange = 5000; size_t nb_prod_thread = 2; size_t nb_calc_thread = 4; - float learning_rate = 0.1; + float learning_rate = 0.00001; struct networks_qlearning *nnetworks = create_network_qlearning( pconf, randomize, minR, maxR, randomRange, @@ -1243,7 +1249,10 @@ HIDE_TEST(__first_learn_vehicle13){ learning_rate ); - struct status_qlearning *qlstatus = create_status_qlearning (); +EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_.symlink"); +EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_.symlink"); + +struct status_qlearning *qlstatus = create_status_qlearning (); struct delay_params *dly = create_delay_params ( 500/*size_t delay_between_episodes*/, 50/*size_t delay_between_games*/ @@ -1253,7 +1262,7 @@ HIDE_TEST(__first_learn_vehicle13){ 0.95/*float gamma*/, learning_rate, 0 /* (not used!)float discount_factor*/, - 0.85 /*float exploration_factor*/, + 0.085 /*float exploration_factor*/, 20/*long int nb_training_before_update_weight_in_target*/, 10000/*size_t number_episodes*/ ); @@ -1284,7 +1293,7 @@ HIDE_TEST(__first_learn_vehicle13){ struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent); struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP); - struct y_socket_t *argS = y_socket_create("1600", 2, 3, var); + struct y_socket_t *argS = y_socket_create("16001", 2, 3, var); pthread_t pollTh;