qlearn: add func caller name to store result more verbose, and add record result if ending before done
This commit is contained in:
@@ -7,9 +7,11 @@ char *action_name[8] = {"LEFT", "CENTER", "RIGHT"};
|
|||||||
#define USE_THRESHOLD 0
|
#define USE_THRESHOLD 0
|
||||||
|
|
||||||
float reLU(float x){
|
float reLU(float x){
|
||||||
|
#if CHECK_NAN
|
||||||
if(x!=x){// nan
|
if(x!=x){// nan
|
||||||
printf("nan relu ");
|
printf("nan relu ");
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
#if USE_THRESHOLD
|
#if USE_THRESHOLD
|
||||||
if(x>UPPER_THRESHOLD) return UPPER_THRESHOLD;
|
if(x>UPPER_THRESHOLD) return UPPER_THRESHOLD;
|
||||||
#endif
|
#endif
|
||||||
@@ -191,6 +193,7 @@ struct qlearning_params * create_qlearning_params (
|
|||||||
qparams->minimum_threshold_exploration_factor = 0.0001;
|
qparams->minimum_threshold_exploration_factor = 0.0001;
|
||||||
|
|
||||||
// qparams->threshold_number_same_action = 500;
|
// qparams->threshold_number_same_action = 500;
|
||||||
|
qparams->caller_func_name=NULL;
|
||||||
|
|
||||||
return qparams;
|
return qparams;
|
||||||
}
|
}
|
||||||
@@ -257,6 +260,7 @@ void free_print_params (struct print_params *pprint){
|
|||||||
}
|
}
|
||||||
|
|
||||||
void free_qlearning_params(struct qlearning_params *q_params){
|
void free_qlearning_params(struct qlearning_params *q_params){
|
||||||
|
if(q_params->caller_func_name) free(q_params->caller_func_name);
|
||||||
free(q_params);
|
free(q_params);
|
||||||
}
|
}
|
||||||
void free_RL_agent(struct RL_agent *rlAgent){
|
void free_RL_agent(struct RL_agent *rlAgent){
|
||||||
@@ -464,8 +468,8 @@ char *fileNameDateScore(char * pre, char* post,size_t score){
|
|||||||
return filename;
|
return filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* target_symlink = ".ff_target_.symlink";
|
//const char* target_symlink = ".ff_target_.symlink";
|
||||||
const char* main_symlink = ".ff_main_.symlink";
|
//const char* main_symlink = ".ff_main_.symlink";
|
||||||
const char* dest_folder=".ff_learnDir";
|
const char* dest_folder=".ff_learnDir";
|
||||||
|
|
||||||
void* learn_to_drive(void * lrnarg){
|
void* learn_to_drive(void * lrnarg){
|
||||||
@@ -521,27 +525,58 @@ void* learn_to_drive(void * lrnarg){
|
|||||||
int len_cumul=0;
|
int len_cumul=0;
|
||||||
char cumulSTR[128];
|
char cumulSTR[128];
|
||||||
len_cumul=sprintf(cumulSTR, " %ld ", car_status->cumulative_reward);
|
len_cumul=sprintf(cumulSTR, " %ld ", car_status->cumulative_reward);
|
||||||
|
char *mainfuncCaller=malloc(128);
|
||||||
|
char *targetfuncCaller=malloc(128);
|
||||||
|
char *mainSymlinkCaller=malloc(256);
|
||||||
|
char *targetSymlinkCaller=malloc(256);
|
||||||
|
if(qlParams->caller_func_name){
|
||||||
|
sprintf(mainfuncCaller,".ff_learnDir/.ff_main_%s",qlParams->caller_func_name);
|
||||||
|
sprintf(targetfuncCaller,".ff_learnDir/.ff_target_%s",qlParams->caller_func_name);
|
||||||
|
sprintf(mainSymlinkCaller,".ff_main_%s.symlink",qlParams->caller_func_name);
|
||||||
|
sprintf(targetSymlinkCaller,".ff_target_%s.symlink",qlParams->caller_func_name);
|
||||||
|
}else{
|
||||||
|
strcpy(mainfuncCaller,".ff_learnDir/.ff_main_");
|
||||||
|
strcpy(targetfuncCaller,".ff_learnDir/.ff_target_");
|
||||||
|
strcpy(mainSymlinkCaller,".ff_main_.symlink");
|
||||||
|
strcpy(targetSymlinkCaller,".ff_target_.symlink");
|
||||||
|
|
||||||
|
}
|
||||||
push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
|
push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
|
||||||
char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward);
|
//char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward);
|
||||||
|
char *file = fileNameDateScore(mainfuncCaller,"",car_status->cumulative_reward);
|
||||||
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->main_net ,weight_in, file);
|
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->main_net ,weight_in, file);
|
||||||
unlink(main_symlink);
|
|
||||||
if(symlink(file, main_symlink)==-1){
|
|
||||||
fprintf(stderr,"debug: symlink %s with %s.\n",main_symlink, file);
|
//unlink(main_symlink);
|
||||||
|
unlink(mainSymlinkCaller);
|
||||||
|
//if(symlink(file, main_symlink)==-1)
|
||||||
|
if(symlink(file, mainSymlinkCaller)==-1)
|
||||||
|
{
|
||||||
|
//fprintf(stderr,"debug: symlink %s with %s.\n",main_symlink, file);
|
||||||
|
fprintf(stderr,"debug: symlink %s with %s.\n",mainSymlinkCaller, file);
|
||||||
//fprintf(stderr,"debug: symlink %s with %s. explain:%s \n",main_symlink, file, explain_symlink(file, main_symlink) );
|
//fprintf(stderr,"debug: symlink %s with %s. explain:%s \n",main_symlink, file, explain_symlink(file, main_symlink) );
|
||||||
}
|
}
|
||||||
else write(1,":",1);
|
else write(1,":",1);
|
||||||
write(1,cumulSTR,len_cumul);
|
write(1,cumulSTR,len_cumul);
|
||||||
free(file);
|
free(file);
|
||||||
file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward);
|
//file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward);
|
||||||
|
file = fileNameDateScore(targetfuncCaller,"",car_status->cumulative_reward);
|
||||||
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->target_net ,weight_in, file);
|
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->target_net ,weight_in, file);
|
||||||
unlink(target_symlink);
|
//unlink(target_symlink);
|
||||||
if(symlink(file, target_symlink)==-1){
|
//if(symlink(file, target_symlink)==-1)
|
||||||
fprintf(stderr,"debug: symlink %s with %s\n",target_symlink,file );
|
unlink(targetSymlinkCaller);
|
||||||
|
if(symlink(file, targetSymlinkCaller)==-1)
|
||||||
|
{
|
||||||
|
//fprintf(stderr,"debug: symlink %s with %s\n",target_symlink,file );
|
||||||
|
fprintf(stderr,"debug: symlink %s with %s\n",targetSymlinkCaller,file );
|
||||||
//fprintf(stderr,"debug: symlink %s with %s explain:%s\n",target_symlink,file,explain_symlink(file, target_symlink) );
|
//fprintf(stderr,"debug: symlink %s with %s explain:%s\n",target_symlink,file,explain_symlink(file, target_symlink) );
|
||||||
}
|
}
|
||||||
else write(1,"-",1);
|
else write(1,"-",1);
|
||||||
free(file);
|
free(file);
|
||||||
|
free(mainfuncCaller);
|
||||||
|
free(targetfuncCaller);
|
||||||
|
free(mainSymlinkCaller);
|
||||||
|
free(targetSymlinkCaller);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -551,6 +586,66 @@ void* learn_to_drive(void * lrnarg){
|
|||||||
// Sleep(pprint->delay->delay_between_episodes);
|
// Sleep(pprint->delay->delay_between_episodes);
|
||||||
//}
|
//}
|
||||||
}
|
}
|
||||||
|
// UPDATE IF ENDING AND BETTER REWARD
|
||||||
|
|
||||||
|
if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value)
|
||||||
|
{
|
||||||
|
int len_cumul=0;
|
||||||
|
char cumulSTR[128];
|
||||||
|
len_cumul=sprintf(cumulSTR, " %ld ", car_status->cumulative_reward);
|
||||||
|
char *funcCaller_extension=malloc(128);
|
||||||
|
char *mainSymlinkCaller=malloc(256);
|
||||||
|
char *targetSymlinkCaller=malloc(256);
|
||||||
|
if(qlParams->caller_func_name){
|
||||||
|
sprintf(funcCaller_extension,"%s.txt",qlParams->caller_func_name);
|
||||||
|
sprintf(mainSymlinkCaller,".ff_main_%s.symlink",qlParams->caller_func_name);
|
||||||
|
sprintf(targetSymlinkCaller,".ff_target_%s.symlink",qlParams->caller_func_name);
|
||||||
|
}else{
|
||||||
|
sprintf(funcCaller_extension,".%s","txt");
|
||||||
|
strcpy(mainSymlinkCaller,".ff_main_.symlink");
|
||||||
|
strcpy(targetSymlinkCaller,".ff_target_.symlink");
|
||||||
|
|
||||||
|
}
|
||||||
|
push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
|
||||||
|
//char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward);
|
||||||
|
char *file = fileNameDateScore(".ff_learnDir/.ff_main_",funcCaller_extension,car_status->cumulative_reward);
|
||||||
|
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->main_net ,weight_in, file);
|
||||||
|
|
||||||
|
|
||||||
|
//unlink(main_symlink);
|
||||||
|
unlink(mainSymlinkCaller);
|
||||||
|
//if(symlink(file, main_symlink)==-1)
|
||||||
|
if(symlink(file, mainSymlinkCaller)==-1)
|
||||||
|
{
|
||||||
|
//fprintf(stderr,"debug: symlink %s with %s.\n",main_symlink, file);
|
||||||
|
fprintf(stderr,"debug: symlink %s with %s.\n",mainSymlinkCaller, file);
|
||||||
|
//fprintf(stderr,"debug: symlink %s with %s. explain:%s \n",main_symlink, file, explain_symlink(file, main_symlink) );
|
||||||
|
}
|
||||||
|
else write(1,":",1);
|
||||||
|
write(1,cumulSTR,len_cumul);
|
||||||
|
free(file);
|
||||||
|
//file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward);
|
||||||
|
file = fileNameDateScore(".ff_learnDir/.ff_target_",funcCaller_extension,car_status->cumulative_reward);
|
||||||
|
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->target_net ,weight_in, file);
|
||||||
|
//unlink(target_symlink);
|
||||||
|
//if(symlink(file, target_symlink)==-1)
|
||||||
|
unlink(targetSymlinkCaller);
|
||||||
|
if(symlink(file, targetSymlinkCaller)==-1)
|
||||||
|
{
|
||||||
|
//fprintf(stderr,"debug: symlink %s with %s\n",target_symlink,file );
|
||||||
|
fprintf(stderr,"debug: symlink %s with %s\n",targetSymlinkCaller,file );
|
||||||
|
//fprintf(stderr,"debug: symlink %s with %s explain:%s\n",target_symlink,file,explain_symlink(file, target_symlink) );
|
||||||
|
}
|
||||||
|
else write(1,"-",1);
|
||||||
|
free(file);
|
||||||
|
free(funcCaller_extension);
|
||||||
|
free(mainSymlinkCaller);
|
||||||
|
free(targetSymlinkCaller);
|
||||||
|
}
|
||||||
|
|
||||||
|
// END UPDATE
|
||||||
|
|
||||||
|
|
||||||
pthread_mutex_lock(qlStatus->mut_ending);
|
pthread_mutex_lock(qlStatus->mut_ending);
|
||||||
qlStatus->ending = 1;
|
qlStatus->ending = 1;
|
||||||
pthread_mutex_unlock(qlStatus->mut_ending);
|
pthread_mutex_unlock(qlStatus->mut_ending);
|
||||||
|
|||||||
@@ -22,6 +22,10 @@
|
|||||||
|
|
||||||
#include "vehicle.h"
|
#include "vehicle.h"
|
||||||
|
|
||||||
|
#ifndef CHECK_NAN
|
||||||
|
#define CHECK_NAN 0
|
||||||
|
#endif
|
||||||
|
|
||||||
//float reLU(float x);
|
//float reLU(float x);
|
||||||
|
|
||||||
//float d_reLU(float x);
|
//float d_reLU(float x);
|
||||||
@@ -48,6 +52,7 @@ struct qlearning_params {
|
|||||||
long int nb_training_before_update_weight_in_target;
|
long int nb_training_before_update_weight_in_target;
|
||||||
size_t number_episodes;
|
size_t number_episodes;
|
||||||
// size_t threshold_number_same_action;
|
// size_t threshold_number_same_action;
|
||||||
|
char *caller_func_name;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -778,6 +778,8 @@ TEST(_first_learn_vehicle_50__11){
|
|||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
struct blocks * path = create_blocks(nb_block, dim);
|
struct blocks * path = create_blocks(nb_block, dim);
|
||||||
|
|
||||||
|
LOG("debug: f_name = %s\n", __func__);
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
|
|
||||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
||||||
@@ -922,6 +924,10 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
|
|||||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
|
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
|
||||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
|
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
|
||||||
*/
|
*/
|
||||||
|
qlparams->caller_func_name=malloc(strlen(__func__)+1);
|
||||||
|
strcpy(qlparams->caller_func_name, __func__);
|
||||||
|
|
||||||
|
|
||||||
struct print_params *pprint = create_print_params(
|
struct print_params *pprint = create_print_params(
|
||||||
12/*float scale_x*/,12 /*float scale_y*/,
|
12/*float scale_x*/,12 /*float scale_y*/,
|
||||||
dly/*struct delay_params * dly_p*/
|
dly/*struct delay_params * dly_p*/
|
||||||
@@ -1088,7 +1094,7 @@ copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
|||||||
int randomRange = 500;
|
int randomRange = 500;
|
||||||
size_t nb_prod_thread = 2;
|
size_t nb_prod_thread = 2;
|
||||||
size_t nb_calc_thread = 4;
|
size_t nb_calc_thread = 4;
|
||||||
float learning_rate = 0.0000001 /* 0.001*/;
|
float learning_rate = 0.00001 /* 0.001*/;
|
||||||
struct networks_qlearning *nnetworks = create_network_qlearning(
|
struct networks_qlearning *nnetworks = create_network_qlearning(
|
||||||
pconf,
|
pconf,
|
||||||
randomize, minR, maxR, randomRange,
|
randomize, minR, maxR, randomRange,
|
||||||
@@ -1235,7 +1241,7 @@ HIDE_TEST(__first_learn_vehicle13){
|
|||||||
int randomRange = 5000;
|
int randomRange = 5000;
|
||||||
size_t nb_prod_thread = 2;
|
size_t nb_prod_thread = 2;
|
||||||
size_t nb_calc_thread = 4;
|
size_t nb_calc_thread = 4;
|
||||||
float learning_rate = 0.1;
|
float learning_rate = 0.00001;
|
||||||
struct networks_qlearning *nnetworks = create_network_qlearning(
|
struct networks_qlearning *nnetworks = create_network_qlearning(
|
||||||
pconf,
|
pconf,
|
||||||
randomize, minR, maxR, randomRange,
|
randomize, minR, maxR, randomRange,
|
||||||
@@ -1243,7 +1249,10 @@ HIDE_TEST(__first_learn_vehicle13){
|
|||||||
learning_rate
|
learning_rate
|
||||||
);
|
);
|
||||||
|
|
||||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_.symlink");
|
||||||
|
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_.symlink");
|
||||||
|
|
||||||
|
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||||
struct delay_params *dly = create_delay_params (
|
struct delay_params *dly = create_delay_params (
|
||||||
500/*size_t delay_between_episodes*/,
|
500/*size_t delay_between_episodes*/,
|
||||||
50/*size_t delay_between_games*/
|
50/*size_t delay_between_games*/
|
||||||
@@ -1253,7 +1262,7 @@ HIDE_TEST(__first_learn_vehicle13){
|
|||||||
0.95/*float gamma*/,
|
0.95/*float gamma*/,
|
||||||
learning_rate,
|
learning_rate,
|
||||||
0 /* (not used!)float discount_factor*/,
|
0 /* (not used!)float discount_factor*/,
|
||||||
0.85 /*float exploration_factor*/,
|
0.085 /*float exploration_factor*/,
|
||||||
20/*long int nb_training_before_update_weight_in_target*/,
|
20/*long int nb_training_before_update_weight_in_target*/,
|
||||||
10000/*size_t number_episodes*/
|
10000/*size_t number_episodes*/
|
||||||
);
|
);
|
||||||
@@ -1284,7 +1293,7 @@ HIDE_TEST(__first_learn_vehicle13){
|
|||||||
struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent);
|
struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent);
|
||||||
|
|
||||||
struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP);
|
struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP);
|
||||||
struct y_socket_t *argS = y_socket_create("1600", 2, 3, var);
|
struct y_socket_t *argS = y_socket_create("16001", 2, 3, var);
|
||||||
|
|
||||||
|
|
||||||
pthread_t pollTh;
|
pthread_t pollTh;
|
||||||
|
|||||||
Reference in New Issue
Block a user