y_nnn learn_to_drive: try to minimise no change action! search optimal params
This commit is contained in:
@@ -79,7 +79,9 @@ struct networks_qlearning * create_nework_qlearning(
|
|||||||
|
|
||||||
qnets->thread_learn = NULL;
|
qnets->thread_learn = NULL;
|
||||||
|
|
||||||
|
for(int i=0;i<COUNT_ACTION;++i){
|
||||||
|
qnets->nb_successive_action[i]=0;
|
||||||
|
}
|
||||||
|
|
||||||
return qnets;
|
return qnets;
|
||||||
|
|
||||||
@@ -162,7 +164,7 @@ struct qlearning_params * create_qlearning_params (
|
|||||||
|
|
||||||
qparams->factor_update_learning_rate = 0.995;
|
qparams->factor_update_learning_rate = 0.995;
|
||||||
qparams->minimum_threshold_learning_rate = 0.0001 ;
|
qparams->minimum_threshold_learning_rate = 0.0001 ;
|
||||||
qparams->factor_update_exploration_factor = 0.995;
|
qparams->factor_update_exploration_factor = 0.9995 /*0.995*/;
|
||||||
qparams->minimum_threshold_exploration_factor = 0.01;
|
qparams->minimum_threshold_exploration_factor = 0.01;
|
||||||
|
|
||||||
// qparams->threshold_number_same_action = 500;
|
// qparams->threshold_number_same_action = 500;
|
||||||
@@ -244,6 +246,8 @@ void free_RL_agent(struct RL_agent *rlAgent){
|
|||||||
free(rlAgent);
|
free(rlAgent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ACCEPTABLE_REWARD 1000
|
||||||
|
|
||||||
void train_qlearning(struct RL_agent * rlAgent,
|
void train_qlearning(struct RL_agent * rlAgent,
|
||||||
int action //, long reward
|
int action //, long reward
|
||||||
){
|
){
|
||||||
@@ -276,17 +280,18 @@ void train_qlearning(struct RL_agent * rlAgent,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ***
|
// ***
|
||||||
|
if(car_status->cumulative_reward > ACCEPTABLE_REWARD){
|
||||||
float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ );
|
float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ );
|
||||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, net_main, learning_rate, new_value);
|
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, net_main, learning_rate, new_value);
|
||||||
|
|
||||||
qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ;
|
qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ;
|
||||||
|
}
|
||||||
// free_tensor_TYPE_FLOAT(action_value);
|
// free_tensor_TYPE_FLOAT(action_value);
|
||||||
// free_tensor_TYPE_FLOAT(next_action_value);
|
// free_tensor_TYPE_FLOAT(next_action_value);
|
||||||
free_tensor_TYPE_FLOAT(experimental_values);
|
free_tensor_TYPE_FLOAT(experimental_values);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#define MAX_SUCCESSIVE_ACTION 200
|
||||||
int select_action(struct RL_agent * rlAgent){
|
int select_action(struct RL_agent * rlAgent){
|
||||||
//static size_t explore = 0;
|
//static size_t explore = 0;
|
||||||
int action;
|
int action;
|
||||||
@@ -305,6 +310,17 @@ int select_action(struct RL_agent * rlAgent){
|
|||||||
float proba_explor = (float) (xrand() % ((1<<17) -1))/ ((1<<17) -1); //frand(); //(float)(random ) / randRange;
|
float proba_explor = (float) (xrand() % ((1<<17) -1))/ ((1<<17) -1); //frand(); //(float)(random ) / randRange;
|
||||||
if(proba_explor > rlAgent->qlearnParams->exploration_factor ){
|
if(proba_explor > rlAgent->qlearnParams->exploration_factor ){
|
||||||
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
|
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
|
||||||
|
//printf(" STRATEGY : action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
||||||
|
if(rlAgent->networks->nb_successive_action[action]>MAX_SUCCESSIVE_ACTION){
|
||||||
|
rlAgent->networks->nb_successive_action[action]=0;
|
||||||
|
int recAction=action;
|
||||||
|
while(action==recAction){
|
||||||
|
action = xrand() % action_value->dim->rank ;
|
||||||
|
//printf("debug: action=%d recAction=%d\n",action, recAction);
|
||||||
|
}
|
||||||
|
write(1,"#",1);
|
||||||
|
}
|
||||||
|
////else write(1,".",1);
|
||||||
//if(action == ARG_MIN_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ))
|
//if(action == ARG_MIN_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ))
|
||||||
//action = xrand() % action_value->dim->rank ;
|
//action = xrand() % action_value->dim->rank ;
|
||||||
}
|
}
|
||||||
@@ -312,7 +328,15 @@ int select_action(struct RL_agent * rlAgent){
|
|||||||
action = xrand() % action_value->dim->rank ;
|
action = xrand() % action_value->dim->rank ;
|
||||||
// explore++;
|
// explore++;
|
||||||
//printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
//printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
||||||
|
//printf(" EXPLORE : action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
||||||
|
////write(1,"*",1);
|
||||||
}
|
}
|
||||||
|
for(int a=0;a<COUNT_ACTION;++a){
|
||||||
|
if(a!=action)
|
||||||
|
rlAgent->networks->nb_successive_action[a]=0;
|
||||||
|
}
|
||||||
|
(rlAgent->networks->nb_successive_action[action])++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
if(rlAgent->status->last_action == action){
|
if(rlAgent->status->last_action == action){
|
||||||
++(rlAgent->status->count_last_action);
|
++(rlAgent->status->count_last_action);
|
||||||
@@ -445,6 +469,9 @@ void* learn_to_drive(void * lrnarg){
|
|||||||
//push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward);
|
//push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward);
|
||||||
// printf(" cumul : %ld ", car_status->cumulative_reward);
|
// printf(" cumul : %ld ", car_status->cumulative_reward);
|
||||||
if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){
|
if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){
|
||||||
|
int len_cumul=0;
|
||||||
|
char cumulSTR[128];
|
||||||
|
len_cumul=sprintf(cumulSTR, " %ld ", car_status->cumulative_reward);
|
||||||
|
|
||||||
push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
|
push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
|
||||||
char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward);
|
char *file = fileNameDateScore(".ff_learnDir/.ff_main_",".txt",car_status->cumulative_reward);
|
||||||
@@ -455,6 +482,7 @@ void* learn_to_drive(void * lrnarg){
|
|||||||
//fprintf(stderr,"debug: symlink %s with %s. explain:%s \n",main_symlink, file, explain_symlink(file, main_symlink) );
|
//fprintf(stderr,"debug: symlink %s with %s. explain:%s \n",main_symlink, file, explain_symlink(file, main_symlink) );
|
||||||
}
|
}
|
||||||
else write(1,":",1);
|
else write(1,":",1);
|
||||||
|
write(1,cumulSTR,len_cumul);
|
||||||
free(file);
|
free(file);
|
||||||
file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward);
|
file = fileNameDateScore(".ff_learnDir/.ff_target_",".txt",car_status->cumulative_reward);
|
||||||
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->target_net ,weight_in, file);
|
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, rlAgent->networks->target_net ,weight_in, file);
|
||||||
|
|||||||
@@ -87,6 +87,7 @@ struct networks_qlearning {
|
|||||||
neurons_TYPE_FLOAT *target_net;
|
neurons_TYPE_FLOAT *target_net;
|
||||||
neurons_TYPE_FLOAT *best_net;
|
neurons_TYPE_FLOAT *best_net;
|
||||||
pthread_t *thread_learn;
|
pthread_t *thread_learn;
|
||||||
|
ssize_t nb_successive_action[COUNT_ACTION];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RL_agent {
|
struct RL_agent {
|
||||||
|
|||||||
@@ -581,7 +581,7 @@ void step_vehicle(struct vehicle *v, int action){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RANDOM 1
|
#define RANDOM 0
|
||||||
|
|
||||||
void reset(struct vehicle *v){
|
void reset(struct vehicle *v){
|
||||||
//static bool init = true;
|
//static bool init = true;
|
||||||
@@ -597,28 +597,29 @@ void reset(struct vehicle *v){
|
|||||||
//srand(time(NULL));
|
//srand(time(NULL));
|
||||||
//init = false;
|
//init = false;
|
||||||
//}
|
//}
|
||||||
int random;
|
|
||||||
int diff;
|
int diff;
|
||||||
diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0];
|
diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0];
|
||||||
random = xrand() % (diff/2) ;
|
|
||||||
#if RANDOM
|
#if RANDOM
|
||||||
|
int random;
|
||||||
|
random = xrand() % (diff/2) ;
|
||||||
v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
|
v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
|
||||||
#else
|
#else
|
||||||
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
|
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
|
||||||
#endif
|
#endif
|
||||||
diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1];
|
diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1];
|
||||||
random = xrand() % (diff/2);
|
|
||||||
#if RANDOM
|
#if RANDOM
|
||||||
|
random = xrand() % (diff/2);
|
||||||
v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
|
v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
|
||||||
#else
|
#else
|
||||||
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
|
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
|
||||||
#endif
|
#endif
|
||||||
random = xrand() % 50;
|
|
||||||
#if RANDOM
|
#if RANDOM
|
||||||
// v->direction = 115 - random ;
|
random = xrand() % 50;
|
||||||
v->direction = random - 25 ;
|
v->direction = 80 - random ;
|
||||||
|
//v->direction = 115 - random ;
|
||||||
|
// v->direction = random - 25 ;
|
||||||
#else
|
#else
|
||||||
v->direction = -90;
|
v->direction = 70; //-90;
|
||||||
#endif
|
#endif
|
||||||
v->speed = SPEED;
|
v->speed = SPEED;
|
||||||
read_sensor(v);
|
read_sensor(v);
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ void* runBashPrint(void *arg){
|
|||||||
////printf("%s ",pprint->string_space);
|
////printf("%s ",pprint->string_space);
|
||||||
len_buf=sprintf(buf,"%s ",pprint->string_space);
|
len_buf=sprintf(buf,"%s ",pprint->string_space);
|
||||||
BASH_WRITE_IF_EXIST(bash_arg, buf, len_buf)
|
BASH_WRITE_IF_EXIST(bash_arg, buf, len_buf)
|
||||||
#if 0
|
#if 1
|
||||||
////printf("ep: %ld ",qlStatus->index_episode);
|
////printf("ep: %ld ",qlStatus->index_episode);
|
||||||
len_buf=sprintf(buf,"ep: %ld\n",qlStatus->index_episode);
|
len_buf=sprintf(buf,"ep: %ld\n",qlStatus->index_episode);
|
||||||
BASH_WRITE_IF_EXIST(bash_arg, buf, len_buf)
|
BASH_WRITE_IF_EXIST(bash_arg, buf, len_buf)
|
||||||
|
|||||||
@@ -279,7 +279,7 @@ float df(float x){
|
|||||||
// **************************************************************
|
// **************************************************************
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
TEST(_first_learn_vehicle_50__9){
|
HIDE_TEST(_first_learn_vehicle_50__9){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
struct blocks * path = create_blocks(nb_block, dim);
|
struct blocks * path = create_blocks(nb_block, dim);
|
||||||
@@ -335,7 +335,7 @@ TEST(_first_learn_vehicle_50__9){
|
|||||||
int randomRange = 500;
|
int randomRange = 500;
|
||||||
size_t nb_prod_thread = 2;
|
size_t nb_prod_thread = 2;
|
||||||
size_t nb_calc_thread = 4;
|
size_t nb_calc_thread = 4;
|
||||||
float learning_rate = 0.00001 /* 0.001*/;
|
float learning_rate = 0.001 /*0.01*/ /* 0.001*/;
|
||||||
struct networks_qlearning *nnetworks = create_nework_qlearning(
|
struct networks_qlearning *nnetworks = create_nework_qlearning(
|
||||||
pconf,
|
pconf,
|
||||||
randomize, minR, maxR, randomRange,
|
randomize, minR, maxR, randomRange,
|
||||||
@@ -360,7 +360,7 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
|
|||||||
0.95/*float gamma*/,
|
0.95/*float gamma*/,
|
||||||
learning_rate,
|
learning_rate,
|
||||||
0 /* (not used!)float discount_factor*/,
|
0 /* (not used!)float discount_factor*/,
|
||||||
0.01/*0.99*/ /*float exploration_factor*/,
|
0.78/*0.01*//*0.99*/ /*float exploration_factor*/,
|
||||||
20/*long int nb_training_before_update_weight_in_target*/,
|
20/*long int nb_training_before_update_weight_in_target*/,
|
||||||
10000/*size_t number_episodes*/
|
10000/*size_t number_episodes*/
|
||||||
);
|
);
|
||||||
@@ -421,7 +421,7 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
|
|||||||
// ****************************************************************
|
// ****************************************************************
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
HIDE_TEST(first_learn_vehicle_50__10){
|
HIDE_TEST(_first_learn_vehicle_50__10){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
struct blocks * path = create_blocks(nb_block, dim);
|
struct blocks * path = create_blocks(nb_block, dim);
|
||||||
@@ -578,9 +578,35 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
|
|||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
learn_to_drive(rlAgent);
|
//learn_to_drive(rlAgent);
|
||||||
|
//learn_to_drive(rlAgent);
|
||||||
|
|
||||||
free_RL_agent(rlAgent);
|
struct arg_bash *bash_arg= create_arg_bash();
|
||||||
|
|
||||||
|
struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent);
|
||||||
|
|
||||||
|
struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP);
|
||||||
|
struct y_socket_t *argS = y_socket_create("1600", 2, 3, var);
|
||||||
|
|
||||||
|
|
||||||
|
pthread_t pollTh;
|
||||||
|
pthread_create(&pollTh, NULL, y_socket_poll_fds, (void*)argS);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pthread_join(pollTh, NULL);
|
||||||
|
//pthread_join(thread_learn, NULL);
|
||||||
|
|
||||||
|
y_socket_free(argS);
|
||||||
|
free_arg_var_(var);
|
||||||
|
free_arg_run_qlearn_bprint(argQL_BP);
|
||||||
|
//free_arg_bash(bash_arg);
|
||||||
|
|
||||||
|
//free_RL_agent(rlAgent);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -588,7 +614,7 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
HIDE_TEST(_first_learn_vehicle_50__11_9){
|
TEST(_first_learn_vehicle_50__11_9){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
struct blocks * path = create_blocks(nb_block, dim);
|
struct blocks * path = create_blocks(nb_block, dim);
|
||||||
@@ -664,7 +690,7 @@ HIDE_TEST(_first_learn_vehicle_50__11_9){
|
|||||||
int randomRange = 500;
|
int randomRange = 500;
|
||||||
size_t nb_prod_thread = 2;
|
size_t nb_prod_thread = 2;
|
||||||
size_t nb_calc_thread = 4;
|
size_t nb_calc_thread = 4;
|
||||||
float learning_rate = 0.00001 /* 0.001*/;
|
float learning_rate = 0.001; /* 0.00001*/ /* 0.001*/;
|
||||||
struct networks_qlearning *nnetworks = create_nework_qlearning(
|
struct networks_qlearning *nnetworks = create_nework_qlearning(
|
||||||
pconf,
|
pconf,
|
||||||
randomize, minR, maxR, randomRange,
|
randomize, minR, maxR, randomRange,
|
||||||
@@ -676,8 +702,8 @@ EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weigh
|
|||||||
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_01h42m16s_5300.txt");
|
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_20240717_01h42m16s_5300.txt");
|
||||||
*/
|
*/
|
||||||
|
|
||||||
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_.symlink");
|
//EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->main_net, weight_in, ".ff_main_.symlink");
|
||||||
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_.symlink");
|
//EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, nnetworks->target_net, weight_in, ".ff_target_.symlink");
|
||||||
|
|
||||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||||
struct delay_params *dly = create_delay_params (
|
struct delay_params *dly = create_delay_params (
|
||||||
@@ -689,7 +715,7 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
|
|||||||
0.95/*float gamma*/,
|
0.95/*float gamma*/,
|
||||||
learning_rate,
|
learning_rate,
|
||||||
0 /* (not used!)float discount_factor*/,
|
0 /* (not used!)float discount_factor*/,
|
||||||
0.01/*0.99*/ /*float exploration_factor*/,
|
0.99 /*0.99*/ /*float exploration_factor*/,
|
||||||
20/*long int nb_training_before_update_weight_in_target*/,
|
20/*long int nb_training_before_update_weight_in_target*/,
|
||||||
10000/*size_t number_episodes*/
|
10000/*size_t number_episodes*/
|
||||||
);
|
);
|
||||||
@@ -1149,7 +1175,7 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
|
|||||||
|
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
HIDE_TEST(first_learn_vehicle13){
|
HIDE_TEST(__first_learn_vehicle13){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
struct blocks * path = create_blocks(nb_block, dim);
|
struct blocks * path = create_blocks(nb_block, dim);
|
||||||
@@ -1216,8 +1242,8 @@ HIDE_TEST(first_learn_vehicle13){
|
|||||||
|
|
||||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||||
struct delay_params *dly = create_delay_params (
|
struct delay_params *dly = create_delay_params (
|
||||||
100/*size_t delay_between_episodes*/,
|
500/*size_t delay_between_episodes*/,
|
||||||
10/*size_t delay_between_games*/
|
50/*size_t delay_between_games*/
|
||||||
);
|
);
|
||||||
|
|
||||||
struct qlearning_params *qlparams = create_qlearning_params (
|
struct qlearning_params *qlparams = create_qlearning_params (
|
||||||
@@ -1246,9 +1272,37 @@ HIDE_TEST(first_learn_vehicle13){
|
|||||||
qlparams/*struct qlearning_params *qlearnParams*/
|
qlparams/*struct qlearning_params *qlearnParams*/
|
||||||
);
|
);
|
||||||
|
|
||||||
learn_to_drive(rlAgent);
|
//learn_to_drive(rlAgent);
|
||||||
|
|
||||||
free_RL_agent(rlAgent);
|
//learn_to_drive(rlAgent);
|
||||||
|
|
||||||
|
struct arg_bash *bash_arg= create_arg_bash();
|
||||||
|
|
||||||
|
struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent);
|
||||||
|
|
||||||
|
struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP);
|
||||||
|
struct y_socket_t *argS = y_socket_create("1600", 2, 3, var);
|
||||||
|
|
||||||
|
|
||||||
|
pthread_t pollTh;
|
||||||
|
pthread_create(&pollTh, NULL, y_socket_poll_fds, (void*)argS);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pthread_join(pollTh, NULL);
|
||||||
|
//pthread_join(thread_learn, NULL);
|
||||||
|
|
||||||
|
y_socket_free(argS);
|
||||||
|
free_arg_var_(var);
|
||||||
|
free_arg_run_qlearn_bprint(argQL_BP);
|
||||||
|
|
||||||
|
|
||||||
|
//free_RL_agent(rlAgent);
|
||||||
|
//free_RL_agent(rlAgent);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -2701,7 +2755,7 @@ struct status_qlearning *qlstatus = create_status_qlearning ();
|
|||||||
|
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
HIDE_TEST(first_learn_vehicle13){
|
HIDE_TEST(_first_learn_vehicle13){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
struct blocks * path = create_blocks(nb_block, dim);
|
struct blocks * path = create_blocks(nb_block, dim);
|
||||||
@@ -2798,9 +2852,37 @@ HIDE_TEST(first_learn_vehicle13){
|
|||||||
qlparams/*struct qlearning_params *qlearnParams*/
|
qlparams/*struct qlearning_params *qlearnParams*/
|
||||||
);
|
);
|
||||||
|
|
||||||
learn_to_drive(rlAgent);
|
//learn_to_drive(rlAgent);
|
||||||
|
|
||||||
free_RL_agent(rlAgent);
|
//learn_to_drive(rlAgent);
|
||||||
|
|
||||||
|
struct arg_bash *bash_arg= create_arg_bash();
|
||||||
|
|
||||||
|
struct arg_run_qlearn_bprint *argQL_BP = create_arg_run_qlearn_bprint(bash_arg, rlAgent);
|
||||||
|
|
||||||
|
struct arg_var_ * var = create_arg_var_(y_nnn_manager_handle_input, argQL_BP);
|
||||||
|
struct y_socket_t *argS = y_socket_create("1600", 2, 3, var);
|
||||||
|
|
||||||
|
|
||||||
|
pthread_t pollTh;
|
||||||
|
pthread_create(&pollTh, NULL, y_socket_poll_fds, (void*)argS);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pthread_join(pollTh, NULL);
|
||||||
|
//pthread_join(thread_learn, NULL);
|
||||||
|
|
||||||
|
y_socket_free(argS);
|
||||||
|
free_arg_var_(var);
|
||||||
|
free_arg_run_qlearn_bprint(argQL_BP);
|
||||||
|
|
||||||
|
|
||||||
|
//free_RL_agent(rlAgent);
|
||||||
|
//free_RL_agent(rlAgent);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user