diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.c b/deepQlearn_0/src/deepQlearning/learn_to_drive.c index d83c9f1..c64f6c9 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.c +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.c @@ -1,5 +1,6 @@ #include "learn_to_drive.h" +char *action_name[8] = {"LEFT", "CENTER", "RIGHT"}; float reLU(float x){ if(x>0) return x; @@ -28,21 +29,30 @@ void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * netwo struct networks_qlearning * create_nework_qlearning( struct config_layers * config, - bool randomize, float minR, float maxR, int randomRange + bool randomize, float minR, float maxR, int randomRange, + size_t nb_prod_thread, + size_t nb_calc_thread, + float learning_rate ){ struct networks_qlearning *qnets = malloc(sizeof(struct networks_qlearning)); qnets->config = config; - setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->main_net), config, - random, minR, maxR, randomRange); - setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->target_net), config, - false, minR, maxR, randomRange); + setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->main_net), config, random, minR, maxR, randomRange); + setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->target_net), config, false, minR, maxR, randomRange); copy_weight_in_networks_from_main_to_target(qnets); - setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config, - false, minR, maxR, randomRange); + setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config, false, minR, maxR, randomRange); copy_weight_in_networks_from_main_to_best(qnets); + + setup_all_layers_functions_TYPE_FLOAT(qnets->main_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU); + setup_all_layers_params_TYPE_FLOAT(qnets->main_net, nb_prod_thread, nb_calc_thread, learning_rate); + setup_all_layers_functions_TYPE_FLOAT(qnets->target_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU); + setup_all_layers_params_TYPE_FLOAT(qnets->target_net, nb_prod_thread, nb_calc_thread, learning_rate); + setup_all_layers_functions_TYPE_FLOAT(qnets->best_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU); + setup_all_layers_params_TYPE_FLOAT(qnets->best_net, nb_prod_thread, nb_calc_thread, learning_rate); + + return qnets; } @@ -53,7 +63,11 @@ struct status_qlearning * create_status_qlearning (){ status_ql->list_main_cumul = create_var_list_TYPE_L_INT(); status_ql->list_target_cumul = create_var_list_TYPE_L_INT(); status_ql->progress_best_cumul = create_var_list_TYPE_L_INT(); - + + //push_back_list_TYPE_L_INT(status_ql->list_main_cumul, 0); + //push_back_list_TYPE_L_INT(status_ql->list_target_cumul, 0); + push_back_list_TYPE_L_INT(status_ql->progress_best_cumul, -10000); + status_ql->nb_training_after_updated_weight_in_target = 0; return status_ql; @@ -66,16 +80,34 @@ struct delay_params * create_delay_params ( struct delay_params * delay = malloc(sizeof(struct delay_params)); delay->delay_between_episodes = delay_between_episodes; delay->delay_between_games = delay_between_games; + return delay; } +struct print_params * create_print_params(float scale_x, float scale_y, struct delay_params * delay){ + struct print_params * pprint = malloc(sizeof(struct print_params)); + pprint->printed = true; + pprint->scale_x = scale_x; + pprint->scale_y = scale_y; + pprint->delay = delay; + pthread_mutex_init(&(pprint->mut_printed), NULL); + + int i; + for( i=0; istring_space[i]=' '; + pprint->string_space[i]='\0'; + + return pprint; +} + struct qlearning_params * create_qlearning_params ( - double gamma, - double learning_rate, - double discount_factor, - double exploration_factor, - long int nb_training_before_update_weight_in_target + float gamma, + float learning_rate, + float discount_factor, + float exploration_factor, + long int nb_training_before_update_weight_in_target, + size_t number_episodes ){ struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params)); @@ -85,6 +117,13 @@ struct qlearning_params * create_qlearning_params ( qparams->exploration_factor = exploration_factor ; qparams->nb_training_before_update_weight_in_target = nb_training_before_update_weight_in_target; + qparams->number_episodes = number_episodes; + + qparams->factor_update_learning_rate = 0.995; + qparams->minimum_threshold_learning_rate = 0.0001 ; + qparams->factor_update_exploration_factor = 0.995; + qparams->minimum_threshold_exploration_factor = 0.01; + return qparams; } @@ -93,7 +132,7 @@ struct RL_agent * create_RL_agent ( struct networks_qlearning * networks, struct vehicle * car, struct status_qlearning * status, - struct delay_params * delay, + struct print_params * pprint, struct qlearning_params *qlearnParams ){ struct RL_agent * rlagent = malloc(sizeof(struct RL_agent)); @@ -101,7 +140,7 @@ struct RL_agent * create_RL_agent ( rlagent->networks = networks ; rlagent->car = car ; rlagent->status = status ; - rlagent->delay = delay ; + rlagent->pprint = pprint ; rlagent->qlearnParams = qlearnParams ; return rlagent; @@ -123,12 +162,20 @@ void free_status_qlearning(struct status_qlearning *status_ql){ void free_delay_params (struct delay_params *dly_p){ free(dly_p); } + +void free_print_params (struct print_params *pprint){ + pthread_mutex_destroy(&(pprint->mut_printed)); + free_delay_params(pprint->delay); + free(pprint); + +} + void free_qlearning_params(struct qlearning_params *q_params){ free(q_params); } void free_RL_agent(struct RL_agent *rlAgent){ free(rlAgent->qlearnParams); - free(rlAgent->delay); + free_print_params(rlAgent->pprint); free_status_qlearning(rlAgent->status); free_networks_qlearning(rlAgent->networks); free_vehicle(rlAgent->car); @@ -137,25 +184,105 @@ void free_RL_agent(struct RL_agent *rlAgent){ } void train_qlearning(struct RL_agent * rlAgent, - int action /* */, - tensor_TYPE_FLOAT * new_state /*input*/, - tensor_TYPE_FLOAT * state /*input*/, - long reward){ + int action //, long reward + ){ tensor_TYPE_FLOAT * action_value = NULL; tensor_TYPE_FLOAT * next_action_value = NULL; neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net; neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net; + tensor_TYPE_FLOAT * new_state = rlAgent->car->sensor /*input*/; + tensor_TYPE_FLOAT * state = rlAgent->car->old_sensor /*input*/; calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value); - calculate_output_by_network_neurons_TYPE_FLOAT(net_target, state, &next_action_value); + calculate_output_by_network_neurons_TYPE_FLOAT(net_target, new_state, &next_action_value); tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim); struct game_status * car_status = rlAgent->car->status; + struct qlearning_params * qlParams = rlAgent->qlearnParams; copy_tensor_TYPE_FLOAT(experimental_values, action_value) ; // experimental_values === Q-tab learning if(car_status->done){ experimental_values->x[action] = -100; }else { - experimental_values->x[action] = reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ; + experimental_values->x[action] = car_status->reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ; + } + float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, net_main, learning_rate, new_value); + + qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ; + + +} + +int select_action(struct RL_agent * rlAgent){ + int action; + tensor_TYPE_FLOAT * action_value = NULL; + calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value); + long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes); + NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2; + srand(time(NULL)); + int random = rand() % NUMBER_EPISODE2; + float proba_explor = (float)random / NUMBER_EPISODE2; + if(proba_explor <= rlAgent->qlearnParams->exploration_factor ){ + action = rand() % action_value->dim->rank ; + } + else{ + action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ); + } + return action; +} + +void learn_to_drive(struct RL_agent * rlAgent){ + int action; + struct vehicle * car = rlAgent->car; + struct game_status * car_status = car->status; + struct qlearning_params * qlParams = rlAgent->qlearnParams; + struct status_qlearning * qlStatus = rlAgent->status; + struct print_params * pprint = rlAgent->pprint; + char msg[100]; + + while(true){ + for(size_t index_episode = 0; index_episode < qlParams->number_episodes; ++index_episode){ + reset(car); + qlStatus->nb_training_after_updated_weight_in_target = 0; + while(true){ + ++(qlStatus->nb_training_after_updated_weight_in_target); + action = select_action(rlAgent); + sprintf(msg," dir:%.0f : %s, ", car->direction ,action_name[action]); + add_string_log_M(car_status,msg); + step_vehicle(car, action); + train_qlearning(rlAgent, action); + if(pprint->printed){ + pthread_mutex_lock(&(pprint->mut_printed)); + print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y); + pthread_mutex_unlock(&(pprint->mut_printed)); + printf("%s ",pprint->string_space); + printf("ep: %ld ",index_episode); + neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net; + for(size_t i=0; ioutput->dim->rank; ++i) printf("{sensro[%s]:%f }",action_name[i%COUNT_ACTION],net_main->output->x[i]); + Sleep(pprint->delay->delay_between_games); + } + //done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor); + if( qlStatus->nb_training_after_updated_weight_in_target > qlParams->nb_training_before_update_weight_in_target ){ + qlStatus->nb_training_after_updated_weight_in_target = 0; + copy_weight_in_networks_from_main_to_target(rlAgent->networks); + } + if(car_status->done == true){ + //push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward); + printf(" cumul : %ld ", car_status->cumulative_reward); + if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){ + push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward); + FOR_LIST_FORM_BEGIN(TYPE_L_INT, qlStatus->progress_best_cumul){ + printf(" | %ld |,",(qlStatus->progress_best_cumul)->current_list->value); + } + printf("%s ",pprint->string_space); + } + break; + } + } + + if(pprint->printed){ + Sleep(pprint->delay->delay_between_episodes); + } + } } - } diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.h b/deepQlearn_0/src/deepQlearning/learn_to_drive.h index fb25376..61ff770 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.h +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.h @@ -1,7 +1,17 @@ #ifndef __LEARNING_VEHICLE__C_H____ #define __LEARNING_VEHICLE__C_H____ -//#include +#include +#include + /* for Sleep : milliseconds */ +#ifdef WINDOWS +#include +//#ifdef LINUX +#else +#include +#define Sleep(x) usleep((x)*1000) +#endif + #include "neuron_t/neuron_t.h" @@ -16,15 +26,17 @@ struct qlearning_params { - double gamma; - double learning_rate; - double factor_update_learning_rate; - double minimum_threshold_learning_rate; - double discount_factor; - double exploration_factor; - double factor_update_exploration_factor; - double minimum_threshold_exploration_factor; + float gamma; + float learning_rate; + float factor_update_learning_rate; + // float epsilon; + float minimum_threshold_learning_rate; + float discount_factor; + float exploration_factor; + float factor_update_exploration_factor; + float minimum_threshold_exploration_factor; long int nb_training_before_update_weight_in_target; + size_t number_episodes; }; @@ -40,6 +52,15 @@ struct delay_params { size_t delay_between_games; }; +struct print_params { + bool printed; + pthread_mutex_t mut_printed; + float scale_x; + float scale_y; + struct delay_params *delay; + char string_space[LOG_LENTH]; +}; + struct networks_qlearning { config_layers *config; neurons_TYPE_FLOAT *main_net; @@ -51,39 +72,48 @@ struct RL_agent { struct networks_qlearning * networks; struct vehicle * car; struct status_qlearning * status; - struct delay_params * delay; + struct print_params * pprint; struct qlearning_params *qlearnParams; }; struct networks_qlearning * create_nework_qlearning( struct config_layers * config, - bool randomize, float minR, float maxR, int randomRange + bool randomize, float minR, float maxR, int randomRange, + size_t nb_prod_thread, + size_t nb_calc_thread, + float learning_rate ); struct status_qlearning * create_status_qlearning (); struct delay_params * create_delay_params ( size_t delay_between_episodes, size_t delay_between_games ); +struct print_params * create_print_params( + float scale_x, float scale_y, + struct delay_params * dly_p +); struct qlearning_params * create_qlearning_params ( - double gamma, - double learning_rate, - double discount_factor, - double exploration_factor, - long int nb_training_before_update_weight_in_target + float gamma, + float learning_rate, + float discount_factor, + float exploration_factor, + long int nb_training_before_update_weight_in_target, + size_t number_episodes ); struct RL_agent * create_RL_agent ( struct networks_qlearning * networks, struct vehicle * car, struct status_qlearning * status, - struct delay_params * delay, + struct print_params * pprint, struct qlearning_params *qlearnParams ); void free_networks_qlearning (struct networks_qlearning * networks); void free_status_qlearning(struct status_qlearning *status_ql); +void free_print_params (struct print_params *pprint); void free_delay_params (struct delay_params *dly_p); void free_qlearning_params(struct qlearning_params *q_params); void free_RL_agent(struct RL_agent *rlAgent); @@ -92,9 +122,11 @@ void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * net void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks); void train_qlearning(struct RL_agent * rlAgent, - int action , - tensor_TYPE_FLOAT * new_state /*input*/, - tensor_TYPE_FLOAT * state /*input*/, - long reward); + int action); +// tensor_TYPE_FLOAT * new_state /*input*/, +// tensor_TYPE_FLOAT * state /*input*/, +// long reward; + +void learn_to_drive(struct RL_agent * rlAgent); #endif /* __LEARNING_VEHICLE__C_H____ */ diff --git a/deepQlearn_0/src/deepQlearning/vehicle.c b/deepQlearn_0/src/deepQlearning/vehicle.c index a1286d5..7abe99f 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.c +++ b/deepQlearn_0/src/deepQlearning/vehicle.c @@ -67,6 +67,7 @@ struct vehicle * create_vehicle(struct blocks *path){ ret_vehicle->coord = create_coordinate(2); ret_vehicle->sensor = create_sensors(NB_SENSORS); + ret_vehicle->old_sensor = create_sensors(NB_SENSORS); ret_vehicle->path = path; ret_vehicle->status = create_game_status(); @@ -113,6 +114,7 @@ void free_vehicle(struct vehicle * vhcl){ free_coordinate(vhcl->coord); free_blocks(vhcl->path); free_sensors(vhcl->sensor); + free_sensors(vhcl->old_sensor); free_game_status(vhcl->status); free(vhcl); @@ -404,11 +406,13 @@ float distance2_coordinate(coordinate *c0, coordinate *c1){ diStep_sensor->x[0] += step_sensor * cos(direction_radian);\ diStep_sensor->x[1] += step_sensor * sin(direction_radian);\ }\ - v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)))) / 50;\ + v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\ + //v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)))) / 50;\ //v->sensor->x[position] = (MIN(49,(int)(distance2_coordinate(diStep_sensor, v->coord)/10))) / 50;\ void read_sensor(struct vehicle *v){ + copy_tensor_TYPE_FLOAT(v->old_sensor, v->sensor); float step_sensor = ((float)1)/SUBDIVISION; coordinate * diStep_sensor = create_coordinate(2); copy_coordinate(diStep_sensor, v->coord->x); @@ -475,10 +479,11 @@ void add_string_log(struct game_status *status, char *str ){ } -void step(struct vehicle *v, int action){ +void step_vehicle(struct vehicle *v, int action){ + //float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT] float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT] v->direction = v->direction + action_x[action % 3]; - v->speed = ((float)1)/2; + v->speed = ((float)1)/5; move_vehicle(v); read_sensor(v); struct game_status *status = v->status; @@ -494,14 +499,14 @@ void step(struct vehicle *v, int action){ status->done = true; } else{ - bool breaked = false; + bool broken = false; long prec, next; char msg[48]; for(long i=0; i< path->nb_blocks; ++i){ //prec = (i-1)%(path->nb_blocks); prec = (i + path->nb_blocks - 1 )%(path->nb_blocks); next = (i + 1)%(path->nb_blocks); - printf("i:%ld, prec:%ld, next:%ld: maker %d, prec marker %d\n",i,prec,next, path->marker[i], path->marker[prec]); + //printf("i:%ld, prec:%ld, next:%ld: maker %d, prec marker %d\n",i,prec,next, path->marker[i], path->marker[prec]); if(is_in_block_index(path, i, v->coord)){ if(path->marker[i] == false && path->marker[prec] == true){ path->marker[i]=true; @@ -516,11 +521,11 @@ void step(struct vehicle *v, int action){ status->done = true; add_string_log(status, "| reverse |"); } - breaked = true; + broken = true; break; } } - if(breaked == false){ + if(broken == false){ if(status->cumulative_reward > THRESHOLD_REWARD){ status->reward = REWARD_CONTINUE; status->done = true; @@ -547,12 +552,14 @@ void reset(struct vehicle *v){ int diff; diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0]; random = rand() % diff; - v->coord->x[0] = path->lower_bound_block[0]->x[0] + random; + //v->coord->x[0] = path->lower_bound_block[0]->x[0] + random; + v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2; diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1]; random = rand() % diff; - v->coord->x[1] = path->lower_bound_block[0]->x[1] + random; + //v->coord->x[1] = path->lower_bound_block[0]->x[1] + random; + v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2; random = rand() % 50; - v->direction = random - 25; - //v->direction = 15; + //v->direction = random - 25; + v->direction = -90; v->speed = 1; } diff --git a/deepQlearn_0/src/deepQlearning/vehicle.h b/deepQlearn_0/src/deepQlearning/vehicle.h index b4b979e..7715777 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.h +++ b/deepQlearn_0/src/deepQlearning/vehicle.h @@ -24,7 +24,9 @@ #define CENTER 1 #define RIGHT 2 -#define SUBDIVISION 10 +#define COUNT_ACTION 3 + +#define SUBDIVISION 5 //10 struct game_status { @@ -81,6 +83,7 @@ struct vehicle { float direction; float speed; sensors *sensor; + sensors *old_sensor; struct blocks *path; struct game_status *status; }; @@ -110,10 +113,11 @@ void copy_coordinate(coordinate *coord, float *x); void move_vehicle(struct vehicle *v); void read_sensor(struct vehicle *v); -void step(struct vehicle *v, int action); - +void step_vehicle(struct vehicle *v, int action); void reset(struct vehicle *v); +void add_string_log_M(struct game_status *status, char *str ); + void print2D_blocks_indexOne_withPoint(struct blocks *blk, float scale_x, float scale_y, coordinate *coordPoint); void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y); diff --git a/deepQlearn_0/test/is_good.c b/deepQlearn_0/test/is_good.c index 989f0a0..7c4db73 100644 --- a/deepQlearn_0/test/is_good.c +++ b/deepQlearn_0/test/is_good.c @@ -5,11 +5,13 @@ #include // for sleep ! +/* #ifdef __linux__ #include #elif _WIN32 #include #endif +*/ #include "ftest/ftest.h" #include "ftest/ftest_array.h" @@ -155,6 +157,7 @@ TEST(print_blocks_withPoint){ } +#if 0 TEST(first_vehicle){ size_t nb_block = 7; @@ -182,20 +185,69 @@ TEST(first_vehicle){ print_vehicle_n_path(vhcl, 0.2,0.4); - step(vhcl, CENTER); - sleep(2); + step_vehicle(vhcl, CENTER); + Sleep(200); print_vehicle_n_path(vhcl, 0.2,0.4); - step(vhcl, CENTER); - sleep(2); + step_vehicle(vhcl, CENTER); + Sleep(200); print_vehicle_n_path(vhcl, 0.2,0.4); - step(vhcl, CENTER); - sleep(2); + step_vehicle(vhcl, CENTER); + Sleep(200); print_vehicle_n_path(vhcl, 0.2,0.4); - step(vhcl, CENTER); - sleep(2); + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); + + free_vehicle(vhcl); + + +} + +#endif + +TEST(circle_path_vehicle){ + size_t nb_block = 7; + size_t dim= 2; + struct blocks * path = create_blocks(nb_block, dim); + + copy_coordinate(path->lower_bound_block[0], (float[]){0,3}); + copy_coordinate(path->upper_bound_block[0], (float[]){4,7}); + copy_coordinate(path->lower_bound_block[1], (float[]){1,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){10,3}); + copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5}); + copy_coordinate(path->upper_bound_block[2], (float[]){14,5}); + copy_coordinate(path->lower_bound_block[3], (float[]){14,2}); + copy_coordinate(path->upper_bound_block[3], (float[]){18,7}); + copy_coordinate(path->lower_bound_block[4], (float[]){11,7}); + copy_coordinate(path->upper_bound_block[4], (float[]){17,10}); + copy_coordinate(path->lower_bound_block[5], (float[]){8,6}); + copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75}); + copy_coordinate(path->lower_bound_block[6], (float[]){1,7}); + copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75}); + + update_bounds_limits_blocks(path); + + struct vehicle *vhcl = create_vehicle(path); + + print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); print_vehicle_n_path(vhcl, 0.2,0.4); free_vehicle(vhcl); @@ -209,6 +261,106 @@ TEST(reward_list){ free_status_qlearning(l_reward); } +#if 1 +TEST(first_learn_vehicle){ + size_t nb_block = 7; + size_t dim= 2; + struct blocks * path = create_blocks(nb_block, dim); + + + +#if 1 + copy_coordinate(path->lower_bound_block[0], (float[]){0,3}); + copy_coordinate(path->upper_bound_block[0], (float[]){4,7}); + copy_coordinate(path->lower_bound_block[1], (float[]){1,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){10,3}); + copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5}); + copy_coordinate(path->upper_bound_block[2], (float[]){14,5}); + copy_coordinate(path->lower_bound_block[3], (float[]){14,2}); + copy_coordinate(path->upper_bound_block[3], (float[]){18,7}); + copy_coordinate(path->lower_bound_block[4], (float[]){11,7}); + copy_coordinate(path->upper_bound_block[4], (float[]){17,10}); + copy_coordinate(path->lower_bound_block[5], (float[]){8,6}); + copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75}); + copy_coordinate(path->lower_bound_block[6], (float[]){1,7}); + copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75}); + +#else + + + copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); + copy_coordinate(path->upper_bound_block[0], (float[]){2,7}); + copy_coordinate(path->lower_bound_block[1], (float[]){2,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){4,2}); + copy_coordinate(path->lower_bound_block[2], (float[]){4,0.5}); + copy_coordinate(path->upper_bound_block[2], (float[]){8,3}); + copy_coordinate(path->lower_bound_block[3], (float[]){8,0}); + copy_coordinate(path->upper_bound_block[3], (float[]){16,2}); + copy_coordinate(path->lower_bound_block[4], (float[]){16,0}); + copy_coordinate(path->upper_bound_block[4], (float[]){18,7}); + copy_coordinate(path->lower_bound_block[5], (float[]){6,7}); + copy_coordinate(path->upper_bound_block[5], (float[]){18,9}); + copy_coordinate(path->lower_bound_block[6], (float[]){2,6}); + copy_coordinate(path->upper_bound_block[6], (float[]){6,8}); +#endif + + update_bounds_limits_blocks(path); + + struct vehicle *car = create_vehicle(path); + + config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + + bool randomize=true; + float minR = 0, maxR = 1; + int randomRange = 500; + size_t nb_prod_thread = 2; + size_t nb_calc_thread = 4; + float learning_rate = 0.001; + struct networks_qlearning *nnetworks = create_nework_qlearning( + pconf, + randomize, minR, maxR, randomRange, + nb_prod_thread, nb_calc_thread, + learning_rate + ); + + struct status_qlearning *qlstatus = create_status_qlearning (); + struct delay_params *dly = create_delay_params ( + 200/*size_t delay_between_episodes*/, + 20/*size_t delay_between_games*/ + ); + + struct qlearning_params *qlparams = create_qlearning_params ( + 0.95/*float gamma*/, + learning_rate, + 0 /* (not used!)float discount_factor*/, + 0.99/*float exploration_factor*/, + 20/*long int nb_training_before_update_weight_in_target*/, + 10000/*size_t number_episodes*/ + ); + struct print_params *pprint = create_print_params( + 0.2/*float scale_x*/,0.4 /*float scale_y*/, + dly/*struct delay_params * dly_p*/ + ); + + struct RL_agent *rlAgent = create_RL_agent ( + nnetworks /*struct networks_qlearning * networks*/, + car /*struct vehicle * car*/, + qlstatus /*struct status_qlearning * status*/, + pprint /*struct print_params * pprint*/, + qlparams/*struct qlearning_params *qlearnParams*/ + ); + + learn_to_drive(rlAgent); + + free_RL_agent(rlAgent); + + + + +} +#endif + + int main(int argc, char **argv){ diff --git a/neuron_t/src/neuron_t/neuron_t.h b/neuron_t/src/neuron_t/neuron_t.h index 5b7b3ac..c2dc48f 100644 --- a/neuron_t/src/neuron_t/neuron_t.h +++ b/neuron_t/src/neuron_t/neuron_t.h @@ -125,4 +125,13 @@ GEN_NEURON_(TYPE_FLOAT) GEN_NEURON_(TYPE_DOUBLE) +#define UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(type, neuronVar, attribute, value) \ + do{\ + neurons_##type *tmpn = neuronVar;\ + while(tmpn){\ + tmpn->attribute = value;\ + tmpn = tmpn->next_layer;\ + }\ + }while(0);\ + #endif /*__NEURON_T_C__H*/ diff --git a/tensor_t/src/tensor_t/tensor_t.c b/tensor_t/src/tensor_t/tensor_t.c index d1b4e9d..c6ab1c8 100644 --- a/tensor_t/src/tensor_t/tensor_t.c +++ b/tensor_t/src/tensor_t/tensor_t.c @@ -1356,7 +1356,7 @@ void parse_file_InputOutput_withDim_to_tensors_##type(tensor_##type **Tpart1, te fprintf( stderr, "Cannot open file: %s for reading\n",file_name_input );\ exit( -1 );\ }\ - bool size_unknown=false, breaked=false; \ + bool size_unknown=false, broken=false; \ bool Done=false;\ int retfread = 0, curIn=0;\ while(!Done){\ @@ -1372,13 +1372,13 @@ void parse_file_InputOutput_withDim_to_tensors_##type(tensor_##type **Tpart1, te iinput[curIn]='\0';\ size_t len = strlen(iinput);\ for(size_t i=0; i