From 0c9813becadd139c5358eaeb3e832ab0cb798870 Mon Sep 17 00:00:00 2001 From: fanasina Date: Tue, 9 Jul 2024 17:38:58 +0200 Subject: [PATCH] Add some MACRO in neuron_t and debug deepQlearning --- .../src/deepQlearning/learn_to_drive.c | 77 +- .../src/deepQlearning/learn_to_drive.h | 3 +- deepQlearn_0/src/deepQlearning/vehicle.c | 80 ++- deepQlearn_0/src/deepQlearning/vehicle.h | 5 +- deepQlearn_0/test/is_good.c | 668 +++++++++++++++++- neuron_t/src/neuron_t/neuron_t.c | 16 +- neuron_t/src/neuron_t/neuron_t.h | 33 +- tensor_t/src/tensor_t/tensor_t.c | 13 +- 8 files changed, 810 insertions(+), 85 deletions(-) diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.c b/deepQlearn_0/src/deepQlearning/learn_to_drive.c index c0cdaec..b3d7071 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.c +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.c @@ -21,10 +21,12 @@ float D_L2(float t, float o){ } void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks){ - copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net); + //copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net); + COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->target_net, networks->main_net); } void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks){ - copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net); + //copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net); + COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->best_net, networks->main_net); } struct networks_qlearning * create_nework_qlearning( @@ -70,6 +72,8 @@ struct status_qlearning * create_status_qlearning (){ status_ql->nb_training_after_updated_weight_in_target = 0; + status_ql->nb_episodes = 0; + return status_ql; } @@ -91,6 +95,7 @@ struct print_params * create_print_params(float scale_x, float scale_y, struct pprint->scale_x = scale_x; pprint->scale_y = scale_y; pprint->delay = delay; + pprint->string_space = malloc(LOG_LENTH+1); pthread_mutex_init(&(pprint->mut_printed), NULL); int i; @@ -164,6 +169,7 @@ void free_delay_params (struct delay_params *dly_p){ } void free_print_params (struct print_params *pprint){ + free(pprint->string_space); pthread_mutex_destroy(&(pprint->mut_printed)); free_delay_params(pprint->delay); free(pprint); @@ -192,13 +198,14 @@ void train_qlearning(struct RL_agent * rlAgent, neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net; tensor_TYPE_FLOAT * new_state = rlAgent->car->sensor /*input*/; tensor_TYPE_FLOAT * state = rlAgent->car->old_sensor /*input*/; - calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value); + neurons_TYPE_FLOAT *ttmp = calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value); calculate_output_by_network_neurons_TYPE_FLOAT(net_target, new_state, &next_action_value); tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim); struct game_status * car_status = rlAgent->car->status; struct qlearning_params * qlParams = rlAgent->qlearnParams; copy_tensor_TYPE_FLOAT(experimental_values, action_value) ; + //copy_tensor_TYPE_FLOAT(experimental_values, next_action_value) ; // experimental_values === Q-tab learning if(car_status->done){ experimental_values->x[action] = -100; @@ -206,19 +213,12 @@ void train_qlearning(struct RL_agent * rlAgent, experimental_values->x[action] = car_status->reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ; } // *** - neurons_TYPE_FLOAT *tmp=NULL, *ttmp=NULL, *base = net_main; - init_copy_in_out_networks_from_tensors_TYPE_FLOAT(base,base->output , experimental_values );\ - tmp=net_main->next_layer;\ - while(tmp){\ - calc_out_neurons_TYPE_FLOAT(tmp);\ - ttmp = tmp;\ - tmp = tmp->next_layer;\ - }\ - while(ttmp != base){\ - calc_delta_neurons_TYPE_FLOAT(ttmp);\ - update_weight_neurons_TYPE_FLOAT(ttmp);\ - ttmp = ttmp->prev_layer;\ - }\ + copy_tensor_TYPE_FLOAT(ttmp->target, experimental_values); + while(ttmp != net_main){ + calc_delta_neurons_TYPE_FLOAT(ttmp); + update_weight_neurons_TYPE_FLOAT(ttmp); + ttmp = ttmp->prev_layer; + } // *** float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ ); @@ -230,19 +230,28 @@ void train_qlearning(struct RL_agent * rlAgent, } int select_action(struct RL_agent * rlAgent){ + //static size_t explore = 0; int action; tensor_TYPE_FLOAT * action_value = NULL; - calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value); - long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes); - NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2; - srand(time(NULL)); + //calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value); + calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->sensor, &action_value); + //long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes)*100; + int NUMBER_EPISODE2 = 3000; + //NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2; +// static bool init = true ; +// if(init){ + srand(time(NULL)); +// init =false; +// } int random = rand() % NUMBER_EPISODE2; - float proba_explor = (float)random / NUMBER_EPISODE2; - if(proba_explor <= rlAgent->qlearnParams->exploration_factor ){ - action = rand() % action_value->dim->rank ; + float proba_explor = (float)(random ) / NUMBER_EPISODE2; + if(proba_explor > rlAgent->qlearnParams->exploration_factor ){ + action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ); } else{ - action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ); + action = rand() % action_value->dim->rank ; + // explore++; + //printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes); } return action; } @@ -261,20 +270,34 @@ void learn_to_drive(struct RL_agent * rlAgent){ reset(car); qlStatus->nb_training_after_updated_weight_in_target = 0; while(true){ + ++(qlStatus->nb_episodes); ++(qlStatus->nb_training_after_updated_weight_in_target); action = select_action(rlAgent); sprintf(msg," dir:%.0f : %s, ", car->direction ,action_name[action]); add_string_log_M(car_status,msg); step_vehicle(car, action); train_qlearning(rlAgent, action); - if(pprint->printed){ + if(/*(qlStatus->nb_episodes %15 == 0) && */ pprint->printed){ pthread_mutex_lock(&(pprint->mut_printed)); print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y); pthread_mutex_unlock(&(pprint->mut_printed)); printf("%s ",pprint->string_space); - printf("ep: %ld ",index_episode); + printf("ep: %ld\n",index_episode); neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net; - for(size_t i=0; ioutput->dim->rank; ++i) printf("{sensro[%s]:%f }",action_name[i%COUNT_ACTION],net_main->output->x[i]); + neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net; + for(size_t i=0; ioutput->dim->rank; ++i) { + printf("{sensro[%s]:%f "/*vs %f / VS / %f */" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i], + /*car->sensor->x[i] ,car->old_sensor->x[i], + */action_name[i%COUNT_ACTION],net_main->output->x[i]); + + } + printf("\n< %f > ( %s ) \n", car->direction, action_name[action % COUNT_ACTION]); + //print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei"); + //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in"); + PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out"); + //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out"); + //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input"); + printf("action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes); Sleep(pprint->delay->delay_between_games); } //done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor); diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.h b/deepQlearn_0/src/deepQlearning/learn_to_drive.h index 61ff770..e0bb8ba 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.h +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.h @@ -45,6 +45,7 @@ struct status_qlearning { struct main_list_TYPE_L_INT * list_target_cumul; struct main_list_TYPE_L_INT * progress_best_cumul; long int nb_training_after_updated_weight_in_target; + size_t nb_episodes; }; struct delay_params { @@ -58,7 +59,7 @@ struct print_params { float scale_x; float scale_y; struct delay_params *delay; - char string_space[LOG_LENTH]; + char *string_space;//[LOG_LENTH]; }; struct networks_qlearning { diff --git a/deepQlearn_0/src/deepQlearning/vehicle.c b/deepQlearn_0/src/deepQlearning/vehicle.c index 7abe99f..1f0d9c0 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.c +++ b/deepQlearn_0/src/deepQlearning/vehicle.c @@ -7,7 +7,7 @@ //#define CENTER 1 //#define RIGHT 2 -#define LIMIT_DISTANCE ((float)1)/50 +#define LIMIT_DISTANCE ((float)((SUBDIVISION-1)/10))/SUBDIVISION #define REWARD_STOP -1000 #define REWARD_CONTINUE 100 @@ -68,6 +68,12 @@ struct vehicle * create_vehicle(struct blocks *path){ ret_vehicle->coord = create_coordinate(2); ret_vehicle->sensor = create_sensors(NB_SENSORS); ret_vehicle->old_sensor = create_sensors(NB_SENSORS); + + for(size_t i=0; isensor->x[i]=0; + ret_vehicle->old_sensor->x[i]=0; + } + ret_vehicle->path = path; ret_vehicle->status = create_game_status(); @@ -387,15 +393,15 @@ void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y){ void move_vehicle(struct vehicle *v){ v->coord->x[0] += v->speed * cos(v->direction * M_PI / 180); - v->coord->x[1] += v->speed * sin(v->direction * M_PI / 180); + v->coord->x[1] -= v->speed * sin(v->direction * M_PI / 180); } float distance2_coordinate(coordinate *c0, coordinate *c1){ - if(c0->dim->rank != c1->dim->rank) return 0; + if(c0->dim->rank != c1->dim->rank) return -1; float d=0, tmp; for(size_t i=0; idim->rank; ++i){ tmp = (c0->x[i] - c1->x[i]); - d += tmp * tmp; + d += (tmp * tmp); } return sqrt(d); } @@ -404,19 +410,27 @@ float distance2_coordinate(coordinate *c0, coordinate *c1){ direction_radian = (v->direction + deviation) * M_PI / 180;\ while( is_in_blocks(v->path, diStep_sensor )){\ diStep_sensor->x[0] += step_sensor * cos(direction_radian);\ - diStep_sensor->x[1] += step_sensor * sin(direction_radian);\ + diStep_sensor->x[1] -= step_sensor * sin(direction_radian);\ }\ - v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\ + dist = (distance2_coordinate(diStep_sensor, v->coord)/5);\ + printf("| dist :%f | ",dist);\ + v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\ + + + + + //v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\ //v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)))) / 50;\ //v->sensor->x[position] = (MIN(49,(int)(distance2_coordinate(diStep_sensor, v->coord)/10))) / 50;\ void read_sensor(struct vehicle *v){ copy_tensor_TYPE_FLOAT(v->old_sensor, v->sensor); - float step_sensor = ((float)1)/SUBDIVISION; + float step_sensor = STEP; // /SUBDIVISION; coordinate * diStep_sensor = create_coordinate(2); copy_coordinate(diStep_sensor, v->coord->x); - + float dist; + printf("\n"); // count the number of step until we go out of the path = distance // center sensor float direction_radian ; @@ -424,11 +438,11 @@ void read_sensor(struct vehicle *v){ copy_coordinate(diStep_sensor, v->coord->x); // right sensor - SENSOR_VALUE_CALCULATE(RIGHT,45); + SENSOR_VALUE_CALCULATE(RIGHT,-45); copy_coordinate(diStep_sensor, v->coord->x); // left sensor - SENSOR_VALUE_CALCULATE(LEFT, -45); + SENSOR_VALUE_CALCULATE(LEFT, 45); free_coordinate(diStep_sensor); @@ -482,8 +496,8 @@ void add_string_log(struct game_status *status, char *str ){ void step_vehicle(struct vehicle *v, int action){ //float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT] float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT] - v->direction = v->direction + action_x[action % 3]; - v->speed = ((float)1)/5; + v->direction = (float)((int)(v->direction + action_x[action % 3]) % 360) ; + v->speed = SPEED; // /5; move_vehicle(v); read_sensor(v); struct game_status *status = v->status; @@ -495,22 +509,24 @@ void step_vehicle(struct vehicle *v, int action){ struct blocks * path = v->path; //printf(" center : %f vs %f direction: %f\n",v->sensor->x[CENTER], LIMIT_DISTANCE, v->direction); if( v->sensor->x[CENTER]<= LIMIT_DISTANCE ){ + //if( MAX_ARRAY_TYPE_FLOAT(v->sensor->x,v->sensor->dim->rank)<= LIMIT_DISTANCE ){ status->reward = REWARD_STOP; status->done = true; } else{ bool broken = false; - long prec, next; + long pprec, prec, next; char msg[48]; for(long i=0; i< path->nb_blocks; ++i){ //prec = (i-1)%(path->nb_blocks); + pprec = (i + path->nb_blocks - 2 )%(path->nb_blocks); prec = (i + path->nb_blocks - 1 )%(path->nb_blocks); next = (i + 1)%(path->nb_blocks); //printf("i:%ld, prec:%ld, next:%ld: maker %d, prec marker %d\n",i,prec,next, path->marker[i], path->marker[prec]); if(is_in_block_index(path, i, v->coord)){ if(path->marker[i] == false && path->marker[prec] == true){ path->marker[i]=true; - path->marker[prec]=false; + path->marker[pprec]=false; status->reward = REWARD_CONTINUE; status->done = false; sprintf(msg," %ld,",i); @@ -536,9 +552,10 @@ void step_vehicle(struct vehicle *v, int action){ } - +#define RANDOM 1 void reset(struct vehicle *v){ + //static bool init = true; struct blocks * path = v->path; long int i; for(i=0; i<(path->nb_blocks -1); ++i) @@ -547,19 +564,32 @@ void reset(struct vehicle *v){ v->status->cumulative_reward = 0; sprintf(v->status->log,"\n"); v->status->cur_log = 0; - srand(time(NULL)); + //if(init){ + srand(time(NULL)); + // init = false; + //} int random; int diff; diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0]; - random = rand() % diff; - //v->coord->x[0] = path->lower_bound_block[0]->x[0] + random; - v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2; + random = rand() % (diff/2) ; + #if RANDOM + v->coord->x[0] = path->lower_bound_block[0]->x[0] + random; + #else + v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2; + #endif diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1]; - random = rand() % diff; - //v->coord->x[1] = path->lower_bound_block[0]->x[1] + random; - v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2; + random = rand() % (diff/2); + #if RANDOM + v->coord->x[1] = path->lower_bound_block[0]->x[1] + random; + #else + v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2; + #endif random = rand() % 50; - //v->direction = random - 25; - v->direction = -90; - v->speed = 1; + #if RANDOM + v->direction = random - 25; + #else + v->direction = -90; + #endif + v->speed = SPEED; + read_sensor(v); } diff --git a/deepQlearn_0/src/deepQlearning/vehicle.h b/deepQlearn_0/src/deepQlearning/vehicle.h index 7715777..79a3632 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.h +++ b/deepQlearn_0/src/deepQlearning/vehicle.h @@ -26,7 +26,10 @@ #define COUNT_ACTION 3 -#define SUBDIVISION 5 //10 +#define SUBDIVISION 50 + +#define STEP 1 +#define SPEED 1 struct game_status { diff --git a/deepQlearn_0/test/is_good.c b/deepQlearn_0/test/is_good.c index 7c4db73..3a52822 100644 --- a/deepQlearn_0/test/is_good.c +++ b/deepQlearn_0/test/is_good.c @@ -253,6 +253,137 @@ TEST(circle_path_vehicle){ free_vehicle(vhcl); +} + +TEST(circle_path_vehicle_00){ + size_t nb_block = 7; + size_t dim= 2; + struct blocks * path = create_blocks(nb_block, dim); + + copy_coordinate(path->lower_bound_block[0], (float[]){0,300}); + copy_coordinate(path->upper_bound_block[0], (float[]){400,700}); + copy_coordinate(path->lower_bound_block[1], (float[]){100,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){1000,300}); + copy_coordinate(path->lower_bound_block[2], (float[]){1000,50}); + copy_coordinate(path->upper_bound_block[2], (float[]){1400,500}); + copy_coordinate(path->lower_bound_block[3], (float[]){1400,200}); + copy_coordinate(path->upper_bound_block[3], (float[]){1800,700}); + copy_coordinate(path->lower_bound_block[4], (float[]){1100,700}); + copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000}); + copy_coordinate(path->lower_bound_block[5], (float[]){800,600}); + copy_coordinate(path->upper_bound_block[5], (float[]){1100,975}); + copy_coordinate(path->lower_bound_block[6], (float[]){100,700}); + copy_coordinate(path->upper_bound_block[6], (float[]){800,975}); + + update_bounds_limits_blocks(path); + + struct vehicle *vhcl = create_vehicle(path); + + print_vehicle_n_path(vhcl, 20,40); + + step_vehicle(vhcl, CENTER); + Sleep(200); +/* print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); +*/ + free_vehicle(vhcl); + + +} + + + +TEST(circle_path_vehicle_50){ + size_t nb_block = 7; + size_t dim= 2; + struct blocks * path = create_blocks(nb_block, dim); +#if 1 + copy_coordinate(path->lower_bound_block[6], (float[]){0,30}); + copy_coordinate(path->upper_bound_block[6], (float[]){150,250}); + copy_coordinate(path->lower_bound_block[5], (float[]){150,0}); + copy_coordinate(path->upper_bound_block[5], (float[]){250,80}); + copy_coordinate(path->lower_bound_block[4], (float[]){250,20}); + copy_coordinate(path->upper_bound_block[4], (float[]){360,120}); + copy_coordinate(path->lower_bound_block[3], (float[]){360,80}); + copy_coordinate(path->upper_bound_block[3], (float[]){600,150}); + copy_coordinate(path->lower_bound_block[2], (float[]){600,90}); + copy_coordinate(path->upper_bound_block[2], (float[]){760,300}); + copy_coordinate(path->lower_bound_block[1], (float[]){300,300}); + copy_coordinate(path->upper_bound_block[1], (float[]){760,350}); + copy_coordinate(path->lower_bound_block[0], (float[]){0,250}); + copy_coordinate(path->upper_bound_block[0], (float[]){410,300}); + + +#else + copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); + copy_coordinate(path->upper_bound_block[0], (float[]){150,250}); + copy_coordinate(path->lower_bound_block[1], (float[]){150,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){250,150}); + copy_coordinate(path->lower_bound_block[2], (float[]){250,80}); + copy_coordinate(path->upper_bound_block[2], (float[]){360,200}); + copy_coordinate(path->lower_bound_block[3], (float[]){360,70}); + copy_coordinate(path->upper_bound_block[3], (float[]){600,170}); + copy_coordinate(path->lower_bound_block[4], (float[]){600,90}); + copy_coordinate(path->upper_bound_block[4], (float[]){760,300}); + copy_coordinate(path->lower_bound_block[5], (float[]){300,300}); + copy_coordinate(path->upper_bound_block[5], (float[]){760,350}); + copy_coordinate(path->lower_bound_block[6], (float[]){0,250}); + copy_coordinate(path->upper_bound_block[6], (float[]){410,300}); + + +copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); + copy_coordinate(path->upper_bound_block[0], (float[]){100,250}); + copy_coordinate(path->lower_bound_block[1], (float[]){100,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){250,80}); + copy_coordinate(path->lower_bound_block[2], (float[]){250,0}); + copy_coordinate(path->upper_bound_block[2], (float[]){360,140}); + copy_coordinate(path->lower_bound_block[3], (float[]){360,70}); + copy_coordinate(path->upper_bound_block[3], (float[]){600,140}); + copy_coordinate(path->lower_bound_block[4], (float[]){600,90}); + copy_coordinate(path->upper_bound_block[4], (float[]){720,300}); + copy_coordinate(path->lower_bound_block[5], (float[]){300,300}); + copy_coordinate(path->upper_bound_block[5], (float[]){720,350}); + copy_coordinate(path->lower_bound_block[6], (float[]){0,250}); + copy_coordinate(path->upper_bound_block[6], (float[]){410,300}); + +#endif + + update_bounds_limits_blocks(path); + + struct vehicle *vhcl = create_vehicle(path); + + print_vehicle_n_path(vhcl, 10,10); + + step_vehicle(vhcl, CENTER); + Sleep(200); +/* print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); + + step_vehicle(vhcl, CENTER); + Sleep(200); + print_vehicle_n_path(vhcl, 0.2,0.4); +*/ + free_vehicle(vhcl); + + } TEST(reward_list){ @@ -261,8 +392,125 @@ TEST(reward_list){ free_status_qlearning(l_reward); } + +float f(float x){ + return 1/(1+exp((double)(-x))); +} + +float df(float x){ + return exp(-x)/ ((1+exp(-x)) * (1+exp(-x))); +} #if 1 -TEST(first_learn_vehicle){ +TEST(first_learn_vehicle_rev50){ + size_t nb_block = 7; + size_t dim= 2; + struct blocks * path = create_blocks(nb_block, dim); + + + +#if 0 + copy_coordinate(path->lower_bound_block[6], (float[]){0,30}); + copy_coordinate(path->upper_bound_block[6], (float[]){150,250}); + copy_coordinate(path->lower_bound_block[5], (float[]){150,0}); + copy_coordinate(path->upper_bound_block[5], (float[]){250,80}); + copy_coordinate(path->lower_bound_block[4], (float[]){250,20}); + copy_coordinate(path->upper_bound_block[4], (float[]){360,120}); + copy_coordinate(path->lower_bound_block[3], (float[]){360,80}); + copy_coordinate(path->upper_bound_block[3], (float[]){600,150}); + copy_coordinate(path->lower_bound_block[2], (float[]){600,90}); + copy_coordinate(path->upper_bound_block[2], (float[]){760,300}); + copy_coordinate(path->lower_bound_block[1], (float[]){300,300}); + copy_coordinate(path->upper_bound_block[1], (float[]){760,350}); + copy_coordinate(path->lower_bound_block[0], (float[]){0,250}); + copy_coordinate(path->upper_bound_block[0], (float[]){410,300}); + +#else + + copy_coordinate(path->lower_bound_block[4], (float[]){0,0}); + copy_coordinate(path->upper_bound_block[4], (float[]){150,250}); + copy_coordinate(path->lower_bound_block[5], (float[]){150,40}); + copy_coordinate(path->upper_bound_block[5], (float[]){250,150}); + copy_coordinate(path->lower_bound_block[6], (float[]){250,80}); + copy_coordinate(path->upper_bound_block[6], (float[]){360,200}); + copy_coordinate(path->lower_bound_block[0], (float[]){360,70}); + copy_coordinate(path->upper_bound_block[0], (float[]){600,150}); + copy_coordinate(path->lower_bound_block[1], (float[]){600,90}); + copy_coordinate(path->upper_bound_block[1], (float[]){760,300}); + copy_coordinate(path->lower_bound_block[2], (float[]){260,300}); + copy_coordinate(path->upper_bound_block[2], (float[]){760,360}); + copy_coordinate(path->lower_bound_block[3], (float[]){0,250}); + copy_coordinate(path->upper_bound_block[3], (float[]){410,300}); + + + +#endif + + update_bounds_limits_blocks(path); + + struct vehicle *car = create_vehicle(path); + + config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + //config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + + bool randomize=true; + float minR = -0.5, maxR = 0.5; + int randomRange = 500; + size_t nb_prod_thread = 2; + size_t nb_calc_thread = 4; + float learning_rate = 0.001; + struct networks_qlearning *nnetworks = create_nework_qlearning( + pconf, + randomize, minR, maxR, randomRange, + nb_prod_thread, nb_calc_thread, + learning_rate + ); + + struct status_qlearning *qlstatus = create_status_qlearning (); + struct delay_params *dly = create_delay_params ( + 100/*size_t delay_between_episodes*/, + 10/*size_t delay_between_games*/ + ); + + struct qlearning_params *qlparams = create_qlearning_params ( + 0.95/*float gamma*/, + learning_rate, + 0 /* (not used!)float discount_factor*/, + 0.99 /*float exploration_factor*/, + 20/*long int nb_training_before_update_weight_in_target*/, + 10000/*size_t number_episodes*/ + ); +/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f ); +*/ + struct print_params *pprint = create_print_params( + 12/*float scale_x*/,12 /*float scale_y*/, + dly/*struct delay_params * dly_p*/ + ); + + struct RL_agent *rlAgent = create_RL_agent ( + nnetworks /*struct networks_qlearning * networks*/, + car /*struct vehicle * car*/, + qlstatus /*struct status_qlearning * status*/, + pprint /*struct print_params * pprint*/, + qlparams/*struct qlearning_params *qlearnParams*/ + ); + + learn_to_drive(rlAgent); + + free_RL_agent(rlAgent); + + + + +} +#endif + + + +#if 1 +TEST(first_learn_vehicle_50){ size_t nb_block = 7; size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); @@ -270,6 +518,59 @@ TEST(first_learn_vehicle){ #if 1 + + copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); + copy_coordinate(path->upper_bound_block[0], (float[]){150,250}); + copy_coordinate(path->lower_bound_block[1], (float[]){150,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){250,150}); + copy_coordinate(path->lower_bound_block[2], (float[]){250,80}); + copy_coordinate(path->upper_bound_block[2], (float[]){360,200}); + copy_coordinate(path->lower_bound_block[3], (float[]){360,70}); + copy_coordinate(path->upper_bound_block[3], (float[]){600,170}); + copy_coordinate(path->lower_bound_block[4], (float[]){600,90}); + copy_coordinate(path->upper_bound_block[4], (float[]){760,300}); + copy_coordinate(path->lower_bound_block[5], (float[]){300,300}); + copy_coordinate(path->upper_bound_block[5], (float[]){760,350}); + copy_coordinate(path->lower_bound_block[6], (float[]){0,250}); + copy_coordinate(path->upper_bound_block[6], (float[]){410,300}); + +/* + copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); + copy_coordinate(path->upper_bound_block[0], (float[]){100,250}); + copy_coordinate(path->lower_bound_block[1], (float[]){100,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){250,80}); + copy_coordinate(path->lower_bound_block[2], (float[]){250,0}); + copy_coordinate(path->upper_bound_block[2], (float[]){360,140}); + copy_coordinate(path->lower_bound_block[3], (float[]){360,70}); + copy_coordinate(path->upper_bound_block[3], (float[]){600,140}); + copy_coordinate(path->lower_bound_block[4], (float[]){600,90}); + copy_coordinate(path->upper_bound_block[4], (float[]){720,300}); + copy_coordinate(path->lower_bound_block[5], (float[]){300,300}); + copy_coordinate(path->upper_bound_block[5], (float[]){720,350}); + copy_coordinate(path->lower_bound_block[6], (float[]){0,250}); + copy_coordinate(path->upper_bound_block[6], (float[]){410,300}); + + + + + copy_coordinate(path->lower_bound_block[0], (float[]){0,300}); + copy_coordinate(path->upper_bound_block[0], (float[]){400,700}); + copy_coordinate(path->lower_bound_block[1], (float[]){100,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){1000,300}); + copy_coordinate(path->lower_bound_block[2], (float[]){1000,50}); + copy_coordinate(path->upper_bound_block[2], (float[]){1400,500}); + copy_coordinate(path->lower_bound_block[3], (float[]){1400,200}); + copy_coordinate(path->upper_bound_block[3], (float[]){1800,700}); + copy_coordinate(path->lower_bound_block[4], (float[]){1100,700}); + copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000}); + copy_coordinate(path->lower_bound_block[5], (float[]){800,600}); + copy_coordinate(path->upper_bound_block[5], (float[]){1100,975}); + copy_coordinate(path->lower_bound_block[6], (float[]){100,700}); + copy_coordinate(path->upper_bound_block[6], (float[]){800,975}); + */ + +#else + copy_coordinate(path->lower_bound_block[0], (float[]){0,3}); copy_coordinate(path->upper_bound_block[0], (float[]){4,7}); copy_coordinate(path->lower_bound_block[1], (float[]){1,0}); @@ -285,23 +586,7 @@ TEST(first_learn_vehicle){ copy_coordinate(path->lower_bound_block[6], (float[]){1,7}); copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75}); -#else - - - copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); - copy_coordinate(path->upper_bound_block[0], (float[]){2,7}); - copy_coordinate(path->lower_bound_block[1], (float[]){2,0}); - copy_coordinate(path->upper_bound_block[1], (float[]){4,2}); - copy_coordinate(path->lower_bound_block[2], (float[]){4,0.5}); - copy_coordinate(path->upper_bound_block[2], (float[]){8,3}); - copy_coordinate(path->lower_bound_block[3], (float[]){8,0}); - copy_coordinate(path->upper_bound_block[3], (float[]){16,2}); - copy_coordinate(path->lower_bound_block[4], (float[]){16,0}); - copy_coordinate(path->upper_bound_block[4], (float[]){18,7}); - copy_coordinate(path->lower_bound_block[5], (float[]){6,7}); - copy_coordinate(path->upper_bound_block[5], (float[]){18,9}); - copy_coordinate(path->lower_bound_block[6], (float[]){2,6}); - copy_coordinate(path->upper_bound_block[6], (float[]){6,8}); + #endif update_bounds_limits_blocks(path); @@ -309,9 +594,10 @@ TEST(first_learn_vehicle){ struct vehicle *car = create_vehicle(path); config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + //config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ bool randomize=true; - float minR = 0, maxR = 1; + float minR = -0.5, maxR = 0.5; int randomRange = 500; size_t nb_prod_thread = 2; size_t nb_calc_thread = 4; @@ -325,20 +611,356 @@ TEST(first_learn_vehicle){ struct status_qlearning *qlstatus = create_status_qlearning (); struct delay_params *dly = create_delay_params ( - 200/*size_t delay_between_episodes*/, - 20/*size_t delay_between_games*/ + 100/*size_t delay_between_episodes*/, + 10/*size_t delay_between_games*/ ); struct qlearning_params *qlparams = create_qlearning_params ( 0.95/*float gamma*/, learning_rate, 0 /* (not used!)float discount_factor*/, - 0.99/*float exploration_factor*/, + 0.99 /*float exploration_factor*/, 20/*long int nb_training_before_update_weight_in_target*/, 10000/*size_t number_episodes*/ ); +/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f ); +*/ struct print_params *pprint = create_print_params( - 0.2/*float scale_x*/,0.4 /*float scale_y*/, + 12/*float scale_x*/,12 /*float scale_y*/, + dly/*struct delay_params * dly_p*/ + ); + + struct RL_agent *rlAgent = create_RL_agent ( + nnetworks /*struct networks_qlearning * networks*/, + car /*struct vehicle * car*/, + qlstatus /*struct status_qlearning * status*/, + pprint /*struct print_params * pprint*/, + qlparams/*struct qlearning_params *qlearnParams*/ + ); + + learn_to_drive(rlAgent); + + free_RL_agent(rlAgent); + + + + +} +#endif + + + +#if 0 +TEST(first_learn_vehicle){ + size_t nb_block = 7; + size_t dim= 2; + struct blocks * path = create_blocks(nb_block, dim); + + + +#if 1 + + copy_coordinate(path->lower_bound_block[0], (float[]){0,300}); + copy_coordinate(path->upper_bound_block[0], (float[]){400,700}); + copy_coordinate(path->lower_bound_block[1], (float[]){100,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){1000,300}); + copy_coordinate(path->lower_bound_block[2], (float[]){1000,50}); + copy_coordinate(path->upper_bound_block[2], (float[]){1400,500}); + copy_coordinate(path->lower_bound_block[3], (float[]){1400,200}); + copy_coordinate(path->upper_bound_block[3], (float[]){1800,700}); + copy_coordinate(path->lower_bound_block[4], (float[]){1100,700}); + copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000}); + copy_coordinate(path->lower_bound_block[5], (float[]){800,600}); + copy_coordinate(path->upper_bound_block[5], (float[]){1100,975}); + copy_coordinate(path->lower_bound_block[6], (float[]){100,700}); + copy_coordinate(path->upper_bound_block[6], (float[]){800,975}); + + +#else + + copy_coordinate(path->lower_bound_block[0], (float[]){0,3}); + copy_coordinate(path->upper_bound_block[0], (float[]){4,7}); + copy_coordinate(path->lower_bound_block[1], (float[]){1,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){10,3}); + copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5}); + copy_coordinate(path->upper_bound_block[2], (float[]){14,5}); + copy_coordinate(path->lower_bound_block[3], (float[]){14,2}); + copy_coordinate(path->upper_bound_block[3], (float[]){18,7}); + copy_coordinate(path->lower_bound_block[4], (float[]){11,7}); + copy_coordinate(path->upper_bound_block[4], (float[]){17,10}); + copy_coordinate(path->lower_bound_block[5], (float[]){8,6}); + copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75}); + copy_coordinate(path->lower_bound_block[6], (float[]){1,7}); + copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75}); + + +#endif + + update_bounds_limits_blocks(path); + + struct vehicle *car = create_vehicle(path); + + config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + //config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + + bool randomize=true; + float minR = -0.5, maxR = 0.5; + int randomRange = 5000; + size_t nb_prod_thread = 2; + size_t nb_calc_thread = 4; + float learning_rate = 0.1; + struct networks_qlearning *nnetworks = create_nework_qlearning( + pconf, + randomize, minR, maxR, randomRange, + nb_prod_thread, nb_calc_thread, + learning_rate + ); + + struct status_qlearning *qlstatus = create_status_qlearning (); + struct delay_params *dly = create_delay_params ( + 100/*size_t delay_between_episodes*/, + 10/*size_t delay_between_games*/ + ); + + struct qlearning_params *qlparams = create_qlearning_params ( + 0.95/*float gamma*/, + learning_rate, + 0 /* (not used!)float discount_factor*/, + 0.85 /*float exploration_factor*/, + 20/*long int nb_training_before_update_weight_in_target*/, + 10000/*size_t number_episodes*/ + ); +/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f ); +*/ + struct print_params *pprint = create_print_params( + 20/*float scale_x*/,40 /*float scale_y*/, + dly/*struct delay_params * dly_p*/ + ); + + struct RL_agent *rlAgent = create_RL_agent ( + nnetworks /*struct networks_qlearning * networks*/, + car /*struct vehicle * car*/, + qlstatus /*struct status_qlearning * status*/, + pprint /*struct print_params * pprint*/, + qlparams/*struct qlearning_params *qlearnParams*/ + ); + + learn_to_drive(rlAgent); + + free_RL_agent(rlAgent); + + + + +} +#endif + + + +#if 0 +TEST(first_learn_vehicle){ + size_t nb_block = 7; + size_t dim= 2; + struct blocks * path = create_blocks(nb_block, dim); + + + +#if 1 + + copy_coordinate(path->lower_bound_block[0], (float[]){0,300}); + copy_coordinate(path->upper_bound_block[0], (float[]){400,700}); + copy_coordinate(path->lower_bound_block[1], (float[]){100,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){1000,300}); + copy_coordinate(path->lower_bound_block[2], (float[]){1000,50}); + copy_coordinate(path->upper_bound_block[2], (float[]){1400,500}); + copy_coordinate(path->lower_bound_block[3], (float[]){1400,200}); + copy_coordinate(path->upper_bound_block[3], (float[]){1800,700}); + copy_coordinate(path->lower_bound_block[4], (float[]){1100,700}); + copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000}); + copy_coordinate(path->lower_bound_block[5], (float[]){800,600}); + copy_coordinate(path->upper_bound_block[5], (float[]){1100,975}); + copy_coordinate(path->lower_bound_block[6], (float[]){100,700}); + copy_coordinate(path->upper_bound_block[6], (float[]){800,975}); + + +#else + + copy_coordinate(path->lower_bound_block[0], (float[]){0,3}); + copy_coordinate(path->upper_bound_block[0], (float[]){4,7}); + copy_coordinate(path->lower_bound_block[1], (float[]){1,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){10,3}); + copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5}); + copy_coordinate(path->upper_bound_block[2], (float[]){14,5}); + copy_coordinate(path->lower_bound_block[3], (float[]){14,2}); + copy_coordinate(path->upper_bound_block[3], (float[]){18,7}); + copy_coordinate(path->lower_bound_block[4], (float[]){11,7}); + copy_coordinate(path->upper_bound_block[4], (float[]){17,10}); + copy_coordinate(path->lower_bound_block[5], (float[]){8,6}); + copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75}); + copy_coordinate(path->lower_bound_block[6], (float[]){1,7}); + copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75}); + + +#endif + + update_bounds_limits_blocks(path); + + struct vehicle *car = create_vehicle(path); + + config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + //config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + + bool randomize=true; + float minR = -0.5, maxR = 0.5; + int randomRange = 5000; + size_t nb_prod_thread = 2; + size_t nb_calc_thread = 4; + float learning_rate = 0.1; + struct networks_qlearning *nnetworks = create_nework_qlearning( + pconf, + randomize, minR, maxR, randomRange, + nb_prod_thread, nb_calc_thread, + learning_rate + ); + + struct status_qlearning *qlstatus = create_status_qlearning (); + struct delay_params *dly = create_delay_params ( + 100/*size_t delay_between_episodes*/, + 10/*size_t delay_between_games*/ + ); + + struct qlearning_params *qlparams = create_qlearning_params ( + 0.95/*float gamma*/, + learning_rate, + 0 /* (not used!)float discount_factor*/, + 0.85 /*float exploration_factor*/, + 20/*long int nb_training_before_update_weight_in_target*/, + 10000/*size_t number_episodes*/ + ); +/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f ); +*/ + struct print_params *pprint = create_print_params( + 20/*float scale_x*/,40 /*float scale_y*/, + dly/*struct delay_params * dly_p*/ + ); + + struct RL_agent *rlAgent = create_RL_agent ( + nnetworks /*struct networks_qlearning * networks*/, + car /*struct vehicle * car*/, + qlstatus /*struct status_qlearning * status*/, + pprint /*struct print_params * pprint*/, + qlparams/*struct qlearning_params *qlearnParams*/ + ); + + learn_to_drive(rlAgent); + + free_RL_agent(rlAgent); + + + + +} +#endif + + + + +#if 0 +TEST(first_learn_vehicle){ + size_t nb_block = 7; + size_t dim= 2; + struct blocks * path = create_blocks(nb_block, dim); + + + +#if 1 + + copy_coordinate(path->lower_bound_block[0], (float[]){0,300}); + copy_coordinate(path->upper_bound_block[0], (float[]){400,700}); + copy_coordinate(path->lower_bound_block[1], (float[]){100,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){1000,300}); + copy_coordinate(path->lower_bound_block[2], (float[]){1000,50}); + copy_coordinate(path->upper_bound_block[2], (float[]){1400,500}); + copy_coordinate(path->lower_bound_block[3], (float[]){1400,200}); + copy_coordinate(path->upper_bound_block[3], (float[]){1800,700}); + copy_coordinate(path->lower_bound_block[4], (float[]){1100,700}); + copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000}); + copy_coordinate(path->lower_bound_block[5], (float[]){800,600}); + copy_coordinate(path->upper_bound_block[5], (float[]){1100,975}); + copy_coordinate(path->lower_bound_block[6], (float[]){100,700}); + copy_coordinate(path->upper_bound_block[6], (float[]){800,975}); + + +#else + + copy_coordinate(path->lower_bound_block[0], (float[]){0,3}); + copy_coordinate(path->upper_bound_block[0], (float[]){4,7}); + copy_coordinate(path->lower_bound_block[1], (float[]){1,0}); + copy_coordinate(path->upper_bound_block[1], (float[]){10,3}); + copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5}); + copy_coordinate(path->upper_bound_block[2], (float[]){14,5}); + copy_coordinate(path->lower_bound_block[3], (float[]){14,2}); + copy_coordinate(path->upper_bound_block[3], (float[]){18,7}); + copy_coordinate(path->lower_bound_block[4], (float[]){11,7}); + copy_coordinate(path->upper_bound_block[4], (float[]){17,10}); + copy_coordinate(path->lower_bound_block[5], (float[]){8,6}); + copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75}); + copy_coordinate(path->lower_bound_block[6], (float[]){1,7}); + copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75}); + + +#endif + + update_bounds_limits_blocks(path); + + struct vehicle *car = create_vehicle(path); + + config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + //config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ + + bool randomize=true; + float minR = -0.5, maxR = 0.5; + int randomRange = 5000; + size_t nb_prod_thread = 2; + size_t nb_calc_thread = 4; + float learning_rate = 0.1; + struct networks_qlearning *nnetworks = create_nework_qlearning( + pconf, + randomize, minR, maxR, randomRange, + nb_prod_thread, nb_calc_thread, + learning_rate + ); + + struct status_qlearning *qlstatus = create_status_qlearning (); + struct delay_params *dly = create_delay_params ( + 100/*size_t delay_between_episodes*/, + 10/*size_t delay_between_games*/ + ); + + struct qlearning_params *qlparams = create_qlearning_params ( + 0.95/*float gamma*/, + learning_rate, + 0 /* (not used!)float discount_factor*/, + 0.85 /*float exploration_factor*/, + 20/*long int nb_training_before_update_weight_in_target*/, + 10000/*size_t number_episodes*/ + ); +/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df ); + UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f ); +*/ + struct print_params *pprint = create_print_params( + 20/*float scale_x*/,40 /*float scale_y*/, dly/*struct delay_params * dly_p*/ ); diff --git a/neuron_t/src/neuron_t/neuron_t.c b/neuron_t/src/neuron_t/neuron_t.c index 96e6fe5..f166ad0 100644 --- a/neuron_t/src/neuron_t/neuron_t.c +++ b/neuron_t/src/neuron_t/neuron_t.c @@ -627,6 +627,19 @@ void print_neurons_msg_##type(neurons_##type *nr, char *msg){\ }\ }\ \ +void print_weight_in_neurons_##type(neurons_##type *nn, char *msg){\ + neurons_##type *tmp = nn;\ + size_t i = 0;\ + char vmsg[250];\ + while(tmp){\ + if(tmp->weight_in){\ + sprintf(vmsg,"%s layer %ld",msg,i++);\ + print_tensor_msg_##type(tmp->weight_in, vmsg);\ + }\ + tmp = tmp->next_layer;\ + }\ +}\ +\ void free_neurons_##type(neurons_##type *base){\ neurons_##type *temp = base, *ttemp;\ while(temp){\ @@ -747,7 +760,7 @@ size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *da return nbreps;\ }\ \ -void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link){\ +neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link){\ for(size_t i=0; i<(input->dim)->rank; ++i) (base->output)->x[i]=input->x[i];\ neurons_##type * tmp=base->next_layer;\ while(tmp){\ @@ -755,6 +768,7 @@ void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##t if(tmp->next_layer==NULL){\ /*print_tensor_msg_##type(tmp->output,"retult");*/\ *output_link = tmp->output;\ + return tmp;\ }\ tmp = tmp->next_layer;\ }\ diff --git a/neuron_t/src/neuron_t/neuron_t.h b/neuron_t/src/neuron_t/neuron_t.h index c2dc48f..fc65bfb 100644 --- a/neuron_t/src/neuron_t/neuron_t.h +++ b/neuron_t/src/neuron_t/neuron_t.h @@ -77,7 +77,9 @@ void setup_networks_layers_without_weights_##type(neurons_##type **base_nr, size void setup_networks_layers_without_weights_from_config_##type(neurons_##type **base, config_layers *pconf);\ void setup_networks_OneD_##type(neurons_##type **base_nr, size_t *array_dim_in_layers, size_t nb_layers, bool randomize, type minR, type maxR, int randomRange);\ void init_in_out_all_networks_OneD_##type(neurons_##type *nr, type *in, size_t sz_in, type *out, size_t sz_out);\ +\ void print_neurons_msg_##type(neurons_##type *nr, char * msg);\ +void print_weight_in_neurons_##type(neurons_##type *nn, char *msg);\ \ void free_neurons_##type(neurons_##type *base);\ \ @@ -106,7 +108,7 @@ void print_data_set_msg_##type(data_set_##type *ds, char *msg);\ \ size_t learning_online_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\ size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\ -void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link);\ +neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link);\ void print_predict_by_network_neurons_##type(neurons_##type *base, tensor_##type *input);\ void print_predict_by_network_with_error_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type *target);\ \ @@ -134,4 +136,33 @@ GEN_NEURON_(TYPE_DOUBLE) }\ }while(0);\ +#define COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(type, attribute, dstNN ,sourceNN)\ + do{\ + neurons_##type *tmpn = dstNN;\ + neurons_##type *srcNN = sourceNN;\ + while(tmpn){\ + if(tmpn->attribute)\ + copy_tensor_##type(tmpn->attribute, srcNN->attribute);\ + tmpn = tmpn->next_layer;\ + srcNN = srcNN->next_layer;\ + }\ + }while(0);\ + + + +#define PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(type, neuronVar, attribute, msg)\ + do{\ + neurons_##type *tmpn = neuronVar;\ + char *vmsg=malloc(strlen(msg)+70);\ + size_t i=0;\ + while(tmpn){\ + sprintf(vmsg,"%s layer %ld",msg,i++);\ + if(tmpn->attribute)\ + print_tensor_msg_##type(tmpn->attribute, vmsg);\ + tmpn = tmpn->next_layer;\ + }\ + free(vmsg);\ + }while(0);\ + + #endif /*__NEURON_T_C__H*/ diff --git a/tensor_t/src/tensor_t/tensor_t.c b/tensor_t/src/tensor_t/tensor_t.c index c6ab1c8..7b82190 100644 --- a/tensor_t/src/tensor_t/tensor_t.c +++ b/tensor_t/src/tensor_t/tensor_t.c @@ -368,10 +368,10 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\ /*size_t j=0 ,k=0*/;\ size_t *coord = malloc(sizeof(long int)*(T->dim)->size); \ char *val=NULL;\ - char *dimsg=malloc(512);\ + /*char *dimsg=malloc(512);\ sprintf(dimsg,"(%s)->dim",msg);\ printDebug_dimension(T->dim,dimsg);\ - printf("%s\n",msg);\ + */printf("%s\n",msg);\ long int begin , end /*, beginIter , endIter*/ ;\ long int (*iter)(long int) ;\ bool (*cond)(long int, long int) ; \ @@ -392,10 +392,11 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\ else break;\ }\ }\ - printf(" [");\ - for(size_t k=0; k<(T->dim)->size;++k) printf(" %ld,",coord[k]);\ - val=type##_TO_STR(T->x[i]);\ + /*printf(" [");\ + for(size_t k=0; k<(T->dim)->size;++k) printf(" %ld",coord[k]);\ + */val=type##_TO_STR(T->x[i]);\ printf(" |#%ld]: %s, ",i,val);\ + /*printf(" %s, ",val);*/\ free(val); val=NULL;\ if(coord[begin]==(T->dim)->perm[begin]-1){\ size_t count=0;\ @@ -411,7 +412,7 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\ \ free(coord);\ printf("\n");\ - free(dimsg);\ + /*free(dimsg);*/\ }\ \ \