diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.c b/deepQlearn_0/src/deepQlearning/learn_to_drive.c index b3d7071..234d99f 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.c +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.c @@ -3,11 +3,13 @@ char *action_name[8] = {"LEFT", "CENTER", "RIGHT"}; float reLU(float x){ + if(x>10) return 10; if(x>0) return x; return 0; } float d_reLU(float x){ + if (x>10) return 0; if (x>0) return 1; return 0; } @@ -29,6 +31,10 @@ void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * netwo COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->best_net, networks->main_net); } +float id(float x){ return x;} + +float constOne(float x){return 1;} + struct networks_qlearning * create_nework_qlearning( struct config_layers * config, bool randomize, float minR, float maxR, int randomRange, @@ -46,7 +52,6 @@ struct networks_qlearning * create_nework_qlearning( setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config, false, minR, maxR, randomRange); copy_weight_in_networks_from_main_to_best(qnets); - setup_all_layers_functions_TYPE_FLOAT(qnets->main_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU); setup_all_layers_params_TYPE_FLOAT(qnets->main_net, nb_prod_thread, nb_calc_thread, learning_rate); setup_all_layers_functions_TYPE_FLOAT(qnets->target_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU); @@ -54,6 +59,25 @@ struct networks_qlearning * create_nework_qlearning( setup_all_layers_functions_TYPE_FLOAT(qnets->best_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU); setup_all_layers_params_TYPE_FLOAT(qnets->best_net, nb_prod_thread, nb_calc_thread, learning_rate); +// ne pas mettre fonction d'activation à la sortie , i.e: fonction identité : f(x) = x: + neurons_TYPE_FLOAT *tmpMain = qnets->main_net; + neurons_TYPE_FLOAT *tmpTarget = qnets->target_net; + neurons_TYPE_FLOAT *tmpBest = qnets->best_net; + while(tmpMain){ + if(tmpMain->next_layer == NULL){ + tmpMain->f_act = id; + tmpMain->d_f_act = constOne; + tmpTarget->f_act = id; + tmpTarget->d_f_act = constOne; + tmpBest->f_act = id; + tmpBest->d_f_act = constOne; + } + tmpMain = tmpMain->next_layer; + tmpTarget= tmpTarget->next_layer; + tmpBest = tmpBest->next_layer; + } + + return qnets; @@ -73,6 +97,11 @@ struct status_qlearning * create_status_qlearning (){ status_ql->nb_training_after_updated_weight_in_target = 0; status_ql->nb_episodes = 0; + status_ql->index_episode= 0; + status_ql->action=1; + +// status_ql->last_action=-1; +// status_ql->count_last_action=0; return status_ql; } @@ -129,6 +158,7 @@ struct qlearning_params * create_qlearning_params ( qparams->factor_update_exploration_factor = 0.995; qparams->minimum_threshold_exploration_factor = 0.01; +// qparams->threshold_number_same_action = 500; return qparams; } @@ -226,6 +256,8 @@ void train_qlearning(struct RL_agent * rlAgent, qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ; +// free_tensor_TYPE_FLOAT(action_value); +// free_tensor_TYPE_FLOAT(next_action_value); } @@ -236,26 +268,91 @@ int select_action(struct RL_agent * rlAgent){ //calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value); calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->sensor, &action_value); //long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes)*100; - int NUMBER_EPISODE2 = 3000; + //int randRange = 10000; //NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2; -// static bool init = true ; -// if(init){ - srand(time(NULL)); -// init =false; -// } - int random = rand() % NUMBER_EPISODE2; - float proba_explor = (float)(random ) / NUMBER_EPISODE2; + //static bool init = true ; + //if(init){ + //srand(time(NULL)); + //init =false; + //} + //int random = xrand() % randRange; + float proba_explor = (float) (rand() % (1<<17 -1))/ (1<<17 -1); //frand(); //(float)(random ) / randRange; if(proba_explor > rlAgent->qlearnParams->exploration_factor ){ action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ); + //if(action == ARG_MIN_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank )) + //action = xrand() % action_value->dim->rank ; } else{ - action = rand() % action_value->dim->rank ; + action = xrand() % action_value->dim->rank ; // explore++; //printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes); } + /* + if(rlAgent->status->last_action == action){ + ++(rlAgent->status->count_last_action); + if(rlAgent->status->count_last_action > rlAgent->qlearnParams->threshold_number_same_action ){ + while(rlAgent->status->last_action == action) + action = xrand() % action_value->dim->rank ; + + rlAgent->status->last_action = action; + rlAgent->status->count_last_action = 0; + } + } + else{ + rlAgent->status->last_action = action; + rlAgent->status->count_last_action = 0; + } + */ + rlAgent->status->action = action; return action; } +void* runPrint(void *arg){ + struct RL_agent *rlAgent = (struct RL_agent*)arg; + struct status_qlearning *qlStatus = rlAgent->status; + struct print_params * pprint = rlAgent->pprint; + struct vehicle *car = rlAgent->car; + size_t count_print = 0; + while(1){ +if(/*(qlStatus->nb_episodes %125 == 0) &&*/ pprint->printed){ + //pthread_mutex_lock(&(pprint->mut_printed)); + pthread_mutex_lock(&(car->mut_coord)); + print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y); + pthread_mutex_unlock(&(car->mut_coord)); + //pthread_mutex_unlock(&(pprint->mut_printed)); + printf("%s ",pprint->string_space); + printf("ep: %ld\n",qlStatus->index_episode); + neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net; + neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net; + for(size_t i=0; ioutput->dim->rank; ++i) { + printf("{sensro[%s]:%f "" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i], + action_name[i%COUNT_ACTION],net_main->output->x[i]); + + } + printf("\n< %5.2f > ( %s ) \n", car->direction, action_name[qlStatus->action % COUNT_ACTION]); + //print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei"); + //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in"); + PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out"); + //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out"); + //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input"); + printf(" action : %d , factor : %f nb_episodes : %ld \n",qlStatus->action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes); + + FOR_LIST_FORM_BEGIN(TYPE_L_INT, qlStatus->progress_best_cumul){ + printf(" | %ld |,",(qlStatus->progress_best_cumul)->current_list->value); + } + printf("[%ld] %s ", rlAgent->car->status->cumulative_reward, pprint->string_space); + + } + Sleep(pprint->delay->delay_between_games); + ++count_print; + if(count_print > 20){ + count_print = 0; + clear_screen(); + } + } +} + + void learn_to_drive(struct RL_agent * rlAgent){ int action; struct vehicle * car = rlAgent->car; @@ -264,11 +361,15 @@ void learn_to_drive(struct RL_agent * rlAgent){ struct status_qlearning * qlStatus = rlAgent->status; struct print_params * pprint = rlAgent->pprint; char msg[100]; - + + pthread_t threadPrint; + pthread_create(&threadPrint, NULL, runPrint, (void*)rlAgent); + while(true){ for(size_t index_episode = 0; index_episode < qlParams->number_episodes; ++index_episode){ reset(car); qlStatus->nb_training_after_updated_weight_in_target = 0; + qlStatus->index_episode = index_episode; while(true){ ++(qlStatus->nb_episodes); ++(qlStatus->nb_training_after_updated_weight_in_target); @@ -277,51 +378,27 @@ void learn_to_drive(struct RL_agent * rlAgent){ add_string_log_M(car_status,msg); step_vehicle(car, action); train_qlearning(rlAgent, action); - if(/*(qlStatus->nb_episodes %15 == 0) && */ pprint->printed){ - pthread_mutex_lock(&(pprint->mut_printed)); - print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y); - pthread_mutex_unlock(&(pprint->mut_printed)); - printf("%s ",pprint->string_space); - printf("ep: %ld\n",index_episode); - neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net; - neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net; - for(size_t i=0; ioutput->dim->rank; ++i) { - printf("{sensro[%s]:%f "/*vs %f / VS / %f */" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i], - /*car->sensor->x[i] ,car->old_sensor->x[i], - */action_name[i%COUNT_ACTION],net_main->output->x[i]); - - } - printf("\n< %f > ( %s ) \n", car->direction, action_name[action % COUNT_ACTION]); - //print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei"); - //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in"); - PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out"); - //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out"); - //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input"); - printf("action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes); - Sleep(pprint->delay->delay_between_games); - } - //done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor); + //done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor); if( qlStatus->nb_training_after_updated_weight_in_target > qlParams->nb_training_before_update_weight_in_target ){ qlStatus->nb_training_after_updated_weight_in_target = 0; copy_weight_in_networks_from_main_to_target(rlAgent->networks); } if(car_status->done == true){ //push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward); - printf(" cumul : %ld ", car_status->cumulative_reward); + // printf(" cumul : %ld ", car_status->cumulative_reward); if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){ push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward); - FOR_LIST_FORM_BEGIN(TYPE_L_INT, qlStatus->progress_best_cumul){ - printf(" | %ld |,",(qlStatus->progress_best_cumul)->current_list->value); - } - printf("%s ",pprint->string_space); } break; } } - if(pprint->printed){ - Sleep(pprint->delay->delay_between_episodes); - } + //if(pprint->printed){ + // Sleep(pprint->delay->delay_between_episodes); + //} } } + + pthread_join(threadPrint, NULL); } + diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.h b/deepQlearn_0/src/deepQlearning/learn_to_drive.h index e0bb8ba..5e69793 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.h +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.h @@ -37,6 +37,7 @@ struct qlearning_params { float minimum_threshold_exploration_factor; long int nb_training_before_update_weight_in_target; size_t number_episodes; +// size_t threshold_number_same_action; }; @@ -46,6 +47,10 @@ struct status_qlearning { struct main_list_TYPE_L_INT * progress_best_cumul; long int nb_training_after_updated_weight_in_target; size_t nb_episodes; + size_t index_episode; + int action; +// int last_action; +// size_t count_last_action; }; struct delay_params { diff --git a/deepQlearn_0/src/deepQlearning/vehicle.c b/deepQlearn_0/src/deepQlearning/vehicle.c index 1f0d9c0..519d4e0 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.c +++ b/deepQlearn_0/src/deepQlearning/vehicle.c @@ -64,7 +64,7 @@ sensors * create_sensors(size_t nb_xs){ struct vehicle * create_vehicle(struct blocks *path){ struct vehicle * ret_vehicle = malloc(sizeof(struct vehicle)); - + pthread_mutex_init(&(ret_vehicle->mut_coord), NULL); ret_vehicle->coord = create_coordinate(2); ret_vehicle->sensor = create_sensors(NB_SENSORS); ret_vehicle->old_sensor = create_sensors(NB_SENSORS); @@ -117,6 +117,7 @@ void free_sensors(sensors *snsr){ } void free_vehicle(struct vehicle * vhcl){ + pthread_mutex_destroy(&(vhcl->mut_coord)); free_coordinate(vhcl->coord); free_blocks(vhcl->path); free_sensors(vhcl->sensor); @@ -392,8 +393,10 @@ void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y){ } void move_vehicle(struct vehicle *v){ + pthread_mutex_lock(&(v->mut_coord)); v->coord->x[0] += v->speed * cos(v->direction * M_PI / 180); v->coord->x[1] -= v->speed * sin(v->direction * M_PI / 180); + pthread_mutex_unlock(&(v->mut_coord)); } float distance2_coordinate(coordinate *c0, coordinate *c1){ @@ -413,11 +416,12 @@ float distance2_coordinate(coordinate *c0, coordinate *c1){ diStep_sensor->x[1] -= step_sensor * sin(direction_radian);\ }\ dist = (distance2_coordinate(diStep_sensor, v->coord)/5);\ - printf("| dist :%f | ",dist);\ - v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\ + /*printf("| dist :%f | ",dist);*/\ + v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),dist))/SUBDIVISION ;\ + //v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\ //v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\ @@ -430,7 +434,7 @@ void read_sensor(struct vehicle *v){ coordinate * diStep_sensor = create_coordinate(2); copy_coordinate(diStep_sensor, v->coord->x); float dist; - printf("\n"); +// printf("\n"); // count the number of step until we go out of the path = distance // center sensor float direction_radian ; @@ -493,11 +497,23 @@ void add_string_log(struct game_status *status, char *str ){ } +float addEpsilonRand(){ + int rangeRand = 500; + int randd = xrand() % rangeRand; + int sign = (-1)*((randd %2)*2) + 1; + float addR = sign * (float)randd/(rangeRand* 10); + + return addR; + + +} + void step_vehicle(struct vehicle *v, int action){ //float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT] float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT] v->direction = (float)((int)(v->direction + action_x[action % 3]) % 360) ; - v->speed = SPEED; // /5; + //v->direction += addEpsilonRand(); + //v->speed = SPEED; // /5; move_vehicle(v); read_sensor(v); struct game_status *status = v->status; @@ -517,6 +533,7 @@ void step_vehicle(struct vehicle *v, int action){ bool broken = false; long pprec, prec, next; char msg[48]; + //size_t count_i[path->nb_blocks]; for(long i=0; i< path->nb_blocks; ++i){ //prec = (i-1)%(path->nb_blocks); pprec = (i + path->nb_blocks - 2 )%(path->nb_blocks); @@ -531,7 +548,12 @@ void step_vehicle(struct vehicle *v, int action){ status->done = false; sprintf(msg," %ld,",i); add_string_log(status, msg); - } + //count_i[i] = 0; + }/*else{ + if(count_i[i]>10000) + status->reward = -10; + ++count_i[i]; + }*/ if(path->marker[next] == true){ status->reward = REWARD_STOP; status->done = true; @@ -550,6 +572,9 @@ void step_vehicle(struct vehicle *v, int action){ } status->cumulative_reward += status->reward; + if(status->cumulative_reward < -25000){ + status->done = true; + } } #define RANDOM 1 @@ -565,28 +590,29 @@ void reset(struct vehicle *v){ sprintf(v->status->log,"\n"); v->status->cur_log = 0; //if(init){ - srand(time(NULL)); - // init = false; + //srand(time(NULL)); + //init = false; //} int random; int diff; diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0]; - random = rand() % (diff/2) ; + random = xrand() % (diff/2) ; #if RANDOM v->coord->x[0] = path->lower_bound_block[0]->x[0] + random; #else v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2; #endif diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1]; - random = rand() % (diff/2); + random = xrand() % (diff/2); #if RANDOM v->coord->x[1] = path->lower_bound_block[0]->x[1] + random; #else v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2; #endif - random = rand() % 50; + random = xrand() % 50; #if RANDOM - v->direction = random - 25; + // v->direction = 115 - random ; + v->direction = random - 25 ; #else v->direction = -90; #endif diff --git a/deepQlearn_0/src/deepQlearning/vehicle.h b/deepQlearn_0/src/deepQlearning/vehicle.h index 79a3632..2657ca5 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.h +++ b/deepQlearn_0/src/deepQlearning/vehicle.h @@ -82,6 +82,7 @@ struct blocks { typedef tensor_TYPE_FLOAT sensors; struct vehicle { + pthread_mutex_t mut_coord; coordinate *coord; float direction; float speed; diff --git a/deepQlearn_0/test/is_good.c b/deepQlearn_0/test/is_good.c index 3a52822..3469ff4 100644 --- a/deepQlearn_0/test/is_good.c +++ b/deepQlearn_0/test/is_good.c @@ -401,7 +401,7 @@ float df(float x){ return exp(-x)/ ((1+exp(-x)) * (1+exp(-x))); } #if 1 -TEST(first_learn_vehicle_rev50){ +TEST(first_learn_vehicle_rev50_8){ size_t nb_block = 7; size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); @@ -467,8 +467,8 @@ TEST(first_learn_vehicle_rev50){ struct status_qlearning *qlstatus = create_status_qlearning (); struct delay_params *dly = create_delay_params ( - 100/*size_t delay_between_episodes*/, - 10/*size_t delay_between_games*/ + 500/*size_t delay_between_episodes*/, + 50/*size_t delay_between_games*/ ); struct qlearning_params *qlparams = create_qlearning_params ( @@ -510,7 +510,7 @@ TEST(first_learn_vehicle_rev50){ #if 1 -TEST(first_learn_vehicle_50){ +TEST(first_learn_vehicle_50__9){ size_t nb_block = 7; size_t dim= 2; struct blocks * path = create_blocks(nb_block, dim); @@ -518,7 +518,23 @@ TEST(first_learn_vehicle_50){ #if 1 + copy_coordinate(path->lower_bound_block[4], (float[]){0,0}); + copy_coordinate(path->upper_bound_block[4], (float[]){150,250}); + copy_coordinate(path->lower_bound_block[3], (float[]){150,40}); + copy_coordinate(path->upper_bound_block[3], (float[]){250,150}); + copy_coordinate(path->lower_bound_block[2], (float[]){250,80}); + copy_coordinate(path->upper_bound_block[2], (float[]){360,200}); + copy_coordinate(path->lower_bound_block[1], (float[]){360,70}); + copy_coordinate(path->upper_bound_block[1], (float[]){600,150}); + copy_coordinate(path->lower_bound_block[0], (float[]){600,90}); + copy_coordinate(path->upper_bound_block[0], (float[]){760,300}); + copy_coordinate(path->lower_bound_block[6], (float[]){260,300}); + copy_coordinate(path->upper_bound_block[6], (float[]){760,360}); + copy_coordinate(path->lower_bound_block[5], (float[]){0,250}); + copy_coordinate(path->upper_bound_block[5], (float[]){410,300}); + +/* copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); copy_coordinate(path->upper_bound_block[0], (float[]){150,250}); copy_coordinate(path->lower_bound_block[1], (float[]){150,0}); @@ -534,7 +550,6 @@ TEST(first_learn_vehicle_50){ copy_coordinate(path->lower_bound_block[6], (float[]){0,250}); copy_coordinate(path->upper_bound_block[6], (float[]){410,300}); -/* copy_coordinate(path->lower_bound_block[0], (float[]){0,0}); copy_coordinate(path->upper_bound_block[0], (float[]){100,250}); copy_coordinate(path->lower_bound_block[1], (float[]){100,0}); @@ -611,8 +626,8 @@ TEST(first_learn_vehicle_50){ struct status_qlearning *qlstatus = create_status_qlearning (); struct delay_params *dly = create_delay_params ( - 100/*size_t delay_between_episodes*/, - 10/*size_t delay_between_games*/ + 500/*size_t delay_between_episodes*/, + 50/*size_t delay_between_games*/ ); struct qlearning_params *qlparams = create_qlearning_params ( @@ -653,7 +668,7 @@ TEST(first_learn_vehicle_50){ -#if 0 +#if 1 TEST(first_learn_vehicle){ size_t nb_block = 7; size_t dim= 2; @@ -763,7 +778,7 @@ TEST(first_learn_vehicle){ -#if 0 +#if 1 TEST(first_learn_vehicle){ size_t nb_block = 7; size_t dim= 2; diff --git a/neuron_t/src/neuron_t/neuron_t.c b/neuron_t/src/neuron_t/neuron_t.c index f166ad0..28506d7 100644 --- a/neuron_t/src/neuron_t/neuron_t.c +++ b/neuron_t/src/neuron_t/neuron_t.c @@ -768,6 +768,7 @@ neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base if(tmp->next_layer==NULL){\ /*print_tensor_msg_##type(tmp->output,"retult");*/\ *output_link = tmp->output;\ + \ return tmp;\ }\ tmp = tmp->next_layer;\ diff --git a/neuron_t/test/is_good.c b/neuron_t/test/is_good.c index 4bfd506..8b08f18 100644 --- a/neuron_t/test/is_good.c +++ b/neuron_t/test/is_good.c @@ -18,6 +18,7 @@ //#include "permutation_t/permutation_t.h" #include "neuron_t/neuron_t.h" +#include "neuron_t/nneuron_t_file.h" #define VALGRIND_ 1 @@ -135,7 +136,6 @@ TEST(learning_first){ */ } - free_data_set_TYPE_FLOAT(ds); free_neurons_TYPE_FLOAT(bn); @@ -145,7 +145,7 @@ TEST(learning_first){ -TEST(learning_second){ +TEST(learning_second_PRINT){ bool rec_randomizeInitWeight = randomizeInitWeight; randomizeInitWeight =false; @@ -184,6 +184,9 @@ TEST(learning_second){ } + PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, input, " bn input"); + PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, output, " bn output"); + PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, bias, " bn bias"); free_data_set_TYPE_FLOAT(ds); free_neurons_TYPE_FLOAT(bn); @@ -382,6 +385,8 @@ TEST(copy_weight_in_neurons){ sprintf(msg," output copy %ld ",i); print_tensor_msg_TYPE_FLOAT(linked_tens,msg); } + + EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, bn, weight_in, ".ff_bn_weight_in.txt") @@ -395,6 +400,73 @@ TEST(copy_weight_in_neurons){ +TEST(Extract_weight_in_neurons){ + bool rec_randomizeInitWeight = randomizeInitWeight; + randomizeInitWeight =false; + + data_set_TYPE_FLOAT *ds= fill_data_set_from_file_TYPE_FLOAT("xor.txt",1); +// print_data_set_msg_TYPE_FLOAT(ds,"data"); + config_layers *pconf = create_config_layers_from_OneD(3,(size_t[]){2,4,1}); /* 2 input , 1 target; 1 hidden layer with 5 neurons */ + neurons_TYPE_FLOAT *bn=NULL, *tmp ; + neurons_TYPE_FLOAT *cpyn=NULL; + //setup_networks_alloutputs_config_GLOBAL_rdm01_TYPE_FLOAT(setup_networks_alloutputs_config_TYPE_FLOAT(&bn,pconf);bn,pconf); + setup_networks_alloutputs_config_TYPE_FLOAT(&bn,pconf,false,0,1,5000); + setup_networks_alloutputs_config_TYPE_FLOAT(&cpyn, pconf,false,0,1,5000); + + setup_all_layers_functions_TYPE_FLOAT(bn, + tensorContractnProdThread_TYPE_FLOAT, + tensorProdThread_TYPE_FLOAT, + DL, + L, + f, + df); + + setup_all_layers_params_TYPE_FLOAT(bn, 5, 1 , 0.1); + + + size_t reps = learning_online2_neurons_TYPE_FLOAT(bn,ds,cond); + EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, bn, weight_in, ".ff_bn_weight_in__toExtract.txt") + + setup_all_layers_functions_TYPE_FLOAT(cpyn, + tensorContractnProdThread_TYPE_FLOAT, + tensorProdThread_TYPE_FLOAT, + DL, + L, + f, + df); + + setup_all_layers_params_TYPE_FLOAT(cpyn, 5, 1 , 0.1); + + EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, cpyn, weight_in, ".ff_bn_weight_in__toExtract.txt") +// copy_weight_in_neurons_TYPE_FLOAT(cpyn, bn); + + char msg[256]; + tensor_TYPE_FLOAT * linked_tens = NULL; + for(size_t i=0; isize; ++i){ +// print_predict_by_network_with_error_neurons_TYPE_FLOAT(bn,ds->input[i],ds->target[i]); + // print_predict_by_network_with_error_neurons_TYPE_FLOAT(cpyn,ds->input[i],ds->target[i]); + calculate_output_by_network_neurons_TYPE_FLOAT(bn,ds->input[i],&linked_tens); + sprintf(msg," output base %ld ",i); + print_tensor_msg_TYPE_FLOAT(linked_tens,msg); + calculate_output_by_network_neurons_TYPE_FLOAT(cpyn,ds->input[i],&linked_tens); + sprintf(msg," output copy %ld ",i); + print_tensor_msg_TYPE_FLOAT(linked_tens,msg); + } + + + EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, cpyn, weight_in, ".ff_bn_weight_in__exportedCPYfromExtract.txt") + + + free_data_set_TYPE_FLOAT(ds); + free_neurons_TYPE_FLOAT(bn); + free_neurons_TYPE_FLOAT(cpyn); + + LOG("reps = %ld\n",reps); + randomizeInitWeight = rec_randomizeInitWeight; +} + + + diff --git a/qlearn_0/src/Frozen_Lake.c b/qlearn_0/src/Frozen_Lake.c index bd1aad0..244613c 100644 --- a/qlearn_0/src/Frozen_Lake.c +++ b/qlearn_0/src/Frozen_Lake.c @@ -83,10 +83,10 @@ struct game_status * create_game_status(){ } #define GENERATE_RANDOM_PLACES(CONTENT) \ - int CONTENT##_Number = rand() % (params->limit_##CONTENT##_number) + 1;\ + int CONTENT##_Number = xrand() % (params->limit_##CONTENT##_number) + 1;\ for(int i=0; i< CONTENT##_Number; ++i) { \ do{\ - random = rand() % (dim->rank);\ + random = xrand() % (dim->rank);\ }while((gm->cells[random]).content != EMPTY);\ (gm->cells[random]).content = CONTENT;\ } @@ -101,13 +101,13 @@ long int generate_game(struct game *gm){ for(long int j=0; j < ACTION_COUNT; ++j) (gm->cells[i]).Q[j] = 0; } - srand(time(NULL)); + //srand(time(NULL)); int random; GENERATE_RANDOM_PLACES(FOX) GENERATE_RANDOM_PLACES(CARROT) GENERATE_RANDOM_PLACES(BLOCK) do{ - random = rand() % (dim->rank); + random = xrand() % (dim->rank); }while((gm->cells[random]).content != EMPTY); (gm->cells[random]).content = START; @@ -289,7 +289,7 @@ void mainQlearning_game(struct game *gm){ int random; long int NUMBER_EPISODE2 = (params->limit_EPISODES_number) * (params->limit_EPISODES_number); double proba_explor; - srand(time(NULL)); + //srand(time(NULL)); for(long int k=0 ; k < params->limit_game_number; ++k){ generate_game(gm); @@ -297,10 +297,10 @@ void mainQlearning_game(struct game *gm){ reset_game_status(status); while(!(status->endGame)){ - random = rand() % NUMBER_EPISODE2; + random = xrand() % NUMBER_EPISODE2; proba_explor = (double)random / NUMBER_EPISODE2; if( proba_explor < qlearnParams->exploration_factor * (1. / ((episode / 10 ) + 1))){ - action = rand() % ACTION_COUNT; + action = xrand() % ACTION_COUNT; printf("exploration action "); } else{ diff --git a/tensor_t/src/tensor_t/tensor_t.c b/tensor_t/src/tensor_t/tensor_t.c index 7b82190..d7ee726 100644 --- a/tensor_t/src/tensor_t/tensor_t.c +++ b/tensor_t/src/tensor_t/tensor_t.c @@ -150,10 +150,11 @@ int copy_tensor_##type(tensor_##type * dst, tensor_##type * src){\ }\ }\ void init_random_x_##type(tensor_##type *M, type minR, type maxR, int randomRange){\ - srand(time(NULL));\ + /*static bool initRandomFirst = true;\ + if(initRandomFirst){ srand(time(NULL)); initRandomFirst = false;}*/\ int randVal;\ for(size_t i =0; i<(M->dim)->rank;++i){\ - randVal = rand() % randomRange;\ + randVal = xrand() % randomRange;\ M->x[i]=minR + (maxR-minR)*randVal / randomRange ;\ \ }\ diff --git a/ytest_t/include_ytest/include/tools_t/tools_t.h b/ytest_t/include_ytest/include/tools_t/tools_t.h index 57c6ee8..5d13ff6 100644 --- a/ytest_t/include_ytest/include/tools_t/tools_t.h +++ b/ytest_t/include_ytest/include/tools_t/tools_t.h @@ -7,6 +7,8 @@ #include #include +#include + // to define DEBUG in gcc cli do: gcc -D DEBUG=1 or 0 if need! #ifndef DEBUG @@ -44,7 +46,13 @@ extern long int PRECISION_TYPE_L_DOUBLE ; #endif +int xrand(); +float frand(); + void gotoxy(int x, int y); + +void clear_screen(); + //void get_cursor_position(int *col, int *rows); #if DEBUG diff --git a/ytest_t/libytest.so b/ytest_t/libytest.so index 4995260..1242cd3 100644 Binary files a/ytest_t/libytest.so and b/ytest_t/libytest.so differ diff --git a/ytools_t/include/tools_t/tools_t.h b/ytools_t/include/tools_t/tools_t.h index 57c6ee8..5d13ff6 100644 --- a/ytools_t/include/tools_t/tools_t.h +++ b/ytools_t/include/tools_t/tools_t.h @@ -7,6 +7,8 @@ #include #include +#include + // to define DEBUG in gcc cli do: gcc -D DEBUG=1 or 0 if need! #ifndef DEBUG @@ -44,7 +46,13 @@ extern long int PRECISION_TYPE_L_DOUBLE ; #endif +int xrand(); +float frand(); + void gotoxy(int x, int y); + +void clear_screen(); + //void get_cursor_position(int *col, int *rows); #if DEBUG diff --git a/ytools_t/src/tools_t/tools_t.c b/ytools_t/src/tools_t/tools_t.c index 879dc82..532cd4a 100644 --- a/ytools_t/src/tools_t/tools_t.c +++ b/ytools_t/src/tools_t/tools_t.c @@ -1,6 +1,107 @@ #include "tools_t/tools_t.h" +#define POW 17 +#define MMOD ((1 << (POW)) - 1) +#define SUBA 5 +#define SUBB 8 +int xrand(){ + int mod = MMOD; // (1 << 17) - 1; + int a = (1<<(POW-(SUBA))) - 1; + int b = (1 << (POW-(SUBB))) - 1; + static int xi = 0; + int xii = (a * xi + b)%mod; + //float ret = (float) xii / mod; + //printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret); + + xi = xii; + + return xii; + /* + static bool init = true; + if(init){ + init = false; + struct timespec start_t; + clock_gettime(CLOCK_REALTIME, &start_t); + srand(start_t.tv_nsec); + //srand(start_t.tv_nsec - start_t.tv_sec); + } + int ret = rand (); + return ret ; +*/ +} +int irand(){ + int mod = MMOD; // (1 << 17) - 1; + int a = (1<<(POW-(SUBA))) - 1; + int b = (1 << (POW-(SUBB))) - 1; + static int xi = 0; + int xii = (a * xi + b)%mod; + //float ret = (float) xii / mod; + + //printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret); + + xi = xii; + + return xii; + +} + + +float frand(){ + /* int mod = MMOD; // (1 << 17) - 1; + int a = (1<<13) - 1; + int b = (1 << 7) - 1; + static int xi = 0; + + int xii = (a * xi + b)%mod; + float ret = (float) xii / mod; + + printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret); + + xi = xii; + + return ret; +*/ +/* +int xii = irand(); + float ret = (float) xii / (MMOD); +*/ + int xii = rand(); + float ret = (float) xii / RAND_MAX; + +// printf("[%6d / %6d = %.6f | ", (xii), MMOD, ret); + + return ret; +} + + +/* +int xrand(){ + static int randMod = 1; + static bool init = true; + if(init){ + init = false; + struct timespec start_t; + clock_gettime(CLOCK_REALTIME, &start_t); + srand(start_t.tv_nsec); + //srand(start_t.tv_nsec - start_t.tv_sec); + } + int ret = rand (); + if(ret % 7 == randMod % 11){ + init = true; + randMod = ret + 1; + } + return ret ; +} + +float frand(){ + int max = 50000; + static int rnd = 0; + rnd = (xrand())%max; + printf("[%6d / %6d = %.6f | ", (rnd), max,(float)(rnd)/max); + return (float)(rnd)/max; + } +*/ void gotoxy(int x, int y) { @@ -8,6 +109,21 @@ void gotoxy(int x, int y) } + +void clear_screen(){ + struct winsize w; + + ioctl(1, TIOCGWINSZ, &w); + char pad[w.ws_col+1]; + int i=0; + for(i=0; i