Trying to fix nan output of RL by using relu with upperbound

2024-07-16 12:13:05 +02:00
parent 0c9813beca
commit aac7434346
13 changed files with 405 additions and 75 deletions
@@ -3,11 +3,13 @@
 char *action_name[8] = {"LEFT", "CENTER", "RIGHT"};

 float reLU(float x){
+  if(x>10) return 10;
  if(x>0) return x;
  return 0;
 }

 float d_reLU(float x){
+  if (x>10) return 0;
  if (x>0) return 1;
  return 0;
 }
@@ -29,6 +31,10 @@ void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * netwo
   COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->best_net, networks->main_net);
 }

+float id(float x){ return x;}
+
+float constOne(float x){return 1;}
+
 struct networks_qlearning * create_nework_qlearning(
  struct config_layers * config,
  bool randomize, float minR, float maxR,  int randomRange,
@@ -46,7 +52,6 @@ struct networks_qlearning * create_nework_qlearning(
  setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config, false, minR, maxR, randomRange);  
  copy_weight_in_networks_from_main_to_best(qnets);

-  
  setup_all_layers_functions_TYPE_FLOAT(qnets->main_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
  setup_all_layers_params_TYPE_FLOAT(qnets->main_net, nb_prod_thread, nb_calc_thread, learning_rate);
  setup_all_layers_functions_TYPE_FLOAT(qnets->target_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
@@ -54,6 +59,25 @@ struct networks_qlearning * create_nework_qlearning(
  setup_all_layers_functions_TYPE_FLOAT(qnets->best_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
  setup_all_layers_params_TYPE_FLOAT(qnets->best_net, nb_prod_thread, nb_calc_thread, learning_rate);

+// ne pas mettre fonction d'activation à la sortie , i.e: fonction identité : f(x) = x:
+  neurons_TYPE_FLOAT *tmpMain = qnets->main_net;
+  neurons_TYPE_FLOAT *tmpTarget = qnets->target_net;
+  neurons_TYPE_FLOAT *tmpBest = qnets->best_net;
+  while(tmpMain){
+    if(tmpMain->next_layer == NULL){
+      tmpMain->f_act = id;
+      tmpMain->d_f_act = constOne;
+      tmpTarget->f_act = id;
+      tmpTarget->d_f_act = constOne;
+      tmpBest->f_act = id;
+      tmpBest->d_f_act = constOne;
+    }
+    tmpMain = tmpMain->next_layer;
+    tmpTarget= tmpTarget->next_layer;
+    tmpBest = tmpBest->next_layer;
+  }
+  
+

  return qnets; 

@@ -73,6 +97,11 @@ struct status_qlearning * create_status_qlearning (){
  status_ql->nb_training_after_updated_weight_in_target = 0;

  status_ql->nb_episodes = 0;
+  status_ql->index_episode= 0;
+  status_ql->action=1;
+
+//  status_ql->last_action=-1;
+//  status_ql->count_last_action=0;
  
  return status_ql;
 }
@@ -129,6 +158,7 @@ struct qlearning_params * create_qlearning_params  (
  qparams->factor_update_exploration_factor = 0.995;
  qparams->minimum_threshold_exploration_factor = 0.01;

+//  qparams->threshold_number_same_action = 500;

  return qparams;
 }
@@ -226,6 +256,8 @@ void train_qlearning(struct RL_agent * rlAgent,

  qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ;

+//  free_tensor_TYPE_FLOAT(action_value);
+//  free_tensor_TYPE_FLOAT(next_action_value);

 }

@@ -236,26 +268,91 @@ int select_action(struct RL_agent * rlAgent){
  //calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value);
  calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->sensor, &action_value);
  //long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes)*100;
-  int NUMBER_EPISODE2 = 3000;
+  //int randRange = 10000;
  //NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2;
-//  static bool init = true ;
-//  if(init){
-    srand(time(NULL));
-//    init =false;
-//  }
-  int random = rand() % NUMBER_EPISODE2;
-  float proba_explor = (float)(random ) / NUMBER_EPISODE2;
+  //static bool init = true ;
+  //if(init){
+    //srand(time(NULL));
+    //init =false;
+  //}
+  //int random = xrand() % randRange;
+  float proba_explor =  (float) (rand() % (1<<17 -1))/ (1<<17 -1); //frand(); //(float)(random ) / randRange;
  if(proba_explor > rlAgent->qlearnParams->exploration_factor ){
    action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank  );
+    //if(action == ARG_MIN_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank  )) 
+      //action = xrand() % action_value->dim->rank ; 
  }
  else{
-    action = rand() % action_value->dim->rank ; 
+    action = xrand() % action_value->dim->rank ; 
   // explore++;
    //printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
  }
+  /*
+    if(rlAgent->status->last_action == action){
+      ++(rlAgent->status->count_last_action);
+      if(rlAgent->status->count_last_action > rlAgent->qlearnParams->threshold_number_same_action  ){
+        while(rlAgent->status->last_action == action) 
+          action = xrand() % action_value->dim->rank ;
+        
+        rlAgent->status->last_action = action;
+        rlAgent->status->count_last_action = 0;
+      }
+    }
+    else{
+      rlAgent->status->last_action = action;
+      rlAgent->status->count_last_action = 0;
+    }
+    */
+  rlAgent->status->action = action;
  return action;
 }

+void* runPrint(void *arg){
+  struct RL_agent *rlAgent = (struct RL_agent*)arg;
+  struct status_qlearning *qlStatus = rlAgent->status;
+  struct print_params * pprint = rlAgent->pprint;
+  struct vehicle *car = rlAgent->car;
+  size_t count_print = 0;
+  while(1){
+if(/*(qlStatus->nb_episodes %125 == 0)  &&*/  pprint->printed){
+          //pthread_mutex_lock(&(pprint->mut_printed));
+          pthread_mutex_lock(&(car->mut_coord));
+          print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y);
+          pthread_mutex_unlock(&(car->mut_coord));
+          //pthread_mutex_unlock(&(pprint->mut_printed));
+          printf("%s ",pprint->string_space);
+          printf("ep: %ld\n",qlStatus->index_episode);
+          neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
+          neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
+          for(size_t i=0; i<net_main->output->dim->rank; ++i) {
+            printf("{sensro[%s]:%f "" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i], 
+            action_name[i%COUNT_ACTION],net_main->output->x[i]);
+            
+          }
+          printf("\n< %5.2f > ( %s  ) \n", car->direction, action_name[qlStatus->action % COUNT_ACTION]);
+          //print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei");
+          //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in");
+          PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out");
+          //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out");
+          //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input");
+          printf(" action : %d , factor : %f nb_episodes : %ld \n",qlStatus->action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
+        
+          FOR_LIST_FORM_BEGIN(TYPE_L_INT, qlStatus->progress_best_cumul){
+             printf(" | %ld |,",(qlStatus->progress_best_cumul)->current_list->value);
+          }
+          printf("[%ld] %s ", rlAgent->car->status->cumulative_reward, pprint->string_space);
+
+        }
+          Sleep(pprint->delay->delay_between_games);
+          ++count_print;
+          if(count_print > 20){
+            count_print = 0;
+            clear_screen();
+          }
+  }
+}
+
+
 void learn_to_drive(struct RL_agent * rlAgent){
  int action;
  struct vehicle * car = rlAgent->car;
@@ -265,10 +362,14 @@ void learn_to_drive(struct RL_agent * rlAgent){
  struct print_params * pprint = rlAgent->pprint;
  char msg[100];
  
+  pthread_t threadPrint;
+  pthread_create(&threadPrint, NULL, runPrint, (void*)rlAgent);
+  
  while(true){
    for(size_t index_episode = 0; index_episode < qlParams->number_episodes; ++index_episode){
      reset(car);
      qlStatus->nb_training_after_updated_weight_in_target = 0;
+      qlStatus->index_episode = index_episode;
      while(true){
        ++(qlStatus->nb_episodes);
        ++(qlStatus->nb_training_after_updated_weight_in_target);
@@ -277,51 +378,27 @@ void learn_to_drive(struct RL_agent * rlAgent){
        add_string_log_M(car_status,msg);
        step_vehicle(car, action);
        train_qlearning(rlAgent, action);
-        if(/*(qlStatus->nb_episodes %15 == 0)  && */ pprint->printed){
-          pthread_mutex_lock(&(pprint->mut_printed));
-          print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y);
-          pthread_mutex_unlock(&(pprint->mut_printed));
-          printf("%s ",pprint->string_space);
-          printf("ep: %ld\n",index_episode);
-          neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
-          neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
-          for(size_t i=0; i<net_main->output->dim->rank; ++i) {
-            printf("{sensro[%s]:%f "/*vs %f / VS / %f */" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i], 
-            /*car->sensor->x[i] ,car->old_sensor->x[i],
-            */action_name[i%COUNT_ACTION],net_main->output->x[i]);
-            
-          }
-          printf("\n< %f > ( %s  ) \n", car->direction, action_name[action % COUNT_ACTION]);
-          //print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei");
-          //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in");
-          PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out");
-          //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out");
-          //PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input");
-          printf("action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
-          Sleep(pprint->delay->delay_between_games);
-        }
-        //done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor);
+                //done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor);
        if( qlStatus->nb_training_after_updated_weight_in_target > qlParams->nb_training_before_update_weight_in_target ){
          qlStatus->nb_training_after_updated_weight_in_target = 0;
          copy_weight_in_networks_from_main_to_target(rlAgent->networks);
        }
        if(car_status->done == true){
          //push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward);
-          printf(" cumul : %ld ", car_status->cumulative_reward);
+          // printf(" cumul : %ld ", car_status->cumulative_reward);
          if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){
            push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
-            FOR_LIST_FORM_BEGIN(TYPE_L_INT, qlStatus->progress_best_cumul){
-              printf(" | %ld |,",(qlStatus->progress_best_cumul)->current_list->value);
-            }
-            printf("%s ",pprint->string_space);
          }
          break;
        }
      }

-      if(pprint->printed){
-        Sleep(pprint->delay->delay_between_episodes);
-      }
+      //if(pprint->printed){
+      //  Sleep(pprint->delay->delay_between_episodes);
+      //}
    }
  }
+
+  pthread_join(threadPrint, NULL);
 }
+
@@ -37,6 +37,7 @@ struct qlearning_params {
  float minimum_threshold_exploration_factor;
  long int nb_training_before_update_weight_in_target;
  size_t number_episodes;
+//  size_t threshold_number_same_action;
 };


@@ -46,6 +47,10 @@ struct status_qlearning {
  struct main_list_TYPE_L_INT * progress_best_cumul;
  long int nb_training_after_updated_weight_in_target;
  size_t nb_episodes;
+  size_t index_episode;
+  int action;
+//  int last_action;
+//  size_t count_last_action;
 };

 struct delay_params {
@@ -64,7 +64,7 @@ sensors * create_sensors(size_t nb_xs){

 struct vehicle * create_vehicle(struct blocks *path){
  struct vehicle * ret_vehicle = malloc(sizeof(struct vehicle));
-  
+  pthread_mutex_init(&(ret_vehicle->mut_coord), NULL); 
  ret_vehicle->coord = create_coordinate(2);
  ret_vehicle->sensor = create_sensors(NB_SENSORS);
  ret_vehicle->old_sensor = create_sensors(NB_SENSORS);
@@ -117,6 +117,7 @@ void free_sensors(sensors *snsr){
 }

 void free_vehicle(struct vehicle * vhcl){
+  pthread_mutex_destroy(&(vhcl->mut_coord));
  free_coordinate(vhcl->coord);
  free_blocks(vhcl->path);
  free_sensors(vhcl->sensor);
@@ -392,8 +393,10 @@ void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y){
 }

 void move_vehicle(struct vehicle *v){
+  pthread_mutex_lock(&(v->mut_coord));
  v->coord->x[0] += v->speed * cos(v->direction * M_PI / 180);
  v->coord->x[1] -= v->speed * sin(v->direction * M_PI / 180);
+  pthread_mutex_unlock(&(v->mut_coord));
 }

 float distance2_coordinate(coordinate *c0, coordinate *c1){
@@ -413,11 +416,12 @@ float distance2_coordinate(coordinate *c0, coordinate *c1){
    diStep_sensor->x[1] -= step_sensor * sin(direction_radian);\
  }\
  dist = (distance2_coordinate(diStep_sensor, v->coord)/5);\
-  printf("| dist :%f | ",dist);\
-  v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\
+  /*printf("| dist :%f | ",dist);*/\
+  v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),dist))/SUBDIVISION ;\
  
  
  
+  //v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\
  
  
  //v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\
@@ -430,7 +434,7 @@ void read_sensor(struct vehicle *v){
  coordinate * diStep_sensor = create_coordinate(2);
  copy_coordinate(diStep_sensor, v->coord->x);
  float dist;
-  printf("\n"); 
+//  printf("\n"); 
  // count the number of step until we go out of the path = distance
  // center sensor
  float direction_radian ;
@@ -493,11 +497,23 @@ void add_string_log(struct game_status *status, char *str ){

 }

+float addEpsilonRand(){
+  int rangeRand = 500;
+  int randd = xrand() % rangeRand;
+  int sign = (-1)*((randd %2)*2) + 1;
+  float addR = sign * (float)randd/(rangeRand* 10);
+  
+  return addR;
+  
+
+}
+
 void step_vehicle(struct vehicle *v, int action){
  //float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
  float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
  v->direction = (float)((int)(v->direction + action_x[action % 3]) % 360) ;
-  v->speed = SPEED; // /5;
+  //v->direction += addEpsilonRand();
+  //v->speed = SPEED; // /5;
  move_vehicle(v);
  read_sensor(v);
  struct game_status *status = v->status;
@@ -517,6 +533,7 @@ void step_vehicle(struct vehicle *v, int action){
    bool broken = false;
    long pprec, prec, next;
    char msg[48];
+    //size_t count_i[path->nb_blocks];
    for(long i=0; i< path->nb_blocks; ++i){
        //prec = (i-1)%(path->nb_blocks);
        pprec = (i + path->nb_blocks - 2 )%(path->nb_blocks);
@@ -531,7 +548,12 @@ void step_vehicle(struct vehicle *v, int action){
          status->done = false;
          sprintf(msg," %ld,",i);
          add_string_log(status, msg);
-        }
+          //count_i[i] = 0;
+        }/*else{
+          if(count_i[i]>10000)
+            status->reward = -10;
+          ++count_i[i];
+        }*/
        if(path->marker[next] == true){
          status->reward = REWARD_STOP;
          status->done = true;
@@ -550,6 +572,9 @@ void step_vehicle(struct vehicle *v, int action){
  }
  status->cumulative_reward += status->reward;

+  if(status->cumulative_reward < -25000){
+    status->done = true;
+  }
 }

 #define RANDOM 1
@@ -565,28 +590,29 @@ void reset(struct vehicle *v){
  sprintf(v->status->log,"\n");
  v->status->cur_log = 0;
  //if(init){
-      srand(time(NULL));
-  //  init = false;
+    //srand(time(NULL));
+    //init = false;
  //}
  int random;
  int diff;
  diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0];
-  random = rand() % (diff/2) ;
+  random = xrand() % (diff/2) ;
  #if RANDOM
    v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
  #else
    v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
  #endif
  diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1];
-  random = rand() % (diff/2);
+  random = xrand() % (diff/2);
  #if RANDOM
    v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
  #else
    v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
  #endif
-  random = rand() % 50;
+  random = xrand() % 50;
  #if RANDOM
-    v->direction = random - 25;
+   // v->direction = 115 - random ;
+    v->direction = random - 25  ;
  #else
    v->direction = -90;
  #endif
@@ -82,6 +82,7 @@ struct blocks {
 typedef tensor_TYPE_FLOAT sensors;

 struct vehicle {
+  pthread_mutex_t mut_coord;
  coordinate *coord;
  float direction;
  float speed;
@@ -401,7 +401,7 @@ float df(float x){
  return exp(-x)/ ((1+exp(-x)) * (1+exp(-x)));
 }
 #if 1
-TEST(first_learn_vehicle_rev50){
+TEST(first_learn_vehicle_rev50_8){
  size_t nb_block = 7;
  size_t dim= 2;
  struct blocks * path = create_blocks(nb_block, dim);
@@ -467,8 +467,8 @@ TEST(first_learn_vehicle_rev50){

  struct status_qlearning *qlstatus = create_status_qlearning ();
  struct delay_params *dly = create_delay_params (
-    100/*size_t delay_between_episodes*/,
-    10/*size_t delay_between_games*/
+    500/*size_t delay_between_episodes*/,
+    50/*size_t delay_between_games*/
  );
  
  struct qlearning_params *qlparams = create_qlearning_params (
@@ -510,7 +510,7 @@ TEST(first_learn_vehicle_rev50){


 #if 1
-TEST(first_learn_vehicle_50){
+TEST(first_learn_vehicle_50__9){
  size_t nb_block = 7;
  size_t dim= 2;
  struct blocks * path = create_blocks(nb_block, dim);
@@ -518,7 +518,23 @@ TEST(first_learn_vehicle_50){


 #if 1
+  copy_coordinate(path->lower_bound_block[4], (float[]){0,0});
+  copy_coordinate(path->upper_bound_block[4], (float[]){150,250});
+  copy_coordinate(path->lower_bound_block[3], (float[]){150,40});
+  copy_coordinate(path->upper_bound_block[3], (float[]){250,150});
+  copy_coordinate(path->lower_bound_block[2], (float[]){250,80});
+  copy_coordinate(path->upper_bound_block[2], (float[]){360,200});
+  copy_coordinate(path->lower_bound_block[1], (float[]){360,70});
+  copy_coordinate(path->upper_bound_block[1], (float[]){600,150});
+  copy_coordinate(path->lower_bound_block[0], (float[]){600,90});
+  copy_coordinate(path->upper_bound_block[0], (float[]){760,300});
+  copy_coordinate(path->lower_bound_block[6], (float[]){260,300});
+  copy_coordinate(path->upper_bound_block[6], (float[]){760,360});
+  copy_coordinate(path->lower_bound_block[5], (float[]){0,250});
+  copy_coordinate(path->upper_bound_block[5], (float[]){410,300});

+
+/*
    copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
  copy_coordinate(path->upper_bound_block[0], (float[]){150,250});
  copy_coordinate(path->lower_bound_block[1], (float[]){150,0});
@@ -534,7 +550,6 @@ TEST(first_learn_vehicle_50){
  copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
  copy_coordinate(path->upper_bound_block[6], (float[]){410,300});

-/*
  copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
  copy_coordinate(path->upper_bound_block[0], (float[]){100,250});
  copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
@@ -611,8 +626,8 @@ TEST(first_learn_vehicle_50){

  struct status_qlearning *qlstatus = create_status_qlearning ();
  struct delay_params *dly = create_delay_params (
-    100/*size_t delay_between_episodes*/,
-    10/*size_t delay_between_games*/
+    500/*size_t delay_between_episodes*/,
+    50/*size_t delay_between_games*/
  );
  
  struct qlearning_params *qlparams = create_qlearning_params (
@@ -653,7 +668,7 @@ TEST(first_learn_vehicle_50){



-#if 0
+#if 1
 TEST(first_learn_vehicle){
  size_t nb_block = 7;
  size_t dim= 2;
@@ -763,7 +778,7 @@ TEST(first_learn_vehicle){



-#if 0
+#if 1
 TEST(first_learn_vehicle){
  size_t nb_block = 7;
  size_t dim= 2;
@@ -768,6 +768,7 @@ neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base
    if(tmp->next_layer==NULL){\
      /*print_tensor_msg_##type(tmp->output,"retult");*/\
      *output_link = tmp->output;\
+      \
      return tmp;\
    }\
    tmp = tmp->next_layer;\
@@ -18,6 +18,7 @@

 //#include "permutation_t/permutation_t.h"
 #include "neuron_t/neuron_t.h"
+#include "neuron_t/nneuron_t_file.h"

 #define VALGRIND_ 1

@@ -135,7 +136,6 @@ TEST(learning_first){
    */
  }
  
-
  free_data_set_TYPE_FLOAT(ds);
  free_neurons_TYPE_FLOAT(bn); 

@@ -145,7 +145,7 @@ TEST(learning_first){



-TEST(learning_second){
+TEST(learning_second_PRINT){
   bool rec_randomizeInitWeight = randomizeInitWeight;
   randomizeInitWeight =false;
  
@@ -184,6 +184,9 @@ TEST(learning_second){
  }
 

+  PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, input, " bn input");
+  PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, output, " bn output");
+  PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, bias, " bn bias");

  free_data_set_TYPE_FLOAT(ds);
  free_neurons_TYPE_FLOAT(bn); 
@@ -383,6 +386,75 @@ TEST(copy_weight_in_neurons){
    print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
  }

+  EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, bn, weight_in, ".ff_bn_weight_in.txt")
+ 
+
+
+  free_data_set_TYPE_FLOAT(ds);
+  free_neurons_TYPE_FLOAT(bn); 
+  free_neurons_TYPE_FLOAT(cpyn); 
+
+  LOG("reps = %ld\n",reps);
+  randomizeInitWeight = rec_randomizeInitWeight;
+}
+
+
+
+TEST(Extract_weight_in_neurons){
+   bool rec_randomizeInitWeight = randomizeInitWeight;
+   randomizeInitWeight =false;
+  
+  data_set_TYPE_FLOAT *ds= fill_data_set_from_file_TYPE_FLOAT("xor.txt",1);
+//  print_data_set_msg_TYPE_FLOAT(ds,"data");
+  config_layers *pconf = create_config_layers_from_OneD(3,(size_t[]){2,4,1}); /* 2 input , 1 target; 1 hidden layer with 5 neurons */
+  neurons_TYPE_FLOAT *bn=NULL, *tmp ;
+  neurons_TYPE_FLOAT *cpyn=NULL;
+  //setup_networks_alloutputs_config_GLOBAL_rdm01_TYPE_FLOAT(setup_networks_alloutputs_config_TYPE_FLOAT(&bn,pconf);bn,pconf);
+     setup_networks_alloutputs_config_TYPE_FLOAT(&bn,pconf,false,0,1,5000);
+     setup_networks_alloutputs_config_TYPE_FLOAT(&cpyn, pconf,false,0,1,5000);
+
+  setup_all_layers_functions_TYPE_FLOAT(bn,
+    tensorContractnProdThread_TYPE_FLOAT,
+    tensorProdThread_TYPE_FLOAT,
+    DL,
+    L,
+    f,
+    df);
+
+  setup_all_layers_params_TYPE_FLOAT(bn, 5, 1 ,  0.1);
+
+
+  size_t reps = learning_online2_neurons_TYPE_FLOAT(bn,ds,cond);
+  EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, bn, weight_in, ".ff_bn_weight_in__toExtract.txt")
+  
+  setup_all_layers_functions_TYPE_FLOAT(cpyn,
+    tensorContractnProdThread_TYPE_FLOAT,
+    tensorProdThread_TYPE_FLOAT,
+    DL,
+    L,
+    f,
+    df);
+
+  setup_all_layers_params_TYPE_FLOAT(cpyn, 5, 1 ,  0.1);
+
+  EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, cpyn, weight_in, ".ff_bn_weight_in__toExtract.txt")
+//  copy_weight_in_neurons_TYPE_FLOAT(cpyn, bn);
+ 
+  char msg[256];
+  tensor_TYPE_FLOAT * linked_tens = NULL;
+  for(size_t i=0; i<ds->size; ++i){
+//    print_predict_by_network_with_error_neurons_TYPE_FLOAT(bn,ds->input[i],ds->target[i]);
+ //   print_predict_by_network_with_error_neurons_TYPE_FLOAT(cpyn,ds->input[i],ds->target[i]);
+    calculate_output_by_network_neurons_TYPE_FLOAT(bn,ds->input[i],&linked_tens);
+    sprintf(msg," output base %ld ",i);
+    print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
+    calculate_output_by_network_neurons_TYPE_FLOAT(cpyn,ds->input[i],&linked_tens);
+    sprintf(msg," output copy %ld ",i);
+    print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
+  }
+
+ 
+  EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, cpyn, weight_in, ".ff_bn_weight_in__exportedCPYfromExtract.txt")


  free_data_set_TYPE_FLOAT(ds);
@@ -83,10 +83,10 @@ struct game_status * create_game_status(){
 }

 #define GENERATE_RANDOM_PLACES(CONTENT) \
-  int CONTENT##_Number = rand() % (params->limit_##CONTENT##_number) + 1;\
+  int CONTENT##_Number = xrand() % (params->limit_##CONTENT##_number) + 1;\
  for(int i=0; i< CONTENT##_Number; ++i) { \
    do{\
-      random = rand() % (dim->rank);\
+      random = xrand() % (dim->rank);\
    }while((gm->cells[random]).content != EMPTY);\
    (gm->cells[random]).content = CONTENT;\
  }
@@ -101,13 +101,13 @@ long int generate_game(struct game *gm){
    for(long int j=0; j < ACTION_COUNT; ++j)
      (gm->cells[i]).Q[j] = 0;
  }
-  srand(time(NULL));
+  //srand(time(NULL));
  int random;
  GENERATE_RANDOM_PLACES(FOX)
  GENERATE_RANDOM_PLACES(CARROT)
  GENERATE_RANDOM_PLACES(BLOCK)
  do{
-      random = rand() % (dim->rank);
+      random = xrand() % (dim->rank);
  }while((gm->cells[random]).content != EMPTY);
  (gm->cells[random]).content = START;
  
@@ -289,7 +289,7 @@ void mainQlearning_game(struct game *gm){
  int random;
  long int NUMBER_EPISODE2 = (params->limit_EPISODES_number) * (params->limit_EPISODES_number);
  double proba_explor;
-  srand(time(NULL));
+  //srand(time(NULL));

  for(long int k=0 ; k < params->limit_game_number; ++k){
      generate_game(gm);
@@ -297,10 +297,10 @@ void mainQlearning_game(struct game *gm){
      reset_game_status(status);

      while(!(status->endGame)){
-        random = rand() % NUMBER_EPISODE2;
+        random = xrand() % NUMBER_EPISODE2;
        proba_explor = (double)random / NUMBER_EPISODE2;
        if( proba_explor < qlearnParams->exploration_factor * (1. / ((episode / 10 ) + 1))){ 
-          action = rand() % ACTION_COUNT;
+          action = xrand() % ACTION_COUNT;
          printf("exploration action ");
        }
        else{
@@ -150,10 +150,11 @@ int copy_tensor_##type(tensor_##type * dst, tensor_##type * src){\
    }\
  }\
  void init_random_x_##type(tensor_##type *M, type minR, type maxR,  int randomRange){\
-    srand(time(NULL));\
+    /*static bool initRandomFirst = true;\
+    if(initRandomFirst){ srand(time(NULL)); initRandomFirst = false;}*/\
    int randVal;\
    for(size_t i =0; i<(M->dim)->rank;++i){\
-      randVal = rand() % randomRange;\
+      randVal = xrand() % randomRange;\
      M->x[i]=minR + (maxR-minR)*randVal  / randomRange ;\
    \
    }\
@@ -7,6 +7,8 @@
 #include <stdbool.h>
 #include <time.h>

+#include <sys/ioctl.h>
+

 // to define DEBUG in gcc cli do: gcc -D DEBUG=1 or 0 if need!
 #ifndef DEBUG
@@ -44,7 +46,13 @@ extern long int PRECISION_TYPE_L_DOUBLE ;

 #endif

+int xrand();
+float frand();
+
 void gotoxy(int x, int y);
+
+void clear_screen();
+
 //void get_cursor_position(int *col, int *rows);

 #if DEBUG
@@ -7,6 +7,8 @@
 #include <stdbool.h>
 #include <time.h>

+#include <sys/ioctl.h>
+

 // to define DEBUG in gcc cli do: gcc -D DEBUG=1 or 0 if need!
 #ifndef DEBUG
@@ -44,7 +46,13 @@ extern long int PRECISION_TYPE_L_DOUBLE ;

 #endif

+int xrand();
+float frand();
+
 void gotoxy(int x, int y);
+
+void clear_screen();
+
 //void get_cursor_position(int *col, int *rows);

 #if DEBUG
@@ -1,6 +1,107 @@
 #include "tools_t/tools_t.h"

+#define POW 17
+#define MMOD ((1 << (POW)) - 1)
+#define SUBA 5
+#define SUBB 8
+int xrand(){
+  int mod = MMOD; // (1 << 17) - 1;
+  int a = (1<<(POW-(SUBA))) - 1;
+  int b = (1 << (POW-(SUBB))) - 1;
+  static int xi = 0;
+  int xii = (a * xi + b)%mod;
+  //float ret = (float) xii / mod;

+  //printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret);
+
+  xi = xii;
+
+  return xii;
+  /*
+  static bool init = true;
+  if(init){
+    init = false;
+    struct timespec start_t;
+    clock_gettime(CLOCK_REALTIME, &start_t);
+    srand(start_t.tv_nsec);
+      //srand(start_t.tv_nsec - start_t.tv_sec);
+  }
+  int ret = rand ();
+  return ret ;
+*/
+}
+int irand(){
+  int mod = MMOD; // (1 << 17) - 1;
+  int a = (1<<(POW-(SUBA))) - 1;
+  int b = (1 << (POW-(SUBB))) - 1;
+  static int xi = 0;
+  int xii = (a * xi + b)%mod;
+  //float ret = (float) xii / mod;
+
+  //printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret);
+
+  xi = xii;
+
+  return xii;
+
+}
+
+
+float frand(){
+ /* int mod = MMOD; // (1 << 17) - 1;
+  int a = (1<<13) - 1;
+  int b = (1 << 7) - 1;
+  static int xi = 0;
+  
+  int xii = (a * xi + b)%mod;
+  float ret = (float) xii / mod;
+  
+  printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret);
+
+  xi = xii;
+
+  return ret;
+*/
+/*
+int xii = irand();
+  float ret = (float) xii / (MMOD);
+*/  
+  int xii = rand();
+  float ret = (float) xii / RAND_MAX;
+
+//  printf("[%6d / %6d = %.6f | ", (xii), MMOD, ret);
+
+  return ret; 
+}
+
+
+/*
+int xrand(){
+    static int randMod = 1;
+    static bool init = true;
+    if(init){
+      init = false;
+      struct timespec start_t;
+      clock_gettime(CLOCK_REALTIME, &start_t);
+      srand(start_t.tv_nsec);
+      //srand(start_t.tv_nsec - start_t.tv_sec);
+    }
+    int ret = rand ();
+    if(ret % 7 ==  randMod % 11){
+      init = true;
+      randMod = ret + 1;
+    }
+    return ret ;
+}
+
+float frand(){
+  int max = 50000;
+  static int rnd = 0;
+  rnd = (xrand())%max;
+  printf("[%6d / %6d = %.6f | ", (rnd), max,(float)(rnd)/max);
+  return (float)(rnd)/max;
+  }
+*/

 void gotoxy(int x, int y)
 {
@@ -8,6 +109,21 @@ void gotoxy(int x, int y)
 }


+
+void clear_screen(){
+   struct winsize w;
+
+    ioctl(1, TIOCGWINSZ, &w);
+    char pad[w.ws_col+1];
+    int i=0;
+    for(i=0; i<w.ws_col+1; ++i) pad[i]=' ';
+    pad[i]='\0';
+    for(i=0; i<w.ws_row - 5 ; ++i) printf("%s\n",pad);;
+
+
+}
+
+
 /*
 void get_cursor_position(int *col, int *rows)
 {