Trying to fix nan output of RL by using relu with upperbound
This commit is contained in:
@@ -3,11 +3,13 @@
|
|||||||
char *action_name[8] = {"LEFT", "CENTER", "RIGHT"};
|
char *action_name[8] = {"LEFT", "CENTER", "RIGHT"};
|
||||||
|
|
||||||
float reLU(float x){
|
float reLU(float x){
|
||||||
|
if(x>10) return 10;
|
||||||
if(x>0) return x;
|
if(x>0) return x;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
float d_reLU(float x){
|
float d_reLU(float x){
|
||||||
|
if (x>10) return 0;
|
||||||
if (x>0) return 1;
|
if (x>0) return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -29,6 +31,10 @@ void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * netwo
|
|||||||
COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->best_net, networks->main_net);
|
COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->best_net, networks->main_net);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float id(float x){ return x;}
|
||||||
|
|
||||||
|
float constOne(float x){return 1;}
|
||||||
|
|
||||||
struct networks_qlearning * create_nework_qlearning(
|
struct networks_qlearning * create_nework_qlearning(
|
||||||
struct config_layers * config,
|
struct config_layers * config,
|
||||||
bool randomize, float minR, float maxR, int randomRange,
|
bool randomize, float minR, float maxR, int randomRange,
|
||||||
@@ -46,7 +52,6 @@ struct networks_qlearning * create_nework_qlearning(
|
|||||||
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config, false, minR, maxR, randomRange);
|
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config, false, minR, maxR, randomRange);
|
||||||
copy_weight_in_networks_from_main_to_best(qnets);
|
copy_weight_in_networks_from_main_to_best(qnets);
|
||||||
|
|
||||||
|
|
||||||
setup_all_layers_functions_TYPE_FLOAT(qnets->main_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
|
setup_all_layers_functions_TYPE_FLOAT(qnets->main_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
|
||||||
setup_all_layers_params_TYPE_FLOAT(qnets->main_net, nb_prod_thread, nb_calc_thread, learning_rate);
|
setup_all_layers_params_TYPE_FLOAT(qnets->main_net, nb_prod_thread, nb_calc_thread, learning_rate);
|
||||||
setup_all_layers_functions_TYPE_FLOAT(qnets->target_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
|
setup_all_layers_functions_TYPE_FLOAT(qnets->target_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
|
||||||
@@ -54,6 +59,25 @@ struct networks_qlearning * create_nework_qlearning(
|
|||||||
setup_all_layers_functions_TYPE_FLOAT(qnets->best_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
|
setup_all_layers_functions_TYPE_FLOAT(qnets->best_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
|
||||||
setup_all_layers_params_TYPE_FLOAT(qnets->best_net, nb_prod_thread, nb_calc_thread, learning_rate);
|
setup_all_layers_params_TYPE_FLOAT(qnets->best_net, nb_prod_thread, nb_calc_thread, learning_rate);
|
||||||
|
|
||||||
|
// ne pas mettre fonction d'activation à la sortie , i.e: fonction identité : f(x) = x:
|
||||||
|
neurons_TYPE_FLOAT *tmpMain = qnets->main_net;
|
||||||
|
neurons_TYPE_FLOAT *tmpTarget = qnets->target_net;
|
||||||
|
neurons_TYPE_FLOAT *tmpBest = qnets->best_net;
|
||||||
|
while(tmpMain){
|
||||||
|
if(tmpMain->next_layer == NULL){
|
||||||
|
tmpMain->f_act = id;
|
||||||
|
tmpMain->d_f_act = constOne;
|
||||||
|
tmpTarget->f_act = id;
|
||||||
|
tmpTarget->d_f_act = constOne;
|
||||||
|
tmpBest->f_act = id;
|
||||||
|
tmpBest->d_f_act = constOne;
|
||||||
|
}
|
||||||
|
tmpMain = tmpMain->next_layer;
|
||||||
|
tmpTarget= tmpTarget->next_layer;
|
||||||
|
tmpBest = tmpBest->next_layer;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return qnets;
|
return qnets;
|
||||||
|
|
||||||
@@ -73,6 +97,11 @@ struct status_qlearning * create_status_qlearning (){
|
|||||||
status_ql->nb_training_after_updated_weight_in_target = 0;
|
status_ql->nb_training_after_updated_weight_in_target = 0;
|
||||||
|
|
||||||
status_ql->nb_episodes = 0;
|
status_ql->nb_episodes = 0;
|
||||||
|
status_ql->index_episode= 0;
|
||||||
|
status_ql->action=1;
|
||||||
|
|
||||||
|
// status_ql->last_action=-1;
|
||||||
|
// status_ql->count_last_action=0;
|
||||||
|
|
||||||
return status_ql;
|
return status_ql;
|
||||||
}
|
}
|
||||||
@@ -129,6 +158,7 @@ struct qlearning_params * create_qlearning_params (
|
|||||||
qparams->factor_update_exploration_factor = 0.995;
|
qparams->factor_update_exploration_factor = 0.995;
|
||||||
qparams->minimum_threshold_exploration_factor = 0.01;
|
qparams->minimum_threshold_exploration_factor = 0.01;
|
||||||
|
|
||||||
|
// qparams->threshold_number_same_action = 500;
|
||||||
|
|
||||||
return qparams;
|
return qparams;
|
||||||
}
|
}
|
||||||
@@ -226,6 +256,8 @@ void train_qlearning(struct RL_agent * rlAgent,
|
|||||||
|
|
||||||
qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ;
|
qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ;
|
||||||
|
|
||||||
|
// free_tensor_TYPE_FLOAT(action_value);
|
||||||
|
// free_tensor_TYPE_FLOAT(next_action_value);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -236,26 +268,91 @@ int select_action(struct RL_agent * rlAgent){
|
|||||||
//calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value);
|
//calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value);
|
||||||
calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->sensor, &action_value);
|
calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->sensor, &action_value);
|
||||||
//long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes)*100;
|
//long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes)*100;
|
||||||
int NUMBER_EPISODE2 = 3000;
|
//int randRange = 10000;
|
||||||
//NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2;
|
//NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2;
|
||||||
// static bool init = true ;
|
//static bool init = true ;
|
||||||
// if(init){
|
//if(init){
|
||||||
srand(time(NULL));
|
//srand(time(NULL));
|
||||||
// init =false;
|
//init =false;
|
||||||
// }
|
//}
|
||||||
int random = rand() % NUMBER_EPISODE2;
|
//int random = xrand() % randRange;
|
||||||
float proba_explor = (float)(random ) / NUMBER_EPISODE2;
|
float proba_explor = (float) (rand() % (1<<17 -1))/ (1<<17 -1); //frand(); //(float)(random ) / randRange;
|
||||||
if(proba_explor > rlAgent->qlearnParams->exploration_factor ){
|
if(proba_explor > rlAgent->qlearnParams->exploration_factor ){
|
||||||
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
|
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
|
||||||
|
//if(action == ARG_MIN_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ))
|
||||||
|
//action = xrand() % action_value->dim->rank ;
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
action = rand() % action_value->dim->rank ;
|
action = xrand() % action_value->dim->rank ;
|
||||||
// explore++;
|
// explore++;
|
||||||
//printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
//printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
if(rlAgent->status->last_action == action){
|
||||||
|
++(rlAgent->status->count_last_action);
|
||||||
|
if(rlAgent->status->count_last_action > rlAgent->qlearnParams->threshold_number_same_action ){
|
||||||
|
while(rlAgent->status->last_action == action)
|
||||||
|
action = xrand() % action_value->dim->rank ;
|
||||||
|
|
||||||
|
rlAgent->status->last_action = action;
|
||||||
|
rlAgent->status->count_last_action = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
rlAgent->status->last_action = action;
|
||||||
|
rlAgent->status->count_last_action = 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
rlAgent->status->action = action;
|
||||||
return action;
|
return action;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* runPrint(void *arg){
|
||||||
|
struct RL_agent *rlAgent = (struct RL_agent*)arg;
|
||||||
|
struct status_qlearning *qlStatus = rlAgent->status;
|
||||||
|
struct print_params * pprint = rlAgent->pprint;
|
||||||
|
struct vehicle *car = rlAgent->car;
|
||||||
|
size_t count_print = 0;
|
||||||
|
while(1){
|
||||||
|
if(/*(qlStatus->nb_episodes %125 == 0) &&*/ pprint->printed){
|
||||||
|
//pthread_mutex_lock(&(pprint->mut_printed));
|
||||||
|
pthread_mutex_lock(&(car->mut_coord));
|
||||||
|
print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y);
|
||||||
|
pthread_mutex_unlock(&(car->mut_coord));
|
||||||
|
//pthread_mutex_unlock(&(pprint->mut_printed));
|
||||||
|
printf("%s ",pprint->string_space);
|
||||||
|
printf("ep: %ld\n",qlStatus->index_episode);
|
||||||
|
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
|
||||||
|
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
|
||||||
|
for(size_t i=0; i<net_main->output->dim->rank; ++i) {
|
||||||
|
printf("{sensro[%s]:%f "" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i],
|
||||||
|
action_name[i%COUNT_ACTION],net_main->output->x[i]);
|
||||||
|
|
||||||
|
}
|
||||||
|
printf("\n< %5.2f > ( %s ) \n", car->direction, action_name[qlStatus->action % COUNT_ACTION]);
|
||||||
|
//print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei");
|
||||||
|
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in");
|
||||||
|
PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out");
|
||||||
|
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out");
|
||||||
|
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input");
|
||||||
|
printf(" action : %d , factor : %f nb_episodes : %ld \n",qlStatus->action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
||||||
|
|
||||||
|
FOR_LIST_FORM_BEGIN(TYPE_L_INT, qlStatus->progress_best_cumul){
|
||||||
|
printf(" | %ld |,",(qlStatus->progress_best_cumul)->current_list->value);
|
||||||
|
}
|
||||||
|
printf("[%ld] %s ", rlAgent->car->status->cumulative_reward, pprint->string_space);
|
||||||
|
|
||||||
|
}
|
||||||
|
Sleep(pprint->delay->delay_between_games);
|
||||||
|
++count_print;
|
||||||
|
if(count_print > 20){
|
||||||
|
count_print = 0;
|
||||||
|
clear_screen();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void learn_to_drive(struct RL_agent * rlAgent){
|
void learn_to_drive(struct RL_agent * rlAgent){
|
||||||
int action;
|
int action;
|
||||||
struct vehicle * car = rlAgent->car;
|
struct vehicle * car = rlAgent->car;
|
||||||
@@ -265,10 +362,14 @@ void learn_to_drive(struct RL_agent * rlAgent){
|
|||||||
struct print_params * pprint = rlAgent->pprint;
|
struct print_params * pprint = rlAgent->pprint;
|
||||||
char msg[100];
|
char msg[100];
|
||||||
|
|
||||||
|
pthread_t threadPrint;
|
||||||
|
pthread_create(&threadPrint, NULL, runPrint, (void*)rlAgent);
|
||||||
|
|
||||||
while(true){
|
while(true){
|
||||||
for(size_t index_episode = 0; index_episode < qlParams->number_episodes; ++index_episode){
|
for(size_t index_episode = 0; index_episode < qlParams->number_episodes; ++index_episode){
|
||||||
reset(car);
|
reset(car);
|
||||||
qlStatus->nb_training_after_updated_weight_in_target = 0;
|
qlStatus->nb_training_after_updated_weight_in_target = 0;
|
||||||
|
qlStatus->index_episode = index_episode;
|
||||||
while(true){
|
while(true){
|
||||||
++(qlStatus->nb_episodes);
|
++(qlStatus->nb_episodes);
|
||||||
++(qlStatus->nb_training_after_updated_weight_in_target);
|
++(qlStatus->nb_training_after_updated_weight_in_target);
|
||||||
@@ -277,51 +378,27 @@ void learn_to_drive(struct RL_agent * rlAgent){
|
|||||||
add_string_log_M(car_status,msg);
|
add_string_log_M(car_status,msg);
|
||||||
step_vehicle(car, action);
|
step_vehicle(car, action);
|
||||||
train_qlearning(rlAgent, action);
|
train_qlearning(rlAgent, action);
|
||||||
if(/*(qlStatus->nb_episodes %15 == 0) && */ pprint->printed){
|
//done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor);
|
||||||
pthread_mutex_lock(&(pprint->mut_printed));
|
|
||||||
print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y);
|
|
||||||
pthread_mutex_unlock(&(pprint->mut_printed));
|
|
||||||
printf("%s ",pprint->string_space);
|
|
||||||
printf("ep: %ld\n",index_episode);
|
|
||||||
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
|
|
||||||
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
|
|
||||||
for(size_t i=0; i<net_main->output->dim->rank; ++i) {
|
|
||||||
printf("{sensro[%s]:%f "/*vs %f / VS / %f */" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i],
|
|
||||||
/*car->sensor->x[i] ,car->old_sensor->x[i],
|
|
||||||
*/action_name[i%COUNT_ACTION],net_main->output->x[i]);
|
|
||||||
|
|
||||||
}
|
|
||||||
printf("\n< %f > ( %s ) \n", car->direction, action_name[action % COUNT_ACTION]);
|
|
||||||
//print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei");
|
|
||||||
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in");
|
|
||||||
PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out");
|
|
||||||
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out");
|
|
||||||
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input");
|
|
||||||
printf("action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
|
||||||
Sleep(pprint->delay->delay_between_games);
|
|
||||||
}
|
|
||||||
//done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor);
|
|
||||||
if( qlStatus->nb_training_after_updated_weight_in_target > qlParams->nb_training_before_update_weight_in_target ){
|
if( qlStatus->nb_training_after_updated_weight_in_target > qlParams->nb_training_before_update_weight_in_target ){
|
||||||
qlStatus->nb_training_after_updated_weight_in_target = 0;
|
qlStatus->nb_training_after_updated_weight_in_target = 0;
|
||||||
copy_weight_in_networks_from_main_to_target(rlAgent->networks);
|
copy_weight_in_networks_from_main_to_target(rlAgent->networks);
|
||||||
}
|
}
|
||||||
if(car_status->done == true){
|
if(car_status->done == true){
|
||||||
//push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward);
|
//push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward);
|
||||||
printf(" cumul : %ld ", car_status->cumulative_reward);
|
// printf(" cumul : %ld ", car_status->cumulative_reward);
|
||||||
if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){
|
if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){
|
||||||
push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
|
push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
|
||||||
FOR_LIST_FORM_BEGIN(TYPE_L_INT, qlStatus->progress_best_cumul){
|
|
||||||
printf(" | %ld |,",(qlStatus->progress_best_cumul)->current_list->value);
|
|
||||||
}
|
|
||||||
printf("%s ",pprint->string_space);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(pprint->printed){
|
//if(pprint->printed){
|
||||||
Sleep(pprint->delay->delay_between_episodes);
|
// Sleep(pprint->delay->delay_between_episodes);
|
||||||
}
|
//}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pthread_join(threadPrint, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ struct qlearning_params {
|
|||||||
float minimum_threshold_exploration_factor;
|
float minimum_threshold_exploration_factor;
|
||||||
long int nb_training_before_update_weight_in_target;
|
long int nb_training_before_update_weight_in_target;
|
||||||
size_t number_episodes;
|
size_t number_episodes;
|
||||||
|
// size_t threshold_number_same_action;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -46,6 +47,10 @@ struct status_qlearning {
|
|||||||
struct main_list_TYPE_L_INT * progress_best_cumul;
|
struct main_list_TYPE_L_INT * progress_best_cumul;
|
||||||
long int nb_training_after_updated_weight_in_target;
|
long int nb_training_after_updated_weight_in_target;
|
||||||
size_t nb_episodes;
|
size_t nb_episodes;
|
||||||
|
size_t index_episode;
|
||||||
|
int action;
|
||||||
|
// int last_action;
|
||||||
|
// size_t count_last_action;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct delay_params {
|
struct delay_params {
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ sensors * create_sensors(size_t nb_xs){
|
|||||||
|
|
||||||
struct vehicle * create_vehicle(struct blocks *path){
|
struct vehicle * create_vehicle(struct blocks *path){
|
||||||
struct vehicle * ret_vehicle = malloc(sizeof(struct vehicle));
|
struct vehicle * ret_vehicle = malloc(sizeof(struct vehicle));
|
||||||
|
pthread_mutex_init(&(ret_vehicle->mut_coord), NULL);
|
||||||
ret_vehicle->coord = create_coordinate(2);
|
ret_vehicle->coord = create_coordinate(2);
|
||||||
ret_vehicle->sensor = create_sensors(NB_SENSORS);
|
ret_vehicle->sensor = create_sensors(NB_SENSORS);
|
||||||
ret_vehicle->old_sensor = create_sensors(NB_SENSORS);
|
ret_vehicle->old_sensor = create_sensors(NB_SENSORS);
|
||||||
@@ -117,6 +117,7 @@ void free_sensors(sensors *snsr){
|
|||||||
}
|
}
|
||||||
|
|
||||||
void free_vehicle(struct vehicle * vhcl){
|
void free_vehicle(struct vehicle * vhcl){
|
||||||
|
pthread_mutex_destroy(&(vhcl->mut_coord));
|
||||||
free_coordinate(vhcl->coord);
|
free_coordinate(vhcl->coord);
|
||||||
free_blocks(vhcl->path);
|
free_blocks(vhcl->path);
|
||||||
free_sensors(vhcl->sensor);
|
free_sensors(vhcl->sensor);
|
||||||
@@ -392,8 +393,10 @@ void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y){
|
|||||||
}
|
}
|
||||||
|
|
||||||
void move_vehicle(struct vehicle *v){
|
void move_vehicle(struct vehicle *v){
|
||||||
|
pthread_mutex_lock(&(v->mut_coord));
|
||||||
v->coord->x[0] += v->speed * cos(v->direction * M_PI / 180);
|
v->coord->x[0] += v->speed * cos(v->direction * M_PI / 180);
|
||||||
v->coord->x[1] -= v->speed * sin(v->direction * M_PI / 180);
|
v->coord->x[1] -= v->speed * sin(v->direction * M_PI / 180);
|
||||||
|
pthread_mutex_unlock(&(v->mut_coord));
|
||||||
}
|
}
|
||||||
|
|
||||||
float distance2_coordinate(coordinate *c0, coordinate *c1){
|
float distance2_coordinate(coordinate *c0, coordinate *c1){
|
||||||
@@ -413,11 +416,12 @@ float distance2_coordinate(coordinate *c0, coordinate *c1){
|
|||||||
diStep_sensor->x[1] -= step_sensor * sin(direction_radian);\
|
diStep_sensor->x[1] -= step_sensor * sin(direction_radian);\
|
||||||
}\
|
}\
|
||||||
dist = (distance2_coordinate(diStep_sensor, v->coord)/5);\
|
dist = (distance2_coordinate(diStep_sensor, v->coord)/5);\
|
||||||
printf("| dist :%f | ",dist);\
|
/*printf("| dist :%f | ",dist);*/\
|
||||||
v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\
|
v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),dist))/SUBDIVISION ;\
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\
|
||||||
|
|
||||||
|
|
||||||
//v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\
|
//v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\
|
||||||
@@ -430,7 +434,7 @@ void read_sensor(struct vehicle *v){
|
|||||||
coordinate * diStep_sensor = create_coordinate(2);
|
coordinate * diStep_sensor = create_coordinate(2);
|
||||||
copy_coordinate(diStep_sensor, v->coord->x);
|
copy_coordinate(diStep_sensor, v->coord->x);
|
||||||
float dist;
|
float dist;
|
||||||
printf("\n");
|
// printf("\n");
|
||||||
// count the number of step until we go out of the path = distance
|
// count the number of step until we go out of the path = distance
|
||||||
// center sensor
|
// center sensor
|
||||||
float direction_radian ;
|
float direction_radian ;
|
||||||
@@ -493,11 +497,23 @@ void add_string_log(struct game_status *status, char *str ){
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float addEpsilonRand(){
|
||||||
|
int rangeRand = 500;
|
||||||
|
int randd = xrand() % rangeRand;
|
||||||
|
int sign = (-1)*((randd %2)*2) + 1;
|
||||||
|
float addR = sign * (float)randd/(rangeRand* 10);
|
||||||
|
|
||||||
|
return addR;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void step_vehicle(struct vehicle *v, int action){
|
void step_vehicle(struct vehicle *v, int action){
|
||||||
//float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
|
//float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
|
||||||
float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
|
float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
|
||||||
v->direction = (float)((int)(v->direction + action_x[action % 3]) % 360) ;
|
v->direction = (float)((int)(v->direction + action_x[action % 3]) % 360) ;
|
||||||
v->speed = SPEED; // /5;
|
//v->direction += addEpsilonRand();
|
||||||
|
//v->speed = SPEED; // /5;
|
||||||
move_vehicle(v);
|
move_vehicle(v);
|
||||||
read_sensor(v);
|
read_sensor(v);
|
||||||
struct game_status *status = v->status;
|
struct game_status *status = v->status;
|
||||||
@@ -517,6 +533,7 @@ void step_vehicle(struct vehicle *v, int action){
|
|||||||
bool broken = false;
|
bool broken = false;
|
||||||
long pprec, prec, next;
|
long pprec, prec, next;
|
||||||
char msg[48];
|
char msg[48];
|
||||||
|
//size_t count_i[path->nb_blocks];
|
||||||
for(long i=0; i< path->nb_blocks; ++i){
|
for(long i=0; i< path->nb_blocks; ++i){
|
||||||
//prec = (i-1)%(path->nb_blocks);
|
//prec = (i-1)%(path->nb_blocks);
|
||||||
pprec = (i + path->nb_blocks - 2 )%(path->nb_blocks);
|
pprec = (i + path->nb_blocks - 2 )%(path->nb_blocks);
|
||||||
@@ -531,7 +548,12 @@ void step_vehicle(struct vehicle *v, int action){
|
|||||||
status->done = false;
|
status->done = false;
|
||||||
sprintf(msg," %ld,",i);
|
sprintf(msg," %ld,",i);
|
||||||
add_string_log(status, msg);
|
add_string_log(status, msg);
|
||||||
}
|
//count_i[i] = 0;
|
||||||
|
}/*else{
|
||||||
|
if(count_i[i]>10000)
|
||||||
|
status->reward = -10;
|
||||||
|
++count_i[i];
|
||||||
|
}*/
|
||||||
if(path->marker[next] == true){
|
if(path->marker[next] == true){
|
||||||
status->reward = REWARD_STOP;
|
status->reward = REWARD_STOP;
|
||||||
status->done = true;
|
status->done = true;
|
||||||
@@ -550,6 +572,9 @@ void step_vehicle(struct vehicle *v, int action){
|
|||||||
}
|
}
|
||||||
status->cumulative_reward += status->reward;
|
status->cumulative_reward += status->reward;
|
||||||
|
|
||||||
|
if(status->cumulative_reward < -25000){
|
||||||
|
status->done = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RANDOM 1
|
#define RANDOM 1
|
||||||
@@ -565,28 +590,29 @@ void reset(struct vehicle *v){
|
|||||||
sprintf(v->status->log,"\n");
|
sprintf(v->status->log,"\n");
|
||||||
v->status->cur_log = 0;
|
v->status->cur_log = 0;
|
||||||
//if(init){
|
//if(init){
|
||||||
srand(time(NULL));
|
//srand(time(NULL));
|
||||||
// init = false;
|
//init = false;
|
||||||
//}
|
//}
|
||||||
int random;
|
int random;
|
||||||
int diff;
|
int diff;
|
||||||
diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0];
|
diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0];
|
||||||
random = rand() % (diff/2) ;
|
random = xrand() % (diff/2) ;
|
||||||
#if RANDOM
|
#if RANDOM
|
||||||
v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
|
v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
|
||||||
#else
|
#else
|
||||||
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
|
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
|
||||||
#endif
|
#endif
|
||||||
diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1];
|
diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1];
|
||||||
random = rand() % (diff/2);
|
random = xrand() % (diff/2);
|
||||||
#if RANDOM
|
#if RANDOM
|
||||||
v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
|
v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
|
||||||
#else
|
#else
|
||||||
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
|
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
|
||||||
#endif
|
#endif
|
||||||
random = rand() % 50;
|
random = xrand() % 50;
|
||||||
#if RANDOM
|
#if RANDOM
|
||||||
v->direction = random - 25;
|
// v->direction = 115 - random ;
|
||||||
|
v->direction = random - 25 ;
|
||||||
#else
|
#else
|
||||||
v->direction = -90;
|
v->direction = -90;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ struct blocks {
|
|||||||
typedef tensor_TYPE_FLOAT sensors;
|
typedef tensor_TYPE_FLOAT sensors;
|
||||||
|
|
||||||
struct vehicle {
|
struct vehicle {
|
||||||
|
pthread_mutex_t mut_coord;
|
||||||
coordinate *coord;
|
coordinate *coord;
|
||||||
float direction;
|
float direction;
|
||||||
float speed;
|
float speed;
|
||||||
|
|||||||
@@ -401,7 +401,7 @@ float df(float x){
|
|||||||
return exp(-x)/ ((1+exp(-x)) * (1+exp(-x)));
|
return exp(-x)/ ((1+exp(-x)) * (1+exp(-x)));
|
||||||
}
|
}
|
||||||
#if 1
|
#if 1
|
||||||
TEST(first_learn_vehicle_rev50){
|
TEST(first_learn_vehicle_rev50_8){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
struct blocks * path = create_blocks(nb_block, dim);
|
struct blocks * path = create_blocks(nb_block, dim);
|
||||||
@@ -467,8 +467,8 @@ TEST(first_learn_vehicle_rev50){
|
|||||||
|
|
||||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||||
struct delay_params *dly = create_delay_params (
|
struct delay_params *dly = create_delay_params (
|
||||||
100/*size_t delay_between_episodes*/,
|
500/*size_t delay_between_episodes*/,
|
||||||
10/*size_t delay_between_games*/
|
50/*size_t delay_between_games*/
|
||||||
);
|
);
|
||||||
|
|
||||||
struct qlearning_params *qlparams = create_qlearning_params (
|
struct qlearning_params *qlparams = create_qlearning_params (
|
||||||
@@ -510,7 +510,7 @@ TEST(first_learn_vehicle_rev50){
|
|||||||
|
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
TEST(first_learn_vehicle_50){
|
TEST(first_learn_vehicle_50__9){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
struct blocks * path = create_blocks(nb_block, dim);
|
struct blocks * path = create_blocks(nb_block, dim);
|
||||||
@@ -518,7 +518,23 @@ TEST(first_learn_vehicle_50){
|
|||||||
|
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
|
copy_coordinate(path->lower_bound_block[4], (float[]){0,0});
|
||||||
|
copy_coordinate(path->upper_bound_block[4], (float[]){150,250});
|
||||||
|
copy_coordinate(path->lower_bound_block[3], (float[]){150,40});
|
||||||
|
copy_coordinate(path->upper_bound_block[3], (float[]){250,150});
|
||||||
|
copy_coordinate(path->lower_bound_block[2], (float[]){250,80});
|
||||||
|
copy_coordinate(path->upper_bound_block[2], (float[]){360,200});
|
||||||
|
copy_coordinate(path->lower_bound_block[1], (float[]){360,70});
|
||||||
|
copy_coordinate(path->upper_bound_block[1], (float[]){600,150});
|
||||||
|
copy_coordinate(path->lower_bound_block[0], (float[]){600,90});
|
||||||
|
copy_coordinate(path->upper_bound_block[0], (float[]){760,300});
|
||||||
|
copy_coordinate(path->lower_bound_block[6], (float[]){260,300});
|
||||||
|
copy_coordinate(path->upper_bound_block[6], (float[]){760,360});
|
||||||
|
copy_coordinate(path->lower_bound_block[5], (float[]){0,250});
|
||||||
|
copy_coordinate(path->upper_bound_block[5], (float[]){410,300});
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
||||||
copy_coordinate(path->upper_bound_block[0], (float[]){150,250});
|
copy_coordinate(path->upper_bound_block[0], (float[]){150,250});
|
||||||
copy_coordinate(path->lower_bound_block[1], (float[]){150,0});
|
copy_coordinate(path->lower_bound_block[1], (float[]){150,0});
|
||||||
@@ -534,7 +550,6 @@ TEST(first_learn_vehicle_50){
|
|||||||
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
|
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
|
||||||
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
|
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
|
||||||
|
|
||||||
/*
|
|
||||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
||||||
copy_coordinate(path->upper_bound_block[0], (float[]){100,250});
|
copy_coordinate(path->upper_bound_block[0], (float[]){100,250});
|
||||||
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
||||||
@@ -611,8 +626,8 @@ TEST(first_learn_vehicle_50){
|
|||||||
|
|
||||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||||
struct delay_params *dly = create_delay_params (
|
struct delay_params *dly = create_delay_params (
|
||||||
100/*size_t delay_between_episodes*/,
|
500/*size_t delay_between_episodes*/,
|
||||||
10/*size_t delay_between_games*/
|
50/*size_t delay_between_games*/
|
||||||
);
|
);
|
||||||
|
|
||||||
struct qlearning_params *qlparams = create_qlearning_params (
|
struct qlearning_params *qlparams = create_qlearning_params (
|
||||||
@@ -653,7 +668,7 @@ TEST(first_learn_vehicle_50){
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 1
|
||||||
TEST(first_learn_vehicle){
|
TEST(first_learn_vehicle){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
@@ -763,7 +778,7 @@ TEST(first_learn_vehicle){
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 1
|
||||||
TEST(first_learn_vehicle){
|
TEST(first_learn_vehicle){
|
||||||
size_t nb_block = 7;
|
size_t nb_block = 7;
|
||||||
size_t dim= 2;
|
size_t dim= 2;
|
||||||
|
|||||||
@@ -768,6 +768,7 @@ neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base
|
|||||||
if(tmp->next_layer==NULL){\
|
if(tmp->next_layer==NULL){\
|
||||||
/*print_tensor_msg_##type(tmp->output,"retult");*/\
|
/*print_tensor_msg_##type(tmp->output,"retult");*/\
|
||||||
*output_link = tmp->output;\
|
*output_link = tmp->output;\
|
||||||
|
\
|
||||||
return tmp;\
|
return tmp;\
|
||||||
}\
|
}\
|
||||||
tmp = tmp->next_layer;\
|
tmp = tmp->next_layer;\
|
||||||
|
|||||||
+74
-2
@@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
//#include "permutation_t/permutation_t.h"
|
//#include "permutation_t/permutation_t.h"
|
||||||
#include "neuron_t/neuron_t.h"
|
#include "neuron_t/neuron_t.h"
|
||||||
|
#include "neuron_t/nneuron_t_file.h"
|
||||||
|
|
||||||
#define VALGRIND_ 1
|
#define VALGRIND_ 1
|
||||||
|
|
||||||
@@ -135,7 +136,6 @@ TEST(learning_first){
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
free_data_set_TYPE_FLOAT(ds);
|
free_data_set_TYPE_FLOAT(ds);
|
||||||
free_neurons_TYPE_FLOAT(bn);
|
free_neurons_TYPE_FLOAT(bn);
|
||||||
|
|
||||||
@@ -145,7 +145,7 @@ TEST(learning_first){
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
TEST(learning_second){
|
TEST(learning_second_PRINT){
|
||||||
bool rec_randomizeInitWeight = randomizeInitWeight;
|
bool rec_randomizeInitWeight = randomizeInitWeight;
|
||||||
randomizeInitWeight =false;
|
randomizeInitWeight =false;
|
||||||
|
|
||||||
@@ -184,6 +184,9 @@ TEST(learning_second){
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, input, " bn input");
|
||||||
|
PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, output, " bn output");
|
||||||
|
PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, bn, bias, " bn bias");
|
||||||
|
|
||||||
free_data_set_TYPE_FLOAT(ds);
|
free_data_set_TYPE_FLOAT(ds);
|
||||||
free_neurons_TYPE_FLOAT(bn);
|
free_neurons_TYPE_FLOAT(bn);
|
||||||
@@ -383,6 +386,75 @@ TEST(copy_weight_in_neurons){
|
|||||||
print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
|
print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, bn, weight_in, ".ff_bn_weight_in.txt")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
free_data_set_TYPE_FLOAT(ds);
|
||||||
|
free_neurons_TYPE_FLOAT(bn);
|
||||||
|
free_neurons_TYPE_FLOAT(cpyn);
|
||||||
|
|
||||||
|
LOG("reps = %ld\n",reps);
|
||||||
|
randomizeInitWeight = rec_randomizeInitWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
TEST(Extract_weight_in_neurons){
|
||||||
|
bool rec_randomizeInitWeight = randomizeInitWeight;
|
||||||
|
randomizeInitWeight =false;
|
||||||
|
|
||||||
|
data_set_TYPE_FLOAT *ds= fill_data_set_from_file_TYPE_FLOAT("xor.txt",1);
|
||||||
|
// print_data_set_msg_TYPE_FLOAT(ds,"data");
|
||||||
|
config_layers *pconf = create_config_layers_from_OneD(3,(size_t[]){2,4,1}); /* 2 input , 1 target; 1 hidden layer with 5 neurons */
|
||||||
|
neurons_TYPE_FLOAT *bn=NULL, *tmp ;
|
||||||
|
neurons_TYPE_FLOAT *cpyn=NULL;
|
||||||
|
//setup_networks_alloutputs_config_GLOBAL_rdm01_TYPE_FLOAT(setup_networks_alloutputs_config_TYPE_FLOAT(&bn,pconf);bn,pconf);
|
||||||
|
setup_networks_alloutputs_config_TYPE_FLOAT(&bn,pconf,false,0,1,5000);
|
||||||
|
setup_networks_alloutputs_config_TYPE_FLOAT(&cpyn, pconf,false,0,1,5000);
|
||||||
|
|
||||||
|
setup_all_layers_functions_TYPE_FLOAT(bn,
|
||||||
|
tensorContractnProdThread_TYPE_FLOAT,
|
||||||
|
tensorProdThread_TYPE_FLOAT,
|
||||||
|
DL,
|
||||||
|
L,
|
||||||
|
f,
|
||||||
|
df);
|
||||||
|
|
||||||
|
setup_all_layers_params_TYPE_FLOAT(bn, 5, 1 , 0.1);
|
||||||
|
|
||||||
|
|
||||||
|
size_t reps = learning_online2_neurons_TYPE_FLOAT(bn,ds,cond);
|
||||||
|
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, bn, weight_in, ".ff_bn_weight_in__toExtract.txt")
|
||||||
|
|
||||||
|
setup_all_layers_functions_TYPE_FLOAT(cpyn,
|
||||||
|
tensorContractnProdThread_TYPE_FLOAT,
|
||||||
|
tensorProdThread_TYPE_FLOAT,
|
||||||
|
DL,
|
||||||
|
L,
|
||||||
|
f,
|
||||||
|
df);
|
||||||
|
|
||||||
|
setup_all_layers_params_TYPE_FLOAT(cpyn, 5, 1 , 0.1);
|
||||||
|
|
||||||
|
EXTRACT_FILE_TO_TENSOR_ATTRIBUTE_NNEURONS(TYPE_FLOAT, cpyn, weight_in, ".ff_bn_weight_in__toExtract.txt")
|
||||||
|
// copy_weight_in_neurons_TYPE_FLOAT(cpyn, bn);
|
||||||
|
|
||||||
|
char msg[256];
|
||||||
|
tensor_TYPE_FLOAT * linked_tens = NULL;
|
||||||
|
for(size_t i=0; i<ds->size; ++i){
|
||||||
|
// print_predict_by_network_with_error_neurons_TYPE_FLOAT(bn,ds->input[i],ds->target[i]);
|
||||||
|
// print_predict_by_network_with_error_neurons_TYPE_FLOAT(cpyn,ds->input[i],ds->target[i]);
|
||||||
|
calculate_output_by_network_neurons_TYPE_FLOAT(bn,ds->input[i],&linked_tens);
|
||||||
|
sprintf(msg," output base %ld ",i);
|
||||||
|
print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
|
||||||
|
calculate_output_by_network_neurons_TYPE_FLOAT(cpyn,ds->input[i],&linked_tens);
|
||||||
|
sprintf(msg," output copy %ld ",i);
|
||||||
|
print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
EXPORT_TO_FILE_TENSOR_ATTRIBUTE_IN_NNEURONS(TYPE_FLOAT, cpyn, weight_in, ".ff_bn_weight_in__exportedCPYfromExtract.txt")
|
||||||
|
|
||||||
|
|
||||||
free_data_set_TYPE_FLOAT(ds);
|
free_data_set_TYPE_FLOAT(ds);
|
||||||
|
|||||||
@@ -83,10 +83,10 @@ struct game_status * create_game_status(){
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define GENERATE_RANDOM_PLACES(CONTENT) \
|
#define GENERATE_RANDOM_PLACES(CONTENT) \
|
||||||
int CONTENT##_Number = rand() % (params->limit_##CONTENT##_number) + 1;\
|
int CONTENT##_Number = xrand() % (params->limit_##CONTENT##_number) + 1;\
|
||||||
for(int i=0; i< CONTENT##_Number; ++i) { \
|
for(int i=0; i< CONTENT##_Number; ++i) { \
|
||||||
do{\
|
do{\
|
||||||
random = rand() % (dim->rank);\
|
random = xrand() % (dim->rank);\
|
||||||
}while((gm->cells[random]).content != EMPTY);\
|
}while((gm->cells[random]).content != EMPTY);\
|
||||||
(gm->cells[random]).content = CONTENT;\
|
(gm->cells[random]).content = CONTENT;\
|
||||||
}
|
}
|
||||||
@@ -101,13 +101,13 @@ long int generate_game(struct game *gm){
|
|||||||
for(long int j=0; j < ACTION_COUNT; ++j)
|
for(long int j=0; j < ACTION_COUNT; ++j)
|
||||||
(gm->cells[i]).Q[j] = 0;
|
(gm->cells[i]).Q[j] = 0;
|
||||||
}
|
}
|
||||||
srand(time(NULL));
|
//srand(time(NULL));
|
||||||
int random;
|
int random;
|
||||||
GENERATE_RANDOM_PLACES(FOX)
|
GENERATE_RANDOM_PLACES(FOX)
|
||||||
GENERATE_RANDOM_PLACES(CARROT)
|
GENERATE_RANDOM_PLACES(CARROT)
|
||||||
GENERATE_RANDOM_PLACES(BLOCK)
|
GENERATE_RANDOM_PLACES(BLOCK)
|
||||||
do{
|
do{
|
||||||
random = rand() % (dim->rank);
|
random = xrand() % (dim->rank);
|
||||||
}while((gm->cells[random]).content != EMPTY);
|
}while((gm->cells[random]).content != EMPTY);
|
||||||
(gm->cells[random]).content = START;
|
(gm->cells[random]).content = START;
|
||||||
|
|
||||||
@@ -289,7 +289,7 @@ void mainQlearning_game(struct game *gm){
|
|||||||
int random;
|
int random;
|
||||||
long int NUMBER_EPISODE2 = (params->limit_EPISODES_number) * (params->limit_EPISODES_number);
|
long int NUMBER_EPISODE2 = (params->limit_EPISODES_number) * (params->limit_EPISODES_number);
|
||||||
double proba_explor;
|
double proba_explor;
|
||||||
srand(time(NULL));
|
//srand(time(NULL));
|
||||||
|
|
||||||
for(long int k=0 ; k < params->limit_game_number; ++k){
|
for(long int k=0 ; k < params->limit_game_number; ++k){
|
||||||
generate_game(gm);
|
generate_game(gm);
|
||||||
@@ -297,10 +297,10 @@ void mainQlearning_game(struct game *gm){
|
|||||||
reset_game_status(status);
|
reset_game_status(status);
|
||||||
|
|
||||||
while(!(status->endGame)){
|
while(!(status->endGame)){
|
||||||
random = rand() % NUMBER_EPISODE2;
|
random = xrand() % NUMBER_EPISODE2;
|
||||||
proba_explor = (double)random / NUMBER_EPISODE2;
|
proba_explor = (double)random / NUMBER_EPISODE2;
|
||||||
if( proba_explor < qlearnParams->exploration_factor * (1. / ((episode / 10 ) + 1))){
|
if( proba_explor < qlearnParams->exploration_factor * (1. / ((episode / 10 ) + 1))){
|
||||||
action = rand() % ACTION_COUNT;
|
action = xrand() % ACTION_COUNT;
|
||||||
printf("exploration action ");
|
printf("exploration action ");
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
|
|||||||
@@ -150,10 +150,11 @@ int copy_tensor_##type(tensor_##type * dst, tensor_##type * src){\
|
|||||||
}\
|
}\
|
||||||
}\
|
}\
|
||||||
void init_random_x_##type(tensor_##type *M, type minR, type maxR, int randomRange){\
|
void init_random_x_##type(tensor_##type *M, type minR, type maxR, int randomRange){\
|
||||||
srand(time(NULL));\
|
/*static bool initRandomFirst = true;\
|
||||||
|
if(initRandomFirst){ srand(time(NULL)); initRandomFirst = false;}*/\
|
||||||
int randVal;\
|
int randVal;\
|
||||||
for(size_t i =0; i<(M->dim)->rank;++i){\
|
for(size_t i =0; i<(M->dim)->rank;++i){\
|
||||||
randVal = rand() % randomRange;\
|
randVal = xrand() % randomRange;\
|
||||||
M->x[i]=minR + (maxR-minR)*randVal / randomRange ;\
|
M->x[i]=minR + (maxR-minR)*randVal / randomRange ;\
|
||||||
\
|
\
|
||||||
}\
|
}\
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
|
||||||
// to define DEBUG in gcc cli do: gcc -D DEBUG=1 or 0 if need!
|
// to define DEBUG in gcc cli do: gcc -D DEBUG=1 or 0 if need!
|
||||||
#ifndef DEBUG
|
#ifndef DEBUG
|
||||||
@@ -44,7 +46,13 @@ extern long int PRECISION_TYPE_L_DOUBLE ;
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int xrand();
|
||||||
|
float frand();
|
||||||
|
|
||||||
void gotoxy(int x, int y);
|
void gotoxy(int x, int y);
|
||||||
|
|
||||||
|
void clear_screen();
|
||||||
|
|
||||||
//void get_cursor_position(int *col, int *rows);
|
//void get_cursor_position(int *col, int *rows);
|
||||||
|
|
||||||
#if DEBUG
|
#if DEBUG
|
||||||
|
|||||||
Binary file not shown.
@@ -7,6 +7,8 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
|
||||||
// to define DEBUG in gcc cli do: gcc -D DEBUG=1 or 0 if need!
|
// to define DEBUG in gcc cli do: gcc -D DEBUG=1 or 0 if need!
|
||||||
#ifndef DEBUG
|
#ifndef DEBUG
|
||||||
@@ -44,7 +46,13 @@ extern long int PRECISION_TYPE_L_DOUBLE ;
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int xrand();
|
||||||
|
float frand();
|
||||||
|
|
||||||
void gotoxy(int x, int y);
|
void gotoxy(int x, int y);
|
||||||
|
|
||||||
|
void clear_screen();
|
||||||
|
|
||||||
//void get_cursor_position(int *col, int *rows);
|
//void get_cursor_position(int *col, int *rows);
|
||||||
|
|
||||||
#if DEBUG
|
#if DEBUG
|
||||||
|
|||||||
@@ -1,6 +1,107 @@
|
|||||||
#include "tools_t/tools_t.h"
|
#include "tools_t/tools_t.h"
|
||||||
|
|
||||||
|
#define POW 17
|
||||||
|
#define MMOD ((1 << (POW)) - 1)
|
||||||
|
#define SUBA 5
|
||||||
|
#define SUBB 8
|
||||||
|
int xrand(){
|
||||||
|
int mod = MMOD; // (1 << 17) - 1;
|
||||||
|
int a = (1<<(POW-(SUBA))) - 1;
|
||||||
|
int b = (1 << (POW-(SUBB))) - 1;
|
||||||
|
static int xi = 0;
|
||||||
|
int xii = (a * xi + b)%mod;
|
||||||
|
//float ret = (float) xii / mod;
|
||||||
|
|
||||||
|
//printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret);
|
||||||
|
|
||||||
|
xi = xii;
|
||||||
|
|
||||||
|
return xii;
|
||||||
|
/*
|
||||||
|
static bool init = true;
|
||||||
|
if(init){
|
||||||
|
init = false;
|
||||||
|
struct timespec start_t;
|
||||||
|
clock_gettime(CLOCK_REALTIME, &start_t);
|
||||||
|
srand(start_t.tv_nsec);
|
||||||
|
//srand(start_t.tv_nsec - start_t.tv_sec);
|
||||||
|
}
|
||||||
|
int ret = rand ();
|
||||||
|
return ret ;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
int irand(){
|
||||||
|
int mod = MMOD; // (1 << 17) - 1;
|
||||||
|
int a = (1<<(POW-(SUBA))) - 1;
|
||||||
|
int b = (1 << (POW-(SUBB))) - 1;
|
||||||
|
static int xi = 0;
|
||||||
|
int xii = (a * xi + b)%mod;
|
||||||
|
//float ret = (float) xii / mod;
|
||||||
|
|
||||||
|
//printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret);
|
||||||
|
|
||||||
|
xi = xii;
|
||||||
|
|
||||||
|
return xii;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float frand(){
|
||||||
|
/* int mod = MMOD; // (1 << 17) - 1;
|
||||||
|
int a = (1<<13) - 1;
|
||||||
|
int b = (1 << 7) - 1;
|
||||||
|
static int xi = 0;
|
||||||
|
|
||||||
|
int xii = (a * xi + b)%mod;
|
||||||
|
float ret = (float) xii / mod;
|
||||||
|
|
||||||
|
printf("[a:%d * xi:%6d + b:%d ] %% %d = %6d :: %.7f | ",a,xi,b,mod,xii,ret);
|
||||||
|
|
||||||
|
xi = xii;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
int xii = irand();
|
||||||
|
float ret = (float) xii / (MMOD);
|
||||||
|
*/
|
||||||
|
int xii = rand();
|
||||||
|
float ret = (float) xii / RAND_MAX;
|
||||||
|
|
||||||
|
// printf("[%6d / %6d = %.6f | ", (xii), MMOD, ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
int xrand(){
|
||||||
|
static int randMod = 1;
|
||||||
|
static bool init = true;
|
||||||
|
if(init){
|
||||||
|
init = false;
|
||||||
|
struct timespec start_t;
|
||||||
|
clock_gettime(CLOCK_REALTIME, &start_t);
|
||||||
|
srand(start_t.tv_nsec);
|
||||||
|
//srand(start_t.tv_nsec - start_t.tv_sec);
|
||||||
|
}
|
||||||
|
int ret = rand ();
|
||||||
|
if(ret % 7 == randMod % 11){
|
||||||
|
init = true;
|
||||||
|
randMod = ret + 1;
|
||||||
|
}
|
||||||
|
return ret ;
|
||||||
|
}
|
||||||
|
|
||||||
|
float frand(){
|
||||||
|
int max = 50000;
|
||||||
|
static int rnd = 0;
|
||||||
|
rnd = (xrand())%max;
|
||||||
|
printf("[%6d / %6d = %.6f | ", (rnd), max,(float)(rnd)/max);
|
||||||
|
return (float)(rnd)/max;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
void gotoxy(int x, int y)
|
void gotoxy(int x, int y)
|
||||||
{
|
{
|
||||||
@@ -8,6 +109,21 @@ void gotoxy(int x, int y)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void clear_screen(){
|
||||||
|
struct winsize w;
|
||||||
|
|
||||||
|
ioctl(1, TIOCGWINSZ, &w);
|
||||||
|
char pad[w.ws_col+1];
|
||||||
|
int i=0;
|
||||||
|
for(i=0; i<w.ws_col+1; ++i) pad[i]=' ';
|
||||||
|
pad[i]='\0';
|
||||||
|
for(i=0; i<w.ws_row - 5 ; ++i) printf("%s\n",pad);;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
void get_cursor_position(int *col, int *rows)
|
void get_cursor_position(int *col, int *rows)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user