first try qdeep learning

This commit is contained in:
2024-06-21 00:28:50 +02:00
parent e96304bee7
commit 582d7a6a70
7 changed files with 401 additions and 70 deletions
+148 -21
View File
@@ -1,5 +1,6 @@
#include "learn_to_drive.h"
char *action_name[8] = {"LEFT", "CENTER", "RIGHT"};
float reLU(float x){
if(x>0) return x;
@@ -28,21 +29,30 @@ void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * netwo
struct networks_qlearning * create_nework_qlearning(
struct config_layers * config,
bool randomize, float minR, float maxR, int randomRange
bool randomize, float minR, float maxR, int randomRange,
size_t nb_prod_thread,
size_t nb_calc_thread,
float learning_rate
){
struct networks_qlearning *qnets = malloc(sizeof(struct networks_qlearning));
qnets->config = config;
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->main_net), config,
random, minR, maxR, randomRange);
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->target_net), config,
false, minR, maxR, randomRange);
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->main_net), config, random, minR, maxR, randomRange);
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->target_net), config, false, minR, maxR, randomRange);
copy_weight_in_networks_from_main_to_target(qnets);
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config,
false, minR, maxR, randomRange);
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config, false, minR, maxR, randomRange);
copy_weight_in_networks_from_main_to_best(qnets);
setup_all_layers_functions_TYPE_FLOAT(qnets->main_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
setup_all_layers_params_TYPE_FLOAT(qnets->main_net, nb_prod_thread, nb_calc_thread, learning_rate);
setup_all_layers_functions_TYPE_FLOAT(qnets->target_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
setup_all_layers_params_TYPE_FLOAT(qnets->target_net, nb_prod_thread, nb_calc_thread, learning_rate);
setup_all_layers_functions_TYPE_FLOAT(qnets->best_net, tensorContractnProdThread_TYPE_FLOAT, tensorProdThread_TYPE_FLOAT, D_L2, L2, reLU, d_reLU);
setup_all_layers_params_TYPE_FLOAT(qnets->best_net, nb_prod_thread, nb_calc_thread, learning_rate);
return qnets;
}
@@ -54,6 +64,10 @@ struct status_qlearning * create_status_qlearning (){
status_ql->list_target_cumul = create_var_list_TYPE_L_INT();
status_ql->progress_best_cumul = create_var_list_TYPE_L_INT();
//push_back_list_TYPE_L_INT(status_ql->list_main_cumul, 0);
//push_back_list_TYPE_L_INT(status_ql->list_target_cumul, 0);
push_back_list_TYPE_L_INT(status_ql->progress_best_cumul, -10000);
status_ql->nb_training_after_updated_weight_in_target = 0;
return status_ql;
@@ -67,15 +81,33 @@ struct delay_params * create_delay_params (
delay->delay_between_episodes = delay_between_episodes;
delay->delay_between_games = delay_between_games;
return delay;
}
struct print_params * create_print_params(float scale_x, float scale_y, struct delay_params * delay){
struct print_params * pprint = malloc(sizeof(struct print_params));
pprint->printed = true;
pprint->scale_x = scale_x;
pprint->scale_y = scale_y;
pprint->delay = delay;
pthread_mutex_init(&(pprint->mut_printed), NULL);
int i;
for( i=0; i<LOG_LENTH; ++i)
pprint->string_space[i]=' ';
pprint->string_space[i]='\0';
return pprint;
}
struct qlearning_params * create_qlearning_params (
double gamma,
double learning_rate,
double discount_factor,
double exploration_factor,
long int nb_training_before_update_weight_in_target
float gamma,
float learning_rate,
float discount_factor,
float exploration_factor,
long int nb_training_before_update_weight_in_target,
size_t number_episodes
){
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
@@ -85,6 +117,13 @@ struct qlearning_params * create_qlearning_params (
qparams->exploration_factor = exploration_factor ;
qparams->nb_training_before_update_weight_in_target = nb_training_before_update_weight_in_target;
qparams->number_episodes = number_episodes;
qparams->factor_update_learning_rate = 0.995;
qparams->minimum_threshold_learning_rate = 0.0001 ;
qparams->factor_update_exploration_factor = 0.995;
qparams->minimum_threshold_exploration_factor = 0.01;
return qparams;
}
@@ -93,7 +132,7 @@ struct RL_agent * create_RL_agent (
struct networks_qlearning * networks,
struct vehicle * car,
struct status_qlearning * status,
struct delay_params * delay,
struct print_params * pprint,
struct qlearning_params *qlearnParams
){
struct RL_agent * rlagent = malloc(sizeof(struct RL_agent));
@@ -101,7 +140,7 @@ struct RL_agent * create_RL_agent (
rlagent->networks = networks ;
rlagent->car = car ;
rlagent->status = status ;
rlagent->delay = delay ;
rlagent->pprint = pprint ;
rlagent->qlearnParams = qlearnParams ;
return rlagent;
@@ -123,12 +162,20 @@ void free_status_qlearning(struct status_qlearning *status_ql){
void free_delay_params (struct delay_params *dly_p){
free(dly_p);
}
void free_print_params (struct print_params *pprint){
pthread_mutex_destroy(&(pprint->mut_printed));
free_delay_params(pprint->delay);
free(pprint);
}
void free_qlearning_params(struct qlearning_params *q_params){
free(q_params);
}
void free_RL_agent(struct RL_agent *rlAgent){
free(rlAgent->qlearnParams);
free(rlAgent->delay);
free_print_params(rlAgent->pprint);
free_status_qlearning(rlAgent->status);
free_networks_qlearning(rlAgent->networks);
free_vehicle(rlAgent->car);
@@ -137,25 +184,105 @@ void free_RL_agent(struct RL_agent *rlAgent){
}
void train_qlearning(struct RL_agent * rlAgent,
int action /* */,
tensor_TYPE_FLOAT * new_state /*input*/,
tensor_TYPE_FLOAT * state /*input*/,
long reward){
int action //, long reward
){
tensor_TYPE_FLOAT * action_value = NULL;
tensor_TYPE_FLOAT * next_action_value = NULL;
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
tensor_TYPE_FLOAT * new_state = rlAgent->car->sensor /*input*/;
tensor_TYPE_FLOAT * state = rlAgent->car->old_sensor /*input*/;
calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value);
calculate_output_by_network_neurons_TYPE_FLOAT(net_target, state, &next_action_value);
calculate_output_by_network_neurons_TYPE_FLOAT(net_target, new_state, &next_action_value);
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
struct game_status * car_status = rlAgent->car->status;
struct qlearning_params * qlParams = rlAgent->qlearnParams;
copy_tensor_TYPE_FLOAT(experimental_values, action_value) ;
// experimental_values === Q-tab learning
if(car_status->done){
experimental_values->x[action] = -100;
}else {
experimental_values->x[action] = reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ;
experimental_values->x[action] = car_status->reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ;
}
float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, net_main, learning_rate, new_value);
qlParams->exploration_factor = (qlParams->exploration_factor < qlParams->minimum_threshold_exploration_factor) ? qlParams->exploration_factor : qlParams->exploration_factor * qlParams->factor_update_exploration_factor ;
}
int select_action(struct RL_agent * rlAgent){
int action;
tensor_TYPE_FLOAT * action_value = NULL;
calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value);
long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes);
NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2;
srand(time(NULL));
int random = rand() % NUMBER_EPISODE2;
float proba_explor = (float)random / NUMBER_EPISODE2;
if(proba_explor <= rlAgent->qlearnParams->exploration_factor ){
action = rand() % action_value->dim->rank ;
}
else{
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
}
return action;
}
void learn_to_drive(struct RL_agent * rlAgent){
int action;
struct vehicle * car = rlAgent->car;
struct game_status * car_status = car->status;
struct qlearning_params * qlParams = rlAgent->qlearnParams;
struct status_qlearning * qlStatus = rlAgent->status;
struct print_params * pprint = rlAgent->pprint;
char msg[100];
while(true){
for(size_t index_episode = 0; index_episode < qlParams->number_episodes; ++index_episode){
reset(car);
qlStatus->nb_training_after_updated_weight_in_target = 0;
while(true){
++(qlStatus->nb_training_after_updated_weight_in_target);
action = select_action(rlAgent);
sprintf(msg," dir:%.0f : %s, ", car->direction ,action_name[action]);
add_string_log_M(car_status,msg);
step_vehicle(car, action);
train_qlearning(rlAgent, action);
if(pprint->printed){
pthread_mutex_lock(&(pprint->mut_printed));
print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y);
pthread_mutex_unlock(&(pprint->mut_printed));
printf("%s ",pprint->string_space);
printf("ep: %ld ",index_episode);
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
for(size_t i=0; i<net_main->output->dim->rank; ++i) printf("{sensro[%s]:%f }",action_name[i%COUNT_ACTION],net_main->output->x[i]);
Sleep(pprint->delay->delay_between_games);
}
//done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor);
if( qlStatus->nb_training_after_updated_weight_in_target > qlParams->nb_training_before_update_weight_in_target ){
qlStatus->nb_training_after_updated_weight_in_target = 0;
copy_weight_in_networks_from_main_to_target(rlAgent->networks);
}
if(car_status->done == true){
//push_back_list_TYPE_L_INT(qlStatus->list_main_cumul, car_status->cumulative_reward);
printf(" cumul : %ld ", car_status->cumulative_reward);
if(car_status->cumulative_reward > qlStatus->progress_best_cumul->end_list->value){
push_back_list_TYPE_L_INT(qlStatus->progress_best_cumul, car_status->cumulative_reward);
FOR_LIST_FORM_BEGIN(TYPE_L_INT, qlStatus->progress_best_cumul){
printf(" | %ld |,",(qlStatus->progress_best_cumul)->current_list->value);
}
printf("%s ",pprint->string_space);
}
break;
}
}
if(pprint->printed){
Sleep(pprint->delay->delay_between_episodes);
}
}
}
}
+53 -21
View File
@@ -1,7 +1,17 @@
#ifndef __LEARNING_VEHICLE__C_H____
#define __LEARNING_VEHICLE__C_H____
//#include <pthread.h>
#include <stdlib.h>
#include <pthread.h>
/* for Sleep : milliseconds */
#ifdef WINDOWS
#include <windows.h>
//#ifdef LINUX
#else
#include <unistd.h>
#define Sleep(x) usleep((x)*1000)
#endif
#include "neuron_t/neuron_t.h"
@@ -16,15 +26,17 @@
struct qlearning_params {
double gamma;
double learning_rate;
double factor_update_learning_rate;
double minimum_threshold_learning_rate;
double discount_factor;
double exploration_factor;
double factor_update_exploration_factor;
double minimum_threshold_exploration_factor;
float gamma;
float learning_rate;
float factor_update_learning_rate;
// float epsilon;
float minimum_threshold_learning_rate;
float discount_factor;
float exploration_factor;
float factor_update_exploration_factor;
float minimum_threshold_exploration_factor;
long int nb_training_before_update_weight_in_target;
size_t number_episodes;
};
@@ -40,6 +52,15 @@ struct delay_params {
size_t delay_between_games;
};
struct print_params {
bool printed;
pthread_mutex_t mut_printed;
float scale_x;
float scale_y;
struct delay_params *delay;
char string_space[LOG_LENTH];
};
struct networks_qlearning {
config_layers *config;
neurons_TYPE_FLOAT *main_net;
@@ -51,39 +72,48 @@ struct RL_agent {
struct networks_qlearning * networks;
struct vehicle * car;
struct status_qlearning * status;
struct delay_params * delay;
struct print_params * pprint;
struct qlearning_params *qlearnParams;
};
struct networks_qlearning * create_nework_qlearning(
struct config_layers * config,
bool randomize, float minR, float maxR, int randomRange
bool randomize, float minR, float maxR, int randomRange,
size_t nb_prod_thread,
size_t nb_calc_thread,
float learning_rate
);
struct status_qlearning * create_status_qlearning ();
struct delay_params * create_delay_params (
size_t delay_between_episodes,
size_t delay_between_games
);
struct print_params * create_print_params(
float scale_x, float scale_y,
struct delay_params * dly_p
);
struct qlearning_params * create_qlearning_params (
double gamma,
double learning_rate,
double discount_factor,
double exploration_factor,
long int nb_training_before_update_weight_in_target
float gamma,
float learning_rate,
float discount_factor,
float exploration_factor,
long int nb_training_before_update_weight_in_target,
size_t number_episodes
);
struct RL_agent * create_RL_agent (
struct networks_qlearning * networks,
struct vehicle * car,
struct status_qlearning * status,
struct delay_params * delay,
struct print_params * pprint,
struct qlearning_params *qlearnParams
);
void free_networks_qlearning (struct networks_qlearning * networks);
void free_status_qlearning(struct status_qlearning *status_ql);
void free_print_params (struct print_params *pprint);
void free_delay_params (struct delay_params *dly_p);
void free_qlearning_params(struct qlearning_params *q_params);
void free_RL_agent(struct RL_agent *rlAgent);
@@ -92,9 +122,11 @@ void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * net
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks);
void train_qlearning(struct RL_agent * rlAgent,
int action ,
tensor_TYPE_FLOAT * new_state /*input*/,
tensor_TYPE_FLOAT * state /*input*/,
long reward);
int action);
// tensor_TYPE_FLOAT * new_state /*input*/,
// tensor_TYPE_FLOAT * state /*input*/,
// long reward;
void learn_to_drive(struct RL_agent * rlAgent);
#endif /* __LEARNING_VEHICLE__C_H____ */
+18 -11
View File
@@ -67,6 +67,7 @@ struct vehicle * create_vehicle(struct blocks *path){
ret_vehicle->coord = create_coordinate(2);
ret_vehicle->sensor = create_sensors(NB_SENSORS);
ret_vehicle->old_sensor = create_sensors(NB_SENSORS);
ret_vehicle->path = path;
ret_vehicle->status = create_game_status();
@@ -113,6 +114,7 @@ void free_vehicle(struct vehicle * vhcl){
free_coordinate(vhcl->coord);
free_blocks(vhcl->path);
free_sensors(vhcl->sensor);
free_sensors(vhcl->old_sensor);
free_game_status(vhcl->status);
free(vhcl);
@@ -404,11 +406,13 @@ float distance2_coordinate(coordinate *c0, coordinate *c1){
diStep_sensor->x[0] += step_sensor * cos(direction_radian);\
diStep_sensor->x[1] += step_sensor * sin(direction_radian);\
}\
v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)))) / 50;\
v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\
//v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)))) / 50;\
//v->sensor->x[position] = (MIN(49,(int)(distance2_coordinate(diStep_sensor, v->coord)/10))) / 50;\
void read_sensor(struct vehicle *v){
copy_tensor_TYPE_FLOAT(v->old_sensor, v->sensor);
float step_sensor = ((float)1)/SUBDIVISION;
coordinate * diStep_sensor = create_coordinate(2);
copy_coordinate(diStep_sensor, v->coord->x);
@@ -475,10 +479,11 @@ void add_string_log(struct game_status *status, char *str ){
}
void step(struct vehicle *v, int action){
void step_vehicle(struct vehicle *v, int action){
//float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
v->direction = v->direction + action_x[action % 3];
v->speed = ((float)1)/2;
v->speed = ((float)1)/5;
move_vehicle(v);
read_sensor(v);
struct game_status *status = v->status;
@@ -494,14 +499,14 @@ void step(struct vehicle *v, int action){
status->done = true;
}
else{
bool breaked = false;
bool broken = false;
long prec, next;
char msg[48];
for(long i=0; i< path->nb_blocks; ++i){
//prec = (i-1)%(path->nb_blocks);
prec = (i + path->nb_blocks - 1 )%(path->nb_blocks);
next = (i + 1)%(path->nb_blocks);
printf("i:%ld, prec:%ld, next:%ld: maker %d, prec marker %d\n",i,prec,next, path->marker[i], path->marker[prec]);
//printf("i:%ld, prec:%ld, next:%ld: maker %d, prec marker %d\n",i,prec,next, path->marker[i], path->marker[prec]);
if(is_in_block_index(path, i, v->coord)){
if(path->marker[i] == false && path->marker[prec] == true){
path->marker[i]=true;
@@ -516,11 +521,11 @@ void step(struct vehicle *v, int action){
status->done = true;
add_string_log(status, "| reverse |");
}
breaked = true;
broken = true;
break;
}
}
if(breaked == false){
if(broken == false){
if(status->cumulative_reward > THRESHOLD_REWARD){
status->reward = REWARD_CONTINUE;
status->done = true;
@@ -547,12 +552,14 @@ void reset(struct vehicle *v){
int diff;
diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0];
random = rand() % diff;
v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
//v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1];
random = rand() % diff;
v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
//v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
random = rand() % 50;
v->direction = random - 25;
//v->direction = 15;
//v->direction = random - 25;
v->direction = -90;
v->speed = 1;
}
+7 -3
View File
@@ -24,7 +24,9 @@
#define CENTER 1
#define RIGHT 2
#define SUBDIVISION 10
#define COUNT_ACTION 3
#define SUBDIVISION 5 //10
struct game_status {
@@ -81,6 +83,7 @@ struct vehicle {
float direction;
float speed;
sensors *sensor;
sensors *old_sensor;
struct blocks *path;
struct game_status *status;
};
@@ -110,10 +113,11 @@ void copy_coordinate(coordinate *coord, float *x);
void move_vehicle(struct vehicle *v);
void read_sensor(struct vehicle *v);
void step(struct vehicle *v, int action);
void step_vehicle(struct vehicle *v, int action);
void reset(struct vehicle *v);
void add_string_log_M(struct game_status *status, char *str );
void print2D_blocks_indexOne_withPoint(struct blocks *blk, float scale_x, float scale_y, coordinate *coordPoint);
void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y);
+160 -8
View File
@@ -5,11 +5,13 @@
#include <math.h>
// for sleep !
/*
#ifdef __linux__
#include <unistd.h>
#elif _WIN32
#include <windows.h>
#endif
*/
#include "ftest/ftest.h"
#include "ftest/ftest_array.h"
@@ -155,6 +157,7 @@ TEST(print_blocks_withPoint){
}
#if 0
TEST(first_vehicle){
size_t nb_block = 7;
@@ -182,20 +185,69 @@ TEST(first_vehicle){
print_vehicle_n_path(vhcl, 0.2,0.4);
step(vhcl, CENTER);
sleep(2);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step(vhcl, CENTER);
sleep(2);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step(vhcl, CENTER);
sleep(2);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step(vhcl, CENTER);
sleep(2);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
free_vehicle(vhcl);
}
#endif
TEST(circle_path_vehicle){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
update_bounds_limits_blocks(path);
struct vehicle *vhcl = create_vehicle(path);
print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
free_vehicle(vhcl);
@@ -209,6 +261,106 @@ TEST(reward_list){
free_status_qlearning(l_reward);
}
#if 1
TEST(first_learn_vehicle){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
#if 1
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
#else
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
copy_coordinate(path->upper_bound_block[0], (float[]){2,7});
copy_coordinate(path->lower_bound_block[1], (float[]){2,0});
copy_coordinate(path->upper_bound_block[1], (float[]){4,2});
copy_coordinate(path->lower_bound_block[2], (float[]){4,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){8,3});
copy_coordinate(path->lower_bound_block[3], (float[]){8,0});
copy_coordinate(path->upper_bound_block[3], (float[]){16,2});
copy_coordinate(path->lower_bound_block[4], (float[]){16,0});
copy_coordinate(path->upper_bound_block[4], (float[]){18,7});
copy_coordinate(path->lower_bound_block[5], (float[]){6,7});
copy_coordinate(path->upper_bound_block[5], (float[]){18,9});
copy_coordinate(path->lower_bound_block[6], (float[]){2,6});
copy_coordinate(path->upper_bound_block[6], (float[]){6,8});
#endif
update_bounds_limits_blocks(path);
struct vehicle *car = create_vehicle(path);
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
bool randomize=true;
float minR = 0, maxR = 1;
int randomRange = 500;
size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4;
float learning_rate = 0.001;
struct networks_qlearning *nnetworks = create_nework_qlearning(
pconf,
randomize, minR, maxR, randomRange,
nb_prod_thread, nb_calc_thread,
learning_rate
);
struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params (
200/*size_t delay_between_episodes*/,
20/*size_t delay_between_games*/
);
struct qlearning_params *qlparams = create_qlearning_params (
0.95/*float gamma*/,
learning_rate,
0 /* (not used!)float discount_factor*/,
0.99/*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/,
10000/*size_t number_episodes*/
);
struct print_params *pprint = create_print_params(
0.2/*float scale_x*/,0.4 /*float scale_y*/,
dly/*struct delay_params * dly_p*/
);
struct RL_agent *rlAgent = create_RL_agent (
nnetworks /*struct networks_qlearning * networks*/,
car /*struct vehicle * car*/,
qlstatus /*struct status_qlearning * status*/,
pprint /*struct print_params * pprint*/,
qlparams/*struct qlearning_params *qlearnParams*/
);
learn_to_drive(rlAgent);
free_RL_agent(rlAgent);
}
#endif
int main(int argc, char **argv){
+9
View File
@@ -125,4 +125,13 @@ GEN_NEURON_(TYPE_FLOAT)
GEN_NEURON_(TYPE_DOUBLE)
#define UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(type, neuronVar, attribute, value) \
do{\
neurons_##type *tmpn = neuronVar;\
while(tmpn){\
tmpn->attribute = value;\
tmpn = tmpn->next_layer;\
}\
}while(0);\
#endif /*__NEURON_T_C__H*/
+4 -4
View File
@@ -1356,7 +1356,7 @@ void parse_file_InputOutput_withDim_to_tensors_##type(tensor_##type **Tpart1, te
fprintf( stderr, "Cannot open file: %s for reading\n",file_name_input );\
exit( -1 );\
}\
bool size_unknown=false, breaked=false; \
bool size_unknown=false, broken=false; \
bool Done=false;\
int retfread = 0, curIn=0;\
while(!Done){\
@@ -1372,13 +1372,13 @@ void parse_file_InputOutput_withDim_to_tensors_##type(tensor_##type **Tpart1, te
iinput[curIn]='\0';\
size_t len = strlen(iinput);\
for(size_t i=0; i<len ; ++i){\
if(iinput[i]==']') {breaked = true; break;}\
if(iinput[i]==']') {broken = true; break;}\
if((iinput[i]=='*') ||(iinput[i]=='_')){ \
breaked=true; size_unknown =true;\
broken=true; size_unknown =true;\
break;\
}\
}\
Done = breaked;\
Done = broken;\
}\
rewind(f_input);\
list_perm_in_dim *l_p=NULL;\