Add some MACRO in neuron_t and debug deepQlearning

This commit is contained in:
2024-07-09 17:38:58 +02:00
parent 824396f901
commit 0c9813beca
8 changed files with 810 additions and 85 deletions
+50 -27
View File
@@ -21,10 +21,12 @@ float D_L2(float t, float o){
} }
void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks){ void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks){
copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net); //copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net);
COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->target_net, networks->main_net);
} }
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks){ void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks){
copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net); //copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net);
COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->best_net, networks->main_net);
} }
struct networks_qlearning * create_nework_qlearning( struct networks_qlearning * create_nework_qlearning(
@@ -70,6 +72,8 @@ struct status_qlearning * create_status_qlearning (){
status_ql->nb_training_after_updated_weight_in_target = 0; status_ql->nb_training_after_updated_weight_in_target = 0;
status_ql->nb_episodes = 0;
return status_ql; return status_ql;
} }
@@ -91,6 +95,7 @@ struct print_params * create_print_params(float scale_x, float scale_y, struct
pprint->scale_x = scale_x; pprint->scale_x = scale_x;
pprint->scale_y = scale_y; pprint->scale_y = scale_y;
pprint->delay = delay; pprint->delay = delay;
pprint->string_space = malloc(LOG_LENTH+1);
pthread_mutex_init(&(pprint->mut_printed), NULL); pthread_mutex_init(&(pprint->mut_printed), NULL);
int i; int i;
@@ -164,6 +169,7 @@ void free_delay_params (struct delay_params *dly_p){
} }
void free_print_params (struct print_params *pprint){ void free_print_params (struct print_params *pprint){
free(pprint->string_space);
pthread_mutex_destroy(&(pprint->mut_printed)); pthread_mutex_destroy(&(pprint->mut_printed));
free_delay_params(pprint->delay); free_delay_params(pprint->delay);
free(pprint); free(pprint);
@@ -192,13 +198,14 @@ void train_qlearning(struct RL_agent * rlAgent,
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net; neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
tensor_TYPE_FLOAT * new_state = rlAgent->car->sensor /*input*/; tensor_TYPE_FLOAT * new_state = rlAgent->car->sensor /*input*/;
tensor_TYPE_FLOAT * state = rlAgent->car->old_sensor /*input*/; tensor_TYPE_FLOAT * state = rlAgent->car->old_sensor /*input*/;
calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value); neurons_TYPE_FLOAT *ttmp = calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value);
calculate_output_by_network_neurons_TYPE_FLOAT(net_target, new_state, &next_action_value); calculate_output_by_network_neurons_TYPE_FLOAT(net_target, new_state, &next_action_value);
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim); tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
struct game_status * car_status = rlAgent->car->status; struct game_status * car_status = rlAgent->car->status;
struct qlearning_params * qlParams = rlAgent->qlearnParams; struct qlearning_params * qlParams = rlAgent->qlearnParams;
copy_tensor_TYPE_FLOAT(experimental_values, action_value) ; copy_tensor_TYPE_FLOAT(experimental_values, action_value) ;
//copy_tensor_TYPE_FLOAT(experimental_values, next_action_value) ;
// experimental_values === Q-tab learning // experimental_values === Q-tab learning
if(car_status->done){ if(car_status->done){
experimental_values->x[action] = -100; experimental_values->x[action] = -100;
@@ -206,19 +213,12 @@ void train_qlearning(struct RL_agent * rlAgent,
experimental_values->x[action] = car_status->reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ; experimental_values->x[action] = car_status->reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ;
} }
// *** // ***
neurons_TYPE_FLOAT *tmp=NULL, *ttmp=NULL, *base = net_main; copy_tensor_TYPE_FLOAT(ttmp->target, experimental_values);
init_copy_in_out_networks_from_tensors_TYPE_FLOAT(base,base->output , experimental_values );\ while(ttmp != net_main){
tmp=net_main->next_layer;\ calc_delta_neurons_TYPE_FLOAT(ttmp);
while(tmp){\ update_weight_neurons_TYPE_FLOAT(ttmp);
calc_out_neurons_TYPE_FLOAT(tmp);\ ttmp = ttmp->prev_layer;
ttmp = tmp;\ }
tmp = tmp->next_layer;\
}\
while(ttmp != base){\
calc_delta_neurons_TYPE_FLOAT(ttmp);\
update_weight_neurons_TYPE_FLOAT(ttmp);\
ttmp = ttmp->prev_layer;\
}\
// *** // ***
float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ ); float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ );
@@ -230,19 +230,28 @@ void train_qlearning(struct RL_agent * rlAgent,
} }
int select_action(struct RL_agent * rlAgent){ int select_action(struct RL_agent * rlAgent){
//static size_t explore = 0;
int action; int action;
tensor_TYPE_FLOAT * action_value = NULL; tensor_TYPE_FLOAT * action_value = NULL;
calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value); //calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value);
long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes); calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->sensor, &action_value);
NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2; //long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes)*100;
srand(time(NULL)); int NUMBER_EPISODE2 = 3000;
//NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2;
// static bool init = true ;
// if(init){
srand(time(NULL));
// init =false;
// }
int random = rand() % NUMBER_EPISODE2; int random = rand() % NUMBER_EPISODE2;
float proba_explor = (float)random / NUMBER_EPISODE2; float proba_explor = (float)(random ) / NUMBER_EPISODE2;
if(proba_explor <= rlAgent->qlearnParams->exploration_factor ){ if(proba_explor > rlAgent->qlearnParams->exploration_factor ){
action = rand() % action_value->dim->rank ; action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
} }
else{ else{
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank ); action = rand() % action_value->dim->rank ;
// explore++;
//printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
} }
return action; return action;
} }
@@ -261,20 +270,34 @@ void learn_to_drive(struct RL_agent * rlAgent){
reset(car); reset(car);
qlStatus->nb_training_after_updated_weight_in_target = 0; qlStatus->nb_training_after_updated_weight_in_target = 0;
while(true){ while(true){
++(qlStatus->nb_episodes);
++(qlStatus->nb_training_after_updated_weight_in_target); ++(qlStatus->nb_training_after_updated_weight_in_target);
action = select_action(rlAgent); action = select_action(rlAgent);
sprintf(msg," dir:%.0f : %s, ", car->direction ,action_name[action]); sprintf(msg," dir:%.0f : %s, ", car->direction ,action_name[action]);
add_string_log_M(car_status,msg); add_string_log_M(car_status,msg);
step_vehicle(car, action); step_vehicle(car, action);
train_qlearning(rlAgent, action); train_qlearning(rlAgent, action);
if(pprint->printed){ if(/*(qlStatus->nb_episodes %15 == 0) && */ pprint->printed){
pthread_mutex_lock(&(pprint->mut_printed)); pthread_mutex_lock(&(pprint->mut_printed));
print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y); print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y);
pthread_mutex_unlock(&(pprint->mut_printed)); pthread_mutex_unlock(&(pprint->mut_printed));
printf("%s ",pprint->string_space); printf("%s ",pprint->string_space);
printf("ep: %ld ",index_episode); printf("ep: %ld\n",index_episode);
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net; neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
for(size_t i=0; i<net_main->output->dim->rank; ++i) printf("{sensro[%s]:%f }",action_name[i%COUNT_ACTION],net_main->output->x[i]); neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
for(size_t i=0; i<net_main->output->dim->rank; ++i) {
printf("{sensro[%s]:%f "/*vs %f / VS / %f */" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i],
/*car->sensor->x[i] ,car->old_sensor->x[i],
*/action_name[i%COUNT_ACTION],net_main->output->x[i]);
}
printf("\n< %f > ( %s ) \n", car->direction, action_name[action % COUNT_ACTION]);
//print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei");
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in");
PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out");
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out");
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input");
printf("action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
Sleep(pprint->delay->delay_between_games); Sleep(pprint->delay->delay_between_games);
} }
//done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor); //done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor);
@@ -45,6 +45,7 @@ struct status_qlearning {
struct main_list_TYPE_L_INT * list_target_cumul; struct main_list_TYPE_L_INT * list_target_cumul;
struct main_list_TYPE_L_INT * progress_best_cumul; struct main_list_TYPE_L_INT * progress_best_cumul;
long int nb_training_after_updated_weight_in_target; long int nb_training_after_updated_weight_in_target;
size_t nb_episodes;
}; };
struct delay_params { struct delay_params {
@@ -58,7 +59,7 @@ struct print_params {
float scale_x; float scale_x;
float scale_y; float scale_y;
struct delay_params *delay; struct delay_params *delay;
char string_space[LOG_LENTH]; char *string_space;//[LOG_LENTH];
}; };
struct networks_qlearning { struct networks_qlearning {
+55 -25
View File
@@ -7,7 +7,7 @@
//#define CENTER 1 //#define CENTER 1
//#define RIGHT 2 //#define RIGHT 2
#define LIMIT_DISTANCE ((float)1)/50 #define LIMIT_DISTANCE ((float)((SUBDIVISION-1)/10))/SUBDIVISION
#define REWARD_STOP -1000 #define REWARD_STOP -1000
#define REWARD_CONTINUE 100 #define REWARD_CONTINUE 100
@@ -68,6 +68,12 @@ struct vehicle * create_vehicle(struct blocks *path){
ret_vehicle->coord = create_coordinate(2); ret_vehicle->coord = create_coordinate(2);
ret_vehicle->sensor = create_sensors(NB_SENSORS); ret_vehicle->sensor = create_sensors(NB_SENSORS);
ret_vehicle->old_sensor = create_sensors(NB_SENSORS); ret_vehicle->old_sensor = create_sensors(NB_SENSORS);
for(size_t i=0; i<NB_SENSORS;++i){
ret_vehicle->sensor->x[i]=0;
ret_vehicle->old_sensor->x[i]=0;
}
ret_vehicle->path = path; ret_vehicle->path = path;
ret_vehicle->status = create_game_status(); ret_vehicle->status = create_game_status();
@@ -387,15 +393,15 @@ void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y){
void move_vehicle(struct vehicle *v){ void move_vehicle(struct vehicle *v){
v->coord->x[0] += v->speed * cos(v->direction * M_PI / 180); v->coord->x[0] += v->speed * cos(v->direction * M_PI / 180);
v->coord->x[1] += v->speed * sin(v->direction * M_PI / 180); v->coord->x[1] -= v->speed * sin(v->direction * M_PI / 180);
} }
float distance2_coordinate(coordinate *c0, coordinate *c1){ float distance2_coordinate(coordinate *c0, coordinate *c1){
if(c0->dim->rank != c1->dim->rank) return 0; if(c0->dim->rank != c1->dim->rank) return -1;
float d=0, tmp; float d=0, tmp;
for(size_t i=0; i<c0->dim->rank; ++i){ for(size_t i=0; i<c0->dim->rank; ++i){
tmp = (c0->x[i] - c1->x[i]); tmp = (c0->x[i] - c1->x[i]);
d += tmp * tmp; d += (tmp * tmp);
} }
return sqrt(d); return sqrt(d);
} }
@@ -404,19 +410,27 @@ float distance2_coordinate(coordinate *c0, coordinate *c1){
direction_radian = (v->direction + deviation) * M_PI / 180;\ direction_radian = (v->direction + deviation) * M_PI / 180;\
while( is_in_blocks(v->path, diStep_sensor )){\ while( is_in_blocks(v->path, diStep_sensor )){\
diStep_sensor->x[0] += step_sensor * cos(direction_radian);\ diStep_sensor->x[0] += step_sensor * cos(direction_radian);\
diStep_sensor->x[1] += step_sensor * sin(direction_radian);\ diStep_sensor->x[1] -= step_sensor * sin(direction_radian);\
}\ }\
v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\ dist = (distance2_coordinate(diStep_sensor, v->coord)/5);\
printf("| dist :%f | ",dist);\
v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\
//v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\
//v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)))) / 50;\ //v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)))) / 50;\
//v->sensor->x[position] = (MIN(49,(int)(distance2_coordinate(diStep_sensor, v->coord)/10))) / 50;\ //v->sensor->x[position] = (MIN(49,(int)(distance2_coordinate(diStep_sensor, v->coord)/10))) / 50;\
void read_sensor(struct vehicle *v){ void read_sensor(struct vehicle *v){
copy_tensor_TYPE_FLOAT(v->old_sensor, v->sensor); copy_tensor_TYPE_FLOAT(v->old_sensor, v->sensor);
float step_sensor = ((float)1)/SUBDIVISION; float step_sensor = STEP; // /SUBDIVISION;
coordinate * diStep_sensor = create_coordinate(2); coordinate * diStep_sensor = create_coordinate(2);
copy_coordinate(diStep_sensor, v->coord->x); copy_coordinate(diStep_sensor, v->coord->x);
float dist;
printf("\n");
// count the number of step until we go out of the path = distance // count the number of step until we go out of the path = distance
// center sensor // center sensor
float direction_radian ; float direction_radian ;
@@ -424,11 +438,11 @@ void read_sensor(struct vehicle *v){
copy_coordinate(diStep_sensor, v->coord->x); copy_coordinate(diStep_sensor, v->coord->x);
// right sensor // right sensor
SENSOR_VALUE_CALCULATE(RIGHT,45); SENSOR_VALUE_CALCULATE(RIGHT,-45);
copy_coordinate(diStep_sensor, v->coord->x); copy_coordinate(diStep_sensor, v->coord->x);
// left sensor // left sensor
SENSOR_VALUE_CALCULATE(LEFT, -45); SENSOR_VALUE_CALCULATE(LEFT, 45);
free_coordinate(diStep_sensor); free_coordinate(diStep_sensor);
@@ -482,8 +496,8 @@ void add_string_log(struct game_status *status, char *str ){
void step_vehicle(struct vehicle *v, int action){ void step_vehicle(struct vehicle *v, int action){
//float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT] //float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT] float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
v->direction = v->direction + action_x[action % 3]; v->direction = (float)((int)(v->direction + action_x[action % 3]) % 360) ;
v->speed = ((float)1)/5; v->speed = SPEED; // /5;
move_vehicle(v); move_vehicle(v);
read_sensor(v); read_sensor(v);
struct game_status *status = v->status; struct game_status *status = v->status;
@@ -495,22 +509,24 @@ void step_vehicle(struct vehicle *v, int action){
struct blocks * path = v->path; struct blocks * path = v->path;
//printf(" center : %f vs %f direction: %f\n",v->sensor->x[CENTER], LIMIT_DISTANCE, v->direction); //printf(" center : %f vs %f direction: %f\n",v->sensor->x[CENTER], LIMIT_DISTANCE, v->direction);
if( v->sensor->x[CENTER]<= LIMIT_DISTANCE ){ if( v->sensor->x[CENTER]<= LIMIT_DISTANCE ){
//if( MAX_ARRAY_TYPE_FLOAT(v->sensor->x,v->sensor->dim->rank)<= LIMIT_DISTANCE ){
status->reward = REWARD_STOP; status->reward = REWARD_STOP;
status->done = true; status->done = true;
} }
else{ else{
bool broken = false; bool broken = false;
long prec, next; long pprec, prec, next;
char msg[48]; char msg[48];
for(long i=0; i< path->nb_blocks; ++i){ for(long i=0; i< path->nb_blocks; ++i){
//prec = (i-1)%(path->nb_blocks); //prec = (i-1)%(path->nb_blocks);
pprec = (i + path->nb_blocks - 2 )%(path->nb_blocks);
prec = (i + path->nb_blocks - 1 )%(path->nb_blocks); prec = (i + path->nb_blocks - 1 )%(path->nb_blocks);
next = (i + 1)%(path->nb_blocks); next = (i + 1)%(path->nb_blocks);
//printf("i:%ld, prec:%ld, next:%ld: maker %d, prec marker %d\n",i,prec,next, path->marker[i], path->marker[prec]); //printf("i:%ld, prec:%ld, next:%ld: maker %d, prec marker %d\n",i,prec,next, path->marker[i], path->marker[prec]);
if(is_in_block_index(path, i, v->coord)){ if(is_in_block_index(path, i, v->coord)){
if(path->marker[i] == false && path->marker[prec] == true){ if(path->marker[i] == false && path->marker[prec] == true){
path->marker[i]=true; path->marker[i]=true;
path->marker[prec]=false; path->marker[pprec]=false;
status->reward = REWARD_CONTINUE; status->reward = REWARD_CONTINUE;
status->done = false; status->done = false;
sprintf(msg," %ld,",i); sprintf(msg," %ld,",i);
@@ -536,9 +552,10 @@ void step_vehicle(struct vehicle *v, int action){
} }
#define RANDOM 1
void reset(struct vehicle *v){ void reset(struct vehicle *v){
//static bool init = true;
struct blocks * path = v->path; struct blocks * path = v->path;
long int i; long int i;
for(i=0; i<(path->nb_blocks -1); ++i) for(i=0; i<(path->nb_blocks -1); ++i)
@@ -547,19 +564,32 @@ void reset(struct vehicle *v){
v->status->cumulative_reward = 0; v->status->cumulative_reward = 0;
sprintf(v->status->log,"\n"); sprintf(v->status->log,"\n");
v->status->cur_log = 0; v->status->cur_log = 0;
srand(time(NULL)); //if(init){
srand(time(NULL));
// init = false;
//}
int random; int random;
int diff; int diff;
diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0]; diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0];
random = rand() % diff; random = rand() % (diff/2) ;
//v->coord->x[0] = path->lower_bound_block[0]->x[0] + random; #if RANDOM
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2; v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
#else
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
#endif
diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1]; diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1];
random = rand() % diff; random = rand() % (diff/2);
//v->coord->x[1] = path->lower_bound_block[0]->x[1] + random; #if RANDOM
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2; v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
#else
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
#endif
random = rand() % 50; random = rand() % 50;
//v->direction = random - 25; #if RANDOM
v->direction = -90; v->direction = random - 25;
v->speed = 1; #else
v->direction = -90;
#endif
v->speed = SPEED;
read_sensor(v);
} }
+4 -1
View File
@@ -26,7 +26,10 @@
#define COUNT_ACTION 3 #define COUNT_ACTION 3
#define SUBDIVISION 5 //10 #define SUBDIVISION 50
#define STEP 1
#define SPEED 1
struct game_status { struct game_status {
+645 -23
View File
@@ -253,6 +253,137 @@ TEST(circle_path_vehicle){
free_vehicle(vhcl); free_vehicle(vhcl);
}
TEST(circle_path_vehicle_00){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
update_bounds_limits_blocks(path);
struct vehicle *vhcl = create_vehicle(path);
print_vehicle_n_path(vhcl, 20,40);
step_vehicle(vhcl, CENTER);
Sleep(200);
/* print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
*/
free_vehicle(vhcl);
}
TEST(circle_path_vehicle_50){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
#if 1
copy_coordinate(path->lower_bound_block[6], (float[]){0,30});
copy_coordinate(path->upper_bound_block[6], (float[]){150,250});
copy_coordinate(path->lower_bound_block[5], (float[]){150,0});
copy_coordinate(path->upper_bound_block[5], (float[]){250,80});
copy_coordinate(path->lower_bound_block[4], (float[]){250,20});
copy_coordinate(path->upper_bound_block[4], (float[]){360,120});
copy_coordinate(path->lower_bound_block[3], (float[]){360,80});
copy_coordinate(path->upper_bound_block[3], (float[]){600,150});
copy_coordinate(path->lower_bound_block[2], (float[]){600,90});
copy_coordinate(path->upper_bound_block[2], (float[]){760,300});
copy_coordinate(path->lower_bound_block[1], (float[]){300,300});
copy_coordinate(path->upper_bound_block[1], (float[]){760,350});
copy_coordinate(path->lower_bound_block[0], (float[]){0,250});
copy_coordinate(path->upper_bound_block[0], (float[]){410,300});
#else
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
copy_coordinate(path->upper_bound_block[0], (float[]){150,250});
copy_coordinate(path->lower_bound_block[1], (float[]){150,0});
copy_coordinate(path->upper_bound_block[1], (float[]){250,150});
copy_coordinate(path->lower_bound_block[2], (float[]){250,80});
copy_coordinate(path->upper_bound_block[2], (float[]){360,200});
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
copy_coordinate(path->upper_bound_block[3], (float[]){600,170});
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
copy_coordinate(path->upper_bound_block[4], (float[]){760,300});
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
copy_coordinate(path->upper_bound_block[5], (float[]){760,350});
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
copy_coordinate(path->upper_bound_block[0], (float[]){100,250});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){250,80});
copy_coordinate(path->lower_bound_block[2], (float[]){250,0});
copy_coordinate(path->upper_bound_block[2], (float[]){360,140});
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
copy_coordinate(path->upper_bound_block[3], (float[]){600,140});
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
copy_coordinate(path->upper_bound_block[4], (float[]){720,300});
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
copy_coordinate(path->upper_bound_block[5], (float[]){720,350});
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
#endif
update_bounds_limits_blocks(path);
struct vehicle *vhcl = create_vehicle(path);
print_vehicle_n_path(vhcl, 10,10);
step_vehicle(vhcl, CENTER);
Sleep(200);
/* print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
step_vehicle(vhcl, CENTER);
Sleep(200);
print_vehicle_n_path(vhcl, 0.2,0.4);
*/
free_vehicle(vhcl);
} }
TEST(reward_list){ TEST(reward_list){
@@ -261,8 +392,125 @@ TEST(reward_list){
free_status_qlearning(l_reward); free_status_qlearning(l_reward);
} }
float f(float x){
return 1/(1+exp((double)(-x)));
}
float df(float x){
return exp(-x)/ ((1+exp(-x)) * (1+exp(-x)));
}
#if 1 #if 1
TEST(first_learn_vehicle){ TEST(first_learn_vehicle_rev50){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
#if 0
copy_coordinate(path->lower_bound_block[6], (float[]){0,30});
copy_coordinate(path->upper_bound_block[6], (float[]){150,250});
copy_coordinate(path->lower_bound_block[5], (float[]){150,0});
copy_coordinate(path->upper_bound_block[5], (float[]){250,80});
copy_coordinate(path->lower_bound_block[4], (float[]){250,20});
copy_coordinate(path->upper_bound_block[4], (float[]){360,120});
copy_coordinate(path->lower_bound_block[3], (float[]){360,80});
copy_coordinate(path->upper_bound_block[3], (float[]){600,150});
copy_coordinate(path->lower_bound_block[2], (float[]){600,90});
copy_coordinate(path->upper_bound_block[2], (float[]){760,300});
copy_coordinate(path->lower_bound_block[1], (float[]){300,300});
copy_coordinate(path->upper_bound_block[1], (float[]){760,350});
copy_coordinate(path->lower_bound_block[0], (float[]){0,250});
copy_coordinate(path->upper_bound_block[0], (float[]){410,300});
#else
copy_coordinate(path->lower_bound_block[4], (float[]){0,0});
copy_coordinate(path->upper_bound_block[4], (float[]){150,250});
copy_coordinate(path->lower_bound_block[5], (float[]){150,40});
copy_coordinate(path->upper_bound_block[5], (float[]){250,150});
copy_coordinate(path->lower_bound_block[6], (float[]){250,80});
copy_coordinate(path->upper_bound_block[6], (float[]){360,200});
copy_coordinate(path->lower_bound_block[0], (float[]){360,70});
copy_coordinate(path->upper_bound_block[0], (float[]){600,150});
copy_coordinate(path->lower_bound_block[1], (float[]){600,90});
copy_coordinate(path->upper_bound_block[1], (float[]){760,300});
copy_coordinate(path->lower_bound_block[2], (float[]){260,300});
copy_coordinate(path->upper_bound_block[2], (float[]){760,360});
copy_coordinate(path->lower_bound_block[3], (float[]){0,250});
copy_coordinate(path->upper_bound_block[3], (float[]){410,300});
#endif
update_bounds_limits_blocks(path);
struct vehicle *car = create_vehicle(path);
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
bool randomize=true;
float minR = -0.5, maxR = 0.5;
int randomRange = 500;
size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4;
float learning_rate = 0.001;
struct networks_qlearning *nnetworks = create_nework_qlearning(
pconf,
randomize, minR, maxR, randomRange,
nb_prod_thread, nb_calc_thread,
learning_rate
);
struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params (
100/*size_t delay_between_episodes*/,
10/*size_t delay_between_games*/
);
struct qlearning_params *qlparams = create_qlearning_params (
0.95/*float gamma*/,
learning_rate,
0 /* (not used!)float discount_factor*/,
0.99 /*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/,
10000/*size_t number_episodes*/
);
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
*/
struct print_params *pprint = create_print_params(
12/*float scale_x*/,12 /*float scale_y*/,
dly/*struct delay_params * dly_p*/
);
struct RL_agent *rlAgent = create_RL_agent (
nnetworks /*struct networks_qlearning * networks*/,
car /*struct vehicle * car*/,
qlstatus /*struct status_qlearning * status*/,
pprint /*struct print_params * pprint*/,
qlparams/*struct qlearning_params *qlearnParams*/
);
learn_to_drive(rlAgent);
free_RL_agent(rlAgent);
}
#endif
#if 1
TEST(first_learn_vehicle_50){
size_t nb_block = 7; size_t nb_block = 7;
size_t dim= 2; size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim); struct blocks * path = create_blocks(nb_block, dim);
@@ -270,6 +518,59 @@ TEST(first_learn_vehicle){
#if 1 #if 1
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
copy_coordinate(path->upper_bound_block[0], (float[]){150,250});
copy_coordinate(path->lower_bound_block[1], (float[]){150,0});
copy_coordinate(path->upper_bound_block[1], (float[]){250,150});
copy_coordinate(path->lower_bound_block[2], (float[]){250,80});
copy_coordinate(path->upper_bound_block[2], (float[]){360,200});
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
copy_coordinate(path->upper_bound_block[3], (float[]){600,170});
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
copy_coordinate(path->upper_bound_block[4], (float[]){760,300});
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
copy_coordinate(path->upper_bound_block[5], (float[]){760,350});
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
/*
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
copy_coordinate(path->upper_bound_block[0], (float[]){100,250});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){250,80});
copy_coordinate(path->lower_bound_block[2], (float[]){250,0});
copy_coordinate(path->upper_bound_block[2], (float[]){360,140});
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
copy_coordinate(path->upper_bound_block[3], (float[]){600,140});
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
copy_coordinate(path->upper_bound_block[4], (float[]){720,300});
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
copy_coordinate(path->upper_bound_block[5], (float[]){720,350});
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
*/
#else
copy_coordinate(path->lower_bound_block[0], (float[]){0,3}); copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
copy_coordinate(path->upper_bound_block[0], (float[]){4,7}); copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
copy_coordinate(path->lower_bound_block[1], (float[]){1,0}); copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
@@ -285,23 +586,7 @@ TEST(first_learn_vehicle){
copy_coordinate(path->lower_bound_block[6], (float[]){1,7}); copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75}); copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
#else
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
copy_coordinate(path->upper_bound_block[0], (float[]){2,7});
copy_coordinate(path->lower_bound_block[1], (float[]){2,0});
copy_coordinate(path->upper_bound_block[1], (float[]){4,2});
copy_coordinate(path->lower_bound_block[2], (float[]){4,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){8,3});
copy_coordinate(path->lower_bound_block[3], (float[]){8,0});
copy_coordinate(path->upper_bound_block[3], (float[]){16,2});
copy_coordinate(path->lower_bound_block[4], (float[]){16,0});
copy_coordinate(path->upper_bound_block[4], (float[]){18,7});
copy_coordinate(path->lower_bound_block[5], (float[]){6,7});
copy_coordinate(path->upper_bound_block[5], (float[]){18,9});
copy_coordinate(path->lower_bound_block[6], (float[]){2,6});
copy_coordinate(path->upper_bound_block[6], (float[]){6,8});
#endif #endif
update_bounds_limits_blocks(path); update_bounds_limits_blocks(path);
@@ -309,9 +594,10 @@ TEST(first_learn_vehicle){
struct vehicle *car = create_vehicle(path); struct vehicle *car = create_vehicle(path);
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */ config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
bool randomize=true; bool randomize=true;
float minR = 0, maxR = 1; float minR = -0.5, maxR = 0.5;
int randomRange = 500; int randomRange = 500;
size_t nb_prod_thread = 2; size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4; size_t nb_calc_thread = 4;
@@ -325,20 +611,356 @@ TEST(first_learn_vehicle){
struct status_qlearning *qlstatus = create_status_qlearning (); struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params ( struct delay_params *dly = create_delay_params (
200/*size_t delay_between_episodes*/, 100/*size_t delay_between_episodes*/,
20/*size_t delay_between_games*/ 10/*size_t delay_between_games*/
); );
struct qlearning_params *qlparams = create_qlearning_params ( struct qlearning_params *qlparams = create_qlearning_params (
0.95/*float gamma*/, 0.95/*float gamma*/,
learning_rate, learning_rate,
0 /* (not used!)float discount_factor*/, 0 /* (not used!)float discount_factor*/,
0.99/*float exploration_factor*/, 0.99 /*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/, 20/*long int nb_training_before_update_weight_in_target*/,
10000/*size_t number_episodes*/ 10000/*size_t number_episodes*/
); );
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
*/
struct print_params *pprint = create_print_params( struct print_params *pprint = create_print_params(
0.2/*float scale_x*/,0.4 /*float scale_y*/, 12/*float scale_x*/,12 /*float scale_y*/,
dly/*struct delay_params * dly_p*/
);
struct RL_agent *rlAgent = create_RL_agent (
nnetworks /*struct networks_qlearning * networks*/,
car /*struct vehicle * car*/,
qlstatus /*struct status_qlearning * status*/,
pprint /*struct print_params * pprint*/,
qlparams/*struct qlearning_params *qlearnParams*/
);
learn_to_drive(rlAgent);
free_RL_agent(rlAgent);
}
#endif
#if 0
TEST(first_learn_vehicle){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
#if 1
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
#else
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
#endif
update_bounds_limits_blocks(path);
struct vehicle *car = create_vehicle(path);
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
bool randomize=true;
float minR = -0.5, maxR = 0.5;
int randomRange = 5000;
size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4;
float learning_rate = 0.1;
struct networks_qlearning *nnetworks = create_nework_qlearning(
pconf,
randomize, minR, maxR, randomRange,
nb_prod_thread, nb_calc_thread,
learning_rate
);
struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params (
100/*size_t delay_between_episodes*/,
10/*size_t delay_between_games*/
);
struct qlearning_params *qlparams = create_qlearning_params (
0.95/*float gamma*/,
learning_rate,
0 /* (not used!)float discount_factor*/,
0.85 /*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/,
10000/*size_t number_episodes*/
);
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
*/
struct print_params *pprint = create_print_params(
20/*float scale_x*/,40 /*float scale_y*/,
dly/*struct delay_params * dly_p*/
);
struct RL_agent *rlAgent = create_RL_agent (
nnetworks /*struct networks_qlearning * networks*/,
car /*struct vehicle * car*/,
qlstatus /*struct status_qlearning * status*/,
pprint /*struct print_params * pprint*/,
qlparams/*struct qlearning_params *qlearnParams*/
);
learn_to_drive(rlAgent);
free_RL_agent(rlAgent);
}
#endif
#if 0
TEST(first_learn_vehicle){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
#if 1
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
#else
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
#endif
update_bounds_limits_blocks(path);
struct vehicle *car = create_vehicle(path);
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
bool randomize=true;
float minR = -0.5, maxR = 0.5;
int randomRange = 5000;
size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4;
float learning_rate = 0.1;
struct networks_qlearning *nnetworks = create_nework_qlearning(
pconf,
randomize, minR, maxR, randomRange,
nb_prod_thread, nb_calc_thread,
learning_rate
);
struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params (
100/*size_t delay_between_episodes*/,
10/*size_t delay_between_games*/
);
struct qlearning_params *qlparams = create_qlearning_params (
0.95/*float gamma*/,
learning_rate,
0 /* (not used!)float discount_factor*/,
0.85 /*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/,
10000/*size_t number_episodes*/
);
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
*/
struct print_params *pprint = create_print_params(
20/*float scale_x*/,40 /*float scale_y*/,
dly/*struct delay_params * dly_p*/
);
struct RL_agent *rlAgent = create_RL_agent (
nnetworks /*struct networks_qlearning * networks*/,
car /*struct vehicle * car*/,
qlstatus /*struct status_qlearning * status*/,
pprint /*struct print_params * pprint*/,
qlparams/*struct qlearning_params *qlearnParams*/
);
learn_to_drive(rlAgent);
free_RL_agent(rlAgent);
}
#endif
#if 0
TEST(first_learn_vehicle){
size_t nb_block = 7;
size_t dim= 2;
struct blocks * path = create_blocks(nb_block, dim);
#if 1
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
#else
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
#endif
update_bounds_limits_blocks(path);
struct vehicle *car = create_vehicle(path);
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
bool randomize=true;
float minR = -0.5, maxR = 0.5;
int randomRange = 5000;
size_t nb_prod_thread = 2;
size_t nb_calc_thread = 4;
float learning_rate = 0.1;
struct networks_qlearning *nnetworks = create_nework_qlearning(
pconf,
randomize, minR, maxR, randomRange,
nb_prod_thread, nb_calc_thread,
learning_rate
);
struct status_qlearning *qlstatus = create_status_qlearning ();
struct delay_params *dly = create_delay_params (
100/*size_t delay_between_episodes*/,
10/*size_t delay_between_games*/
);
struct qlearning_params *qlparams = create_qlearning_params (
0.95/*float gamma*/,
learning_rate,
0 /* (not used!)float discount_factor*/,
0.85 /*float exploration_factor*/,
20/*long int nb_training_before_update_weight_in_target*/,
10000/*size_t number_episodes*/
);
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
*/
struct print_params *pprint = create_print_params(
20/*float scale_x*/,40 /*float scale_y*/,
dly/*struct delay_params * dly_p*/ dly/*struct delay_params * dly_p*/
); );
+15 -1
View File
@@ -627,6 +627,19 @@ void print_neurons_msg_##type(neurons_##type *nr, char *msg){\
}\ }\
}\ }\
\ \
void print_weight_in_neurons_##type(neurons_##type *nn, char *msg){\
neurons_##type *tmp = nn;\
size_t i = 0;\
char vmsg[250];\
while(tmp){\
if(tmp->weight_in){\
sprintf(vmsg,"%s layer %ld",msg,i++);\
print_tensor_msg_##type(tmp->weight_in, vmsg);\
}\
tmp = tmp->next_layer;\
}\
}\
\
void free_neurons_##type(neurons_##type *base){\ void free_neurons_##type(neurons_##type *base){\
neurons_##type *temp = base, *ttemp;\ neurons_##type *temp = base, *ttemp;\
while(temp){\ while(temp){\
@@ -747,7 +760,7 @@ size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *da
return nbreps;\ return nbreps;\
}\ }\
\ \
void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link){\ neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link){\
for(size_t i=0; i<(input->dim)->rank; ++i) (base->output)->x[i]=input->x[i];\ for(size_t i=0; i<(input->dim)->rank; ++i) (base->output)->x[i]=input->x[i];\
neurons_##type * tmp=base->next_layer;\ neurons_##type * tmp=base->next_layer;\
while(tmp){\ while(tmp){\
@@ -755,6 +768,7 @@ void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##t
if(tmp->next_layer==NULL){\ if(tmp->next_layer==NULL){\
/*print_tensor_msg_##type(tmp->output,"retult");*/\ /*print_tensor_msg_##type(tmp->output,"retult");*/\
*output_link = tmp->output;\ *output_link = tmp->output;\
return tmp;\
}\ }\
tmp = tmp->next_layer;\ tmp = tmp->next_layer;\
}\ }\
+32 -1
View File
@@ -77,7 +77,9 @@ void setup_networks_layers_without_weights_##type(neurons_##type **base_nr, size
void setup_networks_layers_without_weights_from_config_##type(neurons_##type **base, config_layers *pconf);\ void setup_networks_layers_without_weights_from_config_##type(neurons_##type **base, config_layers *pconf);\
void setup_networks_OneD_##type(neurons_##type **base_nr, size_t *array_dim_in_layers, size_t nb_layers, bool randomize, type minR, type maxR, int randomRange);\ void setup_networks_OneD_##type(neurons_##type **base_nr, size_t *array_dim_in_layers, size_t nb_layers, bool randomize, type minR, type maxR, int randomRange);\
void init_in_out_all_networks_OneD_##type(neurons_##type *nr, type *in, size_t sz_in, type *out, size_t sz_out);\ void init_in_out_all_networks_OneD_##type(neurons_##type *nr, type *in, size_t sz_in, type *out, size_t sz_out);\
\
void print_neurons_msg_##type(neurons_##type *nr, char * msg);\ void print_neurons_msg_##type(neurons_##type *nr, char * msg);\
void print_weight_in_neurons_##type(neurons_##type *nn, char *msg);\
\ \
void free_neurons_##type(neurons_##type *base);\ void free_neurons_##type(neurons_##type *base);\
\ \
@@ -106,7 +108,7 @@ void print_data_set_msg_##type(data_set_##type *ds, char *msg);\
\ \
size_t learning_online_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\ size_t learning_online_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\
size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\ size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\
void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link);\ neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link);\
void print_predict_by_network_neurons_##type(neurons_##type *base, tensor_##type *input);\ void print_predict_by_network_neurons_##type(neurons_##type *base, tensor_##type *input);\
void print_predict_by_network_with_error_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type *target);\ void print_predict_by_network_with_error_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type *target);\
\ \
@@ -134,4 +136,33 @@ GEN_NEURON_(TYPE_DOUBLE)
}\ }\
}while(0);\ }while(0);\
#define COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(type, attribute, dstNN ,sourceNN)\
do{\
neurons_##type *tmpn = dstNN;\
neurons_##type *srcNN = sourceNN;\
while(tmpn){\
if(tmpn->attribute)\
copy_tensor_##type(tmpn->attribute, srcNN->attribute);\
tmpn = tmpn->next_layer;\
srcNN = srcNN->next_layer;\
}\
}while(0);\
#define PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(type, neuronVar, attribute, msg)\
do{\
neurons_##type *tmpn = neuronVar;\
char *vmsg=malloc(strlen(msg)+70);\
size_t i=0;\
while(tmpn){\
sprintf(vmsg,"%s layer %ld",msg,i++);\
if(tmpn->attribute)\
print_tensor_msg_##type(tmpn->attribute, vmsg);\
tmpn = tmpn->next_layer;\
}\
free(vmsg);\
}while(0);\
#endif /*__NEURON_T_C__H*/ #endif /*__NEURON_T_C__H*/
+7 -6
View File
@@ -368,10 +368,10 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\
/*size_t j=0 ,k=0*/;\ /*size_t j=0 ,k=0*/;\
size_t *coord = malloc(sizeof(long int)*(T->dim)->size); \ size_t *coord = malloc(sizeof(long int)*(T->dim)->size); \
char *val=NULL;\ char *val=NULL;\
char *dimsg=malloc(512);\ /*char *dimsg=malloc(512);\
sprintf(dimsg,"(%s)->dim",msg);\ sprintf(dimsg,"(%s)->dim",msg);\
printDebug_dimension(T->dim,dimsg);\ printDebug_dimension(T->dim,dimsg);\
printf("%s\n",msg);\ */printf("%s\n",msg);\
long int begin , end /*, beginIter , endIter*/ ;\ long int begin , end /*, beginIter , endIter*/ ;\
long int (*iter)(long int) ;\ long int (*iter)(long int) ;\
bool (*cond)(long int, long int) ; \ bool (*cond)(long int, long int) ; \
@@ -392,10 +392,11 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\
else break;\ else break;\
}\ }\
}\ }\
printf(" [");\ /*printf(" [");\
for(size_t k=0; k<(T->dim)->size;++k) printf(" %ld,",coord[k]);\ for(size_t k=0; k<(T->dim)->size;++k) printf(" %ld",coord[k]);\
val=type##_TO_STR(T->x[i]);\ */val=type##_TO_STR(T->x[i]);\
printf(" |#%ld]: %s, ",i,val);\ printf(" |#%ld]: %s, ",i,val);\
/*printf(" %s, ",val);*/\
free(val); val=NULL;\ free(val); val=NULL;\
if(coord[begin]==(T->dim)->perm[begin]-1){\ if(coord[begin]==(T->dim)->perm[begin]-1){\
size_t count=0;\ size_t count=0;\
@@ -411,7 +412,7 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\
\ \
free(coord);\ free(coord);\
printf("\n");\ printf("\n");\
free(dimsg);\ /*free(dimsg);*/\
}\ }\
\ \
\ \