Add some MACRO in neuron_t and debug deepQlearning
This commit is contained in:
@@ -21,10 +21,12 @@ float D_L2(float t, float o){
|
||||
}
|
||||
|
||||
void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks){
|
||||
copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net);
|
||||
//copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net);
|
||||
COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->target_net, networks->main_net);
|
||||
}
|
||||
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks){
|
||||
copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net);
|
||||
//copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net);
|
||||
COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(TYPE_FLOAT,weight_in, networks->best_net, networks->main_net);
|
||||
}
|
||||
|
||||
struct networks_qlearning * create_nework_qlearning(
|
||||
@@ -70,6 +72,8 @@ struct status_qlearning * create_status_qlearning (){
|
||||
|
||||
status_ql->nb_training_after_updated_weight_in_target = 0;
|
||||
|
||||
status_ql->nb_episodes = 0;
|
||||
|
||||
return status_ql;
|
||||
}
|
||||
|
||||
@@ -91,6 +95,7 @@ struct print_params * create_print_params(float scale_x, float scale_y, struct
|
||||
pprint->scale_x = scale_x;
|
||||
pprint->scale_y = scale_y;
|
||||
pprint->delay = delay;
|
||||
pprint->string_space = malloc(LOG_LENTH+1);
|
||||
pthread_mutex_init(&(pprint->mut_printed), NULL);
|
||||
|
||||
int i;
|
||||
@@ -164,6 +169,7 @@ void free_delay_params (struct delay_params *dly_p){
|
||||
}
|
||||
|
||||
void free_print_params (struct print_params *pprint){
|
||||
free(pprint->string_space);
|
||||
pthread_mutex_destroy(&(pprint->mut_printed));
|
||||
free_delay_params(pprint->delay);
|
||||
free(pprint);
|
||||
@@ -192,13 +198,14 @@ void train_qlearning(struct RL_agent * rlAgent,
|
||||
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
|
||||
tensor_TYPE_FLOAT * new_state = rlAgent->car->sensor /*input*/;
|
||||
tensor_TYPE_FLOAT * state = rlAgent->car->old_sensor /*input*/;
|
||||
calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value);
|
||||
neurons_TYPE_FLOAT *ttmp = calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value);
|
||||
calculate_output_by_network_neurons_TYPE_FLOAT(net_target, new_state, &next_action_value);
|
||||
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
|
||||
|
||||
struct game_status * car_status = rlAgent->car->status;
|
||||
struct qlearning_params * qlParams = rlAgent->qlearnParams;
|
||||
copy_tensor_TYPE_FLOAT(experimental_values, action_value) ;
|
||||
//copy_tensor_TYPE_FLOAT(experimental_values, next_action_value) ;
|
||||
// experimental_values === Q-tab learning
|
||||
if(car_status->done){
|
||||
experimental_values->x[action] = -100;
|
||||
@@ -206,19 +213,12 @@ void train_qlearning(struct RL_agent * rlAgent,
|
||||
experimental_values->x[action] = car_status->reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ;
|
||||
}
|
||||
// ***
|
||||
neurons_TYPE_FLOAT *tmp=NULL, *ttmp=NULL, *base = net_main;
|
||||
init_copy_in_out_networks_from_tensors_TYPE_FLOAT(base,base->output , experimental_values );\
|
||||
tmp=net_main->next_layer;\
|
||||
while(tmp){\
|
||||
calc_out_neurons_TYPE_FLOAT(tmp);\
|
||||
ttmp = tmp;\
|
||||
tmp = tmp->next_layer;\
|
||||
}\
|
||||
while(ttmp != base){\
|
||||
calc_delta_neurons_TYPE_FLOAT(ttmp);\
|
||||
update_weight_neurons_TYPE_FLOAT(ttmp);\
|
||||
ttmp = ttmp->prev_layer;\
|
||||
}\
|
||||
copy_tensor_TYPE_FLOAT(ttmp->target, experimental_values);
|
||||
while(ttmp != net_main){
|
||||
calc_delta_neurons_TYPE_FLOAT(ttmp);
|
||||
update_weight_neurons_TYPE_FLOAT(ttmp);
|
||||
ttmp = ttmp->prev_layer;
|
||||
}
|
||||
|
||||
// ***
|
||||
float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ );
|
||||
@@ -230,19 +230,28 @@ void train_qlearning(struct RL_agent * rlAgent,
|
||||
}
|
||||
|
||||
int select_action(struct RL_agent * rlAgent){
|
||||
//static size_t explore = 0;
|
||||
int action;
|
||||
tensor_TYPE_FLOAT * action_value = NULL;
|
||||
calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value);
|
||||
long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes);
|
||||
NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2;
|
||||
srand(time(NULL));
|
||||
//calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->old_sensor, &action_value);
|
||||
calculate_output_by_network_neurons_TYPE_FLOAT(rlAgent->networks->main_net, rlAgent->car->sensor, &action_value);
|
||||
//long int NUMBER_EPISODE2 = (rlAgent->qlearnParams->number_episodes)*100;
|
||||
int NUMBER_EPISODE2 = 3000;
|
||||
//NUMBER_EPISODE2 = NUMBER_EPISODE2 * NUMBER_EPISODE2;
|
||||
// static bool init = true ;
|
||||
// if(init){
|
||||
srand(time(NULL));
|
||||
// init =false;
|
||||
// }
|
||||
int random = rand() % NUMBER_EPISODE2;
|
||||
float proba_explor = (float)random / NUMBER_EPISODE2;
|
||||
if(proba_explor <= rlAgent->qlearnParams->exploration_factor ){
|
||||
action = rand() % action_value->dim->rank ;
|
||||
float proba_explor = (float)(random ) / NUMBER_EPISODE2;
|
||||
if(proba_explor > rlAgent->qlearnParams->exploration_factor ){
|
||||
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
|
||||
}
|
||||
else{
|
||||
action = ARG_MAX_ARRAY_TYPE_FLOAT( action_value->x, action_value->dim->rank );
|
||||
action = rand() % action_value->dim->rank ;
|
||||
// explore++;
|
||||
//printf(" EXPLORE :%ld, action : %d , factor : %f nb_episodes : %ld \n",explore,action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
||||
}
|
||||
return action;
|
||||
}
|
||||
@@ -261,20 +270,34 @@ void learn_to_drive(struct RL_agent * rlAgent){
|
||||
reset(car);
|
||||
qlStatus->nb_training_after_updated_weight_in_target = 0;
|
||||
while(true){
|
||||
++(qlStatus->nb_episodes);
|
||||
++(qlStatus->nb_training_after_updated_weight_in_target);
|
||||
action = select_action(rlAgent);
|
||||
sprintf(msg," dir:%.0f : %s, ", car->direction ,action_name[action]);
|
||||
add_string_log_M(car_status,msg);
|
||||
step_vehicle(car, action);
|
||||
train_qlearning(rlAgent, action);
|
||||
if(pprint->printed){
|
||||
if(/*(qlStatus->nb_episodes %15 == 0) && */ pprint->printed){
|
||||
pthread_mutex_lock(&(pprint->mut_printed));
|
||||
print_vehicle_n_path(car, pprint->scale_x, pprint->scale_y);
|
||||
pthread_mutex_unlock(&(pprint->mut_printed));
|
||||
printf("%s ",pprint->string_space);
|
||||
printf("ep: %ld ",index_episode);
|
||||
printf("ep: %ld\n",index_episode);
|
||||
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
|
||||
for(size_t i=0; i<net_main->output->dim->rank; ++i) printf("{sensro[%s]:%f }",action_name[i%COUNT_ACTION],net_main->output->x[i]);
|
||||
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
|
||||
for(size_t i=0; i<net_main->output->dim->rank; ++i) {
|
||||
printf("{sensro[%s]:%f "/*vs %f / VS / %f */" vs oldsens[%s]: %f}\n",action_name[i%COUNT_ACTION],net_target->output->x[i],
|
||||
/*car->sensor->x[i] ,car->old_sensor->x[i],
|
||||
*/action_name[i%COUNT_ACTION],net_main->output->x[i]);
|
||||
|
||||
}
|
||||
printf("\n< %f > ( %s ) \n", car->direction, action_name[action % COUNT_ACTION]);
|
||||
//print_weight_in_neurons_TYPE_FLOAT(net_main, "net_main_wei");
|
||||
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, weight_in, "net_main_we_in");
|
||||
PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, output, "net_main_out");
|
||||
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_target, output, "net_target_out");
|
||||
//PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(TYPE_FLOAT, net_main, input, "net_main_input");
|
||||
printf("action : %d , factor : %f nb_episodes : %ld \n",action,rlAgent->qlearnParams->exploration_factor, rlAgent->status->nb_episodes);
|
||||
Sleep(pprint->delay->delay_between_games);
|
||||
}
|
||||
//done in step ... copy_tensor_TYPE_FLOAT(car->old_sensor, car->sensor);
|
||||
|
||||
@@ -45,6 +45,7 @@ struct status_qlearning {
|
||||
struct main_list_TYPE_L_INT * list_target_cumul;
|
||||
struct main_list_TYPE_L_INT * progress_best_cumul;
|
||||
long int nb_training_after_updated_weight_in_target;
|
||||
size_t nb_episodes;
|
||||
};
|
||||
|
||||
struct delay_params {
|
||||
@@ -58,7 +59,7 @@ struct print_params {
|
||||
float scale_x;
|
||||
float scale_y;
|
||||
struct delay_params *delay;
|
||||
char string_space[LOG_LENTH];
|
||||
char *string_space;//[LOG_LENTH];
|
||||
};
|
||||
|
||||
struct networks_qlearning {
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//#define CENTER 1
|
||||
//#define RIGHT 2
|
||||
|
||||
#define LIMIT_DISTANCE ((float)1)/50
|
||||
#define LIMIT_DISTANCE ((float)((SUBDIVISION-1)/10))/SUBDIVISION
|
||||
|
||||
#define REWARD_STOP -1000
|
||||
#define REWARD_CONTINUE 100
|
||||
@@ -68,6 +68,12 @@ struct vehicle * create_vehicle(struct blocks *path){
|
||||
ret_vehicle->coord = create_coordinate(2);
|
||||
ret_vehicle->sensor = create_sensors(NB_SENSORS);
|
||||
ret_vehicle->old_sensor = create_sensors(NB_SENSORS);
|
||||
|
||||
for(size_t i=0; i<NB_SENSORS;++i){
|
||||
ret_vehicle->sensor->x[i]=0;
|
||||
ret_vehicle->old_sensor->x[i]=0;
|
||||
}
|
||||
|
||||
ret_vehicle->path = path;
|
||||
|
||||
ret_vehicle->status = create_game_status();
|
||||
@@ -387,15 +393,15 @@ void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y){
|
||||
|
||||
void move_vehicle(struct vehicle *v){
|
||||
v->coord->x[0] += v->speed * cos(v->direction * M_PI / 180);
|
||||
v->coord->x[1] += v->speed * sin(v->direction * M_PI / 180);
|
||||
v->coord->x[1] -= v->speed * sin(v->direction * M_PI / 180);
|
||||
}
|
||||
|
||||
float distance2_coordinate(coordinate *c0, coordinate *c1){
|
||||
if(c0->dim->rank != c1->dim->rank) return 0;
|
||||
if(c0->dim->rank != c1->dim->rank) return -1;
|
||||
float d=0, tmp;
|
||||
for(size_t i=0; i<c0->dim->rank; ++i){
|
||||
tmp = (c0->x[i] - c1->x[i]);
|
||||
d += tmp * tmp;
|
||||
d += (tmp * tmp);
|
||||
}
|
||||
return sqrt(d);
|
||||
}
|
||||
@@ -404,19 +410,27 @@ float distance2_coordinate(coordinate *c0, coordinate *c1){
|
||||
direction_radian = (v->direction + deviation) * M_PI / 180;\
|
||||
while( is_in_blocks(v->path, diStep_sensor )){\
|
||||
diStep_sensor->x[0] += step_sensor * cos(direction_radian);\
|
||||
diStep_sensor->x[1] += step_sensor * sin(direction_radian);\
|
||||
diStep_sensor->x[1] -= step_sensor * sin(direction_radian);\
|
||||
}\
|
||||
v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\
|
||||
dist = (distance2_coordinate(diStep_sensor, v->coord)/5);\
|
||||
printf("| dist :%f | ",dist);\
|
||||
v->sensor->x[position] = (float)(MIN((SUBDIVISION-1),(int)dist))/SUBDIVISION ;\
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)/5))) ;\
|
||||
//v->sensor->x[position] = (MIN(49,(distance2_coordinate(diStep_sensor, v->coord)))) / 50;\
|
||||
//v->sensor->x[position] = (MIN(49,(int)(distance2_coordinate(diStep_sensor, v->coord)/10))) / 50;\
|
||||
|
||||
void read_sensor(struct vehicle *v){
|
||||
copy_tensor_TYPE_FLOAT(v->old_sensor, v->sensor);
|
||||
float step_sensor = ((float)1)/SUBDIVISION;
|
||||
float step_sensor = STEP; // /SUBDIVISION;
|
||||
coordinate * diStep_sensor = create_coordinate(2);
|
||||
copy_coordinate(diStep_sensor, v->coord->x);
|
||||
|
||||
float dist;
|
||||
printf("\n");
|
||||
// count the number of step until we go out of the path = distance
|
||||
// center sensor
|
||||
float direction_radian ;
|
||||
@@ -424,11 +438,11 @@ void read_sensor(struct vehicle *v){
|
||||
|
||||
copy_coordinate(diStep_sensor, v->coord->x);
|
||||
// right sensor
|
||||
SENSOR_VALUE_CALCULATE(RIGHT,45);
|
||||
SENSOR_VALUE_CALCULATE(RIGHT,-45);
|
||||
|
||||
copy_coordinate(diStep_sensor, v->coord->x);
|
||||
// left sensor
|
||||
SENSOR_VALUE_CALCULATE(LEFT, -45);
|
||||
SENSOR_VALUE_CALCULATE(LEFT, 45);
|
||||
|
||||
free_coordinate(diStep_sensor);
|
||||
|
||||
@@ -482,8 +496,8 @@ void add_string_log(struct game_status *status, char *str ){
|
||||
void step_vehicle(struct vehicle *v, int action){
|
||||
//float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
|
||||
float action_x[NB_ACTION]={-3,0,3}; // [LEFT, CENTER, RIGHT]
|
||||
v->direction = v->direction + action_x[action % 3];
|
||||
v->speed = ((float)1)/5;
|
||||
v->direction = (float)((int)(v->direction + action_x[action % 3]) % 360) ;
|
||||
v->speed = SPEED; // /5;
|
||||
move_vehicle(v);
|
||||
read_sensor(v);
|
||||
struct game_status *status = v->status;
|
||||
@@ -495,22 +509,24 @@ void step_vehicle(struct vehicle *v, int action){
|
||||
struct blocks * path = v->path;
|
||||
//printf(" center : %f vs %f direction: %f\n",v->sensor->x[CENTER], LIMIT_DISTANCE, v->direction);
|
||||
if( v->sensor->x[CENTER]<= LIMIT_DISTANCE ){
|
||||
//if( MAX_ARRAY_TYPE_FLOAT(v->sensor->x,v->sensor->dim->rank)<= LIMIT_DISTANCE ){
|
||||
status->reward = REWARD_STOP;
|
||||
status->done = true;
|
||||
}
|
||||
else{
|
||||
bool broken = false;
|
||||
long prec, next;
|
||||
long pprec, prec, next;
|
||||
char msg[48];
|
||||
for(long i=0; i< path->nb_blocks; ++i){
|
||||
//prec = (i-1)%(path->nb_blocks);
|
||||
pprec = (i + path->nb_blocks - 2 )%(path->nb_blocks);
|
||||
prec = (i + path->nb_blocks - 1 )%(path->nb_blocks);
|
||||
next = (i + 1)%(path->nb_blocks);
|
||||
//printf("i:%ld, prec:%ld, next:%ld: maker %d, prec marker %d\n",i,prec,next, path->marker[i], path->marker[prec]);
|
||||
if(is_in_block_index(path, i, v->coord)){
|
||||
if(path->marker[i] == false && path->marker[prec] == true){
|
||||
path->marker[i]=true;
|
||||
path->marker[prec]=false;
|
||||
path->marker[pprec]=false;
|
||||
status->reward = REWARD_CONTINUE;
|
||||
status->done = false;
|
||||
sprintf(msg," %ld,",i);
|
||||
@@ -536,9 +552,10 @@ void step_vehicle(struct vehicle *v, int action){
|
||||
|
||||
}
|
||||
|
||||
|
||||
#define RANDOM 1
|
||||
|
||||
void reset(struct vehicle *v){
|
||||
//static bool init = true;
|
||||
struct blocks * path = v->path;
|
||||
long int i;
|
||||
for(i=0; i<(path->nb_blocks -1); ++i)
|
||||
@@ -547,19 +564,32 @@ void reset(struct vehicle *v){
|
||||
v->status->cumulative_reward = 0;
|
||||
sprintf(v->status->log,"\n");
|
||||
v->status->cur_log = 0;
|
||||
srand(time(NULL));
|
||||
//if(init){
|
||||
srand(time(NULL));
|
||||
// init = false;
|
||||
//}
|
||||
int random;
|
||||
int diff;
|
||||
diff = path->upper_bound_block[0]->x[0] - path->lower_bound_block[0]->x[0];
|
||||
random = rand() % diff;
|
||||
//v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
|
||||
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
|
||||
random = rand() % (diff/2) ;
|
||||
#if RANDOM
|
||||
v->coord->x[0] = path->lower_bound_block[0]->x[0] + random;
|
||||
#else
|
||||
v->coord->x[0] = path->lower_bound_block[0]->x[0] + diff/2;
|
||||
#endif
|
||||
diff = path->upper_bound_block[0]->x[1] - path->lower_bound_block[0]->x[1];
|
||||
random = rand() % diff;
|
||||
//v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
|
||||
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
|
||||
random = rand() % (diff/2);
|
||||
#if RANDOM
|
||||
v->coord->x[1] = path->lower_bound_block[0]->x[1] + random;
|
||||
#else
|
||||
v->coord->x[1] = path->lower_bound_block[0]->x[1] + diff/2;
|
||||
#endif
|
||||
random = rand() % 50;
|
||||
//v->direction = random - 25;
|
||||
v->direction = -90;
|
||||
v->speed = 1;
|
||||
#if RANDOM
|
||||
v->direction = random - 25;
|
||||
#else
|
||||
v->direction = -90;
|
||||
#endif
|
||||
v->speed = SPEED;
|
||||
read_sensor(v);
|
||||
}
|
||||
|
||||
@@ -26,7 +26,10 @@
|
||||
|
||||
#define COUNT_ACTION 3
|
||||
|
||||
#define SUBDIVISION 5 //10
|
||||
#define SUBDIVISION 50
|
||||
|
||||
#define STEP 1
|
||||
#define SPEED 1
|
||||
|
||||
|
||||
struct game_status {
|
||||
|
||||
+645
-23
@@ -253,6 +253,137 @@ TEST(circle_path_vehicle){
|
||||
free_vehicle(vhcl);
|
||||
|
||||
|
||||
}
|
||||
|
||||
TEST(circle_path_vehicle_00){
|
||||
size_t nb_block = 7;
|
||||
size_t dim= 2;
|
||||
struct blocks * path = create_blocks(nb_block, dim);
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
|
||||
|
||||
update_bounds_limits_blocks(path);
|
||||
|
||||
struct vehicle *vhcl = create_vehicle(path);
|
||||
|
||||
print_vehicle_n_path(vhcl, 20,40);
|
||||
|
||||
step_vehicle(vhcl, CENTER);
|
||||
Sleep(200);
|
||||
/* print_vehicle_n_path(vhcl, 0.2,0.4);
|
||||
|
||||
step_vehicle(vhcl, CENTER);
|
||||
Sleep(200);
|
||||
print_vehicle_n_path(vhcl, 0.2,0.4);
|
||||
|
||||
step_vehicle(vhcl, CENTER);
|
||||
Sleep(200);
|
||||
print_vehicle_n_path(vhcl, 0.2,0.4);
|
||||
|
||||
step_vehicle(vhcl, CENTER);
|
||||
Sleep(200);
|
||||
print_vehicle_n_path(vhcl, 0.2,0.4);
|
||||
*/
|
||||
free_vehicle(vhcl);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST(circle_path_vehicle_50){
|
||||
size_t nb_block = 7;
|
||||
size_t dim= 2;
|
||||
struct blocks * path = create_blocks(nb_block, dim);
|
||||
#if 1
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){0,30});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){150,250});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){150,0});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){250,80});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){250,20});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){360,120});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){360,80});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){600,150});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){600,90});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){760,300});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){300,300});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){760,350});
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,250});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){410,300});
|
||||
|
||||
|
||||
#else
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){150,250});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){150,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){250,150});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){250,80});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){360,200});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){600,170});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){760,300});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){760,350});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
|
||||
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){100,250});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){250,80});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){250,0});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){360,140});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){600,140});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){720,300});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){720,350});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
|
||||
|
||||
#endif
|
||||
|
||||
update_bounds_limits_blocks(path);
|
||||
|
||||
struct vehicle *vhcl = create_vehicle(path);
|
||||
|
||||
print_vehicle_n_path(vhcl, 10,10);
|
||||
|
||||
step_vehicle(vhcl, CENTER);
|
||||
Sleep(200);
|
||||
/* print_vehicle_n_path(vhcl, 0.2,0.4);
|
||||
|
||||
step_vehicle(vhcl, CENTER);
|
||||
Sleep(200);
|
||||
print_vehicle_n_path(vhcl, 0.2,0.4);
|
||||
|
||||
step_vehicle(vhcl, CENTER);
|
||||
Sleep(200);
|
||||
print_vehicle_n_path(vhcl, 0.2,0.4);
|
||||
|
||||
step_vehicle(vhcl, CENTER);
|
||||
Sleep(200);
|
||||
print_vehicle_n_path(vhcl, 0.2,0.4);
|
||||
*/
|
||||
free_vehicle(vhcl);
|
||||
|
||||
|
||||
}
|
||||
|
||||
TEST(reward_list){
|
||||
@@ -261,8 +392,125 @@ TEST(reward_list){
|
||||
free_status_qlearning(l_reward);
|
||||
}
|
||||
|
||||
|
||||
float f(float x){
|
||||
return 1/(1+exp((double)(-x)));
|
||||
}
|
||||
|
||||
float df(float x){
|
||||
return exp(-x)/ ((1+exp(-x)) * (1+exp(-x)));
|
||||
}
|
||||
#if 1
|
||||
TEST(first_learn_vehicle){
|
||||
TEST(first_learn_vehicle_rev50){
|
||||
size_t nb_block = 7;
|
||||
size_t dim= 2;
|
||||
struct blocks * path = create_blocks(nb_block, dim);
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){0,30});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){150,250});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){150,0});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){250,80});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){250,20});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){360,120});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){360,80});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){600,150});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){600,90});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){760,300});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){300,300});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){760,350});
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,250});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){410,300});
|
||||
|
||||
#else
|
||||
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){0,0});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){150,250});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){150,40});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){250,150});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){250,80});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){360,200});
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){360,70});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){600,150});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){600,90});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){760,300});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){260,300});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){760,360});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){0,250});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){410,300});
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
update_bounds_limits_blocks(path);
|
||||
|
||||
struct vehicle *car = create_vehicle(path);
|
||||
|
||||
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
|
||||
bool randomize=true;
|
||||
float minR = -0.5, maxR = 0.5;
|
||||
int randomRange = 500;
|
||||
size_t nb_prod_thread = 2;
|
||||
size_t nb_calc_thread = 4;
|
||||
float learning_rate = 0.001;
|
||||
struct networks_qlearning *nnetworks = create_nework_qlearning(
|
||||
pconf,
|
||||
randomize, minR, maxR, randomRange,
|
||||
nb_prod_thread, nb_calc_thread,
|
||||
learning_rate
|
||||
);
|
||||
|
||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||
struct delay_params *dly = create_delay_params (
|
||||
100/*size_t delay_between_episodes*/,
|
||||
10/*size_t delay_between_games*/
|
||||
);
|
||||
|
||||
struct qlearning_params *qlparams = create_qlearning_params (
|
||||
0.95/*float gamma*/,
|
||||
learning_rate,
|
||||
0 /* (not used!)float discount_factor*/,
|
||||
0.99 /*float exploration_factor*/,
|
||||
20/*long int nb_training_before_update_weight_in_target*/,
|
||||
10000/*size_t number_episodes*/
|
||||
);
|
||||
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
|
||||
*/
|
||||
struct print_params *pprint = create_print_params(
|
||||
12/*float scale_x*/,12 /*float scale_y*/,
|
||||
dly/*struct delay_params * dly_p*/
|
||||
);
|
||||
|
||||
struct RL_agent *rlAgent = create_RL_agent (
|
||||
nnetworks /*struct networks_qlearning * networks*/,
|
||||
car /*struct vehicle * car*/,
|
||||
qlstatus /*struct status_qlearning * status*/,
|
||||
pprint /*struct print_params * pprint*/,
|
||||
qlparams/*struct qlearning_params *qlearnParams*/
|
||||
);
|
||||
|
||||
learn_to_drive(rlAgent);
|
||||
|
||||
free_RL_agent(rlAgent);
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if 1
|
||||
TEST(first_learn_vehicle_50){
|
||||
size_t nb_block = 7;
|
||||
size_t dim= 2;
|
||||
struct blocks * path = create_blocks(nb_block, dim);
|
||||
@@ -270,6 +518,59 @@ TEST(first_learn_vehicle){
|
||||
|
||||
|
||||
#if 1
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){150,250});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){150,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){250,150});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){250,80});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){360,200});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){600,170});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){760,300});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){760,350});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
|
||||
|
||||
/*
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){100,250});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){250,80});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){250,0});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){360,140});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){360,70});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){600,140});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){600,90});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){720,300});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){300,300});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){720,350});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){0,250});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){410,300});
|
||||
|
||||
|
||||
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
|
||||
*/
|
||||
|
||||
#else
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
|
||||
@@ -285,23 +586,7 @@ TEST(first_learn_vehicle){
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
|
||||
|
||||
#else
|
||||
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,0});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){2,7});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){2,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){4,2});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){4,0.5});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){8,3});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){8,0});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){16,2});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){16,0});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){18,7});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){6,7});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){18,9});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){2,6});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){6,8});
|
||||
|
||||
#endif
|
||||
|
||||
update_bounds_limits_blocks(path);
|
||||
@@ -309,9 +594,10 @@ TEST(first_learn_vehicle){
|
||||
struct vehicle *car = create_vehicle(path);
|
||||
|
||||
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
|
||||
bool randomize=true;
|
||||
float minR = 0, maxR = 1;
|
||||
float minR = -0.5, maxR = 0.5;
|
||||
int randomRange = 500;
|
||||
size_t nb_prod_thread = 2;
|
||||
size_t nb_calc_thread = 4;
|
||||
@@ -325,20 +611,356 @@ TEST(first_learn_vehicle){
|
||||
|
||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||
struct delay_params *dly = create_delay_params (
|
||||
200/*size_t delay_between_episodes*/,
|
||||
20/*size_t delay_between_games*/
|
||||
100/*size_t delay_between_episodes*/,
|
||||
10/*size_t delay_between_games*/
|
||||
);
|
||||
|
||||
struct qlearning_params *qlparams = create_qlearning_params (
|
||||
0.95/*float gamma*/,
|
||||
learning_rate,
|
||||
0 /* (not used!)float discount_factor*/,
|
||||
0.99/*float exploration_factor*/,
|
||||
0.99 /*float exploration_factor*/,
|
||||
20/*long int nb_training_before_update_weight_in_target*/,
|
||||
10000/*size_t number_episodes*/
|
||||
);
|
||||
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
|
||||
*/
|
||||
struct print_params *pprint = create_print_params(
|
||||
0.2/*float scale_x*/,0.4 /*float scale_y*/,
|
||||
12/*float scale_x*/,12 /*float scale_y*/,
|
||||
dly/*struct delay_params * dly_p*/
|
||||
);
|
||||
|
||||
struct RL_agent *rlAgent = create_RL_agent (
|
||||
nnetworks /*struct networks_qlearning * networks*/,
|
||||
car /*struct vehicle * car*/,
|
||||
qlstatus /*struct status_qlearning * status*/,
|
||||
pprint /*struct print_params * pprint*/,
|
||||
qlparams/*struct qlearning_params *qlearnParams*/
|
||||
);
|
||||
|
||||
learn_to_drive(rlAgent);
|
||||
|
||||
free_RL_agent(rlAgent);
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
TEST(first_learn_vehicle){
|
||||
size_t nb_block = 7;
|
||||
size_t dim= 2;
|
||||
struct blocks * path = create_blocks(nb_block, dim);
|
||||
|
||||
|
||||
|
||||
#if 1
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
|
||||
|
||||
|
||||
#else
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
update_bounds_limits_blocks(path);
|
||||
|
||||
struct vehicle *car = create_vehicle(path);
|
||||
|
||||
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
|
||||
bool randomize=true;
|
||||
float minR = -0.5, maxR = 0.5;
|
||||
int randomRange = 5000;
|
||||
size_t nb_prod_thread = 2;
|
||||
size_t nb_calc_thread = 4;
|
||||
float learning_rate = 0.1;
|
||||
struct networks_qlearning *nnetworks = create_nework_qlearning(
|
||||
pconf,
|
||||
randomize, minR, maxR, randomRange,
|
||||
nb_prod_thread, nb_calc_thread,
|
||||
learning_rate
|
||||
);
|
||||
|
||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||
struct delay_params *dly = create_delay_params (
|
||||
100/*size_t delay_between_episodes*/,
|
||||
10/*size_t delay_between_games*/
|
||||
);
|
||||
|
||||
struct qlearning_params *qlparams = create_qlearning_params (
|
||||
0.95/*float gamma*/,
|
||||
learning_rate,
|
||||
0 /* (not used!)float discount_factor*/,
|
||||
0.85 /*float exploration_factor*/,
|
||||
20/*long int nb_training_before_update_weight_in_target*/,
|
||||
10000/*size_t number_episodes*/
|
||||
);
|
||||
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
|
||||
*/
|
||||
struct print_params *pprint = create_print_params(
|
||||
20/*float scale_x*/,40 /*float scale_y*/,
|
||||
dly/*struct delay_params * dly_p*/
|
||||
);
|
||||
|
||||
struct RL_agent *rlAgent = create_RL_agent (
|
||||
nnetworks /*struct networks_qlearning * networks*/,
|
||||
car /*struct vehicle * car*/,
|
||||
qlstatus /*struct status_qlearning * status*/,
|
||||
pprint /*struct print_params * pprint*/,
|
||||
qlparams/*struct qlearning_params *qlearnParams*/
|
||||
);
|
||||
|
||||
learn_to_drive(rlAgent);
|
||||
|
||||
free_RL_agent(rlAgent);
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
TEST(first_learn_vehicle){
|
||||
size_t nb_block = 7;
|
||||
size_t dim= 2;
|
||||
struct blocks * path = create_blocks(nb_block, dim);
|
||||
|
||||
|
||||
|
||||
#if 1
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
|
||||
|
||||
|
||||
#else
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
update_bounds_limits_blocks(path);
|
||||
|
||||
struct vehicle *car = create_vehicle(path);
|
||||
|
||||
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
|
||||
bool randomize=true;
|
||||
float minR = -0.5, maxR = 0.5;
|
||||
int randomRange = 5000;
|
||||
size_t nb_prod_thread = 2;
|
||||
size_t nb_calc_thread = 4;
|
||||
float learning_rate = 0.1;
|
||||
struct networks_qlearning *nnetworks = create_nework_qlearning(
|
||||
pconf,
|
||||
randomize, minR, maxR, randomRange,
|
||||
nb_prod_thread, nb_calc_thread,
|
||||
learning_rate
|
||||
);
|
||||
|
||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||
struct delay_params *dly = create_delay_params (
|
||||
100/*size_t delay_between_episodes*/,
|
||||
10/*size_t delay_between_games*/
|
||||
);
|
||||
|
||||
struct qlearning_params *qlparams = create_qlearning_params (
|
||||
0.95/*float gamma*/,
|
||||
learning_rate,
|
||||
0 /* (not used!)float discount_factor*/,
|
||||
0.85 /*float exploration_factor*/,
|
||||
20/*long int nb_training_before_update_weight_in_target*/,
|
||||
10000/*size_t number_episodes*/
|
||||
);
|
||||
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
|
||||
*/
|
||||
struct print_params *pprint = create_print_params(
|
||||
20/*float scale_x*/,40 /*float scale_y*/,
|
||||
dly/*struct delay_params * dly_p*/
|
||||
);
|
||||
|
||||
struct RL_agent *rlAgent = create_RL_agent (
|
||||
nnetworks /*struct networks_qlearning * networks*/,
|
||||
car /*struct vehicle * car*/,
|
||||
qlstatus /*struct status_qlearning * status*/,
|
||||
pprint /*struct print_params * pprint*/,
|
||||
qlparams/*struct qlearning_params *qlearnParams*/
|
||||
);
|
||||
|
||||
learn_to_drive(rlAgent);
|
||||
|
||||
free_RL_agent(rlAgent);
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
TEST(first_learn_vehicle){
|
||||
size_t nb_block = 7;
|
||||
size_t dim= 2;
|
||||
struct blocks * path = create_blocks(nb_block, dim);
|
||||
|
||||
|
||||
|
||||
#if 1
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,300});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){400,700});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){100,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){1000,300});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){1000,50});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){1400,500});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){1400,200});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){1800,700});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){1100,700});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){1700,1000});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){800,600});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){1100,975});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){100,700});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){800,975});
|
||||
|
||||
|
||||
#else
|
||||
|
||||
copy_coordinate(path->lower_bound_block[0], (float[]){0,3});
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){4,7});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){1,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){10,3});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){10,0.5});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){14,5});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){14,2});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){18,7});
|
||||
copy_coordinate(path->lower_bound_block[4], (float[]){11,7});
|
||||
copy_coordinate(path->upper_bound_block[4], (float[]){17,10});
|
||||
copy_coordinate(path->lower_bound_block[5], (float[]){8,6});
|
||||
copy_coordinate(path->upper_bound_block[5], (float[]){11,9.75});
|
||||
copy_coordinate(path->lower_bound_block[6], (float[]){1,7});
|
||||
copy_coordinate(path->upper_bound_block[6], (float[]){8,9.75});
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
update_bounds_limits_blocks(path);
|
||||
|
||||
struct vehicle *car = create_vehicle(path);
|
||||
|
||||
config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,24,24,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
//config_layers *pconf = create_config_layers_from_OneD(4,(size_t[]){3,14,14,3}); /* 3 input , 3 target; 2 hidden layer with 24 neurons each */
|
||||
|
||||
bool randomize=true;
|
||||
float minR = -0.5, maxR = 0.5;
|
||||
int randomRange = 5000;
|
||||
size_t nb_prod_thread = 2;
|
||||
size_t nb_calc_thread = 4;
|
||||
float learning_rate = 0.1;
|
||||
struct networks_qlearning *nnetworks = create_nework_qlearning(
|
||||
pconf,
|
||||
randomize, minR, maxR, randomRange,
|
||||
nb_prod_thread, nb_calc_thread,
|
||||
learning_rate
|
||||
);
|
||||
|
||||
struct status_qlearning *qlstatus = create_status_qlearning ();
|
||||
struct delay_params *dly = create_delay_params (
|
||||
100/*size_t delay_between_episodes*/,
|
||||
10/*size_t delay_between_games*/
|
||||
);
|
||||
|
||||
struct qlearning_params *qlparams = create_qlearning_params (
|
||||
0.95/*float gamma*/,
|
||||
learning_rate,
|
||||
0 /* (not used!)float discount_factor*/,
|
||||
0.85 /*float exploration_factor*/,
|
||||
20/*long int nb_training_before_update_weight_in_target*/,
|
||||
10000/*size_t number_episodes*/
|
||||
);
|
||||
/* UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->main_net, f_act, f );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, d_f_act , df );
|
||||
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, nnetworks->target_net, f_act , f );
|
||||
*/
|
||||
struct print_params *pprint = create_print_params(
|
||||
20/*float scale_x*/,40 /*float scale_y*/,
|
||||
dly/*struct delay_params * dly_p*/
|
||||
);
|
||||
|
||||
|
||||
@@ -627,6 +627,19 @@ void print_neurons_msg_##type(neurons_##type *nr, char *msg){\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
void print_weight_in_neurons_##type(neurons_##type *nn, char *msg){\
|
||||
neurons_##type *tmp = nn;\
|
||||
size_t i = 0;\
|
||||
char vmsg[250];\
|
||||
while(tmp){\
|
||||
if(tmp->weight_in){\
|
||||
sprintf(vmsg,"%s layer %ld",msg,i++);\
|
||||
print_tensor_msg_##type(tmp->weight_in, vmsg);\
|
||||
}\
|
||||
tmp = tmp->next_layer;\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
void free_neurons_##type(neurons_##type *base){\
|
||||
neurons_##type *temp = base, *ttemp;\
|
||||
while(temp){\
|
||||
@@ -747,7 +760,7 @@ size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *da
|
||||
return nbreps;\
|
||||
}\
|
||||
\
|
||||
void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link){\
|
||||
neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link){\
|
||||
for(size_t i=0; i<(input->dim)->rank; ++i) (base->output)->x[i]=input->x[i];\
|
||||
neurons_##type * tmp=base->next_layer;\
|
||||
while(tmp){\
|
||||
@@ -755,6 +768,7 @@ void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##t
|
||||
if(tmp->next_layer==NULL){\
|
||||
/*print_tensor_msg_##type(tmp->output,"retult");*/\
|
||||
*output_link = tmp->output;\
|
||||
return tmp;\
|
||||
}\
|
||||
tmp = tmp->next_layer;\
|
||||
}\
|
||||
|
||||
@@ -77,7 +77,9 @@ void setup_networks_layers_without_weights_##type(neurons_##type **base_nr, size
|
||||
void setup_networks_layers_without_weights_from_config_##type(neurons_##type **base, config_layers *pconf);\
|
||||
void setup_networks_OneD_##type(neurons_##type **base_nr, size_t *array_dim_in_layers, size_t nb_layers, bool randomize, type minR, type maxR, int randomRange);\
|
||||
void init_in_out_all_networks_OneD_##type(neurons_##type *nr, type *in, size_t sz_in, type *out, size_t sz_out);\
|
||||
\
|
||||
void print_neurons_msg_##type(neurons_##type *nr, char * msg);\
|
||||
void print_weight_in_neurons_##type(neurons_##type *nn, char *msg);\
|
||||
\
|
||||
void free_neurons_##type(neurons_##type *base);\
|
||||
\
|
||||
@@ -106,7 +108,7 @@ void print_data_set_msg_##type(data_set_##type *ds, char *msg);\
|
||||
\
|
||||
size_t learning_online_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\
|
||||
size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\
|
||||
void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link);\
|
||||
neurons_##type * calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link);\
|
||||
void print_predict_by_network_neurons_##type(neurons_##type *base, tensor_##type *input);\
|
||||
void print_predict_by_network_with_error_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type *target);\
|
||||
\
|
||||
@@ -134,4 +136,33 @@ GEN_NEURON_(TYPE_DOUBLE)
|
||||
}\
|
||||
}while(0);\
|
||||
|
||||
#define COPY_NN_ATTRIBUTE_IN_ALL_LAYERS(type, attribute, dstNN ,sourceNN)\
|
||||
do{\
|
||||
neurons_##type *tmpn = dstNN;\
|
||||
neurons_##type *srcNN = sourceNN;\
|
||||
while(tmpn){\
|
||||
if(tmpn->attribute)\
|
||||
copy_tensor_##type(tmpn->attribute, srcNN->attribute);\
|
||||
tmpn = tmpn->next_layer;\
|
||||
srcNN = srcNN->next_layer;\
|
||||
}\
|
||||
}while(0);\
|
||||
|
||||
|
||||
|
||||
#define PRINT_ATTRIBUTE_TENS_IN_ALL_LAYERS(type, neuronVar, attribute, msg)\
|
||||
do{\
|
||||
neurons_##type *tmpn = neuronVar;\
|
||||
char *vmsg=malloc(strlen(msg)+70);\
|
||||
size_t i=0;\
|
||||
while(tmpn){\
|
||||
sprintf(vmsg,"%s layer %ld",msg,i++);\
|
||||
if(tmpn->attribute)\
|
||||
print_tensor_msg_##type(tmpn->attribute, vmsg);\
|
||||
tmpn = tmpn->next_layer;\
|
||||
}\
|
||||
free(vmsg);\
|
||||
}while(0);\
|
||||
|
||||
|
||||
#endif /*__NEURON_T_C__H*/
|
||||
|
||||
@@ -368,10 +368,10 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\
|
||||
/*size_t j=0 ,k=0*/;\
|
||||
size_t *coord = malloc(sizeof(long int)*(T->dim)->size); \
|
||||
char *val=NULL;\
|
||||
char *dimsg=malloc(512);\
|
||||
/*char *dimsg=malloc(512);\
|
||||
sprintf(dimsg,"(%s)->dim",msg);\
|
||||
printDebug_dimension(T->dim,dimsg);\
|
||||
printf("%s\n",msg);\
|
||||
*/printf("%s\n",msg);\
|
||||
long int begin , end /*, beginIter , endIter*/ ;\
|
||||
long int (*iter)(long int) ;\
|
||||
bool (*cond)(long int, long int) ; \
|
||||
@@ -392,10 +392,11 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\
|
||||
else break;\
|
||||
}\
|
||||
}\
|
||||
printf(" [");\
|
||||
for(size_t k=0; k<(T->dim)->size;++k) printf(" %ld,",coord[k]);\
|
||||
val=type##_TO_STR(T->x[i]);\
|
||||
/*printf(" [");\
|
||||
for(size_t k=0; k<(T->dim)->size;++k) printf(" %ld",coord[k]);\
|
||||
*/val=type##_TO_STR(T->x[i]);\
|
||||
printf(" |#%ld]: %s, ",i,val);\
|
||||
/*printf(" %s, ",val);*/\
|
||||
free(val); val=NULL;\
|
||||
if(coord[begin]==(T->dim)->perm[begin]-1){\
|
||||
size_t count=0;\
|
||||
@@ -411,7 +412,7 @@ void print_tensor_msg_##type(tensor_##type *T,char *msg) {\
|
||||
\
|
||||
free(coord);\
|
||||
printf("\n");\
|
||||
free(dimsg);\
|
||||
/*free(dimsg);*/\
|
||||
}\
|
||||
\
|
||||
\
|
||||
|
||||
Reference in New Issue
Block a user