modify COMPARE_N in tool, modify attribute of vehicle by using tensor
This commit is contained in:
@@ -47,14 +47,16 @@ struct networks_qlearning * create_nework_qlearning(
|
||||
|
||||
}
|
||||
|
||||
struct reward_lists * create_reward_lists (){
|
||||
struct reward_lists * rwrd_l = malloc(sizeof(struct reward_lists));
|
||||
struct status_qlearning * create_status_qlearning (){
|
||||
struct status_qlearning * status_ql = malloc(sizeof(struct status_qlearning));
|
||||
|
||||
rwrd_l->list_main_cumul = create_var_list_TYPE_L_INT();
|
||||
rwrd_l->list_target_cumul = create_var_list_TYPE_L_INT();
|
||||
rwrd_l->progress_best_cumul = create_var_list_TYPE_L_INT();
|
||||
status_ql->list_main_cumul = create_var_list_TYPE_L_INT();
|
||||
status_ql->list_target_cumul = create_var_list_TYPE_L_INT();
|
||||
status_ql->progress_best_cumul = create_var_list_TYPE_L_INT();
|
||||
|
||||
status_ql->nb_training_after_updated_weight_in_target = 0;
|
||||
|
||||
return rwrd_l;
|
||||
return status_ql;
|
||||
}
|
||||
|
||||
struct delay_params * create_delay_params (
|
||||
@@ -71,13 +73,16 @@ struct delay_params * create_delay_params (
|
||||
struct qlearning_params * create_qlearning_params (
|
||||
double learning_rate,
|
||||
double discount_factor,
|
||||
double exploration_factor
|
||||
double exploration_factor,
|
||||
long int nb_training_before_update_weight_in_target
|
||||
){
|
||||
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
|
||||
|
||||
qparams->learning_rate = learning_rate ;
|
||||
qparams->discount_factor = discount_factor ;
|
||||
qparams->exploration_factor = exploration_factor ;
|
||||
|
||||
qparams->nb_training_before_update_weight_in_target = nb_training_before_update_weight_in_target;
|
||||
|
||||
return qparams;
|
||||
}
|
||||
@@ -85,7 +90,7 @@ struct qlearning_params * create_qlearning_params (
|
||||
struct RL_agent * create_RL_agent (
|
||||
struct networks_qlearning * networks,
|
||||
struct vehicle * car,
|
||||
struct reward_lists * rewards,
|
||||
struct status_qlearning * status,
|
||||
struct delay_params * delay,
|
||||
struct qlearning_params *qlearnParams
|
||||
){
|
||||
@@ -93,7 +98,7 @@ struct RL_agent * create_RL_agent (
|
||||
|
||||
rlagent->networks = networks ;
|
||||
rlagent->car = car ;
|
||||
rlagent->rewards = rewards ;
|
||||
rlagent->status = status ;
|
||||
rlagent->delay = delay ;
|
||||
rlagent->qlearnParams = qlearnParams ;
|
||||
|
||||
@@ -101,18 +106,52 @@ struct RL_agent * create_RL_agent (
|
||||
}
|
||||
|
||||
void free_networks_qlearning (struct networks_qlearning * networks){
|
||||
|
||||
free_neurons_TYPE_FLOAT(networks->main_net);
|
||||
free_neurons_TYPE_FLOAT(networks->target_net);
|
||||
free_neurons_TYPE_FLOAT(networks->best_net);
|
||||
free_config_layers(networks->config);
|
||||
free(networks);
|
||||
}
|
||||
void free_reward_lists(struct reward_lists *rwd_l){
|
||||
|
||||
void free_status_qlearning(struct status_qlearning *status_ql){
|
||||
free_all_var_list_TYPE_L_INT(status_ql->list_main_cumul);
|
||||
free_all_var_list_TYPE_L_INT(status_ql->list_target_cumul);
|
||||
free_all_var_list_TYPE_L_INT(status_ql->progress_best_cumul);
|
||||
free(status_ql);
|
||||
}
|
||||
void free_delay_params (struct delay_params *dly_p){
|
||||
|
||||
free(dly_p);
|
||||
}
|
||||
void free_qlearning_params(struct qlearning_params *q_params){
|
||||
|
||||
free(q_params);
|
||||
}
|
||||
void free_RL_agent(struct RL_agent *rlAgent){
|
||||
free(rlAgent->qlearnParams);
|
||||
free(rlAgent->delay);
|
||||
free_status_qlearning(rlAgent->status);
|
||||
free_networks_qlearning(rlAgent->networks);
|
||||
free_vehicle(rlAgent->car);
|
||||
|
||||
free(rlAgent);
|
||||
}
|
||||
|
||||
void train_qlearning(struct RL_agent * rlAgent,
|
||||
int action /* */,
|
||||
tensor_TYPE_FLOAT * new_state /*input*/,
|
||||
tensor_TYPE_FLOAT * state /*input*/,
|
||||
long reward){
|
||||
tensor_TYPE_FLOAT * action_value = NULL;
|
||||
tensor_TYPE_FLOAT * next_action_value = NULL;
|
||||
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
|
||||
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
|
||||
calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value);
|
||||
calculate_output_by_network_neurons_TYPE_FLOAT(net_target, state, &next_action_value);
|
||||
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
|
||||
|
||||
struct game_status * car_status = rlAgent->car->status;
|
||||
if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){
|
||||
if(status->done){
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -17,15 +17,21 @@
|
||||
|
||||
struct qlearning_params {
|
||||
double learning_rate;
|
||||
double factor_update_learning_rate;
|
||||
double minimum_threshold_learning_rate;
|
||||
double discount_factor;
|
||||
double exploration_factor;
|
||||
double factor_update_exploration_factor;
|
||||
double minimum_threshold_exploration_factor;
|
||||
long int nb_training_before_update_weight_in_target;
|
||||
};
|
||||
|
||||
|
||||
struct reward_lists {
|
||||
struct status_qlearning {
|
||||
struct main_list_TYPE_L_INT * list_main_cumul;
|
||||
struct main_list_TYPE_L_INT * list_target_cumul;
|
||||
struct main_list_TYPE_L_INT * progress_best_cumul;
|
||||
long int nb_training_after_updated_weight_in_target;
|
||||
};
|
||||
|
||||
struct delay_params {
|
||||
@@ -43,7 +49,7 @@ struct networks_qlearning {
|
||||
struct RL_agent {
|
||||
struct networks_qlearning * networks;
|
||||
struct vehicle * car;
|
||||
struct reward_lists * rewards;
|
||||
struct status_qlearning * status;
|
||||
struct delay_params * delay;
|
||||
struct qlearning_params *qlearnParams;
|
||||
|
||||
@@ -53,7 +59,7 @@ struct networks_qlearning * create_nework_qlearning(
|
||||
struct config_layers * config,
|
||||
bool randomize, float minR, float maxR, int randomRange
|
||||
);
|
||||
struct reward_lists * create_reward_lists ();
|
||||
struct status_qlearning * create_status_qlearning ();
|
||||
struct delay_params * create_delay_params (
|
||||
size_t delay_between_episodes,
|
||||
size_t delay_between_games
|
||||
@@ -62,19 +68,20 @@ struct delay_params * create_delay_params (
|
||||
struct qlearning_params * create_qlearning_params (
|
||||
double learning_rate,
|
||||
double discount_factor,
|
||||
double exploration_factor
|
||||
double exploration_factor,
|
||||
long int nb_training_before_update_weight_in_target
|
||||
);
|
||||
|
||||
struct RL_agent * create_RL_agent (
|
||||
struct networks_qlearning * networks,
|
||||
struct vehicle * car,
|
||||
struct reward_lists * rewards,
|
||||
struct status_qlearning * status,
|
||||
struct delay_params * delay,
|
||||
struct qlearning_params *qlearnParams
|
||||
);
|
||||
|
||||
void free_networks_qlearning (struct networks_qlearning * networks);
|
||||
void free_reward_lists(struct reward_lists *rwd_l);
|
||||
void free_status_qlearning(struct status_qlearning *status_ql);
|
||||
void free_delay_params (struct delay_params *dly_p);
|
||||
void free_qlearning_params(struct qlearning_params *q_params);
|
||||
void free_RL_agent(struct RL_agent *rlAgent);
|
||||
@@ -82,5 +89,10 @@ void free_RL_agent(struct RL_agent *rlAgent);
|
||||
void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks);
|
||||
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks);
|
||||
|
||||
void train_qlearning(struct RL_agent * rlAgent,
|
||||
int action ,
|
||||
tensor_TYPE_FLOAT * new_state /*input*/,
|
||||
tensor_TYPE_FLOAT * state /*input*/,
|
||||
long reward);
|
||||
|
||||
#endif /* __LEARNING_VEHICLE__C_H____ */
|
||||
|
||||
@@ -326,7 +326,7 @@ void print2D_blocks_indexOne_withPoint(struct blocks *blk, float scale_x, float
|
||||
if(in)
|
||||
printf("%d",in);
|
||||
else
|
||||
printf(" ");
|
||||
printf("."); //printf(" ");
|
||||
printf("\033[0;37m"); // white
|
||||
}
|
||||
printf("\n");
|
||||
@@ -478,7 +478,7 @@ void step(struct vehicle *v, int action){
|
||||
status->reward = 0;
|
||||
status->done =false;
|
||||
struct blocks * path = v->path;
|
||||
printf(" center : %f vs %f direction: %f\n",v->sensor->value[CENTER], LIMIT_DISTANCE, v->direction);
|
||||
//printf(" center : %f vs %f direction: %f\n",v->sensor->value[CENTER], LIMIT_DISTANCE, v->direction);
|
||||
if( v->sensor->value[CENTER]<= LIMIT_DISTANCE ){
|
||||
status->reward = REWARD_STOP;
|
||||
status->done = true;
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include "tools_t/tools_t.h"
|
||||
#include "dimension_t/dimension_t.h"
|
||||
#include "tensor_t/tensor_t.h"
|
||||
|
||||
#define LOG_LENTH 128
|
||||
|
||||
@@ -35,10 +36,12 @@ struct game_status {
|
||||
int cur_log;
|
||||
};
|
||||
|
||||
struct coordinate {
|
||||
size_t dimension_size;
|
||||
float *x;
|
||||
};
|
||||
//struct coordinate {
|
||||
// size_t dimension_size;
|
||||
// float *x;
|
||||
//};
|
||||
|
||||
typedef tensor_TYPE_FLOAT coordinate;
|
||||
|
||||
/*
|
||||
+-----------------------+ <-- upper_bound_block (coordinate (6,5) for example)
|
||||
@@ -55,32 +58,34 @@ struct coordinate {
|
||||
*/
|
||||
struct blocks {
|
||||
size_t nb_blocks;
|
||||
struct coordinate **lower_bound_block;
|
||||
struct coordinate **upper_bound_block;
|
||||
struct coordinate **bounds_all_blocks;
|
||||
coordinate **lower_bound_block;
|
||||
coordinate **upper_bound_block;
|
||||
coordinate **bounds_all_blocks;
|
||||
bool all_updated;
|
||||
size_t dimension_size;
|
||||
dimension *dim;
|
||||
bool *marker;
|
||||
//float step: // size of subdivision of the lowest large
|
||||
};
|
||||
|
||||
|
||||
struct sensors {
|
||||
size_t nb_values;
|
||||
float *value;
|
||||
};
|
||||
//struct sensors {
|
||||
// size_t nb_values;
|
||||
// float *value;
|
||||
// tensor_TYPE_FLOAT * sensor;
|
||||
//};
|
||||
typedef tensor_TYPE_FLOAT sensors;
|
||||
|
||||
struct vehicle {
|
||||
struct coordinate *coord;
|
||||
coordinate *coord;
|
||||
float direction;
|
||||
float speed;
|
||||
struct sensors *sensor;
|
||||
sensors *sensor;
|
||||
struct blocks *path;
|
||||
struct game_status *status;
|
||||
};
|
||||
|
||||
struct game_status * greate_game_status();
|
||||
struct coordinate * create_coordinate(size_t dim_size);
|
||||
coordinate * create_coordinate(size_t dim_size);
|
||||
struct blocks * create_blocks(size_t nb_blocks, size_t dim_size);
|
||||
|
||||
struct sensors * create_sensors(size_t nb_values);
|
||||
@@ -89,18 +94,18 @@ struct vehicle * create_vehicle(
|
||||
);
|
||||
|
||||
void free_game_status(struct game_status *status);
|
||||
void free_coordinate(struct coordinate *coord);
|
||||
void free_coordinate(coordinate *coord);
|
||||
void free_blocks(struct blocks *blk);
|
||||
|
||||
void free_sensors(struct sensors *snsr);
|
||||
void free_sensors(sensors *snsr);
|
||||
|
||||
void free_vehicle(struct vehicle * vhcl);
|
||||
|
||||
void update_bounds_limits_blocks(struct blocks * blk);
|
||||
|
||||
int is_in_blocks(struct blocks *blk, struct coordinate *coord);
|
||||
int is_in_blocks(struct blocks *blk, coordinate *coord);
|
||||
|
||||
void copy_coordinate(struct coordinate *coord, float *x);
|
||||
void copy_coordinate(coordinate *coord, float *x);
|
||||
|
||||
void move_vehicle(struct vehicle *v);
|
||||
void read_sensor(struct vehicle *v);
|
||||
@@ -111,9 +116,9 @@ void reset(struct vehicle *v);
|
||||
void print2D_blocks_indexOne_withPoint(struct blocks *blk, float scale_x, float scale_y, struct coordinate *coordPoint);
|
||||
void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y);
|
||||
|
||||
float distance2_coordinate(struct coordinate *c0, struct coordinate *c1);
|
||||
float distance2_coordinate(coordinate *c0, coordinate *c1);
|
||||
|
||||
void print2D_blocks(struct blocks *blk, float scale_x, float scale_y, char pad);
|
||||
void print2D_blocks_withPoint(struct blocks *blk, float scale_x, float scale_y, char pad, struct coordinate *coordPoint);
|
||||
void print2D_blocks_withPoint(struct blocks *blk, float scale_x, float scale_y, char pad, coordinate *coordPoint);
|
||||
|
||||
#endif /* __VEHICLE__C_H__ */
|
||||
|
||||
@@ -165,7 +165,7 @@ TEST(first_vehicle){
|
||||
copy_coordinate(path->upper_bound_block[0], (float[]){2,7});
|
||||
copy_coordinate(path->lower_bound_block[1], (float[]){2,0});
|
||||
copy_coordinate(path->upper_bound_block[1], (float[]){4,2});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){4,1});
|
||||
copy_coordinate(path->lower_bound_block[2], (float[]){4,0.5});
|
||||
copy_coordinate(path->upper_bound_block[2], (float[]){8,3});
|
||||
copy_coordinate(path->lower_bound_block[3], (float[]){8,0});
|
||||
copy_coordinate(path->upper_bound_block[3], (float[]){16,2});
|
||||
@@ -204,7 +204,9 @@ TEST(first_vehicle){
|
||||
}
|
||||
|
||||
TEST(reward_list){
|
||||
struct reward_lists * l_reward = create_reward_lists ();
|
||||
struct status_qlearning * l_reward = create_status_qlearning();
|
||||
|
||||
free_status_qlearning(l_reward);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv){
|
||||
|
||||
Reference in New Issue
Block a user