modify COMPARE_N in tool, modify attribute of vehicle by using tensor
This commit is contained in:
@@ -17,15 +17,21 @@
|
||||
|
||||
struct qlearning_params {
|
||||
double learning_rate;
|
||||
double factor_update_learning_rate;
|
||||
double minimum_threshold_learning_rate;
|
||||
double discount_factor;
|
||||
double exploration_factor;
|
||||
double factor_update_exploration_factor;
|
||||
double minimum_threshold_exploration_factor;
|
||||
long int nb_training_before_update_weight_in_target;
|
||||
};
|
||||
|
||||
|
||||
struct reward_lists {
|
||||
struct status_qlearning {
|
||||
struct main_list_TYPE_L_INT * list_main_cumul;
|
||||
struct main_list_TYPE_L_INT * list_target_cumul;
|
||||
struct main_list_TYPE_L_INT * progress_best_cumul;
|
||||
long int nb_training_after_updated_weight_in_target;
|
||||
};
|
||||
|
||||
struct delay_params {
|
||||
@@ -43,7 +49,7 @@ struct networks_qlearning {
|
||||
struct RL_agent {
|
||||
struct networks_qlearning * networks;
|
||||
struct vehicle * car;
|
||||
struct reward_lists * rewards;
|
||||
struct status_qlearning * status;
|
||||
struct delay_params * delay;
|
||||
struct qlearning_params *qlearnParams;
|
||||
|
||||
@@ -53,7 +59,7 @@ struct networks_qlearning * create_nework_qlearning(
|
||||
struct config_layers * config,
|
||||
bool randomize, float minR, float maxR, int randomRange
|
||||
);
|
||||
struct reward_lists * create_reward_lists ();
|
||||
struct status_qlearning * create_status_qlearning ();
|
||||
struct delay_params * create_delay_params (
|
||||
size_t delay_between_episodes,
|
||||
size_t delay_between_games
|
||||
@@ -62,19 +68,20 @@ struct delay_params * create_delay_params (
|
||||
struct qlearning_params * create_qlearning_params (
|
||||
double learning_rate,
|
||||
double discount_factor,
|
||||
double exploration_factor
|
||||
double exploration_factor,
|
||||
long int nb_training_before_update_weight_in_target
|
||||
);
|
||||
|
||||
struct RL_agent * create_RL_agent (
|
||||
struct networks_qlearning * networks,
|
||||
struct vehicle * car,
|
||||
struct reward_lists * rewards,
|
||||
struct status_qlearning * status,
|
||||
struct delay_params * delay,
|
||||
struct qlearning_params *qlearnParams
|
||||
);
|
||||
|
||||
void free_networks_qlearning (struct networks_qlearning * networks);
|
||||
void free_reward_lists(struct reward_lists *rwd_l);
|
||||
void free_status_qlearning(struct status_qlearning *status_ql);
|
||||
void free_delay_params (struct delay_params *dly_p);
|
||||
void free_qlearning_params(struct qlearning_params *q_params);
|
||||
void free_RL_agent(struct RL_agent *rlAgent);
|
||||
@@ -82,5 +89,10 @@ void free_RL_agent(struct RL_agent *rlAgent);
|
||||
void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks);
|
||||
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks);
|
||||
|
||||
void train_qlearning(struct RL_agent * rlAgent,
|
||||
int action ,
|
||||
tensor_TYPE_FLOAT * new_state /*input*/,
|
||||
tensor_TYPE_FLOAT * state /*input*/,
|
||||
long reward);
|
||||
|
||||
#endif /* __LEARNING_VEHICLE__C_H____ */
|
||||
|
||||
Reference in New Issue
Block a user