modify COMPARE_N in tool, modify attribute of vehicle by using tensor

This commit is contained in:
2024-06-13 23:35:25 +02:00
parent 13f91583bb
commit 9927d6642c
12 changed files with 217 additions and 56 deletions
@@ -17,15 +17,21 @@
struct qlearning_params {
double learning_rate;
double factor_update_learning_rate;
double minimum_threshold_learning_rate;
double discount_factor;
double exploration_factor;
double factor_update_exploration_factor;
double minimum_threshold_exploration_factor;
long int nb_training_before_update_weight_in_target;
};
struct reward_lists {
struct status_qlearning {
struct main_list_TYPE_L_INT * list_main_cumul;
struct main_list_TYPE_L_INT * list_target_cumul;
struct main_list_TYPE_L_INT * progress_best_cumul;
long int nb_training_after_updated_weight_in_target;
};
struct delay_params {
@@ -43,7 +49,7 @@ struct networks_qlearning {
struct RL_agent {
struct networks_qlearning * networks;
struct vehicle * car;
struct reward_lists * rewards;
struct status_qlearning * status;
struct delay_params * delay;
struct qlearning_params *qlearnParams;
@@ -53,7 +59,7 @@ struct networks_qlearning * create_nework_qlearning(
struct config_layers * config,
bool randomize, float minR, float maxR, int randomRange
);
struct reward_lists * create_reward_lists ();
struct status_qlearning * create_status_qlearning ();
struct delay_params * create_delay_params (
size_t delay_between_episodes,
size_t delay_between_games
@@ -62,19 +68,20 @@ struct delay_params * create_delay_params (
struct qlearning_params * create_qlearning_params (
double learning_rate,
double discount_factor,
double exploration_factor
double exploration_factor,
long int nb_training_before_update_weight_in_target
);
struct RL_agent * create_RL_agent (
struct networks_qlearning * networks,
struct vehicle * car,
struct reward_lists * rewards,
struct status_qlearning * status,
struct delay_params * delay,
struct qlearning_params *qlearnParams
);
void free_networks_qlearning (struct networks_qlearning * networks);
void free_reward_lists(struct reward_lists *rwd_l);
void free_status_qlearning(struct status_qlearning *status_ql);
void free_delay_params (struct delay_params *dly_p);
void free_qlearning_params(struct qlearning_params *q_params);
void free_RL_agent(struct RL_agent *rlAgent);
@@ -82,5 +89,10 @@ void free_RL_agent(struct RL_agent *rlAgent);
void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks);
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks);
void train_qlearning(struct RL_agent * rlAgent,
int action ,
tensor_TYPE_FLOAT * new_state /*input*/,
tensor_TYPE_FLOAT * state /*input*/,
long reward);
#endif /* __LEARNING_VEHICLE__C_H____ */