Trying to fix nan output of RL by using relu with upperbound

This commit is contained in:
2024-07-16 12:13:05 +02:00
parent 0c9813beca
commit aac7434346
13 changed files with 405 additions and 75 deletions
@@ -37,6 +37,7 @@ struct qlearning_params {
float minimum_threshold_exploration_factor;
long int nb_training_before_update_weight_in_target;
size_t number_episodes;
// size_t threshold_number_same_action;
};
@@ -46,6 +47,10 @@ struct status_qlearning {
struct main_list_TYPE_L_INT * progress_best_cumul;
long int nb_training_after_updated_weight_in_target;
size_t nb_episodes;
size_t index_episode;
int action;
// int last_action;
// size_t count_last_action;
};
struct delay_params {