[update] learn_to_drive: unlimite update learning rate if very good reward

This commit is contained in:
2026-02-11 08:34:33 +01:00
parent 93a89eaa99
commit 20e001ceb4
@@ -323,7 +323,7 @@ void train_qlearning(struct RL_agent * rlAgent,
#if UPDATE_PARAMS #if UPDATE_PARAMS
if((car_status->cumulative_reward > ACCEPTABLE_REWARD) || (rlAgent->status->nb_episodes % 100 == 0) ){ if((car_status->cumulative_reward > ACCEPTABLE_REWARD) || (rlAgent->status->nb_episodes % 100 == 0) ){
float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ ); float new_value = ( (net_main->learning_rate < qlParams->minimum_threshold_learning_rate /*0.0001*/) ? net_main->learning_rate :(net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ );
if((car_status->cumulative_reward > VERY_GOOD_REWARD) ) new_value = (net_main->learning_rate ) * qlParams->factor_update_learning_rate /*0.995*/ ; if(car_status->cumulative_reward > VERY_GOOD_REWARD) new_value = (net_main->learning_rate ) * qlParams->factor_update_learning_rate ;
UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, net_main, learning_rate, new_value); UPDATE_ATTRIBUTE_NEURONE_IN_ALL_LAYERS(TYPE_FLOAT, net_main, learning_rate, new_value);
qlParams->learning_rate = new_value; qlParams->learning_rate = new_value;
#if UPDATE_EXPLOR_FAC #if UPDATE_EXPLOR_FAC