diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.c b/deepQlearn_0/src/deepQlearning/learn_to_drive.c index e6e1239..d83c9f1 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.c +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.c @@ -71,6 +71,7 @@ struct delay_params * create_delay_params ( } struct qlearning_params * create_qlearning_params ( + double gamma, double learning_rate, double discount_factor, double exploration_factor, @@ -78,6 +79,7 @@ struct qlearning_params * create_qlearning_params ( ){ struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params)); + qparams->gamma = gamma; qparams->learning_rate = learning_rate ; qparams->discount_factor = discount_factor ; qparams->exploration_factor = exploration_factor ; @@ -148,10 +150,12 @@ void train_qlearning(struct RL_agent * rlAgent, tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim); struct game_status * car_status = rlAgent->car->status; - if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){ - if(car_status->done){ - - } + copy_tensor_TYPE_FLOAT(experimental_values, action_value) ; + // experimental_values === Q-tab learning + if(car_status->done){ + experimental_values->x[action] = -100; + }else { + experimental_values->x[action] = reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ; } } diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.h b/deepQlearn_0/src/deepQlearning/learn_to_drive.h index 2c9dfae..fb25376 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.h +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.h @@ -16,6 +16,7 @@ struct qlearning_params { + double gamma; double learning_rate; double factor_update_learning_rate; double minimum_threshold_learning_rate; @@ -66,6 +67,7 @@ struct delay_params * create_delay_params ( ); struct qlearning_params * create_qlearning_params ( + double gamma, double learning_rate, double discount_factor, double exploration_factor, diff --git a/deepQlearn_0/src/deepQlearning/vehicle.c b/deepQlearn_0/src/deepQlearning/vehicle.c index 36a7f0b..a1286d5 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.c +++ b/deepQlearn_0/src/deepQlearning/vehicle.c @@ -16,7 +16,7 @@ struct game_status * create_game_status(){ struct game_status * status = malloc(sizeof(struct game_status)); - status->state = 0; + status->q_state = 0; status->reward = 0; status->cumulative_reward = 0; status->done = false; @@ -482,7 +482,7 @@ void step(struct vehicle *v, int action){ move_vehicle(v); read_sensor(v); struct game_status *status = v->status; - status->state = v->sensor->x[LEFT]* 2500 + + status->q_state = v->sensor->x[LEFT]* 2500 + v->sensor->x[CENTER]* 50 + v->sensor->x[RIGHT] ; status->reward = 0; diff --git a/deepQlearn_0/src/deepQlearning/vehicle.h b/deepQlearn_0/src/deepQlearning/vehicle.h index a2868d3..b4b979e 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.h +++ b/deepQlearn_0/src/deepQlearning/vehicle.h @@ -28,7 +28,7 @@ struct game_status { - long state; + long q_state; long reward; long cumulative_reward; bool done;