wip deepQlearning

This commit is contained in:
2024-06-14 23:07:39 +02:00
parent 474af56377
commit e96304bee7
4 changed files with 13 additions and 7 deletions
@@ -71,6 +71,7 @@ struct delay_params * create_delay_params (
} }
struct qlearning_params * create_qlearning_params ( struct qlearning_params * create_qlearning_params (
double gamma,
double learning_rate, double learning_rate,
double discount_factor, double discount_factor,
double exploration_factor, double exploration_factor,
@@ -78,6 +79,7 @@ struct qlearning_params * create_qlearning_params (
){ ){
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params)); struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
qparams->gamma = gamma;
qparams->learning_rate = learning_rate ; qparams->learning_rate = learning_rate ;
qparams->discount_factor = discount_factor ; qparams->discount_factor = discount_factor ;
qparams->exploration_factor = exploration_factor ; qparams->exploration_factor = exploration_factor ;
@@ -148,10 +150,12 @@ void train_qlearning(struct RL_agent * rlAgent,
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim); tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
struct game_status * car_status = rlAgent->car->status; struct game_status * car_status = rlAgent->car->status;
if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){ copy_tensor_TYPE_FLOAT(experimental_values, action_value) ;
// experimental_values === Q-tab learning
if(car_status->done){ if(car_status->done){
experimental_values->x[action] = -100;
} }else {
experimental_values->x[action] = reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ;
} }
} }
@@ -16,6 +16,7 @@
struct qlearning_params { struct qlearning_params {
double gamma;
double learning_rate; double learning_rate;
double factor_update_learning_rate; double factor_update_learning_rate;
double minimum_threshold_learning_rate; double minimum_threshold_learning_rate;
@@ -66,6 +67,7 @@ struct delay_params * create_delay_params (
); );
struct qlearning_params * create_qlearning_params ( struct qlearning_params * create_qlearning_params (
double gamma,
double learning_rate, double learning_rate,
double discount_factor, double discount_factor,
double exploration_factor, double exploration_factor,
+2 -2
View File
@@ -16,7 +16,7 @@
struct game_status * create_game_status(){ struct game_status * create_game_status(){
struct game_status * status = malloc(sizeof(struct game_status)); struct game_status * status = malloc(sizeof(struct game_status));
status->state = 0; status->q_state = 0;
status->reward = 0; status->reward = 0;
status->cumulative_reward = 0; status->cumulative_reward = 0;
status->done = false; status->done = false;
@@ -482,7 +482,7 @@ void step(struct vehicle *v, int action){
move_vehicle(v); move_vehicle(v);
read_sensor(v); read_sensor(v);
struct game_status *status = v->status; struct game_status *status = v->status;
status->state = v->sensor->x[LEFT]* 2500 + status->q_state = v->sensor->x[LEFT]* 2500 +
v->sensor->x[CENTER]* 50 + v->sensor->x[CENTER]* 50 +
v->sensor->x[RIGHT] ; v->sensor->x[RIGHT] ;
status->reward = 0; status->reward = 0;
+1 -1
View File
@@ -28,7 +28,7 @@
struct game_status { struct game_status {
long state; long q_state;
long reward; long reward;
long cumulative_reward; long cumulative_reward;
bool done; bool done;