wip deepQlearning

This commit is contained in:
2024-06-14 23:07:39 +02:00
parent 474af56377
commit e96304bee7
4 changed files with 13 additions and 7 deletions
@@ -71,6 +71,7 @@ struct delay_params * create_delay_params (
}
struct qlearning_params * create_qlearning_params (
double gamma,
double learning_rate,
double discount_factor,
double exploration_factor,
@@ -78,6 +79,7 @@ struct qlearning_params * create_qlearning_params (
){
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
qparams->gamma = gamma;
qparams->learning_rate = learning_rate ;
qparams->discount_factor = discount_factor ;
qparams->exploration_factor = exploration_factor ;
@@ -148,10 +150,12 @@ void train_qlearning(struct RL_agent * rlAgent,
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
struct game_status * car_status = rlAgent->car->status;
if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){
copy_tensor_TYPE_FLOAT(experimental_values, action_value) ;
// experimental_values === Q-tab learning
if(car_status->done){
}
experimental_values->x[action] = -100;
}else {
experimental_values->x[action] = reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ;
}
}
@@ -16,6 +16,7 @@
struct qlearning_params {
double gamma;
double learning_rate;
double factor_update_learning_rate;
double minimum_threshold_learning_rate;
@@ -66,6 +67,7 @@ struct delay_params * create_delay_params (
);
struct qlearning_params * create_qlearning_params (
double gamma,
double learning_rate,
double discount_factor,
double exploration_factor,
+2 -2
View File
@@ -16,7 +16,7 @@
struct game_status * create_game_status(){
struct game_status * status = malloc(sizeof(struct game_status));
status->state = 0;
status->q_state = 0;
status->reward = 0;
status->cumulative_reward = 0;
status->done = false;
@@ -482,7 +482,7 @@ void step(struct vehicle *v, int action){
move_vehicle(v);
read_sensor(v);
struct game_status *status = v->status;
status->state = v->sensor->x[LEFT]* 2500 +
status->q_state = v->sensor->x[LEFT]* 2500 +
v->sensor->x[CENTER]* 50 +
v->sensor->x[RIGHT] ;
status->reward = 0;
+1 -1
View File
@@ -28,7 +28,7 @@
struct game_status {
long state;
long q_state;
long reward;
long cumulative_reward;
bool done;