wip deepQlearning
This commit is contained in:
@@ -71,6 +71,7 @@ struct delay_params * create_delay_params (
|
||||
}
|
||||
|
||||
struct qlearning_params * create_qlearning_params (
|
||||
double gamma,
|
||||
double learning_rate,
|
||||
double discount_factor,
|
||||
double exploration_factor,
|
||||
@@ -78,6 +79,7 @@ struct qlearning_params * create_qlearning_params (
|
||||
){
|
||||
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
|
||||
|
||||
qparams->gamma = gamma;
|
||||
qparams->learning_rate = learning_rate ;
|
||||
qparams->discount_factor = discount_factor ;
|
||||
qparams->exploration_factor = exploration_factor ;
|
||||
@@ -148,10 +150,12 @@ void train_qlearning(struct RL_agent * rlAgent,
|
||||
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
|
||||
|
||||
struct game_status * car_status = rlAgent->car->status;
|
||||
if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){
|
||||
if(car_status->done){
|
||||
|
||||
}
|
||||
copy_tensor_TYPE_FLOAT(experimental_values, action_value) ;
|
||||
// experimental_values === Q-tab learning
|
||||
if(car_status->done){
|
||||
experimental_values->x[action] = -100;
|
||||
}else {
|
||||
experimental_values->x[action] = reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
|
||||
struct qlearning_params {
|
||||
double gamma;
|
||||
double learning_rate;
|
||||
double factor_update_learning_rate;
|
||||
double minimum_threshold_learning_rate;
|
||||
@@ -66,6 +67,7 @@ struct delay_params * create_delay_params (
|
||||
);
|
||||
|
||||
struct qlearning_params * create_qlearning_params (
|
||||
double gamma,
|
||||
double learning_rate,
|
||||
double discount_factor,
|
||||
double exploration_factor,
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
struct game_status * create_game_status(){
|
||||
struct game_status * status = malloc(sizeof(struct game_status));
|
||||
status->state = 0;
|
||||
status->q_state = 0;
|
||||
status->reward = 0;
|
||||
status->cumulative_reward = 0;
|
||||
status->done = false;
|
||||
@@ -482,7 +482,7 @@ void step(struct vehicle *v, int action){
|
||||
move_vehicle(v);
|
||||
read_sensor(v);
|
||||
struct game_status *status = v->status;
|
||||
status->state = v->sensor->x[LEFT]* 2500 +
|
||||
status->q_state = v->sensor->x[LEFT]* 2500 +
|
||||
v->sensor->x[CENTER]* 50 +
|
||||
v->sensor->x[RIGHT] ;
|
||||
status->reward = 0;
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
|
||||
struct game_status {
|
||||
long state;
|
||||
long q_state;
|
||||
long reward;
|
||||
long cumulative_reward;
|
||||
bool done;
|
||||
|
||||
Reference in New Issue
Block a user