wip deepQlearning
This commit is contained in:
@@ -71,6 +71,7 @@ struct delay_params * create_delay_params (
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct qlearning_params * create_qlearning_params (
|
struct qlearning_params * create_qlearning_params (
|
||||||
|
double gamma,
|
||||||
double learning_rate,
|
double learning_rate,
|
||||||
double discount_factor,
|
double discount_factor,
|
||||||
double exploration_factor,
|
double exploration_factor,
|
||||||
@@ -78,6 +79,7 @@ struct qlearning_params * create_qlearning_params (
|
|||||||
){
|
){
|
||||||
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
|
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
|
||||||
|
|
||||||
|
qparams->gamma = gamma;
|
||||||
qparams->learning_rate = learning_rate ;
|
qparams->learning_rate = learning_rate ;
|
||||||
qparams->discount_factor = discount_factor ;
|
qparams->discount_factor = discount_factor ;
|
||||||
qparams->exploration_factor = exploration_factor ;
|
qparams->exploration_factor = exploration_factor ;
|
||||||
@@ -148,10 +150,12 @@ void train_qlearning(struct RL_agent * rlAgent,
|
|||||||
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
|
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
|
||||||
|
|
||||||
struct game_status * car_status = rlAgent->car->status;
|
struct game_status * car_status = rlAgent->car->status;
|
||||||
if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){
|
copy_tensor_TYPE_FLOAT(experimental_values, action_value) ;
|
||||||
|
// experimental_values === Q-tab learning
|
||||||
if(car_status->done){
|
if(car_status->done){
|
||||||
|
experimental_values->x[action] = -100;
|
||||||
}
|
}else {
|
||||||
|
experimental_values->x[action] = reward + rlAgent->qlearnParams->gamma * MAX_ARRAY_TYPE_FLOAT(next_action_value->x, next_action_value->dim->rank) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
|
|
||||||
struct qlearning_params {
|
struct qlearning_params {
|
||||||
|
double gamma;
|
||||||
double learning_rate;
|
double learning_rate;
|
||||||
double factor_update_learning_rate;
|
double factor_update_learning_rate;
|
||||||
double minimum_threshold_learning_rate;
|
double minimum_threshold_learning_rate;
|
||||||
@@ -66,6 +67,7 @@ struct delay_params * create_delay_params (
|
|||||||
);
|
);
|
||||||
|
|
||||||
struct qlearning_params * create_qlearning_params (
|
struct qlearning_params * create_qlearning_params (
|
||||||
|
double gamma,
|
||||||
double learning_rate,
|
double learning_rate,
|
||||||
double discount_factor,
|
double discount_factor,
|
||||||
double exploration_factor,
|
double exploration_factor,
|
||||||
|
|||||||
@@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
struct game_status * create_game_status(){
|
struct game_status * create_game_status(){
|
||||||
struct game_status * status = malloc(sizeof(struct game_status));
|
struct game_status * status = malloc(sizeof(struct game_status));
|
||||||
status->state = 0;
|
status->q_state = 0;
|
||||||
status->reward = 0;
|
status->reward = 0;
|
||||||
status->cumulative_reward = 0;
|
status->cumulative_reward = 0;
|
||||||
status->done = false;
|
status->done = false;
|
||||||
@@ -482,7 +482,7 @@ void step(struct vehicle *v, int action){
|
|||||||
move_vehicle(v);
|
move_vehicle(v);
|
||||||
read_sensor(v);
|
read_sensor(v);
|
||||||
struct game_status *status = v->status;
|
struct game_status *status = v->status;
|
||||||
status->state = v->sensor->x[LEFT]* 2500 +
|
status->q_state = v->sensor->x[LEFT]* 2500 +
|
||||||
v->sensor->x[CENTER]* 50 +
|
v->sensor->x[CENTER]* 50 +
|
||||||
v->sensor->x[RIGHT] ;
|
v->sensor->x[RIGHT] ;
|
||||||
status->reward = 0;
|
status->reward = 0;
|
||||||
|
|||||||
@@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
|
|
||||||
struct game_status {
|
struct game_status {
|
||||||
long state;
|
long q_state;
|
||||||
long reward;
|
long reward;
|
||||||
long cumulative_reward;
|
long cumulative_reward;
|
||||||
bool done;
|
bool done;
|
||||||
|
|||||||
Reference in New Issue
Block a user