modify COMPARE_N in tool, modify attribute of vehicle by using tensor

This commit is contained in:
2024-06-13 23:35:25 +02:00
parent 13f91583bb
commit 9927d6642c
12 changed files with 217 additions and 56 deletions
+53 -14
View File
@@ -47,14 +47,16 @@ struct networks_qlearning * create_nework_qlearning(
}
struct reward_lists * create_reward_lists (){
struct reward_lists * rwrd_l = malloc(sizeof(struct reward_lists));
struct status_qlearning * create_status_qlearning (){
struct status_qlearning * status_ql = malloc(sizeof(struct status_qlearning));
rwrd_l->list_main_cumul = create_var_list_TYPE_L_INT();
rwrd_l->list_target_cumul = create_var_list_TYPE_L_INT();
rwrd_l->progress_best_cumul = create_var_list_TYPE_L_INT();
status_ql->list_main_cumul = create_var_list_TYPE_L_INT();
status_ql->list_target_cumul = create_var_list_TYPE_L_INT();
status_ql->progress_best_cumul = create_var_list_TYPE_L_INT();
status_ql->nb_training_after_updated_weight_in_target = 0;
return rwrd_l;
return status_ql;
}
struct delay_params * create_delay_params (
@@ -71,13 +73,16 @@ struct delay_params * create_delay_params (
struct qlearning_params * create_qlearning_params (
double learning_rate,
double discount_factor,
double exploration_factor
double exploration_factor,
long int nb_training_before_update_weight_in_target
){
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
qparams->learning_rate = learning_rate ;
qparams->discount_factor = discount_factor ;
qparams->exploration_factor = exploration_factor ;
qparams->nb_training_before_update_weight_in_target = nb_training_before_update_weight_in_target;
return qparams;
}
@@ -85,7 +90,7 @@ struct qlearning_params * create_qlearning_params (
struct RL_agent * create_RL_agent (
struct networks_qlearning * networks,
struct vehicle * car,
struct reward_lists * rewards,
struct status_qlearning * status,
struct delay_params * delay,
struct qlearning_params *qlearnParams
){
@@ -93,7 +98,7 @@ struct RL_agent * create_RL_agent (
rlagent->networks = networks ;
rlagent->car = car ;
rlagent->rewards = rewards ;
rlagent->status = status ;
rlagent->delay = delay ;
rlagent->qlearnParams = qlearnParams ;
@@ -101,18 +106,52 @@ struct RL_agent * create_RL_agent (
}
void free_networks_qlearning (struct networks_qlearning * networks){
free_neurons_TYPE_FLOAT(networks->main_net);
free_neurons_TYPE_FLOAT(networks->target_net);
free_neurons_TYPE_FLOAT(networks->best_net);
free_config_layers(networks->config);
free(networks);
}
void free_reward_lists(struct reward_lists *rwd_l){
void free_status_qlearning(struct status_qlearning *status_ql){
free_all_var_list_TYPE_L_INT(status_ql->list_main_cumul);
free_all_var_list_TYPE_L_INT(status_ql->list_target_cumul);
free_all_var_list_TYPE_L_INT(status_ql->progress_best_cumul);
free(status_ql);
}
void free_delay_params (struct delay_params *dly_p){
free(dly_p);
}
void free_qlearning_params(struct qlearning_params *q_params){
free(q_params);
}
void free_RL_agent(struct RL_agent *rlAgent){
free(rlAgent->qlearnParams);
free(rlAgent->delay);
free_status_qlearning(rlAgent->status);
free_networks_qlearning(rlAgent->networks);
free_vehicle(rlAgent->car);
free(rlAgent);
}
void train_qlearning(struct RL_agent * rlAgent,
int action /* */,
tensor_TYPE_FLOAT * new_state /*input*/,
tensor_TYPE_FLOAT * state /*input*/,
long reward){
tensor_TYPE_FLOAT * action_value = NULL;
tensor_TYPE_FLOAT * next_action_value = NULL;
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value);
calculate_output_by_network_neurons_TYPE_FLOAT(net_target, state, &next_action_value);
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
struct game_status * car_status = rlAgent->car->status;
if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){
if(status->done){
}
}
}