first try qdeep learning

This commit is contained in:
2024-06-21 00:28:50 +02:00
parent e96304bee7
commit 582d7a6a70
7 changed files with 401 additions and 70 deletions
+53 -21
View File
@@ -1,7 +1,17 @@
#ifndef __LEARNING_VEHICLE__C_H____
#define __LEARNING_VEHICLE__C_H____
//#include <pthread.h>
#include <stdlib.h>
#include <pthread.h>
/* for Sleep : milliseconds */
#ifdef WINDOWS
#include <windows.h>
//#ifdef LINUX
#else
#include <unistd.h>
#define Sleep(x) usleep((x)*1000)
#endif
#include "neuron_t/neuron_t.h"
@@ -16,15 +26,17 @@
struct qlearning_params {
double gamma;
double learning_rate;
double factor_update_learning_rate;
double minimum_threshold_learning_rate;
double discount_factor;
double exploration_factor;
double factor_update_exploration_factor;
double minimum_threshold_exploration_factor;
float gamma;
float learning_rate;
float factor_update_learning_rate;
// float epsilon;
float minimum_threshold_learning_rate;
float discount_factor;
float exploration_factor;
float factor_update_exploration_factor;
float minimum_threshold_exploration_factor;
long int nb_training_before_update_weight_in_target;
size_t number_episodes;
};
@@ -40,6 +52,15 @@ struct delay_params {
size_t delay_between_games;
};
struct print_params {
bool printed;
pthread_mutex_t mut_printed;
float scale_x;
float scale_y;
struct delay_params *delay;
char string_space[LOG_LENTH];
};
struct networks_qlearning {
config_layers *config;
neurons_TYPE_FLOAT *main_net;
@@ -51,39 +72,48 @@ struct RL_agent {
struct networks_qlearning * networks;
struct vehicle * car;
struct status_qlearning * status;
struct delay_params * delay;
struct print_params * pprint;
struct qlearning_params *qlearnParams;
};
struct networks_qlearning * create_nework_qlearning(
struct config_layers * config,
bool randomize, float minR, float maxR, int randomRange
bool randomize, float minR, float maxR, int randomRange,
size_t nb_prod_thread,
size_t nb_calc_thread,
float learning_rate
);
struct status_qlearning * create_status_qlearning ();
struct delay_params * create_delay_params (
size_t delay_between_episodes,
size_t delay_between_games
);
struct print_params * create_print_params(
float scale_x, float scale_y,
struct delay_params * dly_p
);
struct qlearning_params * create_qlearning_params (
double gamma,
double learning_rate,
double discount_factor,
double exploration_factor,
long int nb_training_before_update_weight_in_target
float gamma,
float learning_rate,
float discount_factor,
float exploration_factor,
long int nb_training_before_update_weight_in_target,
size_t number_episodes
);
struct RL_agent * create_RL_agent (
struct networks_qlearning * networks,
struct vehicle * car,
struct status_qlearning * status,
struct delay_params * delay,
struct print_params * pprint,
struct qlearning_params *qlearnParams
);
void free_networks_qlearning (struct networks_qlearning * networks);
void free_status_qlearning(struct status_qlearning *status_ql);
void free_print_params (struct print_params *pprint);
void free_delay_params (struct delay_params *dly_p);
void free_qlearning_params(struct qlearning_params *q_params);
void free_RL_agent(struct RL_agent *rlAgent);
@@ -92,9 +122,11 @@ void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * net
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks);
void train_qlearning(struct RL_agent * rlAgent,
int action ,
tensor_TYPE_FLOAT * new_state /*input*/,
tensor_TYPE_FLOAT * state /*input*/,
long reward);
int action);
// tensor_TYPE_FLOAT * new_state /*input*/,
// tensor_TYPE_FLOAT * state /*input*/,
// long reward;
void learn_to_drive(struct RL_agent * rlAgent);
#endif /* __LEARNING_VEHICLE__C_H____ */