update learn to drive and Makefile

This commit is contained in:
2024-06-12 00:24:17 +02:00
parent fca991eb37
commit 13f91583bb
4 changed files with 115 additions and 5 deletions
@@ -19,6 +19,100 @@ float D_L2(float t, float o){
return (o - t); return (o - t);
} }
void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks){
copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net);
}
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks){
copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net);
}
struct networks_qlearning * create_nework_qlearning(
struct config_layers * config,
bool randomize, float minR, float maxR, int randomRange
){
struct networks_qlearning *qnets = malloc(sizeof(struct networks_qlearning));
qnets->config = config;
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->main_net), config,
random, minR, maxR, randomRange);
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->target_net), config,
false, minR, maxR, randomRange);
copy_weight_in_networks_from_main_to_target(qnets);
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config,
false, minR, maxR, randomRange);
copy_weight_in_networks_from_main_to_best(qnets);
return qnets;
}
struct reward_lists * create_reward_lists (){
struct reward_lists * rwrd_l = malloc(sizeof(struct reward_lists));
rwrd_l->list_main_cumul = create_var_list_TYPE_L_INT();
rwrd_l->list_target_cumul = create_var_list_TYPE_L_INT();
rwrd_l->progress_best_cumul = create_var_list_TYPE_L_INT();
return rwrd_l;
}
struct delay_params * create_delay_params (
size_t delay_between_episodes,
size_t delay_between_games
){
struct delay_params * delay = malloc(sizeof(struct delay_params));
delay->delay_between_episodes = delay_between_episodes;
delay->delay_between_games = delay_between_games;
return delay;
}
struct qlearning_params * create_qlearning_params (
double learning_rate,
double discount_factor,
double exploration_factor
){
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
qparams->learning_rate = learning_rate ;
qparams->discount_factor = discount_factor ;
qparams->exploration_factor = exploration_factor ;
return qparams;
}
struct RL_agent * create_RL_agent (
struct networks_qlearning * networks,
struct vehicle * car,
struct reward_lists * rewards,
struct delay_params * delay,
struct qlearning_params *qlearnParams
){
struct RL_agent * rlagent = malloc(sizeof(struct RL_agent));
rlagent->networks = networks ;
rlagent->car = car ;
rlagent->rewards = rewards ;
rlagent->delay = delay ;
rlagent->qlearnParams = qlearnParams ;
return rlagent;
}
void free_networks_qlearning (struct networks_qlearning * networks){
}
void free_reward_lists(struct reward_lists *rwd_l){
}
void free_delay_params (struct delay_params *dly_p){
}
void free_qlearning_params(struct qlearning_params *q_params){
}
void free_RL_agent(struct RL_agent *rlAgent){
}
@@ -59,7 +59,7 @@ struct delay_params * create_delay_params (
size_t delay_between_games size_t delay_between_games
); );
struct qlearning_params ( struct qlearning_params * create_qlearning_params (
double learning_rate, double learning_rate,
double discount_factor, double discount_factor,
double exploration_factor double exploration_factor
+13 -3
View File
@@ -11,10 +11,13 @@ YPERMDIR=$(PWD)/../../ypermutation_t
DIMDIR=$(PWD)/../../dimension_t DIMDIR=$(PWD)/../../dimension_t
TENSDIR=$(PWD)/../../tensor_t TENSDIR=$(PWD)/../../tensor_t
LISTDIR=$(PWD)/../../list_t
NEURODIR=$(PWD)/../../neuron_t NEURODIR=$(PWD)/../../neuron_t
INCLUDE_DIR=$(PWD)/../src/deepQlearning INCLUDE_DIR=$(PWD)/../src/deepQlearning
CFLAGS=-I$(INCLUDE_DIR) -I$(NEURODIR)/src -I$(YPERMDIR)/src -I$(YTESTDIR)/include_ytest/include -I$(DIMDIR)/src -I$(TENSDIR)/src -I$(YTOOLDIR)/include #"-D DEBUG=1" CFLAGS=-I$(INCLUDE_DIR) -I$(NEURODIR)/src -I$(YPERMDIR)/src -I$(YTESTDIR)/include_ytest/include -I$(DIMDIR)/src -I$(TENSDIR)/src -I$(YTOOLDIR)/include -I$(LISTDIR)/src
#"-D DEBUG=1"
LDFLAGS=-L$(YTESTDIR) -lytest -lOpenCL -lm -lpthread #-lcurses LDFLAGS=-L$(YTESTDIR) -lytest -lOpenCL -lm -lpthread #-lcurses
#SRC_DIR=$(ROOT_DIR)/src #SRC_DIR=$(ROOT_DIR)/src
@@ -38,6 +41,8 @@ TENSRC_O=$(TENSRC:.c=.o)
VEHICLESRC=$(INCLUDE_DIR)/vehicle.c VEHICLESRC=$(INCLUDE_DIR)/vehicle.c
VEHICLESRC_O=$(VEHICLESRC:.c=.o) VEHICLESRC_O=$(VEHICLESRC:.c=.o)
LEARNTODRIVESRC=$(INCLUDE_DIR)/learn_to_drive.c
LEARNTODRIVESRC_O=$(LEARNTODRIVESRC:.c=.o)
TOOLSRC_O=$(YTOOLDIR)/src/tools_t/tools_t.o TOOLSRC_O=$(YTOOLDIR)/src/tools_t/tools_t.o
@@ -45,11 +50,14 @@ PERMSRC_O=$(YPERMDIR)/src/permutation_t/permutation_t.o
DIMSRC_O=$(DIMDIR)/src/dimension_t/dimension_t.o DIMSRC_O=$(DIMDIR)/src/dimension_t/dimension_t.o
LISTSRC_O=$(LISTDIR)/src/list_t/list_t.o
TOPTARGETS := all clean TOPTARGETS := all clean
DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR) $(NEURODIR) $(YTOOLDIR) DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR) $(NEURODIR) $(YTOOLDIR) $(LISTDIR)
OBJ=$(VEHICLESRC_O) $(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) $(NEUROSRC_O) $(TOOLSRC_O) OBJ=$(VEHICLESRC_O) $(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) $(NEUROSRC_O) $(TOOLSRC_O) $(LISTSRC_O) $(LEARNTODRIVESRC_O)
LIB_YTEST=$(YTESTDIR)/libytest.so LIB_YTEST=$(YTESTDIR)/libytest.so
@@ -75,6 +83,8 @@ $(EXEC): $(EXECSRC) $(OBJ)
$(VEHICLESRC_O): $(VEHICLESRC) $(TOOLSRC_O) $(DIMSRC_O) $(VEHICLESRC_O): $(VEHICLESRC) $(TOOLSRC_O) $(DIMSRC_O)
$(CC) -o $@ -c $< $(CFLAGS) $(CC) -o $@ -c $< $(CFLAGS)
$(LEARNTODRIVESRC_O): $(LEARNTODRIVESRC) $(VEHICLESRC_O) $(LISTSRC_O)
$(CC) -o $@ -c $< $(CFLAGS)
.PHONY: clean mrproper .PHONY: clean mrproper
+6
View File
@@ -20,6 +20,7 @@
#include "neuron_t/neuron_t.h" #include "neuron_t/neuron_t.h"
#include "vehicle.h" #include "vehicle.h"
#include "learn_to_drive.h"
TEST(create_coordenate){ TEST(create_coordenate){
struct coordinate * coord = create_coordinate(3); struct coordinate * coord = create_coordinate(3);
@@ -199,6 +200,11 @@ TEST(first_vehicle){
free_vehicle(vhcl); free_vehicle(vhcl);
}
TEST(reward_list){
struct reward_lists * l_reward = create_reward_lists ();
} }
int main(int argc, char **argv){ int main(int argc, char **argv){