From 13f91583bbd11840cceb24821e46759e917d11b8 Mon Sep 17 00:00:00 2001 From: fanasina Date: Wed, 12 Jun 2024 00:24:17 +0200 Subject: [PATCH] update learn to drive and Makefile --- .../src/deepQlearning/learn_to_drive.c | 94 +++++++++++++++++++ .../src/deepQlearning/learn_to_drive.h | 2 +- deepQlearn_0/test/Makefile | 16 +++- deepQlearn_0/test/is_good.c | 8 +- 4 files changed, 115 insertions(+), 5 deletions(-) diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.c b/deepQlearn_0/src/deepQlearning/learn_to_drive.c index cbd4af1..648eb47 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.c +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.c @@ -19,6 +19,100 @@ float D_L2(float t, float o){ return (o - t); } +void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks){ + copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net); +} +void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks){ + copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net); +} +struct networks_qlearning * create_nework_qlearning( + struct config_layers * config, + bool randomize, float minR, float maxR, int randomRange +){ + struct networks_qlearning *qnets = malloc(sizeof(struct networks_qlearning)); + qnets->config = config; + setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->main_net), config, + random, minR, maxR, randomRange); + setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->target_net), config, + false, minR, maxR, randomRange); + copy_weight_in_networks_from_main_to_target(qnets); + + setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config, + false, minR, maxR, randomRange); + copy_weight_in_networks_from_main_to_best(qnets); + + return qnets; + +} + +struct reward_lists * create_reward_lists (){ + struct reward_lists * rwrd_l = malloc(sizeof(struct reward_lists)); + + rwrd_l->list_main_cumul = create_var_list_TYPE_L_INT(); + rwrd_l->list_target_cumul = create_var_list_TYPE_L_INT(); + rwrd_l->progress_best_cumul = create_var_list_TYPE_L_INT(); + + return rwrd_l; +} + +struct delay_params * create_delay_params ( + size_t delay_between_episodes, + size_t delay_between_games +){ + struct delay_params * delay = malloc(sizeof(struct delay_params)); + delay->delay_between_episodes = delay_between_episodes; + delay->delay_between_games = delay_between_games; + + return delay; +} + +struct qlearning_params * create_qlearning_params ( + double learning_rate, + double discount_factor, + double exploration_factor +){ + struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params)); + + qparams->learning_rate = learning_rate ; + qparams->discount_factor = discount_factor ; + qparams->exploration_factor = exploration_factor ; + + return qparams; +} + +struct RL_agent * create_RL_agent ( + struct networks_qlearning * networks, + struct vehicle * car, + struct reward_lists * rewards, + struct delay_params * delay, + struct qlearning_params *qlearnParams +){ + struct RL_agent * rlagent = malloc(sizeof(struct RL_agent)); + + rlagent->networks = networks ; + rlagent->car = car ; + rlagent->rewards = rewards ; + rlagent->delay = delay ; + rlagent->qlearnParams = qlearnParams ; + + return rlagent; +} + +void free_networks_qlearning (struct networks_qlearning * networks){ + +} +void free_reward_lists(struct reward_lists *rwd_l){ + +} +void free_delay_params (struct delay_params *dly_p){ + +} +void free_qlearning_params(struct qlearning_params *q_params){ + +} +void free_RL_agent(struct RL_agent *rlAgent){ + +} diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.h b/deepQlearn_0/src/deepQlearning/learn_to_drive.h index 2538e6b..7cb9530 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.h +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.h @@ -59,7 +59,7 @@ struct delay_params * create_delay_params ( size_t delay_between_games ); -struct qlearning_params ( +struct qlearning_params * create_qlearning_params ( double learning_rate, double discount_factor, double exploration_factor diff --git a/deepQlearn_0/test/Makefile b/deepQlearn_0/test/Makefile index 30548f5..aefe07a 100644 --- a/deepQlearn_0/test/Makefile +++ b/deepQlearn_0/test/Makefile @@ -11,10 +11,13 @@ YPERMDIR=$(PWD)/../../ypermutation_t DIMDIR=$(PWD)/../../dimension_t TENSDIR=$(PWD)/../../tensor_t +LISTDIR=$(PWD)/../../list_t + NEURODIR=$(PWD)/../../neuron_t INCLUDE_DIR=$(PWD)/../src/deepQlearning -CFLAGS=-I$(INCLUDE_DIR) -I$(NEURODIR)/src -I$(YPERMDIR)/src -I$(YTESTDIR)/include_ytest/include -I$(DIMDIR)/src -I$(TENSDIR)/src -I$(YTOOLDIR)/include #"-D DEBUG=1" +CFLAGS=-I$(INCLUDE_DIR) -I$(NEURODIR)/src -I$(YPERMDIR)/src -I$(YTESTDIR)/include_ytest/include -I$(DIMDIR)/src -I$(TENSDIR)/src -I$(YTOOLDIR)/include -I$(LISTDIR)/src + #"-D DEBUG=1" LDFLAGS=-L$(YTESTDIR) -lytest -lOpenCL -lm -lpthread #-lcurses #SRC_DIR=$(ROOT_DIR)/src @@ -38,6 +41,8 @@ TENSRC_O=$(TENSRC:.c=.o) VEHICLESRC=$(INCLUDE_DIR)/vehicle.c VEHICLESRC_O=$(VEHICLESRC:.c=.o) +LEARNTODRIVESRC=$(INCLUDE_DIR)/learn_to_drive.c +LEARNTODRIVESRC_O=$(LEARNTODRIVESRC:.c=.o) TOOLSRC_O=$(YTOOLDIR)/src/tools_t/tools_t.o @@ -45,11 +50,14 @@ PERMSRC_O=$(YPERMDIR)/src/permutation_t/permutation_t.o DIMSRC_O=$(DIMDIR)/src/dimension_t/dimension_t.o +LISTSRC_O=$(LISTDIR)/src/list_t/list_t.o + + TOPTARGETS := all clean -DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR) $(NEURODIR) $(YTOOLDIR) +DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR) $(NEURODIR) $(YTOOLDIR) $(LISTDIR) -OBJ=$(VEHICLESRC_O) $(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) $(NEUROSRC_O) $(TOOLSRC_O) +OBJ=$(VEHICLESRC_O) $(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) $(NEUROSRC_O) $(TOOLSRC_O) $(LISTSRC_O) $(LEARNTODRIVESRC_O) LIB_YTEST=$(YTESTDIR)/libytest.so @@ -75,6 +83,8 @@ $(EXEC): $(EXECSRC) $(OBJ) $(VEHICLESRC_O): $(VEHICLESRC) $(TOOLSRC_O) $(DIMSRC_O) $(CC) -o $@ -c $< $(CFLAGS) +$(LEARNTODRIVESRC_O): $(LEARNTODRIVESRC) $(VEHICLESRC_O) $(LISTSRC_O) + $(CC) -o $@ -c $< $(CFLAGS) .PHONY: clean mrproper diff --git a/deepQlearn_0/test/is_good.c b/deepQlearn_0/test/is_good.c index 5144783..c81183f 100644 --- a/deepQlearn_0/test/is_good.c +++ b/deepQlearn_0/test/is_good.c @@ -20,6 +20,7 @@ #include "neuron_t/neuron_t.h" #include "vehicle.h" +#include "learn_to_drive.h" TEST(create_coordenate){ struct coordinate * coord = create_coordinate(3); @@ -199,6 +200,11 @@ TEST(first_vehicle){ free_vehicle(vhcl); + +} + +TEST(reward_list){ + struct reward_lists * l_reward = create_reward_lists (); } int main(int argc, char **argv){ @@ -206,5 +212,5 @@ int main(int argc, char **argv){ run_all_tests_args(argc, argv); - return 0; +return 0; }