update learn to drive and Makefile
This commit is contained in:
@@ -19,6 +19,100 @@ float D_L2(float t, float o){
|
|||||||
return (o - t);
|
return (o - t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks){
|
||||||
|
copy_weight_in_neurons_TYPE_FLOAT(networks->target_net, networks->main_net);
|
||||||
|
}
|
||||||
|
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks){
|
||||||
|
copy_weight_in_neurons_TYPE_FLOAT(networks->best_net, networks->main_net);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct networks_qlearning * create_nework_qlearning(
|
||||||
|
struct config_layers * config,
|
||||||
|
bool randomize, float minR, float maxR, int randomRange
|
||||||
|
){
|
||||||
|
struct networks_qlearning *qnets = malloc(sizeof(struct networks_qlearning));
|
||||||
|
qnets->config = config;
|
||||||
|
|
||||||
|
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->main_net), config,
|
||||||
|
random, minR, maxR, randomRange);
|
||||||
|
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->target_net), config,
|
||||||
|
false, minR, maxR, randomRange);
|
||||||
|
copy_weight_in_networks_from_main_to_target(qnets);
|
||||||
|
|
||||||
|
setup_networks_alloutputs_config_TYPE_FLOAT(&(qnets->best_net), config,
|
||||||
|
false, minR, maxR, randomRange);
|
||||||
|
copy_weight_in_networks_from_main_to_best(qnets);
|
||||||
|
|
||||||
|
return qnets;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
struct reward_lists * create_reward_lists (){
|
||||||
|
struct reward_lists * rwrd_l = malloc(sizeof(struct reward_lists));
|
||||||
|
|
||||||
|
rwrd_l->list_main_cumul = create_var_list_TYPE_L_INT();
|
||||||
|
rwrd_l->list_target_cumul = create_var_list_TYPE_L_INT();
|
||||||
|
rwrd_l->progress_best_cumul = create_var_list_TYPE_L_INT();
|
||||||
|
|
||||||
|
return rwrd_l;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct delay_params * create_delay_params (
|
||||||
|
size_t delay_between_episodes,
|
||||||
|
size_t delay_between_games
|
||||||
|
){
|
||||||
|
struct delay_params * delay = malloc(sizeof(struct delay_params));
|
||||||
|
delay->delay_between_episodes = delay_between_episodes;
|
||||||
|
delay->delay_between_games = delay_between_games;
|
||||||
|
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct qlearning_params * create_qlearning_params (
|
||||||
|
double learning_rate,
|
||||||
|
double discount_factor,
|
||||||
|
double exploration_factor
|
||||||
|
){
|
||||||
|
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
|
||||||
|
|
||||||
|
qparams->learning_rate = learning_rate ;
|
||||||
|
qparams->discount_factor = discount_factor ;
|
||||||
|
qparams->exploration_factor = exploration_factor ;
|
||||||
|
|
||||||
|
return qparams;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RL_agent * create_RL_agent (
|
||||||
|
struct networks_qlearning * networks,
|
||||||
|
struct vehicle * car,
|
||||||
|
struct reward_lists * rewards,
|
||||||
|
struct delay_params * delay,
|
||||||
|
struct qlearning_params *qlearnParams
|
||||||
|
){
|
||||||
|
struct RL_agent * rlagent = malloc(sizeof(struct RL_agent));
|
||||||
|
|
||||||
|
rlagent->networks = networks ;
|
||||||
|
rlagent->car = car ;
|
||||||
|
rlagent->rewards = rewards ;
|
||||||
|
rlagent->delay = delay ;
|
||||||
|
rlagent->qlearnParams = qlearnParams ;
|
||||||
|
|
||||||
|
return rlagent;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_networks_qlearning (struct networks_qlearning * networks){
|
||||||
|
|
||||||
|
}
|
||||||
|
void free_reward_lists(struct reward_lists *rwd_l){
|
||||||
|
|
||||||
|
}
|
||||||
|
void free_delay_params (struct delay_params *dly_p){
|
||||||
|
|
||||||
|
}
|
||||||
|
void free_qlearning_params(struct qlearning_params *q_params){
|
||||||
|
|
||||||
|
}
|
||||||
|
void free_RL_agent(struct RL_agent *rlAgent){
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ struct delay_params * create_delay_params (
|
|||||||
size_t delay_between_games
|
size_t delay_between_games
|
||||||
);
|
);
|
||||||
|
|
||||||
struct qlearning_params (
|
struct qlearning_params * create_qlearning_params (
|
||||||
double learning_rate,
|
double learning_rate,
|
||||||
double discount_factor,
|
double discount_factor,
|
||||||
double exploration_factor
|
double exploration_factor
|
||||||
|
|||||||
@@ -11,10 +11,13 @@ YPERMDIR=$(PWD)/../../ypermutation_t
|
|||||||
DIMDIR=$(PWD)/../../dimension_t
|
DIMDIR=$(PWD)/../../dimension_t
|
||||||
|
|
||||||
TENSDIR=$(PWD)/../../tensor_t
|
TENSDIR=$(PWD)/../../tensor_t
|
||||||
|
LISTDIR=$(PWD)/../../list_t
|
||||||
|
|
||||||
|
|
||||||
NEURODIR=$(PWD)/../../neuron_t
|
NEURODIR=$(PWD)/../../neuron_t
|
||||||
INCLUDE_DIR=$(PWD)/../src/deepQlearning
|
INCLUDE_DIR=$(PWD)/../src/deepQlearning
|
||||||
CFLAGS=-I$(INCLUDE_DIR) -I$(NEURODIR)/src -I$(YPERMDIR)/src -I$(YTESTDIR)/include_ytest/include -I$(DIMDIR)/src -I$(TENSDIR)/src -I$(YTOOLDIR)/include #"-D DEBUG=1"
|
CFLAGS=-I$(INCLUDE_DIR) -I$(NEURODIR)/src -I$(YPERMDIR)/src -I$(YTESTDIR)/include_ytest/include -I$(DIMDIR)/src -I$(TENSDIR)/src -I$(YTOOLDIR)/include -I$(LISTDIR)/src
|
||||||
|
#"-D DEBUG=1"
|
||||||
LDFLAGS=-L$(YTESTDIR) -lytest -lOpenCL -lm -lpthread #-lcurses
|
LDFLAGS=-L$(YTESTDIR) -lytest -lOpenCL -lm -lpthread #-lcurses
|
||||||
|
|
||||||
#SRC_DIR=$(ROOT_DIR)/src
|
#SRC_DIR=$(ROOT_DIR)/src
|
||||||
@@ -38,6 +41,8 @@ TENSRC_O=$(TENSRC:.c=.o)
|
|||||||
VEHICLESRC=$(INCLUDE_DIR)/vehicle.c
|
VEHICLESRC=$(INCLUDE_DIR)/vehicle.c
|
||||||
VEHICLESRC_O=$(VEHICLESRC:.c=.o)
|
VEHICLESRC_O=$(VEHICLESRC:.c=.o)
|
||||||
|
|
||||||
|
LEARNTODRIVESRC=$(INCLUDE_DIR)/learn_to_drive.c
|
||||||
|
LEARNTODRIVESRC_O=$(LEARNTODRIVESRC:.c=.o)
|
||||||
|
|
||||||
TOOLSRC_O=$(YTOOLDIR)/src/tools_t/tools_t.o
|
TOOLSRC_O=$(YTOOLDIR)/src/tools_t/tools_t.o
|
||||||
|
|
||||||
@@ -45,11 +50,14 @@ PERMSRC_O=$(YPERMDIR)/src/permutation_t/permutation_t.o
|
|||||||
|
|
||||||
DIMSRC_O=$(DIMDIR)/src/dimension_t/dimension_t.o
|
DIMSRC_O=$(DIMDIR)/src/dimension_t/dimension_t.o
|
||||||
|
|
||||||
|
LISTSRC_O=$(LISTDIR)/src/list_t/list_t.o
|
||||||
|
|
||||||
|
|
||||||
TOPTARGETS := all clean
|
TOPTARGETS := all clean
|
||||||
|
|
||||||
DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR) $(NEURODIR) $(YTOOLDIR)
|
DEPS=$(DIMDIR) $(YPERMDIR) $(YTESTDIR) $(TENSDIR) $(NEURODIR) $(YTOOLDIR) $(LISTDIR)
|
||||||
|
|
||||||
OBJ=$(VEHICLESRC_O) $(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) $(NEUROSRC_O) $(TOOLSRC_O)
|
OBJ=$(VEHICLESRC_O) $(DIMSRC_O) $(PERMSRC_O) $(TENSRC_O) $(NEUROSRC_O) $(TOOLSRC_O) $(LISTSRC_O) $(LEARNTODRIVESRC_O)
|
||||||
|
|
||||||
LIB_YTEST=$(YTESTDIR)/libytest.so
|
LIB_YTEST=$(YTESTDIR)/libytest.so
|
||||||
|
|
||||||
@@ -75,6 +83,8 @@ $(EXEC): $(EXECSRC) $(OBJ)
|
|||||||
$(VEHICLESRC_O): $(VEHICLESRC) $(TOOLSRC_O) $(DIMSRC_O)
|
$(VEHICLESRC_O): $(VEHICLESRC) $(TOOLSRC_O) $(DIMSRC_O)
|
||||||
$(CC) -o $@ -c $< $(CFLAGS)
|
$(CC) -o $@ -c $< $(CFLAGS)
|
||||||
|
|
||||||
|
$(LEARNTODRIVESRC_O): $(LEARNTODRIVESRC) $(VEHICLESRC_O) $(LISTSRC_O)
|
||||||
|
$(CC) -o $@ -c $< $(CFLAGS)
|
||||||
|
|
||||||
.PHONY: clean mrproper
|
.PHONY: clean mrproper
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
#include "neuron_t/neuron_t.h"
|
#include "neuron_t/neuron_t.h"
|
||||||
|
|
||||||
#include "vehicle.h"
|
#include "vehicle.h"
|
||||||
|
#include "learn_to_drive.h"
|
||||||
|
|
||||||
TEST(create_coordenate){
|
TEST(create_coordenate){
|
||||||
struct coordinate * coord = create_coordinate(3);
|
struct coordinate * coord = create_coordinate(3);
|
||||||
@@ -199,6 +200,11 @@ TEST(first_vehicle){
|
|||||||
|
|
||||||
free_vehicle(vhcl);
|
free_vehicle(vhcl);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(reward_list){
|
||||||
|
struct reward_lists * l_reward = create_reward_lists ();
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv){
|
int main(int argc, char **argv){
|
||||||
|
|||||||
Reference in New Issue
Block a user