modify COMPARE_N in tool, modify attribute of vehicle by using tensor

This commit is contained in:
2024-06-13 23:35:25 +02:00
parent 13f91583bb
commit 9927d6642c
12 changed files with 217 additions and 56 deletions
+53 -14
View File
@@ -47,14 +47,16 @@ struct networks_qlearning * create_nework_qlearning(
}
struct reward_lists * create_reward_lists (){
struct reward_lists * rwrd_l = malloc(sizeof(struct reward_lists));
struct status_qlearning * create_status_qlearning (){
struct status_qlearning * status_ql = malloc(sizeof(struct status_qlearning));
rwrd_l->list_main_cumul = create_var_list_TYPE_L_INT();
rwrd_l->list_target_cumul = create_var_list_TYPE_L_INT();
rwrd_l->progress_best_cumul = create_var_list_TYPE_L_INT();
status_ql->list_main_cumul = create_var_list_TYPE_L_INT();
status_ql->list_target_cumul = create_var_list_TYPE_L_INT();
status_ql->progress_best_cumul = create_var_list_TYPE_L_INT();
return rwrd_l;
status_ql->nb_training_after_updated_weight_in_target = 0;
return status_ql;
}
struct delay_params * create_delay_params (
@@ -71,7 +73,8 @@ struct delay_params * create_delay_params (
struct qlearning_params * create_qlearning_params (
double learning_rate,
double discount_factor,
double exploration_factor
double exploration_factor,
long int nb_training_before_update_weight_in_target
){
struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params));
@@ -79,13 +82,15 @@ struct qlearning_params * create_qlearning_params (
qparams->discount_factor = discount_factor ;
qparams->exploration_factor = exploration_factor ;
qparams->nb_training_before_update_weight_in_target = nb_training_before_update_weight_in_target;
return qparams;
}
struct RL_agent * create_RL_agent (
struct networks_qlearning * networks,
struct vehicle * car,
struct reward_lists * rewards,
struct status_qlearning * status,
struct delay_params * delay,
struct qlearning_params *qlearnParams
){
@@ -93,7 +98,7 @@ struct RL_agent * create_RL_agent (
rlagent->networks = networks ;
rlagent->car = car ;
rlagent->rewards = rewards ;
rlagent->status = status ;
rlagent->delay = delay ;
rlagent->qlearnParams = qlearnParams ;
@@ -101,18 +106,52 @@ struct RL_agent * create_RL_agent (
}
void free_networks_qlearning (struct networks_qlearning * networks){
free_neurons_TYPE_FLOAT(networks->main_net);
free_neurons_TYPE_FLOAT(networks->target_net);
free_neurons_TYPE_FLOAT(networks->best_net);
free_config_layers(networks->config);
free(networks);
}
void free_reward_lists(struct reward_lists *rwd_l){
void free_status_qlearning(struct status_qlearning *status_ql){
free_all_var_list_TYPE_L_INT(status_ql->list_main_cumul);
free_all_var_list_TYPE_L_INT(status_ql->list_target_cumul);
free_all_var_list_TYPE_L_INT(status_ql->progress_best_cumul);
free(status_ql);
}
void free_delay_params (struct delay_params *dly_p){
free(dly_p);
}
void free_qlearning_params(struct qlearning_params *q_params){
free(q_params);
}
void free_RL_agent(struct RL_agent *rlAgent){
free(rlAgent->qlearnParams);
free(rlAgent->delay);
free_status_qlearning(rlAgent->status);
free_networks_qlearning(rlAgent->networks);
free_vehicle(rlAgent->car);
free(rlAgent);
}
void train_qlearning(struct RL_agent * rlAgent,
int action /* */,
tensor_TYPE_FLOAT * new_state /*input*/,
tensor_TYPE_FLOAT * state /*input*/,
long reward){
tensor_TYPE_FLOAT * action_value = NULL;
tensor_TYPE_FLOAT * next_action_value = NULL;
neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net;
neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net;
calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value);
calculate_output_by_network_neurons_TYPE_FLOAT(net_target, state, &next_action_value);
tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim);
struct game_status * car_status = rlAgent->car->status;
if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){
if(status->done){
}
}
}
@@ -17,15 +17,21 @@
struct qlearning_params {
double learning_rate;
double factor_update_learning_rate;
double minimum_threshold_learning_rate;
double discount_factor;
double exploration_factor;
double factor_update_exploration_factor;
double minimum_threshold_exploration_factor;
long int nb_training_before_update_weight_in_target;
};
struct reward_lists {
struct status_qlearning {
struct main_list_TYPE_L_INT * list_main_cumul;
struct main_list_TYPE_L_INT * list_target_cumul;
struct main_list_TYPE_L_INT * progress_best_cumul;
long int nb_training_after_updated_weight_in_target;
};
struct delay_params {
@@ -43,7 +49,7 @@ struct networks_qlearning {
struct RL_agent {
struct networks_qlearning * networks;
struct vehicle * car;
struct reward_lists * rewards;
struct status_qlearning * status;
struct delay_params * delay;
struct qlearning_params *qlearnParams;
@@ -53,7 +59,7 @@ struct networks_qlearning * create_nework_qlearning(
struct config_layers * config,
bool randomize, float minR, float maxR, int randomRange
);
struct reward_lists * create_reward_lists ();
struct status_qlearning * create_status_qlearning ();
struct delay_params * create_delay_params (
size_t delay_between_episodes,
size_t delay_between_games
@@ -62,19 +68,20 @@ struct delay_params * create_delay_params (
struct qlearning_params * create_qlearning_params (
double learning_rate,
double discount_factor,
double exploration_factor
double exploration_factor,
long int nb_training_before_update_weight_in_target
);
struct RL_agent * create_RL_agent (
struct networks_qlearning * networks,
struct vehicle * car,
struct reward_lists * rewards,
struct status_qlearning * status,
struct delay_params * delay,
struct qlearning_params *qlearnParams
);
void free_networks_qlearning (struct networks_qlearning * networks);
void free_reward_lists(struct reward_lists *rwd_l);
void free_status_qlearning(struct status_qlearning *status_ql);
void free_delay_params (struct delay_params *dly_p);
void free_qlearning_params(struct qlearning_params *q_params);
void free_RL_agent(struct RL_agent *rlAgent);
@@ -82,5 +89,10 @@ void free_RL_agent(struct RL_agent *rlAgent);
void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks);
void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks);
void train_qlearning(struct RL_agent * rlAgent,
int action ,
tensor_TYPE_FLOAT * new_state /*input*/,
tensor_TYPE_FLOAT * state /*input*/,
long reward);
#endif /* __LEARNING_VEHICLE__C_H____ */
+2 -2
View File
@@ -326,7 +326,7 @@ void print2D_blocks_indexOne_withPoint(struct blocks *blk, float scale_x, float
if(in)
printf("%d",in);
else
printf(" ");
printf("."); //printf(" ");
printf("\033[0;37m"); // white
}
printf("\n");
@@ -478,7 +478,7 @@ void step(struct vehicle *v, int action){
status->reward = 0;
status->done =false;
struct blocks * path = v->path;
printf(" center : %f vs %f direction: %f\n",v->sensor->value[CENTER], LIMIT_DISTANCE, v->direction);
//printf(" center : %f vs %f direction: %f\n",v->sensor->value[CENTER], LIMIT_DISTANCE, v->direction);
if( v->sensor->value[CENTER]<= LIMIT_DISTANCE ){
status->reward = REWARD_STOP;
status->done = true;
+26 -21
View File
@@ -16,6 +16,7 @@
#include "tools_t/tools_t.h"
#include "dimension_t/dimension_t.h"
#include "tensor_t/tensor_t.h"
#define LOG_LENTH 128
@@ -35,10 +36,12 @@ struct game_status {
int cur_log;
};
struct coordinate {
size_t dimension_size;
float *x;
};
//struct coordinate {
// size_t dimension_size;
// float *x;
//};
typedef tensor_TYPE_FLOAT coordinate;
/*
+-----------------------+ <-- upper_bound_block (coordinate (6,5) for example)
@@ -55,32 +58,34 @@ struct coordinate {
*/
struct blocks {
size_t nb_blocks;
struct coordinate **lower_bound_block;
struct coordinate **upper_bound_block;
struct coordinate **bounds_all_blocks;
coordinate **lower_bound_block;
coordinate **upper_bound_block;
coordinate **bounds_all_blocks;
bool all_updated;
size_t dimension_size;
dimension *dim;
bool *marker;
//float step: // size of subdivision of the lowest large
};
struct sensors {
size_t nb_values;
float *value;
};
//struct sensors {
// size_t nb_values;
// float *value;
// tensor_TYPE_FLOAT * sensor;
//};
typedef tensor_TYPE_FLOAT sensors;
struct vehicle {
struct coordinate *coord;
coordinate *coord;
float direction;
float speed;
struct sensors *sensor;
sensors *sensor;
struct blocks *path;
struct game_status *status;
};
struct game_status * greate_game_status();
struct coordinate * create_coordinate(size_t dim_size);
coordinate * create_coordinate(size_t dim_size);
struct blocks * create_blocks(size_t nb_blocks, size_t dim_size);
struct sensors * create_sensors(size_t nb_values);
@@ -89,18 +94,18 @@ struct vehicle * create_vehicle(
);
void free_game_status(struct game_status *status);
void free_coordinate(struct coordinate *coord);
void free_coordinate(coordinate *coord);
void free_blocks(struct blocks *blk);
void free_sensors(struct sensors *snsr);
void free_sensors(sensors *snsr);
void free_vehicle(struct vehicle * vhcl);
void update_bounds_limits_blocks(struct blocks * blk);
int is_in_blocks(struct blocks *blk, struct coordinate *coord);
int is_in_blocks(struct blocks *blk, coordinate *coord);
void copy_coordinate(struct coordinate *coord, float *x);
void copy_coordinate(coordinate *coord, float *x);
void move_vehicle(struct vehicle *v);
void read_sensor(struct vehicle *v);
@@ -111,9 +116,9 @@ void reset(struct vehicle *v);
void print2D_blocks_indexOne_withPoint(struct blocks *blk, float scale_x, float scale_y, struct coordinate *coordPoint);
void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y);
float distance2_coordinate(struct coordinate *c0, struct coordinate *c1);
float distance2_coordinate(coordinate *c0, coordinate *c1);
void print2D_blocks(struct blocks *blk, float scale_x, float scale_y, char pad);
void print2D_blocks_withPoint(struct blocks *blk, float scale_x, float scale_y, char pad, struct coordinate *coordPoint);
void print2D_blocks_withPoint(struct blocks *blk, float scale_x, float scale_y, char pad, coordinate *coordPoint);
#endif /* __VEHICLE__C_H__ */
+4 -2
View File
@@ -165,7 +165,7 @@ TEST(first_vehicle){
copy_coordinate(path->upper_bound_block[0], (float[]){2,7});
copy_coordinate(path->lower_bound_block[1], (float[]){2,0});
copy_coordinate(path->upper_bound_block[1], (float[]){4,2});
copy_coordinate(path->lower_bound_block[2], (float[]){4,1});
copy_coordinate(path->lower_bound_block[2], (float[]){4,0.5});
copy_coordinate(path->upper_bound_block[2], (float[]){8,3});
copy_coordinate(path->lower_bound_block[3], (float[]){8,0});
copy_coordinate(path->upper_bound_block[3], (float[]){16,2});
@@ -204,7 +204,9 @@ TEST(first_vehicle){
}
TEST(reward_list){
struct reward_lists * l_reward = create_reward_lists ();
struct status_qlearning * l_reward = create_status_qlearning();
free_status_qlearning(l_reward);
}
int main(int argc, char **argv){
+13
View File
@@ -747,6 +747,19 @@ size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *da
return nbreps;\
}\
\
void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link){\
for(size_t i=0; i<(input->dim)->rank; ++i) (base->output)->x[i]=input->x[i];\
neurons_##type * tmp=base->next_layer;\
while(tmp){\
calc_out_neurons_##type(tmp);\
if(tmp->next_layer==NULL){\
/*print_tensor_msg_##type(tmp->output,"retult");*/\
*output_link = tmp->output;\
}\
tmp = tmp->next_layer;\
}\
\
}\
void print_predict_by_network_neurons_##type(neurons_##type *base, tensor_##type *input){\
for(size_t i=0; i<(input->dim)->rank; ++i) (base->output)->x[i]=input->x[i];\
neurons_##type * tmp=base->next_layer;\
+1 -1
View File
@@ -106,7 +106,7 @@ void print_data_set_msg_##type(data_set_##type *ds, char *msg);\
\
size_t learning_online_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\
size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\
\
void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link);\
void print_predict_by_network_neurons_##type(neurons_##type *base, tensor_##type *input);\
void print_predict_by_network_with_error_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type *target);\
\
+21 -3
View File
@@ -21,6 +21,7 @@
#define VALGRIND_ 1
float L(float t, float o){
return (o - t) * (o - t)/2;
}
@@ -356,13 +357,30 @@ TEST(copy_weight_in_neurons){
size_t reps = learning_online2_neurons_TYPE_FLOAT(bn,ds,cond);
setup_all_layers_functions_TYPE_FLOAT(cpyn,
tensorContractnProdThread_TYPE_FLOAT,
tensorProdThread_TYPE_FLOAT,
DL,
L,
f,
df);
setup_all_layers_params_TYPE_FLOAT(cpyn, 5, 1 , 0.1);
copy_weight_in_neurons_TYPE_FLOAT(cpyn, bn);
char msg[256];
tensor_TYPE_FLOAT * linked_tens = NULL;
for(size_t i=0; i<ds->size; ++i){
print_predict_by_network_with_error_neurons_TYPE_FLOAT(bn,ds->input[i],ds->target[i]);
print_predict_by_network_with_error_neurons_TYPE_FLOAT(cpyn,ds->input[i],ds->target[i]);
// print_predict_by_network_with_error_neurons_TYPE_FLOAT(bn,ds->input[i],ds->target[i]);
// print_predict_by_network_with_error_neurons_TYPE_FLOAT(cpyn,ds->input[i],ds->target[i]);
calculate_output_by_network_neurons_TYPE_FLOAT(bn,ds->input[i],&linked_tens);
sprintf(msg," output base %ld ",i);
print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
calculate_output_by_network_neurons_TYPE_FLOAT(cpyn,ds->input[i],&linked_tens);
sprintf(msg," output copy %ld ",i);
print_tensor_msg_TYPE_FLOAT(linked_tens,msg);
}
+10 -5
View File
@@ -130,11 +130,16 @@ tensor_##type* CLONE_TENSOR_##type(tensor_##type *tens){\
return NULL;\
}\
\
void copy_tensor_##type(tensor_##type * dst, tensor_##type * src){\
if(dst!=NULL && src!=NULL && dst->dim->rank == src->dim->rank){ \
for(size_t i=0; i<(dst->dim)->rank;++i)\
dst->x[i]=src->x[i];\
}\
int copy_tensor_##type(tensor_##type * dst, tensor_##type * src){\
if(dst!=NULL && src!=NULL){ \
int diff = dst->dim->rank - src->dim->rank;\
if(diff == 0) \
for(size_t i=0; i<(src->dim)->rank;++i)\
dst->x[i]=src->x[i];\
return diff;\
\
}\
return -1;\
}\
\
void free_tensor_##type(tensor_##type * tens){\
+52
View File
@@ -1705,6 +1705,58 @@ TEST(copy_tensor){
}
TEST(tensorContractnProd_TYPE_DOUBLE_2_2 ){
dimension *d0=create_dim(3);
dimension *d1=create_dim(3);
#if VALGRIND_
d0->perm[0]=1;
d0->perm[1]=2; //3;
d0->perm[2]=3; //3;
d1->perm[0]=2;
d1->perm[1]=3; //3;
d1->perm[2]=1; //3;
#else
d0->perm[0]=1;
d0->perm[1]=22; //3;
d0->perm[2]=52; //3;
d1->perm[0]=52;
d1->perm[1]=22; //3;
d1->perm[2]=1; //3;
#endif
updateRankDim(d0);
updateRankDim(d1);
tensor_TYPE_DOUBLE *M0 = CREATE_TENSOR_TYPE_DOUBLE(d0);
tensor_TYPE_DOUBLE *M1 = CREATE_TENSOR_TYPE_DOUBLE(d1);
for(size_t i=0; i<M0->dim->rank;++i) M0->x[i]=2 ;
for(size_t i=0; i<M1->dim->rank;++i) M1->x[i]=3;
print_tensor_double(M0,"M0");
print_tensor_double(M1,"M1");
tensor_TYPE_DOUBLE *M=NULL;
tensorContractnProd_TYPE_DOUBLE(&M, M0,M1,2);
print_tensor_double(M,"M");
// for(size_t i=0;i<M->dim->rank;++i)
// EXPECT_EQ_TYPE_DOUBLE(M->x[i],MnO->x[i]);
free_tensor_TYPE_DOUBLE(M);
free_tensor_TYPE_DOUBLE(M0);
free_tensor_TYPE_DOUBLE(M1);
}
Binary file not shown.
+17 -2
View File
@@ -115,14 +115,29 @@ long int PRECISION_TYPE_L_DOUBLE = 100000000000000;
#define GENERATE_FUNCTION_NUMERIC(type)\
int COMPARE_N_##type(const void *a, const void *b){ \
type diff = (*(type*)a - *(type*)b) * PRECISION_##type; \
type diff = 0;\
if((*(type*)a > *(type*)b)){ \
diff =(*(type*)a - *(type*)b) * PRECISION_##type; \
/*char *str_diff = type##_TO_STR(diff), *str_a = type##_TO_STR(*(type*)a), *str_b = type##_TO_STR(*(type*)b);\
PRINT_DEBUG_(" diff = %s a=%s b=%s PRECISION : %ld\n",str_diff, str_a, str_b, PRECISION_##type);\
free(str_diff); free(str_a); free(str_b);\
*/ \
if(diff >= 1) return 1;\
return 0;\
}else{\
diff =(*(type*)b - *(type*)a) * PRECISION_##type; \
/*char *str_diff = type##_TO_STR(diff), *str_a = type##_TO_STR(*(type*)a), *str_b = type##_TO_STR(*(type*)b);\
PRINT_DEBUG_(" diff = %s a=%s b=%s PRECISION : %ld\n",str_diff, str_a, str_b, PRECISION_##type);\
free(str_diff); free(str_a); free(str_b);\
*/\
if (diff <= -1) return -1; \
if(diff >= 1) return -1;\
return 0;\
}\
\
/*if (diff <= -1) return -1; \
if (diff >= 1) return 1; \
return 0; \
*/\
} \
\
void COPY_ARRAY_##type(type *dst, const type *src, size_t size){ \