diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.c b/deepQlearn_0/src/deepQlearning/learn_to_drive.c index 648eb47..958f9da 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.c +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.c @@ -47,14 +47,16 @@ struct networks_qlearning * create_nework_qlearning( } -struct reward_lists * create_reward_lists (){ - struct reward_lists * rwrd_l = malloc(sizeof(struct reward_lists)); +struct status_qlearning * create_status_qlearning (){ + struct status_qlearning * status_ql = malloc(sizeof(struct status_qlearning)); - rwrd_l->list_main_cumul = create_var_list_TYPE_L_INT(); - rwrd_l->list_target_cumul = create_var_list_TYPE_L_INT(); - rwrd_l->progress_best_cumul = create_var_list_TYPE_L_INT(); + status_ql->list_main_cumul = create_var_list_TYPE_L_INT(); + status_ql->list_target_cumul = create_var_list_TYPE_L_INT(); + status_ql->progress_best_cumul = create_var_list_TYPE_L_INT(); + + status_ql->nb_training_after_updated_weight_in_target = 0; - return rwrd_l; + return status_ql; } struct delay_params * create_delay_params ( @@ -71,13 +73,16 @@ struct delay_params * create_delay_params ( struct qlearning_params * create_qlearning_params ( double learning_rate, double discount_factor, - double exploration_factor + double exploration_factor, + long int nb_training_before_update_weight_in_target ){ struct qlearning_params * qparams = malloc(sizeof(struct qlearning_params)); qparams->learning_rate = learning_rate ; qparams->discount_factor = discount_factor ; qparams->exploration_factor = exploration_factor ; + + qparams->nb_training_before_update_weight_in_target = nb_training_before_update_weight_in_target; return qparams; } @@ -85,7 +90,7 @@ struct qlearning_params * create_qlearning_params ( struct RL_agent * create_RL_agent ( struct networks_qlearning * networks, struct vehicle * car, - struct reward_lists * rewards, + struct status_qlearning * status, struct delay_params * delay, struct qlearning_params *qlearnParams ){ @@ -93,7 +98,7 @@ struct RL_agent * create_RL_agent ( rlagent->networks = networks ; rlagent->car = car ; - rlagent->rewards = rewards ; + rlagent->status = status ; rlagent->delay = delay ; rlagent->qlearnParams = qlearnParams ; @@ -101,18 +106,52 @@ struct RL_agent * create_RL_agent ( } void free_networks_qlearning (struct networks_qlearning * networks){ - + free_neurons_TYPE_FLOAT(networks->main_net); + free_neurons_TYPE_FLOAT(networks->target_net); + free_neurons_TYPE_FLOAT(networks->best_net); + free_config_layers(networks->config); + free(networks); } -void free_reward_lists(struct reward_lists *rwd_l){ - +void free_status_qlearning(struct status_qlearning *status_ql){ + free_all_var_list_TYPE_L_INT(status_ql->list_main_cumul); + free_all_var_list_TYPE_L_INT(status_ql->list_target_cumul); + free_all_var_list_TYPE_L_INT(status_ql->progress_best_cumul); + free(status_ql); } void free_delay_params (struct delay_params *dly_p){ - + free(dly_p); } void free_qlearning_params(struct qlearning_params *q_params){ - + free(q_params); } void free_RL_agent(struct RL_agent *rlAgent){ + free(rlAgent->qlearnParams); + free(rlAgent->delay); + free_status_qlearning(rlAgent->status); + free_networks_qlearning(rlAgent->networks); + free_vehicle(rlAgent->car); + free(rlAgent); } +void train_qlearning(struct RL_agent * rlAgent, + int action /* */, + tensor_TYPE_FLOAT * new_state /*input*/, + tensor_TYPE_FLOAT * state /*input*/, + long reward){ + tensor_TYPE_FLOAT * action_value = NULL; + tensor_TYPE_FLOAT * next_action_value = NULL; + neurons_TYPE_FLOAT * net_main = rlAgent->networks->main_net; + neurons_TYPE_FLOAT * net_target = rlAgent->networks->target_net; + calculate_output_by_network_neurons_TYPE_FLOAT(net_main, state, &action_value); + calculate_output_by_network_neurons_TYPE_FLOAT(net_target, state, &next_action_value); + tensor_TYPE_FLOAT * experimental_values = CREATE_TENSOR_FROM_CPY_DIM_TYPE_FLOAT(action_value->dim); + + struct game_status * car_status = rlAgent->car->status; + if( copy_tensor_TYPE_FLOAT(experimental_values, action_value) == 0 /* done */){ + if(status->done){ + + } + } + +} diff --git a/deepQlearn_0/src/deepQlearning/learn_to_drive.h b/deepQlearn_0/src/deepQlearning/learn_to_drive.h index 7cb9530..2c9dfae 100644 --- a/deepQlearn_0/src/deepQlearning/learn_to_drive.h +++ b/deepQlearn_0/src/deepQlearning/learn_to_drive.h @@ -17,15 +17,21 @@ struct qlearning_params { double learning_rate; + double factor_update_learning_rate; + double minimum_threshold_learning_rate; double discount_factor; double exploration_factor; + double factor_update_exploration_factor; + double minimum_threshold_exploration_factor; + long int nb_training_before_update_weight_in_target; }; -struct reward_lists { +struct status_qlearning { struct main_list_TYPE_L_INT * list_main_cumul; struct main_list_TYPE_L_INT * list_target_cumul; struct main_list_TYPE_L_INT * progress_best_cumul; + long int nb_training_after_updated_weight_in_target; }; struct delay_params { @@ -43,7 +49,7 @@ struct networks_qlearning { struct RL_agent { struct networks_qlearning * networks; struct vehicle * car; - struct reward_lists * rewards; + struct status_qlearning * status; struct delay_params * delay; struct qlearning_params *qlearnParams; @@ -53,7 +59,7 @@ struct networks_qlearning * create_nework_qlearning( struct config_layers * config, bool randomize, float minR, float maxR, int randomRange ); -struct reward_lists * create_reward_lists (); +struct status_qlearning * create_status_qlearning (); struct delay_params * create_delay_params ( size_t delay_between_episodes, size_t delay_between_games @@ -62,19 +68,20 @@ struct delay_params * create_delay_params ( struct qlearning_params * create_qlearning_params ( double learning_rate, double discount_factor, - double exploration_factor + double exploration_factor, + long int nb_training_before_update_weight_in_target ); struct RL_agent * create_RL_agent ( struct networks_qlearning * networks, struct vehicle * car, - struct reward_lists * rewards, + struct status_qlearning * status, struct delay_params * delay, struct qlearning_params *qlearnParams ); void free_networks_qlearning (struct networks_qlearning * networks); -void free_reward_lists(struct reward_lists *rwd_l); +void free_status_qlearning(struct status_qlearning *status_ql); void free_delay_params (struct delay_params *dly_p); void free_qlearning_params(struct qlearning_params *q_params); void free_RL_agent(struct RL_agent *rlAgent); @@ -82,5 +89,10 @@ void free_RL_agent(struct RL_agent *rlAgent); void copy_weight_in_networks_from_main_to_target(struct networks_qlearning * networks); void copy_weight_in_networks_from_main_to_best(struct networks_qlearning * networks); +void train_qlearning(struct RL_agent * rlAgent, + int action , + tensor_TYPE_FLOAT * new_state /*input*/, + tensor_TYPE_FLOAT * state /*input*/, + long reward); #endif /* __LEARNING_VEHICLE__C_H____ */ diff --git a/deepQlearn_0/src/deepQlearning/vehicle.c b/deepQlearn_0/src/deepQlearning/vehicle.c index 0b44486..fa97eaa 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.c +++ b/deepQlearn_0/src/deepQlearning/vehicle.c @@ -326,7 +326,7 @@ void print2D_blocks_indexOne_withPoint(struct blocks *blk, float scale_x, float if(in) printf("%d",in); else - printf(" "); + printf("."); //printf(" "); printf("\033[0;37m"); // white } printf("\n"); @@ -478,7 +478,7 @@ void step(struct vehicle *v, int action){ status->reward = 0; status->done =false; struct blocks * path = v->path; - printf(" center : %f vs %f direction: %f\n",v->sensor->value[CENTER], LIMIT_DISTANCE, v->direction); + //printf(" center : %f vs %f direction: %f\n",v->sensor->value[CENTER], LIMIT_DISTANCE, v->direction); if( v->sensor->value[CENTER]<= LIMIT_DISTANCE ){ status->reward = REWARD_STOP; status->done = true; diff --git a/deepQlearn_0/src/deepQlearning/vehicle.h b/deepQlearn_0/src/deepQlearning/vehicle.h index 10a3377..d6d1327 100644 --- a/deepQlearn_0/src/deepQlearning/vehicle.h +++ b/deepQlearn_0/src/deepQlearning/vehicle.h @@ -16,6 +16,7 @@ #include "tools_t/tools_t.h" #include "dimension_t/dimension_t.h" +#include "tensor_t/tensor_t.h" #define LOG_LENTH 128 @@ -35,10 +36,12 @@ struct game_status { int cur_log; }; -struct coordinate { - size_t dimension_size; - float *x; -}; +//struct coordinate { +// size_t dimension_size; +// float *x; +//}; + +typedef tensor_TYPE_FLOAT coordinate; /* +-----------------------+ <-- upper_bound_block (coordinate (6,5) for example) @@ -55,32 +58,34 @@ struct coordinate { */ struct blocks { size_t nb_blocks; - struct coordinate **lower_bound_block; - struct coordinate **upper_bound_block; - struct coordinate **bounds_all_blocks; + coordinate **lower_bound_block; + coordinate **upper_bound_block; + coordinate **bounds_all_blocks; bool all_updated; - size_t dimension_size; + dimension *dim; bool *marker; //float step: // size of subdivision of the lowest large }; -struct sensors { - size_t nb_values; - float *value; -}; +//struct sensors { +// size_t nb_values; +// float *value; +// tensor_TYPE_FLOAT * sensor; +//}; +typedef tensor_TYPE_FLOAT sensors; struct vehicle { - struct coordinate *coord; + coordinate *coord; float direction; float speed; - struct sensors *sensor; + sensors *sensor; struct blocks *path; struct game_status *status; }; struct game_status * greate_game_status(); -struct coordinate * create_coordinate(size_t dim_size); +coordinate * create_coordinate(size_t dim_size); struct blocks * create_blocks(size_t nb_blocks, size_t dim_size); struct sensors * create_sensors(size_t nb_values); @@ -89,18 +94,18 @@ struct vehicle * create_vehicle( ); void free_game_status(struct game_status *status); -void free_coordinate(struct coordinate *coord); +void free_coordinate(coordinate *coord); void free_blocks(struct blocks *blk); -void free_sensors(struct sensors *snsr); +void free_sensors(sensors *snsr); void free_vehicle(struct vehicle * vhcl); void update_bounds_limits_blocks(struct blocks * blk); -int is_in_blocks(struct blocks *blk, struct coordinate *coord); +int is_in_blocks(struct blocks *blk, coordinate *coord); -void copy_coordinate(struct coordinate *coord, float *x); +void copy_coordinate(coordinate *coord, float *x); void move_vehicle(struct vehicle *v); void read_sensor(struct vehicle *v); @@ -111,9 +116,9 @@ void reset(struct vehicle *v); void print2D_blocks_indexOne_withPoint(struct blocks *blk, float scale_x, float scale_y, struct coordinate *coordPoint); void print_vehicle_n_path(struct vehicle *v, float scale_x, float scale_y); -float distance2_coordinate(struct coordinate *c0, struct coordinate *c1); +float distance2_coordinate(coordinate *c0, coordinate *c1); void print2D_blocks(struct blocks *blk, float scale_x, float scale_y, char pad); -void print2D_blocks_withPoint(struct blocks *blk, float scale_x, float scale_y, char pad, struct coordinate *coordPoint); +void print2D_blocks_withPoint(struct blocks *blk, float scale_x, float scale_y, char pad, coordinate *coordPoint); #endif /* __VEHICLE__C_H__ */ diff --git a/deepQlearn_0/test/is_good.c b/deepQlearn_0/test/is_good.c index c81183f..a9f4a61 100644 --- a/deepQlearn_0/test/is_good.c +++ b/deepQlearn_0/test/is_good.c @@ -165,7 +165,7 @@ TEST(first_vehicle){ copy_coordinate(path->upper_bound_block[0], (float[]){2,7}); copy_coordinate(path->lower_bound_block[1], (float[]){2,0}); copy_coordinate(path->upper_bound_block[1], (float[]){4,2}); - copy_coordinate(path->lower_bound_block[2], (float[]){4,1}); + copy_coordinate(path->lower_bound_block[2], (float[]){4,0.5}); copy_coordinate(path->upper_bound_block[2], (float[]){8,3}); copy_coordinate(path->lower_bound_block[3], (float[]){8,0}); copy_coordinate(path->upper_bound_block[3], (float[]){16,2}); @@ -204,7 +204,9 @@ TEST(first_vehicle){ } TEST(reward_list){ - struct reward_lists * l_reward = create_reward_lists (); + struct status_qlearning * l_reward = create_status_qlearning(); + + free_status_qlearning(l_reward); } int main(int argc, char **argv){ diff --git a/neuron_t/src/neuron_t/neuron_t.c b/neuron_t/src/neuron_t/neuron_t.c index 4c1e1df..96e6fe5 100644 --- a/neuron_t/src/neuron_t/neuron_t.c +++ b/neuron_t/src/neuron_t/neuron_t.c @@ -747,6 +747,19 @@ size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *da return nbreps;\ }\ \ +void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link){\ + for(size_t i=0; i<(input->dim)->rank; ++i) (base->output)->x[i]=input->x[i];\ + neurons_##type * tmp=base->next_layer;\ + while(tmp){\ + calc_out_neurons_##type(tmp);\ + if(tmp->next_layer==NULL){\ + /*print_tensor_msg_##type(tmp->output,"retult");*/\ + *output_link = tmp->output;\ + }\ + tmp = tmp->next_layer;\ + }\ +\ +}\ void print_predict_by_network_neurons_##type(neurons_##type *base, tensor_##type *input){\ for(size_t i=0; i<(input->dim)->rank; ++i) (base->output)->x[i]=input->x[i];\ neurons_##type * tmp=base->next_layer;\ diff --git a/neuron_t/src/neuron_t/neuron_t.h b/neuron_t/src/neuron_t/neuron_t.h index 8a7f268..5b7b3ac 100644 --- a/neuron_t/src/neuron_t/neuron_t.h +++ b/neuron_t/src/neuron_t/neuron_t.h @@ -106,7 +106,7 @@ void print_data_set_msg_##type(data_set_##type *ds, char *msg);\ \ size_t learning_online_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\ size_t learning_online2_neurons_##type(neurons_##type *base, data_set_##type *dataset, bool (*condition)(type, size_t));\ -\ +void calculate_output_by_network_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type **output_link);\ void print_predict_by_network_neurons_##type(neurons_##type *base, tensor_##type *input);\ void print_predict_by_network_with_error_neurons_##type(neurons_##type *base, tensor_##type *input, tensor_##type *target);\ \ diff --git a/neuron_t/test/is_good.c b/neuron_t/test/is_good.c index a9a43eb..4bfd506 100644 --- a/neuron_t/test/is_good.c +++ b/neuron_t/test/is_good.c @@ -21,6 +21,7 @@ #define VALGRIND_ 1 + float L(float t, float o){ return (o - t) * (o - t)/2; } @@ -356,13 +357,30 @@ TEST(copy_weight_in_neurons){ size_t reps = learning_online2_neurons_TYPE_FLOAT(bn,ds,cond); + setup_all_layers_functions_TYPE_FLOAT(cpyn, + tensorContractnProdThread_TYPE_FLOAT, + tensorProdThread_TYPE_FLOAT, + DL, + L, + f, + df); + + setup_all_layers_params_TYPE_FLOAT(cpyn, 5, 1 , 0.1); + + copy_weight_in_neurons_TYPE_FLOAT(cpyn, bn); char msg[256]; + tensor_TYPE_FLOAT * linked_tens = NULL; for(size_t i=0; isize; ++i){ - print_predict_by_network_with_error_neurons_TYPE_FLOAT(bn,ds->input[i],ds->target[i]); - print_predict_by_network_with_error_neurons_TYPE_FLOAT(cpyn,ds->input[i],ds->target[i]); - +// print_predict_by_network_with_error_neurons_TYPE_FLOAT(bn,ds->input[i],ds->target[i]); + // print_predict_by_network_with_error_neurons_TYPE_FLOAT(cpyn,ds->input[i],ds->target[i]); + calculate_output_by_network_neurons_TYPE_FLOAT(bn,ds->input[i],&linked_tens); + sprintf(msg," output base %ld ",i); + print_tensor_msg_TYPE_FLOAT(linked_tens,msg); + calculate_output_by_network_neurons_TYPE_FLOAT(cpyn,ds->input[i],&linked_tens); + sprintf(msg," output copy %ld ",i); + print_tensor_msg_TYPE_FLOAT(linked_tens,msg); } diff --git a/tensor_t/src/tensor_t/tensor_t.c b/tensor_t/src/tensor_t/tensor_t.c index 71068e1..604e61c 100644 --- a/tensor_t/src/tensor_t/tensor_t.c +++ b/tensor_t/src/tensor_t/tensor_t.c @@ -130,11 +130,16 @@ tensor_##type* CLONE_TENSOR_##type(tensor_##type *tens){\ return NULL;\ }\ \ -void copy_tensor_##type(tensor_##type * dst, tensor_##type * src){\ - if(dst!=NULL && src!=NULL && dst->dim->rank == src->dim->rank){ \ - for(size_t i=0; i<(dst->dim)->rank;++i)\ - dst->x[i]=src->x[i];\ - }\ +int copy_tensor_##type(tensor_##type * dst, tensor_##type * src){\ + if(dst!=NULL && src!=NULL){ \ + int diff = dst->dim->rank - src->dim->rank;\ + if(diff == 0) \ + for(size_t i=0; i<(src->dim)->rank;++i)\ + dst->x[i]=src->x[i];\ + return diff;\ + \ + }\ + return -1;\ }\ \ void free_tensor_##type(tensor_##type * tens){\ diff --git a/tensor_t/test/is_good.c b/tensor_t/test/is_good.c index 4164665..792d5ae 100644 --- a/tensor_t/test/is_good.c +++ b/tensor_t/test/is_good.c @@ -1705,6 +1705,58 @@ TEST(copy_tensor){ } +TEST(tensorContractnProd_TYPE_DOUBLE_2_2 ){ + dimension *d0=create_dim(3); + dimension *d1=create_dim(3); +#if VALGRIND_ + d0->perm[0]=1; + d0->perm[1]=2; //3; + d0->perm[2]=3; //3; + + d1->perm[0]=2; + d1->perm[1]=3; //3; + d1->perm[2]=1; //3; + +#else + + d0->perm[0]=1; + d0->perm[1]=22; //3; + d0->perm[2]=52; //3; + d1->perm[0]=52; + d1->perm[1]=22; //3; + d1->perm[2]=1; //3; + +#endif + + updateRankDim(d0); + updateRankDim(d1); + + + tensor_TYPE_DOUBLE *M0 = CREATE_TENSOR_TYPE_DOUBLE(d0); + tensor_TYPE_DOUBLE *M1 = CREATE_TENSOR_TYPE_DOUBLE(d1); + + for(size_t i=0; idim->rank;++i) M0->x[i]=2 ; + for(size_t i=0; idim->rank;++i) M1->x[i]=3; + + print_tensor_double(M0,"M0"); + print_tensor_double(M1,"M1"); + + tensor_TYPE_DOUBLE *M=NULL; + + tensorContractnProd_TYPE_DOUBLE(&M, M0,M1,2); + + print_tensor_double(M,"M"); + + // for(size_t i=0;idim->rank;++i) + // EXPECT_EQ_TYPE_DOUBLE(M->x[i],MnO->x[i]); + + + free_tensor_TYPE_DOUBLE(M); + free_tensor_TYPE_DOUBLE(M0); + free_tensor_TYPE_DOUBLE(M1); + +} + diff --git a/ytest_t/libytest.so b/ytest_t/libytest.so index 1b54d93..4995260 100644 Binary files a/ytest_t/libytest.so and b/ytest_t/libytest.so differ diff --git a/ytools_t/src/tools_t/tools_t.c b/ytools_t/src/tools_t/tools_t.c index 6c8a59a..879dc82 100644 --- a/ytools_t/src/tools_t/tools_t.c +++ b/ytools_t/src/tools_t/tools_t.c @@ -115,14 +115,29 @@ long int PRECISION_TYPE_L_DOUBLE = 100000000000000; #define GENERATE_FUNCTION_NUMERIC(type)\ int COMPARE_N_##type(const void *a, const void *b){ \ - type diff = (*(type*)a - *(type*)b) * PRECISION_##type; \ + type diff = 0;\ + if((*(type*)a > *(type*)b)){ \ + diff =(*(type*)a - *(type*)b) * PRECISION_##type; \ + /*char *str_diff = type##_TO_STR(diff), *str_a = type##_TO_STR(*(type*)a), *str_b = type##_TO_STR(*(type*)b);\ + PRINT_DEBUG_(" diff = %s a=%s b=%s PRECISION : %ld\n",str_diff, str_a, str_b, PRECISION_##type);\ + free(str_diff); free(str_a); free(str_b);\ + */ \ + if(diff >= 1) return 1;\ + return 0;\ + }else{\ + diff =(*(type*)b - *(type*)a) * PRECISION_##type; \ /*char *str_diff = type##_TO_STR(diff), *str_a = type##_TO_STR(*(type*)a), *str_b = type##_TO_STR(*(type*)b);\ PRINT_DEBUG_(" diff = %s a=%s b=%s PRECISION : %ld\n",str_diff, str_a, str_b, PRECISION_##type);\ free(str_diff); free(str_a); free(str_b);\ */\ - if (diff <= -1) return -1; \ + if(diff >= 1) return -1;\ + return 0;\ + }\ + \ + /*if (diff <= -1) return -1; \ if (diff >= 1) return 1; \ return 0; \ + */\ } \ \ void COPY_ARRAY_##type(type *dst, const type *src, size_t size){ \