elsvastika's picture
End of training
663fc76 verified
raw
history blame
12.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 466.21875,
"epoch": 0.3333333333333333,
"grad_norm": 2.818002700805664,
"kl": 0.024589125911006704,
"learning_rate": 4.985344892885899e-07,
"loss": 0.0,
"reward": 1.0728617757558823,
"reward_std": 1.0374318063259125,
"rewards/concensus_correctness_reward_func": 0.05012499913573265,
"rewards/consensus_reward_func": 0.125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.495455514639616,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.339781254529953,
"step": 2
},
{
"completion_length": 364.875,
"epoch": 0.6666666666666666,
"grad_norm": 1.6754084825515747,
"kl": 0.02502421976532787,
"learning_rate": 4.869132927957006e-07,
"loss": 0.0,
"reward": 1.3312416300177574,
"reward_std": 2.140560135245323,
"rewards/concensus_correctness_reward_func": 0.625,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.3125,
"rewards/question_recreation_reward_func": 0.375085380859673,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.01865624636411667,
"step": 4
},
{
"completion_length": 428.9375,
"epoch": 1.0,
"grad_norm": 2.6106526851654053,
"kl": 0.019527853466570377,
"learning_rate": 4.642142940418973e-07,
"loss": 0.0,
"reward": 0.8669472727924585,
"reward_std": 1.043245192617178,
"rewards/concensus_correctness_reward_func": 0.023874999955296516,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.5792285241186619,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.015625,
"rewards/xmlcount_reward_func": -0.0017812494188547134,
"step": 6
},
{
"completion_length": 382.65625,
"epoch": 1.3333333333333333,
"grad_norm": 3.19954776763916,
"kl": 0.02953332115430385,
"learning_rate": 4.314988729807827e-07,
"loss": 0.0,
"reward": 0.7912007421255112,
"reward_std": 1.1522618383169174,
"rewards/concensus_correctness_reward_func": 0.02250000089406967,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.44829449243843555,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.1954062469303608,
"step": 8
},
{
"completion_length": 454.5,
"epoch": 1.6666666666666665,
"grad_norm": 1.2625623941421509,
"kl": 0.02400887871044688,
"learning_rate": 3.902967663405956e-07,
"loss": 0.0,
"reward": 1.6547736041247845,
"reward_std": 2.4716445207595825,
"rewards/concensus_correctness_reward_func": 0.6788749992847443,
"rewards/consensus_reward_func": 0.125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.47183603793382645,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.1915624924004078,
"step": 10
},
{
"completion_length": 374.78125,
"epoch": 2.0,
"grad_norm": 2.689986228942871,
"kl": 0.028676623362116516,
"learning_rate": 3.4253453883497864e-07,
"loss": 0.0,
"reward": 2.5205069594085217,
"reward_std": 3.358973853290081,
"rewards/concensus_correctness_reward_func": 1.3355000000447035,
"rewards/consensus_reward_func": 0.125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.3125,
"rewards/question_recreation_reward_func": 0.49960072338581085,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.24790625181049109,
"step": 12
},
{
"completion_length": 351.0625,
"epoch": 2.3333333333333335,
"grad_norm": 2.329031229019165,
"kl": 0.022683687566313893,
"learning_rate": 2.9044549913819124e-07,
"loss": 0.0,
"reward": 0.8053823411464691,
"reward_std": 0.9377600699663162,
"rewards/concensus_correctness_reward_func": 0.0494375005364418,
"rewards/consensus_reward_func": 0.125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.3073198366910219,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.3236249890178442,
"step": 14
},
{
"completion_length": 402.5625,
"epoch": 2.6666666666666665,
"grad_norm": 2.642098903656006,
"kl": 0.03899826877750456,
"learning_rate": 2.3646527285364563e-07,
"loss": 0.0,
"reward": 1.048937451094389,
"reward_std": 1.4023225009441376,
"rewards/concensus_correctness_reward_func": 0.1349375005811453,
"rewards/consensus_reward_func": 0.25,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.3677499322220683,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.17124998942017555,
"step": 16
},
{
"completion_length": 454.9375,
"epoch": 3.0,
"grad_norm": 1.4734266996383667,
"kl": 0.015171069288044237,
"learning_rate": 1.8311791536769483e-07,
"loss": 0.0,
"reward": 1.820919245481491,
"reward_std": 1.901282200589776,
"rewards/concensus_correctness_reward_func": 1.2738749999552965,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.40041930228471756,
"rewards/soft_format_reward_func": 0.015625,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.13099999353289604,
"step": 18
},
{
"completion_length": 447.5625,
"epoch": 3.3333333333333335,
"grad_norm": 1.9561309814453125,
"kl": 0.027638212544843554,
"learning_rate": 1.328978898250525e-07,
"loss": 0.0,
"reward": 1.6138426065444946,
"reward_std": 2.08666705340147,
"rewards/concensus_correctness_reward_func": 0.625,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.3899675915017724,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.41137499921023846,
"step": 20
},
{
"completion_length": 313.5,
"epoch": 3.6666666666666665,
"grad_norm": 2.4282801151275635,
"kl": 0.02601044374750927,
"learning_rate": 8.81534288045431e-08,
"loss": 0.0,
"reward": 0.7636516839265823,
"reward_std": 1.200943410396576,
"rewards/concensus_correctness_reward_func": 0.05081249959766865,
"rewards/consensus_reward_func": 0.125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.3807766861282289,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.2070624940097332,
"step": 22
},
{
"completion_length": 348.625,
"epoch": 4.0,
"grad_norm": 3.557086706161499,
"kl": 0.025902074296027422,
"learning_rate": 5.097673357358906e-08,
"loss": 0.0,
"reward": 0.7689573541283607,
"reward_std": 1.0340721681714058,
"rewards/concensus_correctness_reward_func": 0.024937499314546585,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.43936359975486994,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.1171562522649765,
"step": 24
},
{
"completion_length": 443.53125,
"epoch": 4.333333333333333,
"grad_norm": 1.6169641017913818,
"kl": 0.01715872809290886,
"learning_rate": 2.3106145082260774e-08,
"loss": 0.0,
"reward": 0.7657004399225116,
"reward_std": 0.7439820282161236,
"rewards/concensus_correctness_reward_func": 0.0078125,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.5021067075431347,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.06828123982995749,
"step": 26
},
{
"completion_length": 301.0,
"epoch": 4.666666666666667,
"grad_norm": 2.6919360160827637,
"kl": 0.022670619655400515,
"learning_rate": 5.844861072478335e-09,
"loss": 0.0,
"reward": 0.8254948575049639,
"reward_std": 0.9493589596822858,
"rewards/concensus_correctness_reward_func": 0.02250000089406967,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.43468233942985535,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.18081250321120024,
"step": 28
},
{
"completion_length": 339.40625,
"epoch": 5.0,
"grad_norm": 3.3629519939422607,
"kl": 0.04183358867885545,
"learning_rate": 0.0,
"loss": 0.0,
"reward": 1.6037534587085247,
"reward_std": 1.9255643365904689,
"rewards/concensus_correctness_reward_func": 0.6506249997764826,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.432628508657217,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.015625,
"rewards/xmlcount_reward_func": 0.3173750042915344,
"step": 30
},
{
"epoch": 5.0,
"step": 30,
"total_flos": 0.0,
"train_loss": 2.5961448712526666e-05,
"train_runtime": 902.7895,
"train_samples_per_second": 0.532,
"train_steps_per_second": 0.033
}
],
"logging_steps": 2,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}