elsvastika's picture
End of training
aeef55a verified
raw
history blame
19.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.555555555555555,
"eval_steps": 500,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 254.0,
"epoch": 0.2222222222222222,
"grad_norm": 2.455533266067505,
"kl": 0.0016071582067525014,
"learning_rate": 5e-07,
"loss": 0.0,
"reward": 0.8821750227361917,
"reward_std": 0.7979278466664255,
"rewards/concensus_correctness_reward_func": 0.049937501549720764,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.3125,
"rewards/question_recreation_reward_func": 0.26773752458393574,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.1895000054500997,
"step": 2
},
{
"completion_length": 369.3125,
"epoch": 0.4444444444444444,
"grad_norm": 2.520486354827881,
"kl": 0.0038820735207991675,
"learning_rate": 4.978612153434526e-07,
"loss": 0.0,
"reward": 0.3699257434345782,
"reward_std": 0.5709783472120762,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.19958198571112007,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.17034375481307507,
"step": 4
},
{
"completion_length": 467.875,
"epoch": 0.6666666666666666,
"grad_norm": 1.3698705434799194,
"kl": 0.0013275225574034266,
"learning_rate": 4.91481456572267e-07,
"loss": 0.0,
"reward": 0.6990349255502224,
"reward_std": 0.9675047248601913,
"rewards/concensus_correctness_reward_func": 0.046875,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.33225369080901146,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.19490624405443668,
"step": 6
},
{
"completion_length": 323.34375,
"epoch": 0.8888888888888888,
"grad_norm": 2.7056479454040527,
"kl": 0.00173920994711807,
"learning_rate": 4.809698831278217e-07,
"loss": 0.0,
"reward": 0.5385207324288785,
"reward_std": 0.8355418732389808,
"rewards/concensus_correctness_reward_func": 0.02787500061094761,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.2965519982390106,
"rewards/soft_format_reward_func": 0.015625,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.07346874848008156,
"step": 8
},
{
"completion_length": 276.09375,
"epoch": 1.1111111111111112,
"grad_norm": 3.8186511993408203,
"kl": 0.0028932989152963273,
"learning_rate": 4.6650635094610966e-07,
"loss": 0.0,
"reward": 1.1083624437451363,
"reward_std": 1.7071605939418077,
"rewards/concensus_correctness_reward_func": 0.625,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.25786245451308787,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.03799999970942736,
"step": 10
},
{
"completion_length": 255.59375,
"epoch": 1.3333333333333333,
"grad_norm": 34.07455062866211,
"kl": 0.0037407633935799822,
"learning_rate": 4.483383350728088e-07,
"loss": 0.0,
"reward": 1.2880361340939999,
"reward_std": 1.625240983441472,
"rewards/concensus_correctness_reward_func": 0.625,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.32416112907230854,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.15137499663978815,
"step": 12
},
{
"completion_length": 285.21875,
"epoch": 1.5555555555555556,
"grad_norm": 3.052701234817505,
"kl": 0.002863182016881183,
"learning_rate": 4.2677669529663686e-07,
"loss": 0.0,
"reward": 0.607268082909286,
"reward_std": 0.6088872440159321,
"rewards/concensus_correctness_reward_func": 0.022187499329447746,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.38814307004213333,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.19693750143051147,
"step": 14
},
{
"completion_length": 347.65625,
"epoch": 1.7777777777777777,
"grad_norm": 5.15467643737793,
"kl": 0.0014800850040046498,
"learning_rate": 4.0219035725218013e-07,
"loss": 0.0,
"reward": 0.1809218251146376,
"reward_std": 0.7999278882052749,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.2310780775733292,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.112656245008111,
"step": 16
},
{
"completion_length": 441.25,
"epoch": 2.0,
"grad_norm": 4.815533638000488,
"kl": 0.0013374313130043447,
"learning_rate": 3.75e-07,
"loss": 0.0,
"reward": 0.5386425573378801,
"reward_std": 0.6832009451463819,
"rewards/concensus_correctness_reward_func": 0.046875,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.23683004680788144,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.19243750348687172,
"step": 18
},
{
"completion_length": 437.25,
"epoch": 2.2222222222222223,
"grad_norm": 2.5218758583068848,
"kl": 0.0015388188403449021,
"learning_rate": 3.4567085809127245e-07,
"loss": 0.0,
"reward": 0.7281985133886337,
"reward_std": 0.6127606704831123,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.45726102218031883,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.2709374986588955,
"step": 20
},
{
"completion_length": 387.75,
"epoch": 2.4444444444444446,
"grad_norm": 1.3259650468826294,
"kl": 0.0036219921821611933,
"learning_rate": 3.147047612756302e-07,
"loss": 0.0,
"reward": 0.2731318287551403,
"reward_std": 0.2530765999108553,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.20441308384761214,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.06871875282377005,
"step": 22
},
{
"completion_length": 447.09375,
"epoch": 2.6666666666666665,
"grad_norm": 6.329639434814453,
"kl": 0.0021589112002402544,
"learning_rate": 2.826315480550129e-07,
"loss": 0.0,
"reward": 1.2416966576129198,
"reward_std": 2.0539041608572006,
"rewards/concensus_correctness_reward_func": 0.625,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.3013216257095337,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.1278750030323863,
"step": 24
},
{
"completion_length": 301.875,
"epoch": 2.888888888888889,
"grad_norm": 4.056438446044922,
"kl": 0.0024828215537127107,
"learning_rate": 2.5e-07,
"loss": 0.0,
"reward": 0.2829342377372086,
"reward_std": 0.33365900977514684,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.1184342410415411,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.16449999809265137,
"step": 26
},
{
"completion_length": 341.15625,
"epoch": 3.111111111111111,
"grad_norm": 2.550891876220703,
"kl": 0.002100169222103432,
"learning_rate": 2.1736845194498716e-07,
"loss": 0.0,
"reward": 0.3925126292742789,
"reward_std": 0.7323208572342992,
"rewards/concensus_correctness_reward_func": 0.014562499709427357,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.29879387514665723,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.016656249528750777,
"step": 28
},
{
"completion_length": 388.03125,
"epoch": 3.3333333333333335,
"grad_norm": 1.4323590993881226,
"kl": 0.004696541313023772,
"learning_rate": 1.8529523872436977e-07,
"loss": 0.0,
"reward": 0.05710854474455118,
"reward_std": 0.5547531270422041,
"rewards/concensus_correctness_reward_func": 0.0078125,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.19385854969732463,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.20706249913200736,
"step": 30
},
{
"completion_length": 352.90625,
"epoch": 3.5555555555555554,
"grad_norm": 2.221634864807129,
"kl": 0.002907156704168301,
"learning_rate": 1.5432914190872756e-07,
"loss": 0.0,
"reward": 0.3871938968077302,
"reward_std": 0.3934658619109541,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.2629126524552703,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.1242812555283308,
"step": 32
},
{
"completion_length": 384.375,
"epoch": 3.7777777777777777,
"grad_norm": 2.7794787883758545,
"kl": 0.001867200349806808,
"learning_rate": 1.2500000000000005e-07,
"loss": 0.0,
"reward": 0.9573859982192516,
"reward_std": 1.0359943909570575,
"rewards/concensus_correctness_reward_func": 0.11443750280886889,
"rewards/consensus_reward_func": 0.1875,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.3830109708942473,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.20993749890476465,
"step": 34
},
{
"completion_length": 361.375,
"epoch": 4.0,
"grad_norm": 3.378568172454834,
"kl": 0.0019658175588119775,
"learning_rate": 9.780964274781983e-08,
"loss": 0.0,
"reward": 0.550561910495162,
"reward_std": 0.5597149441018701,
"rewards/concensus_correctness_reward_func": 0.03125,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.27071817917749286,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.12359375134110451,
"step": 36
},
{
"completion_length": 282.96875,
"epoch": 4.222222222222222,
"grad_norm": 4.209115505218506,
"kl": 0.0013284565284266137,
"learning_rate": 7.322330470336313e-08,
"loss": 0.0,
"reward": 1.8079969780519605,
"reward_std": 2.005708161741495,
"rewards/concensus_correctness_reward_func": 1.268062500283122,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.20130947075085714,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.08862500544637442,
"step": 38
},
{
"completion_length": 431.4375,
"epoch": 4.444444444444445,
"grad_norm": 29.043092727661133,
"kl": 0.003590297739719972,
"learning_rate": 5.166166492719124e-08,
"loss": 0.0,
"reward": 0.5267084827646613,
"reward_std": 0.684504278935492,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.35548973828554153,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.10871875286102295,
"step": 40
},
{
"completion_length": 384.59375,
"epoch": 4.666666666666667,
"grad_norm": 29.70347785949707,
"kl": 0.001960429195605684,
"learning_rate": 3.349364905389032e-08,
"loss": 0.0,
"reward": 1.234227987471968,
"reward_std": 1.9329941319301724,
"rewards/concensus_correctness_reward_func": 0.6629374995827675,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.257196772727184,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.12659375229850411,
"step": 42
},
{
"completion_length": 364.0625,
"epoch": 4.888888888888889,
"grad_norm": 55.00151062011719,
"kl": 0.002629825277836062,
"learning_rate": 1.9030116872178314e-08,
"loss": 0.0,
"reward": 0.5272765271365643,
"reward_std": 0.8430194035172462,
"rewards/concensus_correctness_reward_func": 0.01837499998509884,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.31580778677016497,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.13059375621378422,
"step": 44
},
{
"completion_length": 304.40625,
"epoch": 5.111111111111111,
"grad_norm": 42.15534973144531,
"kl": 0.005508741844096221,
"learning_rate": 8.518543427732949e-09,
"loss": 0.0,
"reward": 0.4785704296082258,
"reward_std": 0.8159382930025458,
"rewards/concensus_correctness_reward_func": 0.01837499998509884,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.33713291585445404,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.06056249886751175,
"step": 46
},
{
"completion_length": 353.375,
"epoch": 5.333333333333333,
"grad_norm": 6.933172225952148,
"kl": 0.002127421241311822,
"learning_rate": 2.1387846565474044e-09,
"loss": 0.0,
"reward": 0.6708917245268822,
"reward_std": 0.9302016571164131,
"rewards/concensus_correctness_reward_func": 0.006812499836087227,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.3125,
"rewards/question_recreation_reward_func": 0.2251729667186737,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.1264062523841858,
"step": 48
},
{
"completion_length": 346.9375,
"epoch": 5.555555555555555,
"grad_norm": 7.681344985961914,
"kl": 0.0038058556092437357,
"learning_rate": 0.0,
"loss": 0.0,
"reward": 0.41686041094362736,
"reward_std": 0.7147915656678379,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.2795791446696967,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.13728125765919685,
"step": 50
},
{
"epoch": 5.555555555555555,
"step": 50,
"total_flos": 0.0,
"train_loss": 2.602725280667073e-06,
"train_runtime": 1884.3714,
"train_samples_per_second": 0.425,
"train_steps_per_second": 0.027
}
],
"logging_steps": 2,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}