{ "best_global_step": 3622, "best_metric": 0.29197875, "best_model_checkpoint": "/mnt/dhwfile/tancheng/baixi/sft/v2-20260305-234742_copy/tr_sft/v4-20260321-200656/checkpoint-3622", "epoch": 2.0, "eval_steps": 1000, "global_step": 3622, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005524098881369977, "grad_norm": 3.971195936203003, "learning_rate": 2.7472527472527476e-08, "loss": 1.5057597160339355, "step": 1, "token_acc": 0.707983193277311 }, { "epoch": 0.027620494406849883, "grad_norm": 2.0644404888153076, "learning_rate": 1.3736263736263738e-06, "loss": 1.4408824687101403, "step": 50, "token_acc": 0.7159635393620652 }, { "epoch": 0.055240988813699765, "grad_norm": 6.71088981628418, "learning_rate": 2.7472527472527476e-06, "loss": 0.9287164306640625, "step": 100, "token_acc": 0.7522492378615511 }, { "epoch": 0.08286148322054965, "grad_norm": 1.4702277183532715, "learning_rate": 4.120879120879121e-06, "loss": 0.7912979125976562, "step": 150, "token_acc": 0.7896991457224217 }, { "epoch": 0.11048197762739953, "grad_norm": 1.6800647974014282, "learning_rate": 4.999662224532599e-06, "loss": 0.7119190216064453, "step": 200, "token_acc": 0.8086790326137561 }, { "epoch": 0.1381024720342494, "grad_norm": 2.6188578605651855, "learning_rate": 4.995180842497152e-06, "loss": 0.6646479797363282, "step": 250, "token_acc": 0.8224076129246234 }, { "epoch": 0.1657229664410993, "grad_norm": 1.4899194240570068, "learning_rate": 4.9854977109706205e-06, "loss": 0.656124267578125, "step": 300, "token_acc": 0.8239213747063914 }, { "epoch": 0.19334346084794918, "grad_norm": 1.3691211938858032, "learning_rate": 4.970633016555765e-06, "loss": 0.5962641143798828, "step": 350, "token_acc": 0.8409644719528242 }, { "epoch": 0.22096395525479906, "grad_norm": 0.8856273889541626, "learning_rate": 4.950617747955006e-06, "loss": 0.5752347564697265, "step": 400, "token_acc": 0.8464685130976507 }, { "epoch": 0.24858444966164894, "grad_norm": 1.2155076265335083, "learning_rate": 4.925493631367695e-06, "loss": 0.5667870712280273, "step": 450, "token_acc": 0.8497243397340098 }, { "epoch": 0.2762049440684988, "grad_norm": 1.5872883796691895, "learning_rate": 4.895313043502745e-06, "loss": 0.5463564300537109, "step": 500, "token_acc": 0.8544707295032248 }, { "epoch": 0.3038254384753487, "grad_norm": 1.2916710376739502, "learning_rate": 4.8601389023879395e-06, "loss": 0.5288540267944336, "step": 550, "token_acc": 0.8581043185642176 }, { "epoch": 0.3314459328821986, "grad_norm": 0.7346131205558777, "learning_rate": 4.820044536203553e-06, "loss": 0.5156510925292969, "step": 600, "token_acc": 0.8623250807319699 }, { "epoch": 0.3590664272890485, "grad_norm": 1.0493592023849487, "learning_rate": 4.775113530413753e-06, "loss": 0.4914900970458984, "step": 650, "token_acc": 0.8671121098866763 }, { "epoch": 0.38668692169589836, "grad_norm": 1.3090295791625977, "learning_rate": 4.725439553514431e-06, "loss": 0.4881901168823242, "step": 700, "token_acc": 0.869419182733115 }, { "epoch": 0.41430741610274824, "grad_norm": 0.8976083397865295, "learning_rate": 4.671126161760773e-06, "loss": 0.4547769165039062, "step": 750, "token_acc": 0.8772626288378191 }, { "epoch": 0.4419279105095981, "grad_norm": 0.9827790260314941, "learning_rate": 4.612286583281619e-06, "loss": 0.4618700408935547, "step": 800, "token_acc": 0.8752913181071069 }, { "epoch": 0.469548404916448, "grad_norm": 10.650091171264648, "learning_rate": 4.549043482030697e-06, "loss": 0.47108261108398436, "step": 850, "token_acc": 0.8742771870919605 }, { "epoch": 0.4971688993232979, "grad_norm": 1.2158194780349731, "learning_rate": 4.481528702066821e-06, "loss": 0.4383080291748047, "step": 900, "token_acc": 0.8814368000791081 }, { "epoch": 0.5247893937301478, "grad_norm": 0.8892939686775208, "learning_rate": 4.4098829926961485e-06, "loss": 0.43826602935791015, "step": 950, "token_acc": 0.8805370307574445 }, { "epoch": 0.5524098881369977, "grad_norm": 2.2692770957946777, "learning_rate": 4.334255715049505e-06, "loss": 0.44030540466308593, "step": 1000, "token_acc": 0.8802479822797057 }, { "epoch": 0.5524098881369977, "eval_loss": 0.4443511366844177, "eval_runtime": 48.8158, "eval_samples_per_second": 82.432, "eval_steps_per_second": 10.304, "eval_token_acc": 0.8788591129916941, "step": 1000 }, { "epoch": 0.5800303825438475, "grad_norm": 1.7738205194473267, "learning_rate": 4.254804530706494e-06, "loss": 0.43722301483154297, "step": 1050, "token_acc": 0.8826621910133159 }, { "epoch": 0.6076508769506974, "grad_norm": 1.1211692094802856, "learning_rate": 4.171695073015476e-06, "loss": 0.42919532775878905, "step": 1100, "token_acc": 0.8847912635120977 }, { "epoch": 0.6352713713575473, "grad_norm": 0.6628771424293518, "learning_rate": 4.085100601794695e-06, "loss": 0.4198457717895508, "step": 1150, "token_acc": 0.886101994445847 }, { "epoch": 0.6628918657643972, "grad_norm": 0.7384634613990784, "learning_rate": 3.9952016421343285e-06, "loss": 0.4135971832275391, "step": 1200, "token_acc": 0.8885249938438808 }, { "epoch": 0.6905123601712471, "grad_norm": 0.8930935263633728, "learning_rate": 3.902185608052511e-06, "loss": 0.4104171371459961, "step": 1250, "token_acc": 0.8885200623345032 }, { "epoch": 0.718132854578097, "grad_norm": 1.6306012868881226, "learning_rate": 3.806246411789872e-06, "loss": 0.39111907958984377, "step": 1300, "token_acc": 0.8930283797148258 }, { "epoch": 0.7457533489849468, "grad_norm": 1.026354432106018, "learning_rate": 3.70758405955712e-06, "loss": 0.4011444091796875, "step": 1350, "token_acc": 0.8923086269744835 }, { "epoch": 0.7733738433917967, "grad_norm": 0.9568616151809692, "learning_rate": 3.6064042345784e-06, "loss": 0.38247406005859375, "step": 1400, "token_acc": 0.8959567281805276 }, { "epoch": 0.8009943377986466, "grad_norm": 0.9613300561904907, "learning_rate": 3.502917868299695e-06, "loss": 0.3937504196166992, "step": 1450, "token_acc": 0.8941713404549015 }, { "epoch": 0.8286148322054965, "grad_norm": 1.0931392908096313, "learning_rate": 3.3973407006561434e-06, "loss": 0.3801025390625, "step": 1500, "token_acc": 0.8964361413713215 }, { "epoch": 0.8562353266123464, "grad_norm": 0.718775749206543, "learning_rate": 3.2898928303150286e-06, "loss": 0.3670074081420898, "step": 1550, "token_acc": 0.8998434863417378 }, { "epoch": 0.8838558210191962, "grad_norm": 2.243138551712036, "learning_rate": 3.1807982558320238e-06, "loss": 0.37574588775634765, "step": 1600, "token_acc": 0.8973345495084626 }, { "epoch": 0.9114763154260461, "grad_norm": 1.1610990762710571, "learning_rate": 3.070284408677278e-06, "loss": 0.358037109375, "step": 1650, "token_acc": 0.9034944420561322 }, { "epoch": 0.939096809832896, "grad_norm": 2.175825357437134, "learning_rate": 2.9585816791048395e-06, "loss": 0.3607246780395508, "step": 1700, "token_acc": 0.9015336266499058 }, { "epoch": 0.9667173042397459, "grad_norm": 0.5443406105041504, "learning_rate": 2.845922935853841e-06, "loss": 0.36091846466064453, "step": 1750, "token_acc": 0.90267166344294 }, { "epoch": 0.9943377986465958, "grad_norm": 1.0993009805679321, "learning_rate": 2.732543040682756e-06, "loss": 0.34049827575683594, "step": 1800, "token_acc": 0.9051366664591246 }, { "epoch": 1.021543985637343, "grad_norm": 1.2388564348220825, "learning_rate": 2.6186783587487526e-06, "loss": 0.3400102996826172, "step": 1850, "token_acc": 0.9062456636096429 }, { "epoch": 1.0491644800441928, "grad_norm": 1.7439219951629639, "learning_rate": 2.504566265852903e-06, "loss": 0.3237404632568359, "step": 1900, "token_acc": 0.9122041868932039 }, { "epoch": 1.0767849744510427, "grad_norm": 1.3239331245422363, "learning_rate": 2.390444653578469e-06, "loss": 0.32169307708740236, "step": 1950, "token_acc": 0.9117859991410516 }, { "epoch": 1.1044054688578926, "grad_norm": 1.5918147563934326, "learning_rate": 2.2765514333539364e-06, "loss": 0.32176483154296875, "step": 2000, "token_acc": 0.9125848778483918 }, { "epoch": 1.1044054688578926, "eval_loss": 0.3394816517829895, "eval_runtime": 49.0802, "eval_samples_per_second": 81.988, "eval_steps_per_second": 10.249, "eval_token_acc": 0.9040804732800501, "step": 2000 }, { "epoch": 1.1320259632647425, "grad_norm": 0.8860780000686646, "learning_rate": 2.163124040474676e-06, "loss": 0.31078639984130857, "step": 2050, "token_acc": 0.9152370718934875 }, { "epoch": 1.1596464576715924, "grad_norm": 0.7584362626075745, "learning_rate": 2.050398939117207e-06, "loss": 0.3007337951660156, "step": 2100, "token_acc": 0.9158177598631978 }, { "epoch": 1.1872669520784422, "grad_norm": 1.2300533056259155, "learning_rate": 1.9386111293779673e-06, "loss": 0.31986356735229493, "step": 2150, "token_acc": 0.9111235055896404 }, { "epoch": 1.2148874464852921, "grad_norm": 2.0351951122283936, "learning_rate": 1.8279936573642838e-06, "loss": 0.3059127426147461, "step": 2200, "token_acc": 0.915220434303269 }, { "epoch": 1.242507940892142, "grad_norm": 0.8483895063400269, "learning_rate": 1.718777129358856e-06, "loss": 0.3012154769897461, "step": 2250, "token_acc": 0.9162508296256238 }, { "epoch": 1.2701284352989919, "grad_norm": 0.948944628238678, "learning_rate": 1.6111892310705895e-06, "loss": 0.2992052459716797, "step": 2300, "token_acc": 0.9183500423961295 }, { "epoch": 1.2977489297058418, "grad_norm": 3.373568296432495, "learning_rate": 1.505454252974001e-06, "loss": 0.29202014923095704, "step": 2350, "token_acc": 0.9190546823388634 }, { "epoch": 1.3253694241126917, "grad_norm": 2.183572292327881, "learning_rate": 1.4017926227267333e-06, "loss": 0.29514074325561523, "step": 2400, "token_acc": 0.9176666418225634 }, { "epoch": 1.3529899185195415, "grad_norm": 0.7352165579795837, "learning_rate": 1.3004204456399495e-06, "loss": 0.2857296752929688, "step": 2450, "token_acc": 0.9207933111033694 }, { "epoch": 1.3806104129263914, "grad_norm": 0.761044442653656, "learning_rate": 1.2015490541596178e-06, "loss": 0.29884326934814454, "step": 2500, "token_acc": 0.9166312420294567 }, { "epoch": 1.4082309073332413, "grad_norm": 0.7975747585296631, "learning_rate": 1.1053845672978567e-06, "loss": 0.2805310821533203, "step": 2550, "token_acc": 0.9228804523024277 }, { "epoch": 1.4358514017400912, "grad_norm": 1.3609472513198853, "learning_rate": 1.0121274609328385e-06, "loss": 0.28165496826171876, "step": 2600, "token_acc": 0.9215071028472673 }, { "epoch": 1.463471896146941, "grad_norm": 1.0301936864852905, "learning_rate": 9.219721498730233e-07, "loss": 0.2917514419555664, "step": 2650, "token_acc": 0.9183475538640967 }, { "epoch": 1.491092390553791, "grad_norm": 0.9399695992469788, "learning_rate": 8.351065825570343e-07, "loss": 0.28282550811767576, "step": 2700, "token_acc": 0.9228632558837881 }, { "epoch": 1.5187128849606408, "grad_norm": 0.7413146495819092, "learning_rate": 7.517118492340749e-07, "loss": 0.28180957794189454, "step": 2750, "token_acc": 0.9222344851006028 }, { "epoch": 1.5463333793674907, "grad_norm": 1.5082839727401733, "learning_rate": 6.719618044417556e-07, "loss": 0.28240968704223635, "step": 2800, "token_acc": 0.9224628557038529 }, { "epoch": 1.5739538737743406, "grad_norm": 1.313835620880127, "learning_rate": 5.96022704568327e-07, "loss": 0.27993974685668943, "step": 2850, "token_acc": 0.9239168544953669 }, { "epoch": 1.6015743681811905, "grad_norm": 0.9157925248146057, "learning_rate": 5.240528612549326e-07, "loss": 0.2741780471801758, "step": 2900, "token_acc": 0.9235956170360454 }, { "epoch": 1.6291948625880404, "grad_norm": 0.7705038785934448, "learning_rate": 4.5620231136040414e-07, "loss": 0.27471385955810546, "step": 2950, "token_acc": 0.9232198218591605 }, { "epoch": 1.6568153569948902, "grad_norm": 1.2690376043319702, "learning_rate": 3.926125041766635e-07, "loss": 0.2690491485595703, "step": 3000, "token_acc": 0.9261167747914736 }, { "epoch": 1.6568153569948902, "eval_loss": 0.2964812219142914, "eval_runtime": 50.3444, "eval_samples_per_second": 79.929, "eval_steps_per_second": 9.991, "eval_token_acc": 0.9156774016611816, "step": 3000 }, { "epoch": 1.6844358514017401, "grad_norm": 1.0337142944335938, "learning_rate": 3.3341600654676956e-07, "loss": 0.27428403854370115, "step": 3050, "token_acc": 0.9227037675449397 }, { "epoch": 1.71205634580859, "grad_norm": 0.7290350198745728, "learning_rate": 2.787362265003851e-07, "loss": 0.27072456359863284, "step": 3100, "token_acc": 0.9239343293141035 }, { "epoch": 1.73967684021544, "grad_norm": 1.227449893951416, "learning_rate": 2.286871559827758e-07, "loss": 0.27282316207885743, "step": 3150, "token_acc": 0.9239819571482271 }, { "epoch": 1.7672973346222898, "grad_norm": 2.0776495933532715, "learning_rate": 1.833731332137062e-07, "loss": 0.2769058418273926, "step": 3200, "token_acc": 0.924404892163299 }, { "epoch": 1.7949178290291397, "grad_norm": 0.9708506464958191, "learning_rate": 1.4288862517162605e-07, "loss": 0.2685773468017578, "step": 3250, "token_acc": 0.9263406940063091 }, { "epoch": 1.8225383234359895, "grad_norm": 1.5988808870315552, "learning_rate": 1.0731803065662183e-07, "loss": 0.2626660346984863, "step": 3300, "token_acc": 0.9263471197370868 }, { "epoch": 1.8501588178428394, "grad_norm": 0.9155512452125549, "learning_rate": 7.673550434268123e-08, "loss": 0.2755833053588867, "step": 3350, "token_acc": 0.9242779850285348 }, { "epoch": 1.8777793122496893, "grad_norm": 0.9388015270233154, "learning_rate": 5.120480218607837e-08, "loss": 0.26742733001708985, "step": 3400, "token_acc": 0.9263185045076028 }, { "epoch": 1.9053998066565392, "grad_norm": 1.5259448289871216, "learning_rate": 3.077914851215585e-08, "loss": 0.25524681091308593, "step": 3450, "token_acc": 0.9273482096523092 }, { "epoch": 1.933020301063389, "grad_norm": 0.8470087647438049, "learning_rate": 1.550112505759316e-08, "loss": 0.27332218170166017, "step": 3500, "token_acc": 0.9242176653438341 }, { "epoch": 1.960640795470239, "grad_norm": 0.8686062097549438, "learning_rate": 5.402582199476036e-09, "loss": 0.27115694046020505, "step": 3550, "token_acc": 0.9250238894469899 }, { "epoch": 1.9882612898770888, "grad_norm": 1.0696200132369995, "learning_rate": 5.045725562269343e-10, "loss": 0.2721245002746582, "step": 3600, "token_acc": 0.9237477946420751 }, { "epoch": 2.0, "eval_loss": 0.29197874665260315, "eval_runtime": 49.4714, "eval_samples_per_second": 81.34, "eval_steps_per_second": 10.167, "eval_token_acc": 0.9167646136969128, "step": 3622 } ], "logging_steps": 50, "max_steps": 3622, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.281559630949384e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }