{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1651287032540067, "eval_steps": 500, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01942690626517727, "grad_norm": 0.26180756092071533, "learning_rate": 8.653846153846155e-06, "loss": 0.40127906799316404, "step": 10 }, { "epoch": 0.03885381253035454, "grad_norm": 0.1655670404434204, "learning_rate": 1.826923076923077e-05, "loss": 0.3694923400878906, "step": 20 }, { "epoch": 0.05828071879553181, "grad_norm": 0.21055534482002258, "learning_rate": 2.7884615384615386e-05, "loss": 0.3756622076034546, "step": 30 }, { "epoch": 0.07770762506070908, "grad_norm": 0.2429058700799942, "learning_rate": 3.7500000000000003e-05, "loss": 0.3154024600982666, "step": 40 }, { "epoch": 0.09713453132588636, "grad_norm": 0.18555547297000885, "learning_rate": 4.711538461538462e-05, "loss": 0.31356382369995117, "step": 50 }, { "epoch": 0.11656143759106362, "grad_norm": 0.24774602055549622, "learning_rate": 4.99936801054781e-05, "loss": 0.23598077297210693, "step": 60 }, { "epoch": 0.1359883438562409, "grad_norm": 0.1604267954826355, "learning_rate": 4.996273321192579e-05, "loss": 0.26249558925628663, "step": 70 }, { "epoch": 0.15541525012141816, "grad_norm": 0.18501052260398865, "learning_rate": 4.990603041267143e-05, "loss": 0.2521421194076538, "step": 80 }, { "epoch": 0.17484215638659542, "grad_norm": 0.13454674184322357, "learning_rate": 4.982363021220401e-05, "loss": 0.31869068145751955, "step": 90 }, { "epoch": 0.1942690626517727, "grad_norm": 0.11859218776226044, "learning_rate": 4.971561762892976e-05, "loss": 0.23399407863616944, "step": 100 }, { "epoch": 0.21369596891694997, "grad_norm": 0.16577711701393127, "learning_rate": 4.958210410745236e-05, "loss": 0.2260490894317627, "step": 110 }, { "epoch": 0.23312287518212724, "grad_norm": 0.11132260411977768, "learning_rate": 4.942322740358726e-05, "loss": 0.23589625358581542, "step": 120 }, { "epoch": 0.2525497814473045, "grad_norm": 0.20756852626800537, "learning_rate": 4.9239151442228726e-05, "loss": 0.3080857038497925, "step": 130 }, { "epoch": 0.2719766877124818, "grad_norm": 0.09589740633964539, "learning_rate": 4.9030066148216445e-05, "loss": 0.23507814407348632, "step": 140 }, { "epoch": 0.2914035939776591, "grad_norm": 0.13812342286109924, "learning_rate": 4.879618725037587e-05, "loss": 0.2745722055435181, "step": 150 }, { "epoch": 0.3108305002428363, "grad_norm": 0.17384353280067444, "learning_rate": 4.8537756058934826e-05, "loss": 0.26914730072021487, "step": 160 }, { "epoch": 0.3302574065080136, "grad_norm": 0.1553693562746048, "learning_rate": 4.8255039216545814e-05, "loss": 0.2960189342498779, "step": 170 }, { "epoch": 0.34968431277319084, "grad_norm": 0.15468068420886993, "learning_rate": 4.794832842317098e-05, "loss": 0.26504876613616946, "step": 180 }, { "epoch": 0.36911121903836813, "grad_norm": 0.1495364010334015, "learning_rate": 4.7617940135113606e-05, "loss": 0.20354897975921632, "step": 190 }, { "epoch": 0.3885381253035454, "grad_norm": 0.0979977622628212, "learning_rate": 4.726421523850662e-05, "loss": 0.23614444732666015, "step": 200 }, { "epoch": 0.40796503156872266, "grad_norm": 0.14897684752941132, "learning_rate": 4.6887518697595096e-05, "loss": 0.24106810092926026, "step": 210 }, { "epoch": 0.42739193783389995, "grad_norm": 0.11905848234891891, "learning_rate": 4.648823917817551e-05, "loss": 0.2708190202713013, "step": 220 }, { "epoch": 0.44681884409907724, "grad_norm": 0.1429329812526703, "learning_rate": 4.606678864658038e-05, "loss": 0.25339953899383544, "step": 230 }, { "epoch": 0.4662457503642545, "grad_norm": 0.20242151618003845, "learning_rate": 4.5623601944622016e-05, "loss": 0.2554117441177368, "step": 240 }, { "epoch": 0.48567265662943176, "grad_norm": 0.1589473932981491, "learning_rate": 4.5159136340933896e-05, "loss": 0.1923211097717285, "step": 250 }, { "epoch": 0.505099562894609, "grad_norm": 0.15873779356479645, "learning_rate": 4.467387105917269e-05, "loss": 0.20048885345458983, "step": 260 }, { "epoch": 0.5245264691597863, "grad_norm": 0.15066051483154297, "learning_rate": 4.416830678356764e-05, "loss": 0.2062215566635132, "step": 270 }, { "epoch": 0.5439533754249636, "grad_norm": 0.20589925348758698, "learning_rate": 4.36429651423274e-05, "loss": 0.29201040267944334, "step": 280 }, { "epoch": 0.5633802816901409, "grad_norm": 0.2139367163181305, "learning_rate": 4.3098388169437545e-05, "loss": 0.23367040157318114, "step": 290 }, { "epoch": 0.5828071879553182, "grad_norm": 0.1904798150062561, "learning_rate": 4.2535137745403766e-05, "loss": 0.2543118953704834, "step": 300 }, { "epoch": 0.6022340942204953, "grad_norm": 0.18315282464027405, "learning_rate": 4.1953795017518116e-05, "loss": 0.2573453664779663, "step": 310 }, { "epoch": 0.6216610004856726, "grad_norm": 0.12616890668869019, "learning_rate": 4.135495980024615e-05, "loss": 0.2318727493286133, "step": 320 }, { "epoch": 0.6410879067508499, "grad_norm": 0.17712046205997467, "learning_rate": 4.073924995635376e-05, "loss": 0.18010640144348145, "step": 330 }, { "epoch": 0.6605148130160272, "grad_norm": 0.12199205160140991, "learning_rate": 4.010730075941236e-05, "loss": 0.19334938526153564, "step": 340 }, { "epoch": 0.6799417192812045, "grad_norm": 0.27177831530570984, "learning_rate": 3.9459764238339865e-05, "loss": 0.21652016639709473, "step": 350 }, { "epoch": 0.6993686255463817, "grad_norm": 0.15285901725292206, "learning_rate": 3.8797308504654116e-05, "loss": 0.26627991199493406, "step": 360 }, { "epoch": 0.718795531811559, "grad_norm": 0.12266906350851059, "learning_rate": 3.812061706313256e-05, "loss": 0.27732465267181394, "step": 370 }, { "epoch": 0.7382224380767363, "grad_norm": 0.17022156715393066, "learning_rate": 3.743038810658963e-05, "loss": 0.24920461177825928, "step": 380 }, { "epoch": 0.7576493443419136, "grad_norm": 0.13355037569999695, "learning_rate": 3.6727333795499375e-05, "loss": 0.19189114570617677, "step": 390 }, { "epoch": 0.7770762506070908, "grad_norm": 0.15375971794128418, "learning_rate": 3.6012179523206576e-05, "loss": 0.20400729179382324, "step": 400 }, { "epoch": 0.7965031568722681, "grad_norm": 0.2069733738899231, "learning_rate": 3.5285663167484616e-05, "loss": 0.3037395715713501, "step": 410 }, { "epoch": 0.8159300631374453, "grad_norm": 0.1796797662973404, "learning_rate": 3.4548534329212144e-05, "loss": 0.34220497608184813, "step": 420 }, { "epoch": 0.8353569694026226, "grad_norm": 0.18201130628585815, "learning_rate": 3.380155355895416e-05, "loss": 0.2813987731933594, "step": 430 }, { "epoch": 0.8547838756677999, "grad_norm": 0.14474698901176453, "learning_rate": 3.304549157224558e-05, "loss": 0.2509459495544434, "step": 440 }, { "epoch": 0.8742107819329772, "grad_norm": 0.17002937197685242, "learning_rate": 3.228112845438672e-05, "loss": 0.25137474536895754, "step": 450 }, { "epoch": 0.8936376881981545, "grad_norm": 0.20442189276218414, "learning_rate": 3.150925285557141e-05, "loss": 0.23509564399719238, "step": 460 }, { "epoch": 0.9130645944633318, "grad_norm": 0.14680618047714233, "learning_rate": 3.0730661177177954e-05, "loss": 0.19746966361999513, "step": 470 }, { "epoch": 0.932491500728509, "grad_norm": 0.1439915895462036, "learning_rate": 2.994615675006277e-05, "loss": 0.254872727394104, "step": 480 }, { "epoch": 0.9519184069936862, "grad_norm": 0.16022782027721405, "learning_rate": 2.9156549005704203e-05, "loss": 0.1682429313659668, "step": 490 }, { "epoch": 0.9713453132588635, "grad_norm": 0.17126372456550598, "learning_rate": 2.8362652641052025e-05, "loss": 0.2580049991607666, "step": 500 }, { "epoch": 0.9907722195240408, "grad_norm": 0.19092383980751038, "learning_rate": 2.756528677794402e-05, "loss": 0.1847946286201477, "step": 510 }, { "epoch": 1.0097134531325886, "grad_norm": 0.2421938180923462, "learning_rate": 2.6765274117957227e-05, "loss": 0.20903294086456298, "step": 520 }, { "epoch": 1.029140359397766, "grad_norm": 0.19591616094112396, "learning_rate": 2.5963440093565566e-05, "loss": 0.15802922248840331, "step": 530 }, { "epoch": 1.0485672656629432, "grad_norm": 0.21306180953979492, "learning_rate": 2.5160612016479884e-05, "loss": 0.18800711631774902, "step": 540 }, { "epoch": 1.0679941719281205, "grad_norm": 0.21616265177726746, "learning_rate": 2.4357618224049008e-05, "loss": 0.2516720056533813, "step": 550 }, { "epoch": 1.0874210781932978, "grad_norm": 0.1722310185432434, "learning_rate": 2.355528722460261e-05, "loss": 0.13848392963409423, "step": 560 }, { "epoch": 1.1068479844584749, "grad_norm": 0.16589082777500153, "learning_rate": 2.27544468426176e-05, "loss": 0.16518881320953369, "step": 570 }, { "epoch": 1.1262748907236522, "grad_norm": 0.19981171190738678, "learning_rate": 2.1955923364590167e-05, "loss": 0.18203474283218385, "step": 580 }, { "epoch": 1.1457017969888295, "grad_norm": 0.1736886352300644, "learning_rate": 2.1160540686494594e-05, "loss": 0.2113028049468994, "step": 590 }, { "epoch": 1.1651287032540067, "grad_norm": 0.2363515943288803, "learning_rate": 2.0369119463708675e-05, "loss": 0.14112727642059325, "step": 600 } ], "logging_steps": 10, "max_steps": 1030, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.720649024869451e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }