{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 1.9574642181396484, "learning_rate": 4.901e-05, "loss": 4.8716, "step": 100 }, { "epoch": 0.04, "grad_norm": 1.562890648841858, "learning_rate": 4.801e-05, "loss": 2.5735, "step": 200 }, { "epoch": 0.06, "grad_norm": 2.5503687858581543, "learning_rate": 4.7010000000000006e-05, "loss": 2.2182, "step": 300 }, { "epoch": 0.08, "grad_norm": 1.3021109104156494, "learning_rate": 4.601e-05, "loss": 1.9191, "step": 400 }, { "epoch": 0.1, "grad_norm": 1.3246175050735474, "learning_rate": 4.5010000000000004e-05, "loss": 1.8126, "step": 500 }, { "epoch": 0.12, "grad_norm": 1.04691481590271, "learning_rate": 4.401e-05, "loss": 1.7728, "step": 600 }, { "epoch": 0.14, "grad_norm": 0.9935975670814514, "learning_rate": 4.301e-05, "loss": 1.6751, "step": 700 }, { "epoch": 0.16, "grad_norm": 1.1450852155685425, "learning_rate": 4.201e-05, "loss": 1.5686, "step": 800 }, { "epoch": 0.18, "grad_norm": 1.1858118772506714, "learning_rate": 4.101e-05, "loss": 1.6261, "step": 900 }, { "epoch": 0.2, "grad_norm": 1.2318594455718994, "learning_rate": 4.0010000000000005e-05, "loss": 1.6018, "step": 1000 }, { "epoch": 0.22, "grad_norm": 1.0691641569137573, "learning_rate": 3.901e-05, "loss": 1.5865, "step": 1100 }, { "epoch": 0.24, "grad_norm": 0.8661352396011353, "learning_rate": 3.8010000000000004e-05, "loss": 1.4648, "step": 1200 }, { "epoch": 0.26, "grad_norm": 0.9778139591217041, "learning_rate": 3.701e-05, "loss": 1.4765, "step": 1300 }, { "epoch": 0.28, "grad_norm": 1.3476413488388062, "learning_rate": 3.601e-05, "loss": 1.5541, "step": 1400 }, { "epoch": 0.3, "grad_norm": 0.7631675004959106, "learning_rate": 3.5010000000000005e-05, "loss": 1.5048, "step": 1500 }, { "epoch": 0.32, "grad_norm": 1.1490070819854736, "learning_rate": 3.401e-05, "loss": 1.5052, "step": 1600 }, { "epoch": 0.34, "grad_norm": 1.077760934829712, "learning_rate": 3.3010000000000004e-05, "loss": 1.5638, "step": 1700 }, { "epoch": 0.36, "grad_norm": 0.8567506670951843, "learning_rate": 3.201e-05, "loss": 1.5387, "step": 1800 }, { "epoch": 0.38, "grad_norm": 0.842096209526062, "learning_rate": 3.101e-05, "loss": 1.5037, "step": 1900 }, { "epoch": 0.4, "grad_norm": 0.8851768374443054, "learning_rate": 3.001e-05, "loss": 1.5145, "step": 2000 }, { "epoch": 0.42, "grad_norm": 0.7667313814163208, "learning_rate": 2.9010000000000005e-05, "loss": 1.526, "step": 2100 }, { "epoch": 0.44, "grad_norm": 1.079816460609436, "learning_rate": 2.8010000000000005e-05, "loss": 1.5332, "step": 2200 }, { "epoch": 0.46, "grad_norm": 0.9301439523696899, "learning_rate": 2.701e-05, "loss": 1.4011, "step": 2300 }, { "epoch": 0.48, "grad_norm": 0.9722267985343933, "learning_rate": 2.601e-05, "loss": 1.4697, "step": 2400 }, { "epoch": 0.5, "grad_norm": 0.8725349307060242, "learning_rate": 2.501e-05, "loss": 1.4873, "step": 2500 }, { "epoch": 0.52, "grad_norm": 1.0267070531845093, "learning_rate": 2.4010000000000002e-05, "loss": 1.4103, "step": 2600 }, { "epoch": 0.54, "grad_norm": 0.70062655210495, "learning_rate": 2.301e-05, "loss": 1.3659, "step": 2700 }, { "epoch": 0.56, "grad_norm": 0.8035644888877869, "learning_rate": 2.201e-05, "loss": 1.4009, "step": 2800 }, { "epoch": 0.58, "grad_norm": 0.8089845776557922, "learning_rate": 2.101e-05, "loss": 1.4293, "step": 2900 }, { "epoch": 0.6, "grad_norm": 0.8427271246910095, "learning_rate": 2.001e-05, "loss": 1.4011, "step": 3000 }, { "epoch": 0.62, "grad_norm": 0.994111180305481, "learning_rate": 1.901e-05, "loss": 1.3785, "step": 3100 }, { "epoch": 0.64, "grad_norm": 1.0096153020858765, "learning_rate": 1.8010000000000002e-05, "loss": 1.4122, "step": 3200 }, { "epoch": 0.66, "grad_norm": 0.7762922644615173, "learning_rate": 1.701e-05, "loss": 1.3849, "step": 3300 }, { "epoch": 0.68, "grad_norm": 1.1585968732833862, "learning_rate": 1.601e-05, "loss": 1.3961, "step": 3400 }, { "epoch": 0.7, "grad_norm": 0.9618488550186157, "learning_rate": 1.5010000000000002e-05, "loss": 1.3458, "step": 3500 }, { "epoch": 0.72, "grad_norm": 1.007106065750122, "learning_rate": 1.4010000000000001e-05, "loss": 1.3952, "step": 3600 }, { "epoch": 0.74, "grad_norm": 0.9712181091308594, "learning_rate": 1.301e-05, "loss": 1.3275, "step": 3700 }, { "epoch": 0.76, "grad_norm": 0.9360325336456299, "learning_rate": 1.201e-05, "loss": 1.3479, "step": 3800 }, { "epoch": 0.78, "grad_norm": 0.889412522315979, "learning_rate": 1.1010000000000001e-05, "loss": 1.4002, "step": 3900 }, { "epoch": 0.8, "grad_norm": 1.4792468547821045, "learning_rate": 1.001e-05, "loss": 1.2896, "step": 4000 }, { "epoch": 0.82, "grad_norm": 0.8564038276672363, "learning_rate": 9.01e-06, "loss": 1.3501, "step": 4100 }, { "epoch": 0.84, "grad_norm": 0.8316518664360046, "learning_rate": 8.010000000000001e-06, "loss": 1.3673, "step": 4200 }, { "epoch": 0.86, "grad_norm": 0.8671555519104004, "learning_rate": 7.01e-06, "loss": 1.328, "step": 4300 }, { "epoch": 0.88, "grad_norm": 0.9001341462135315, "learning_rate": 6.01e-06, "loss": 1.3363, "step": 4400 }, { "epoch": 0.9, "grad_norm": 0.8028829097747803, "learning_rate": 5.01e-06, "loss": 1.3272, "step": 4500 }, { "epoch": 0.92, "grad_norm": 0.923729419708252, "learning_rate": 4.01e-06, "loss": 1.3645, "step": 4600 }, { "epoch": 0.94, "grad_norm": 1.2373661994934082, "learning_rate": 3.01e-06, "loss": 1.3762, "step": 4700 }, { "epoch": 0.96, "grad_norm": 1.1914544105529785, "learning_rate": 2.0100000000000002e-06, "loss": 1.2622, "step": 4800 }, { "epoch": 0.98, "grad_norm": 0.8515170216560364, "learning_rate": 1.01e-06, "loss": 1.3327, "step": 4900 }, { "epoch": 1.0, "grad_norm": 0.9752984046936035, "learning_rate": 1e-08, "loss": 1.2896, "step": 5000 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.464359559168e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }