{ "best_metric": 1.1637498140335083, "best_model_checkpoint": "./outputs/instruct-lora-8b-alpaca-land/checkpoint-740", "epoch": 1.0652463382157125, "eval_steps": 20, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013315579227696406, "eval_loss": 1.4893051385879517, "eval_runtime": 60.8124, "eval_samples_per_second": 21.953, "eval_steps_per_second": 5.492, "step": 1 }, { "epoch": 0.02663115845539281, "grad_norm": 0.7804192900657654, "learning_rate": 2.666666666666667e-06, "loss": 1.4281, "step": 20 }, { "epoch": 0.02663115845539281, "eval_loss": 1.4869904518127441, "eval_runtime": 60.744, "eval_samples_per_second": 21.977, "eval_steps_per_second": 5.498, "step": 20 }, { "epoch": 0.05326231691078562, "grad_norm": 0.7365455031394958, "learning_rate": 5.333333333333334e-06, "loss": 1.3841, "step": 40 }, { "epoch": 0.05326231691078562, "eval_loss": 1.4637316465377808, "eval_runtime": 62.4639, "eval_samples_per_second": 21.372, "eval_steps_per_second": 5.347, "step": 40 }, { "epoch": 0.07989347536617843, "grad_norm": 0.8536390662193298, "learning_rate": 8e-06, "loss": 1.3682, "step": 60 }, { "epoch": 0.07989347536617843, "eval_loss": 1.3804264068603516, "eval_runtime": 60.9465, "eval_samples_per_second": 21.904, "eval_steps_per_second": 5.48, "step": 60 }, { "epoch": 0.10652463382157124, "grad_norm": 0.9039925336837769, "learning_rate": 1.0666666666666667e-05, "loss": 1.2725, "step": 80 }, { "epoch": 0.10652463382157124, "eval_loss": 1.3125137090682983, "eval_runtime": 60.8859, "eval_samples_per_second": 21.926, "eval_steps_per_second": 5.486, "step": 80 }, { "epoch": 0.13315579227696406, "grad_norm": 0.9523382186889648, "learning_rate": 1.3333333333333333e-05, "loss": 1.2128, "step": 100 }, { "epoch": 0.13315579227696406, "eval_loss": 1.2815206050872803, "eval_runtime": 61.0179, "eval_samples_per_second": 21.879, "eval_steps_per_second": 5.474, "step": 100 }, { "epoch": 0.15978695073235685, "grad_norm": 1.0546497106552124, "learning_rate": 1.6e-05, "loss": 1.1918, "step": 120 }, { "epoch": 0.15978695073235685, "eval_loss": 1.2641539573669434, "eval_runtime": 60.7722, "eval_samples_per_second": 21.967, "eval_steps_per_second": 5.496, "step": 120 }, { "epoch": 0.18641810918774968, "grad_norm": 1.2017662525177002, "learning_rate": 1.866666666666667e-05, "loss": 1.2031, "step": 140 }, { "epoch": 0.18641810918774968, "eval_loss": 1.2466365098953247, "eval_runtime": 62.8298, "eval_samples_per_second": 21.248, "eval_steps_per_second": 5.316, "step": 140 }, { "epoch": 0.21304926764314247, "grad_norm": 1.4084031581878662, "learning_rate": 2.1333333333333335e-05, "loss": 1.1163, "step": 160 }, { "epoch": 0.21304926764314247, "eval_loss": 1.2401237487792969, "eval_runtime": 60.7071, "eval_samples_per_second": 21.991, "eval_steps_per_second": 5.502, "step": 160 }, { "epoch": 0.2396804260985353, "grad_norm": 1.4013845920562744, "learning_rate": 2.4e-05, "loss": 1.121, "step": 180 }, { "epoch": 0.2396804260985353, "eval_loss": 1.229453206062317, "eval_runtime": 60.84, "eval_samples_per_second": 21.943, "eval_steps_per_second": 5.49, "step": 180 }, { "epoch": 0.2663115845539281, "grad_norm": 1.407716155052185, "learning_rate": 2.6666666666666667e-05, "loss": 1.1178, "step": 200 }, { "epoch": 0.2663115845539281, "eval_loss": 1.2264398336410522, "eval_runtime": 62.4948, "eval_samples_per_second": 21.362, "eval_steps_per_second": 5.344, "step": 200 }, { "epoch": 0.2929427430093209, "grad_norm": 1.4397716522216797, "learning_rate": 2.9333333333333333e-05, "loss": 1.0881, "step": 220 }, { "epoch": 0.2929427430093209, "eval_loss": 1.221412181854248, "eval_runtime": 60.9892, "eval_samples_per_second": 21.889, "eval_steps_per_second": 5.476, "step": 220 }, { "epoch": 0.3195739014647137, "grad_norm": 1.4784867763519287, "learning_rate": 2.9995950624188135e-05, "loss": 1.0793, "step": 240 }, { "epoch": 0.3195739014647137, "eval_loss": 1.217350959777832, "eval_runtime": 60.8323, "eval_samples_per_second": 21.946, "eval_steps_per_second": 5.491, "step": 240 }, { "epoch": 0.34620505992010653, "grad_norm": 1.4700146913528442, "learning_rate": 2.9977957806883764e-05, "loss": 1.0485, "step": 260 }, { "epoch": 0.34620505992010653, "eval_loss": 1.2096312046051025, "eval_runtime": 61.1502, "eval_samples_per_second": 21.832, "eval_steps_per_second": 5.462, "step": 260 }, { "epoch": 0.37283621837549935, "grad_norm": 1.4116566181182861, "learning_rate": 2.99455888692835e-05, "loss": 1.0487, "step": 280 }, { "epoch": 0.37283621837549935, "eval_loss": 1.2062605619430542, "eval_runtime": 60.8401, "eval_samples_per_second": 21.943, "eval_steps_per_second": 5.49, "step": 280 }, { "epoch": 0.3994673768308921, "grad_norm": 1.5455678701400757, "learning_rate": 2.989887487969095e-05, "loss": 1.0296, "step": 300 }, { "epoch": 0.3994673768308921, "eval_loss": 1.2005311250686646, "eval_runtime": 62.6482, "eval_samples_per_second": 21.309, "eval_steps_per_second": 5.331, "step": 300 }, { "epoch": 0.42609853528628494, "grad_norm": 1.6689190864562988, "learning_rate": 2.983786067505537e-05, "loss": 1.0229, "step": 320 }, { "epoch": 0.42609853528628494, "eval_loss": 1.2023998498916626, "eval_runtime": 60.9538, "eval_samples_per_second": 21.902, "eval_steps_per_second": 5.48, "step": 320 }, { "epoch": 0.45272969374167776, "grad_norm": 1.5614910125732422, "learning_rate": 2.9762604817936267e-05, "loss": 1.0135, "step": 340 }, { "epoch": 0.45272969374167776, "eval_loss": 1.1988072395324707, "eval_runtime": 60.7561, "eval_samples_per_second": 21.973, "eval_steps_per_second": 5.497, "step": 340 }, { "epoch": 0.4793608521970706, "grad_norm": 1.6140415668487549, "learning_rate": 2.9673179540294035e-05, "loss": 1.0146, "step": 360 }, { "epoch": 0.4793608521970706, "eval_loss": 1.1974718570709229, "eval_runtime": 61.0494, "eval_samples_per_second": 21.868, "eval_steps_per_second": 5.471, "step": 360 }, { "epoch": 0.5059920106524634, "grad_norm": 1.520564079284668, "learning_rate": 2.9569670674160343e-05, "loss": 1.0201, "step": 380 }, { "epoch": 0.5059920106524634, "eval_loss": 1.1944420337677002, "eval_runtime": 60.9135, "eval_samples_per_second": 21.916, "eval_steps_per_second": 5.483, "step": 380 }, { "epoch": 0.5326231691078562, "grad_norm": 1.5548486709594727, "learning_rate": 2.945217756925498e-05, "loss": 0.9832, "step": 400 }, { "epoch": 0.5326231691078562, "eval_loss": 1.1923348903656006, "eval_runtime": 63.1778, "eval_samples_per_second": 21.131, "eval_steps_per_second": 5.287, "step": 400 }, { "epoch": 0.559254327563249, "grad_norm": 1.8514701128005981, "learning_rate": 2.9320812997628184e-05, "loss": 0.9902, "step": 420 }, { "epoch": 0.559254327563249, "eval_loss": 1.1885100603103638, "eval_runtime": 60.9634, "eval_samples_per_second": 21.898, "eval_steps_per_second": 5.479, "step": 420 }, { "epoch": 0.5858854860186418, "grad_norm": 1.5884013175964355, "learning_rate": 2.9175703045419906e-05, "loss": 0.9909, "step": 440 }, { "epoch": 0.5858854860186418, "eval_loss": 1.1867530345916748, "eval_runtime": 61.0787, "eval_samples_per_second": 21.857, "eval_steps_per_second": 5.468, "step": 440 }, { "epoch": 0.6125166444740346, "grad_norm": 1.5883617401123047, "learning_rate": 2.9016986991840035e-05, "loss": 0.9907, "step": 460 }, { "epoch": 0.6125166444740346, "eval_loss": 1.1833444833755493, "eval_runtime": 60.808, "eval_samples_per_second": 21.954, "eval_steps_per_second": 5.493, "step": 460 }, { "epoch": 0.6391478029294274, "grad_norm": 1.6579700708389282, "learning_rate": 2.8844817175485628e-05, "loss": 1.0019, "step": 480 }, { "epoch": 0.6391478029294274, "eval_loss": 1.184342384338379, "eval_runtime": 60.6741, "eval_samples_per_second": 22.003, "eval_steps_per_second": 5.505, "step": 480 }, { "epoch": 0.6657789613848203, "grad_norm": 1.448255181312561, "learning_rate": 2.865935884812353e-05, "loss": 0.9779, "step": 500 }, { "epoch": 0.6657789613848203, "eval_loss": 1.179273247718811, "eval_runtime": 62.907, "eval_samples_per_second": 21.222, "eval_steps_per_second": 5.309, "step": 500 }, { "epoch": 0.6924101198402131, "grad_norm": 1.6556082963943481, "learning_rate": 2.8460790016078664e-05, "loss": 0.9728, "step": 520 }, { "epoch": 0.6924101198402131, "eval_loss": 1.1787182092666626, "eval_runtime": 60.7995, "eval_samples_per_second": 21.957, "eval_steps_per_second": 5.493, "step": 520 }, { "epoch": 0.7190412782956058, "grad_norm": 1.5750830173492432, "learning_rate": 2.824930126938027e-05, "loss": 0.9588, "step": 540 }, { "epoch": 0.7190412782956058, "eval_loss": 1.1773356199264526, "eval_runtime": 60.7139, "eval_samples_per_second": 21.988, "eval_steps_per_second": 5.501, "step": 540 }, { "epoch": 0.7456724367509987, "grad_norm": 1.9439761638641357, "learning_rate": 2.8025095598830108e-05, "loss": 0.9878, "step": 560 }, { "epoch": 0.7456724367509987, "eval_loss": 1.1760228872299194, "eval_runtime": 62.6773, "eval_samples_per_second": 21.3, "eval_steps_per_second": 5.329, "step": 560 }, { "epoch": 0.7723035952063915, "grad_norm": 1.5282509326934814, "learning_rate": 2.7788388201168096e-05, "loss": 0.9656, "step": 580 }, { "epoch": 0.7723035952063915, "eval_loss": 1.1744232177734375, "eval_runtime": 60.9336, "eval_samples_per_second": 21.909, "eval_steps_per_second": 5.481, "step": 580 }, { "epoch": 0.7989347536617842, "grad_norm": 1.6014256477355957, "learning_rate": 2.7539406272522557e-05, "loss": 1.0039, "step": 600 }, { "epoch": 0.7989347536617842, "eval_loss": 1.1710823774337769, "eval_runtime": 60.8244, "eval_samples_per_second": 21.948, "eval_steps_per_second": 5.491, "step": 600 }, { "epoch": 0.8255659121171771, "grad_norm": 2.037503242492676, "learning_rate": 2.7278388790343133e-05, "loss": 0.9679, "step": 620 }, { "epoch": 0.8255659121171771, "eval_loss": 1.1712967157363892, "eval_runtime": 60.7357, "eval_samples_per_second": 21.98, "eval_steps_per_second": 5.499, "step": 620 }, { "epoch": 0.8521970705725699, "grad_norm": 1.5632041692733765, "learning_rate": 2.7005586284025857e-05, "loss": 0.9542, "step": 640 }, { "epoch": 0.8521970705725699, "eval_loss": 1.1708076000213623, "eval_runtime": 60.7352, "eval_samples_per_second": 21.981, "eval_steps_per_second": 5.499, "step": 640 }, { "epoch": 0.8788282290279628, "grad_norm": 1.8948464393615723, "learning_rate": 2.6721260594450408e-05, "loss": 0.9732, "step": 660 }, { "epoch": 0.8788282290279628, "eval_loss": 1.1671587228775024, "eval_runtime": 62.831, "eval_samples_per_second": 21.247, "eval_steps_per_second": 5.316, "step": 660 }, { "epoch": 0.9054593874833555, "grad_norm": 1.7016338109970093, "learning_rate": 2.6425684622660387e-05, "loss": 0.9909, "step": 680 }, { "epoch": 0.9054593874833555, "eval_loss": 1.167040228843689, "eval_runtime": 60.8237, "eval_samples_per_second": 21.949, "eval_steps_per_second": 5.491, "step": 680 }, { "epoch": 0.9320905459387483, "grad_norm": 1.7869995832443237, "learning_rate": 2.6119142067927872e-05, "loss": 0.9597, "step": 700 }, { "epoch": 0.9320905459387483, "eval_loss": 1.1676889657974243, "eval_runtime": 61.3589, "eval_samples_per_second": 21.757, "eval_steps_per_second": 5.443, "step": 700 }, { "epoch": 0.9587217043941412, "grad_norm": 1.9566657543182373, "learning_rate": 2.5801927155453614e-05, "loss": 0.9169, "step": 720 }, { "epoch": 0.9587217043941412, "eval_loss": 1.1662434339523315, "eval_runtime": 61.4456, "eval_samples_per_second": 21.727, "eval_steps_per_second": 5.436, "step": 720 }, { "epoch": 0.9853528628495339, "grad_norm": 1.6298619508743286, "learning_rate": 2.5474344353964275e-05, "loss": 0.9866, "step": 740 }, { "epoch": 0.9853528628495339, "eval_loss": 1.1637498140335083, "eval_runtime": 60.9086, "eval_samples_per_second": 21.918, "eval_steps_per_second": 5.484, "step": 740 }, { "epoch": 1.0119840213049267, "grad_norm": 1.6844900846481323, "learning_rate": 2.513670808347771e-05, "loss": 0.9066, "step": 760 }, { "epoch": 1.0119840213049267, "eval_loss": 1.1655957698822021, "eval_runtime": 62.7597, "eval_samples_per_second": 21.272, "eval_steps_per_second": 5.322, "step": 760 }, { "epoch": 1.0386151797603196, "grad_norm": 1.7751928567886353, "learning_rate": 2.4789342413516838e-05, "loss": 0.8896, "step": 780 }, { "epoch": 1.0386151797603196, "eval_loss": 1.1654549837112427, "eval_runtime": 60.8963, "eval_samples_per_second": 21.923, "eval_steps_per_second": 5.485, "step": 780 }, { "epoch": 1.0652463382157125, "grad_norm": 1.786340355873108, "learning_rate": 2.4432580752061735e-05, "loss": 0.8876, "step": 800 }, { "epoch": 1.0652463382157125, "eval_loss": 1.164109706878662, "eval_runtime": 60.9233, "eval_samples_per_second": 21.913, "eval_steps_per_second": 5.482, "step": 800 } ], "logging_steps": 20, "max_steps": 2253, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3142536251283866e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }