{ "best_global_step": 750, "best_metric": 4.705667495727539, "best_model_checkpoint": "/Users/alexgrigoras/Library/Mobile Documents/com~apple~CloudDocs/[5] Software/github/dif-pi/artifacts/models/sdg_chronos_t5_small_dunnhumby/checkpoint-750", "epoch": 0.3105911585050212, "eval_steps": 25, "global_step": 750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01035303861683404, "grad_norm": 0.2731291949748993, "learning_rate": 8.000000000000001e-06, "loss": 45.1662451171875, "step": 25 }, { "epoch": 0.01035303861683404, "eval_loss": 5.382182598114014, "eval_runtime": 60.5419, "eval_samples_per_second": 70.91, "eval_steps_per_second": 35.463, "step": 25 }, { "epoch": 0.02070607723366808, "grad_norm": 0.2949911952018738, "learning_rate": 1.6333333333333335e-05, "loss": 44.3224951171875, "step": 50 }, { "epoch": 0.02070607723366808, "eval_loss": 5.343369960784912, "eval_runtime": 47.1772, "eval_samples_per_second": 90.997, "eval_steps_per_second": 45.509, "step": 50 }, { "epoch": 0.031059115850502122, "grad_norm": 0.2861124277114868, "learning_rate": 2.466666666666667e-05, "loss": 43.54326171875, "step": 75 }, { "epoch": 0.031059115850502122, "eval_loss": 5.254676818847656, "eval_runtime": 48.6137, "eval_samples_per_second": 88.308, "eval_steps_per_second": 44.164, "step": 75 }, { "epoch": 0.04141215446733616, "grad_norm": 0.3088361918926239, "learning_rate": 2.498250672211728e-05, "loss": 43.87701171875, "step": 100 }, { "epoch": 0.04141215446733616, "eval_loss": 5.151735305786133, "eval_runtime": 46.8392, "eval_samples_per_second": 91.654, "eval_steps_per_second": 45.838, "step": 100 }, { "epoch": 0.05176519308417021, "grad_norm": 0.36366939544677734, "learning_rate": 2.4927134858925575e-05, "loss": 41.4726220703125, "step": 125 }, { "epoch": 0.05176519308417021, "eval_loss": 5.072572231292725, "eval_runtime": 43.9205, "eval_samples_per_second": 97.745, "eval_steps_per_second": 48.884, "step": 125 }, { "epoch": 0.062118231701004244, "grad_norm": 0.44364720582962036, "learning_rate": 2.4834022195605383e-05, "loss": 41.1882958984375, "step": 150 }, { "epoch": 0.062118231701004244, "eval_loss": 4.995686054229736, "eval_runtime": 53.0192, "eval_samples_per_second": 80.971, "eval_steps_per_second": 40.495, "step": 150 }, { "epoch": 0.07247127031783829, "grad_norm": 0.4700476825237274, "learning_rate": 2.470345151225491e-05, "loss": 41.691572265625, "step": 175 }, { "epoch": 0.07247127031783829, "eval_loss": 4.918369293212891, "eval_runtime": 47.97, "eval_samples_per_second": 89.493, "eval_steps_per_second": 44.757, "step": 175 }, { "epoch": 0.08282430893467232, "grad_norm": 0.42324015498161316, "learning_rate": 2.4535819347748074e-05, "loss": 39.7604541015625, "step": 200 }, { "epoch": 0.08282430893467232, "eval_loss": 4.853856086730957, "eval_runtime": 45.9268, "eval_samples_per_second": 93.475, "eval_steps_per_second": 46.748, "step": 200 }, { "epoch": 0.09317734755150636, "grad_norm": 0.4664323925971985, "learning_rate": 2.433163479545898e-05, "loss": 40.16502197265625, "step": 225 }, { "epoch": 0.09317734755150636, "eval_loss": 4.832671642303467, "eval_runtime": 49.0803, "eval_samples_per_second": 87.469, "eval_steps_per_second": 43.745, "step": 225 }, { "epoch": 0.10353038616834041, "grad_norm": 0.475277304649353, "learning_rate": 2.4091517957162068e-05, "loss": 40.338056640625, "step": 250 }, { "epoch": 0.10353038616834041, "eval_loss": 4.815681457519531, "eval_runtime": 45.7189, "eval_samples_per_second": 93.9, "eval_steps_per_second": 46.961, "step": 250 }, { "epoch": 0.11388342478517445, "grad_norm": 0.46192488074302673, "learning_rate": 2.3816198059803415e-05, "loss": 39.81144287109375, "step": 275 }, { "epoch": 0.11388342478517445, "eval_loss": 4.8025665283203125, "eval_runtime": 44.0374, "eval_samples_per_second": 97.485, "eval_steps_per_second": 48.754, "step": 275 }, { "epoch": 0.12423646340200849, "grad_norm": 0.44529587030410767, "learning_rate": 2.350651124086246e-05, "loss": 40.06572509765625, "step": 300 }, { "epoch": 0.12423646340200849, "eval_loss": 4.791704177856445, "eval_runtime": 46.8164, "eval_samples_per_second": 91.699, "eval_steps_per_second": 45.86, "step": 300 }, { "epoch": 0.13458950201884254, "grad_norm": 0.47136980295181274, "learning_rate": 2.316339800902997e-05, "loss": 39.34464599609375, "step": 325 }, { "epoch": 0.13458950201884254, "eval_loss": 4.781772136688232, "eval_runtime": 46.7709, "eval_samples_per_second": 91.788, "eval_steps_per_second": 45.905, "step": 325 }, { "epoch": 0.14494254063567658, "grad_norm": 0.5043098330497742, "learning_rate": 2.2787900387914035e-05, "loss": 40.12859619140625, "step": 350 }, { "epoch": 0.14494254063567658, "eval_loss": 4.77421236038208, "eval_runtime": 46.6372, "eval_samples_per_second": 92.051, "eval_steps_per_second": 46.036, "step": 350 }, { "epoch": 0.1552955792525106, "grad_norm": 0.43915286660194397, "learning_rate": 2.238115875144865e-05, "loss": 40.11051513671875, "step": 375 }, { "epoch": 0.1552955792525106, "eval_loss": 4.768870830535889, "eval_runtime": 46.1314, "eval_samples_per_second": 93.06, "eval_steps_per_second": 46.541, "step": 375 }, { "epoch": 0.16564861786934465, "grad_norm": 0.4874376058578491, "learning_rate": 2.1944408360615527e-05, "loss": 40.54395751953125, "step": 400 }, { "epoch": 0.16564861786934465, "eval_loss": 4.759785175323486, "eval_runtime": 45.0656, "eval_samples_per_second": 95.261, "eval_steps_per_second": 47.642, "step": 400 }, { "epoch": 0.1760016564861787, "grad_norm": 0.47157636284828186, "learning_rate": 2.147897561199711e-05, "loss": 38.57564208984375, "step": 425 }, { "epoch": 0.1760016564861787, "eval_loss": 4.7538371086120605, "eval_runtime": 45.1789, "eval_samples_per_second": 95.022, "eval_steps_per_second": 47.522, "step": 425 }, { "epoch": 0.18635469510301272, "grad_norm": 0.5695982575416565, "learning_rate": 2.0986274009553747e-05, "loss": 40.2056494140625, "step": 450 }, { "epoch": 0.18635469510301272, "eval_loss": 4.746274471282959, "eval_runtime": 48.2349, "eval_samples_per_second": 89.002, "eval_steps_per_second": 44.511, "step": 450 }, { "epoch": 0.19670773371984676, "grad_norm": 0.4859912097454071, "learning_rate": 2.0467799871858624e-05, "loss": 39.90147705078125, "step": 475 }, { "epoch": 0.19670773371984676, "eval_loss": 4.741403579711914, "eval_runtime": 47.2353, "eval_samples_per_second": 90.885, "eval_steps_per_second": 45.453, "step": 475 }, { "epoch": 0.20706077233668083, "grad_norm": 0.5383442640304565, "learning_rate": 1.9925127787827415e-05, "loss": 39.66552001953125, "step": 500 }, { "epoch": 0.20706077233668083, "eval_loss": 4.736755847930908, "eval_runtime": 43.4301, "eval_samples_per_second": 98.849, "eval_steps_per_second": 49.436, "step": 500 }, { "epoch": 0.21741381095351486, "grad_norm": 0.47965624928474426, "learning_rate": 1.9359905834743513e-05, "loss": 39.6004296875, "step": 525 }, { "epoch": 0.21741381095351486, "eval_loss": 4.732944011688232, "eval_runtime": 43.41, "eval_samples_per_second": 98.894, "eval_steps_per_second": 49.459, "step": 525 }, { "epoch": 0.2277668495703489, "grad_norm": 0.5654281973838806, "learning_rate": 1.8773850573101503e-05, "loss": 39.6916064453125, "step": 550 }, { "epoch": 0.2277668495703489, "eval_loss": 4.729019641876221, "eval_runtime": 43.337, "eval_samples_per_second": 99.061, "eval_steps_per_second": 49.542, "step": 550 }, { "epoch": 0.23811988818718294, "grad_norm": 0.562452495098114, "learning_rate": 1.8168741833469327e-05, "loss": 39.9837548828125, "step": 575 }, { "epoch": 0.23811988818718294, "eval_loss": 4.7265305519104, "eval_runtime": 43.4156, "eval_samples_per_second": 98.882, "eval_steps_per_second": 49.452, "step": 575 }, { "epoch": 0.24847292680401697, "grad_norm": 0.6212955117225647, "learning_rate": 1.7546417311201357e-05, "loss": 39.41627685546875, "step": 600 }, { "epoch": 0.24847292680401697, "eval_loss": 4.7227606773376465, "eval_runtime": 43.4821, "eval_samples_per_second": 98.73, "eval_steps_per_second": 49.377, "step": 600 }, { "epoch": 0.258825965420851, "grad_norm": 0.54314124584198, "learning_rate": 1.690876698541802e-05, "loss": 39.156318359375, "step": 625 }, { "epoch": 0.258825965420851, "eval_loss": 4.717469215393066, "eval_runtime": 43.0545, "eval_samples_per_second": 99.711, "eval_steps_per_second": 49.867, "step": 625 }, { "epoch": 0.2691790040376851, "grad_norm": 0.6269752383232117, "learning_rate": 1.625772737920128e-05, "loss": 39.20115966796875, "step": 650 }, { "epoch": 0.2691790040376851, "eval_loss": 4.716719150543213, "eval_runtime": 120.2804, "eval_samples_per_second": 35.692, "eval_steps_per_second": 17.85, "step": 650 }, { "epoch": 0.2795320426545191, "grad_norm": 0.5073297023773193, "learning_rate": 1.5595275678437756e-05, "loss": 39.50381591796875, "step": 675 }, { "epoch": 0.2795320426545191, "eval_loss": 4.712583065032959, "eval_runtime": 43.4686, "eval_samples_per_second": 98.761, "eval_steps_per_second": 49.392, "step": 675 }, { "epoch": 0.28988508127135315, "grad_norm": 0.5422746539115906, "learning_rate": 1.4923423727170106e-05, "loss": 38.739453125, "step": 700 }, { "epoch": 0.28988508127135315, "eval_loss": 4.711677074432373, "eval_runtime": 45.4531, "eval_samples_per_second": 94.449, "eval_steps_per_second": 47.235, "step": 700 }, { "epoch": 0.30023811988818716, "grad_norm": 0.5396411418914795, "learning_rate": 1.4244211917692812e-05, "loss": 38.6535791015625, "step": 725 }, { "epoch": 0.30023811988818716, "eval_loss": 4.707785606384277, "eval_runtime": 45.9015, "eval_samples_per_second": 93.526, "eval_steps_per_second": 46.774, "step": 725 }, { "epoch": 0.3105911585050212, "grad_norm": 0.6173298358917236, "learning_rate": 1.355970299394786e-05, "loss": 38.515927734375, "step": 750 }, { "epoch": 0.3105911585050212, "eval_loss": 4.705667495727539, "eval_runtime": 46.4706, "eval_samples_per_second": 92.381, "eval_steps_per_second": 46.201, "step": 750 } ], "logging_steps": 25, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 750, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 574777589760000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }