{ "best_metric": 71.03448275862068, "best_model_checkpoint": "./whisper-large-v3-cv17-dv/checkpoint-3900", "epoch": 11.863192182410424, "eval_steps": 300, "global_step": 3900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3257328990228013, "grad_norm": 38.63404846191406, "learning_rate": 1.9200000000000003e-06, "loss": 4.5676, "step": 100 }, { "epoch": 0.6514657980456026, "grad_norm": 37.98774719238281, "learning_rate": 3.920000000000001e-06, "loss": 2.8219, "step": 200 }, { "epoch": 0.9771986970684039, "grad_norm": 7.7040252685546875, "learning_rate": 5.92e-06, "loss": 1.4644, "step": 300 }, { "epoch": 0.9771986970684039, "eval_loss": 1.0653657913208008, "eval_runtime": 142.8846, "eval_samples_per_second": 0.7, "eval_steps_per_second": 0.021, "eval_wer": 203.96551724137933, "step": 300 }, { "epoch": 1.3029315960912053, "grad_norm": 3.3996422290802, "learning_rate": 7.9e-06, "loss": 0.6065, "step": 400 }, { "epoch": 1.6286644951140063, "grad_norm": 2.5693020820617676, "learning_rate": 9.9e-06, "loss": 0.303, "step": 500 }, { "epoch": 1.9543973941368078, "grad_norm": 2.019421339035034, "learning_rate": 9.72857142857143e-06, "loss": 0.2384, "step": 600 }, { "epoch": 1.9543973941368078, "eval_loss": 0.3341977596282959, "eval_runtime": 138.0417, "eval_samples_per_second": 0.724, "eval_steps_per_second": 0.022, "eval_wer": 84.82758620689656, "step": 600 }, { "epoch": 2.2801302931596092, "grad_norm": 1.6545788049697876, "learning_rate": 9.442857142857144e-06, "loss": 0.1718, "step": 700 }, { "epoch": 2.6058631921824107, "grad_norm": 1.4984415769577026, "learning_rate": 9.157142857142857e-06, "loss": 0.1555, "step": 800 }, { "epoch": 2.9315960912052117, "grad_norm": 1.6755363941192627, "learning_rate": 8.871428571428571e-06, "loss": 0.1481, "step": 900 }, { "epoch": 2.9315960912052117, "eval_loss": 0.2715227007865906, "eval_runtime": 137.1492, "eval_samples_per_second": 0.729, "eval_steps_per_second": 0.022, "eval_wer": 78.79310344827586, "step": 900 }, { "epoch": 2.3257328990228014, "grad_norm": 1.167482852935791, "learning_rate": 8.585714285714286e-06, "loss": 0.0978, "step": 1000 }, { "epoch": 2.6514657980456025, "grad_norm": 1.9179445505142212, "learning_rate": 8.3e-06, "loss": 0.0876, "step": 1100 }, { "epoch": 2.977198697068404, "grad_norm": 1.3239954710006714, "learning_rate": 8.014285714285715e-06, "loss": 0.0975, "step": 1200 }, { "epoch": 2.977198697068404, "eval_loss": 0.2634916603565216, "eval_runtime": 138.2093, "eval_samples_per_second": 0.724, "eval_steps_per_second": 0.022, "eval_wer": 76.0344827586207, "step": 1200 }, { "epoch": 3.3029315960912053, "grad_norm": 1.5532219409942627, "learning_rate": 7.72857142857143e-06, "loss": 0.0637, "step": 1300 }, { "epoch": 3.6286644951140063, "grad_norm": 1.0479369163513184, "learning_rate": 7.442857142857144e-06, "loss": 0.0626, "step": 1400 }, { "epoch": 3.954397394136808, "grad_norm": 0.9303975105285645, "learning_rate": 7.1571428571428584e-06, "loss": 0.0616, "step": 1500 }, { "epoch": 3.954397394136808, "eval_loss": 0.2841312289237976, "eval_runtime": 138.8794, "eval_samples_per_second": 0.72, "eval_steps_per_second": 0.022, "eval_wer": 73.10344827586206, "step": 1500 }, { "epoch": 4.3257328990228014, "grad_norm": 1.0117665529251099, "learning_rate": 6.871428571428572e-06, "loss": 0.0357, "step": 1600 }, { "epoch": 4.651465798045603, "grad_norm": 1.661333441734314, "learning_rate": 6.585714285714286e-06, "loss": 0.0372, "step": 1700 }, { "epoch": 4.977198697068404, "grad_norm": 2.5246667861938477, "learning_rate": 6.300000000000001e-06, "loss": 0.0399, "step": 1800 }, { "epoch": 4.977198697068404, "eval_loss": 0.32145407795906067, "eval_runtime": 141.4007, "eval_samples_per_second": 0.707, "eval_steps_per_second": 0.021, "eval_wer": 72.24137931034483, "step": 1800 }, { "epoch": 5.3257328990228014, "grad_norm": 0.9164466261863708, "learning_rate": 6.014285714285715e-06, "loss": 0.0203, "step": 1900 }, { "epoch": 5.651465798045603, "grad_norm": 1.748327374458313, "learning_rate": 5.7285714285714285e-06, "loss": 0.0222, "step": 2000 }, { "epoch": 5.977198697068404, "grad_norm": 1.8888225555419922, "learning_rate": 5.442857142857143e-06, "loss": 0.0218, "step": 2100 }, { "epoch": 5.977198697068404, "eval_loss": 0.38806796073913574, "eval_runtime": 138.8021, "eval_samples_per_second": 0.72, "eval_steps_per_second": 0.022, "eval_wer": 73.79310344827587, "step": 2100 }, { "epoch": 6.3257328990228014, "grad_norm": 1.3422770500183105, "learning_rate": 5.157142857142857e-06, "loss": 0.0529, "step": 2200 }, { "epoch": 6.651465798045603, "grad_norm": 1.5599135160446167, "learning_rate": 4.871428571428572e-06, "loss": 0.0488, "step": 2300 }, { "epoch": 6.977198697068404, "grad_norm": 1.3102701902389526, "learning_rate": 4.585714285714286e-06, "loss": 0.046, "step": 2400 }, { "epoch": 6.977198697068404, "eval_loss": 0.27721107006073, "eval_runtime": 147.7865, "eval_samples_per_second": 0.677, "eval_steps_per_second": 0.02, "eval_wer": 74.13793103448276, "step": 2400 }, { "epoch": 7.302931596091205, "grad_norm": 1.040212869644165, "learning_rate": 4.3e-06, "loss": 0.0198, "step": 2500 }, { "epoch": 7.628664495114006, "grad_norm": 0.8678199648857117, "learning_rate": 4.014285714285715e-06, "loss": 0.0174, "step": 2600 }, { "epoch": 7.954397394136808, "grad_norm": 1.5105644464492798, "learning_rate": 3.7285714285714286e-06, "loss": 0.018, "step": 2700 }, { "epoch": 7.954397394136808, "eval_loss": 0.3344331681728363, "eval_runtime": 150.3178, "eval_samples_per_second": 0.665, "eval_steps_per_second": 0.02, "eval_wer": 71.37931034482759, "step": 2700 }, { "epoch": 8.280130293159608, "grad_norm": 0.5797841548919678, "learning_rate": 3.4428571428571434e-06, "loss": 0.0079, "step": 2800 }, { "epoch": 8.60586319218241, "grad_norm": 0.5921549797058105, "learning_rate": 3.1571428571428573e-06, "loss": 0.0068, "step": 2900 }, { "epoch": 8.931596091205211, "grad_norm": 0.7851368188858032, "learning_rate": 2.8714285714285717e-06, "loss": 0.0067, "step": 3000 }, { "epoch": 8.931596091205211, "eval_loss": 0.39471304416656494, "eval_runtime": 145.8938, "eval_samples_per_second": 0.685, "eval_steps_per_second": 0.021, "eval_wer": 71.72413793103448, "step": 3000 }, { "epoch": 9.257328990228013, "grad_norm": 0.23316721618175507, "learning_rate": 2.5857142857142856e-06, "loss": 0.0031, "step": 3100 }, { "epoch": 9.583061889250814, "grad_norm": 0.7372754216194153, "learning_rate": 2.3000000000000004e-06, "loss": 0.0027, "step": 3200 }, { "epoch": 9.908794788273616, "grad_norm": 0.2684876322746277, "learning_rate": 2.0142857142857144e-06, "loss": 0.0023, "step": 3300 }, { "epoch": 9.908794788273616, "eval_loss": 0.42460787296295166, "eval_runtime": 146.943, "eval_samples_per_second": 0.681, "eval_steps_per_second": 0.02, "eval_wer": 72.58620689655172, "step": 3300 }, { "epoch": 10.234527687296417, "grad_norm": 0.5388866662979126, "learning_rate": 1.7285714285714287e-06, "loss": 0.0012, "step": 3400 }, { "epoch": 10.560260586319218, "grad_norm": 0.6833564639091492, "learning_rate": 1.442857142857143e-06, "loss": 0.0007, "step": 3500 }, { "epoch": 10.88599348534202, "grad_norm": 0.26250678300857544, "learning_rate": 1.1571428571428572e-06, "loss": 0.0008, "step": 3600 }, { "epoch": 10.88599348534202, "eval_loss": 0.45030444860458374, "eval_runtime": 147.162, "eval_samples_per_second": 0.68, "eval_steps_per_second": 0.02, "eval_wer": 71.72413793103448, "step": 3600 }, { "epoch": 11.211726384364821, "grad_norm": 0.03168971464037895, "learning_rate": 8.714285714285716e-07, "loss": 0.0005, "step": 3700 }, { "epoch": 11.537459283387623, "grad_norm": 0.020701788365840912, "learning_rate": 5.857142857142857e-07, "loss": 0.0003, "step": 3800 }, { "epoch": 11.863192182410424, "grad_norm": 0.05785975977778435, "learning_rate": 3.0000000000000004e-07, "loss": 0.0003, "step": 3900 }, { "epoch": 11.863192182410424, "eval_loss": 0.4610276520252228, "eval_runtime": 143.56, "eval_samples_per_second": 0.697, "eval_steps_per_second": 0.021, "eval_wer": 71.03448275862068, "step": 3900 } ], "logging_steps": 100, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 14, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.117320944648192e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }