| { |
| "best_metric": 0.0005143894231878221, |
| "best_model_checkpoint": "PE-big3/checkpoint-19158", |
| "epoch": 2.9998434156271205, |
| "eval_steps": 500, |
| "global_step": 19158, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003914609321989665, |
| "grad_norm": 4.866889476776123, |
| "learning_rate": 6.524008350730689e-07, |
| "loss": 1.4, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.00782921864397933, |
| "grad_norm": 3.255279302597046, |
| "learning_rate": 1.3048016701461379e-06, |
| "loss": 1.3054, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.011743827965968996, |
| "grad_norm": 2.329948902130127, |
| "learning_rate": 1.957202505219207e-06, |
| "loss": 1.1451, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.01565843728795866, |
| "grad_norm": 1.9639887809753418, |
| "learning_rate": 2.6096033402922757e-06, |
| "loss": 0.9271, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.019573046609948328, |
| "grad_norm": 1.9199451208114624, |
| "learning_rate": 3.262004175365345e-06, |
| "loss": 0.6443, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.02348765593193799, |
| "grad_norm": 1.4710872173309326, |
| "learning_rate": 3.914405010438414e-06, |
| "loss": 0.3856, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.02740226525392766, |
| "grad_norm": 1.655380129814148, |
| "learning_rate": 4.5668058455114825e-06, |
| "loss": 0.2192, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.03131687457591732, |
| "grad_norm": 1.475960373878479, |
| "learning_rate": 5.2192066805845514e-06, |
| "loss": 0.12, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.035231483897906986, |
| "grad_norm": 0.9583467245101929, |
| "learning_rate": 5.87160751565762e-06, |
| "loss": 0.0688, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.039146093219896656, |
| "grad_norm": 1.5871645212173462, |
| "learning_rate": 6.52400835073069e-06, |
| "loss": 0.0487, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04306070254188632, |
| "grad_norm": 1.3285017013549805, |
| "learning_rate": 7.176409185803757e-06, |
| "loss": 0.0331, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.04697531186387598, |
| "grad_norm": 1.2705070972442627, |
| "learning_rate": 7.828810020876827e-06, |
| "loss": 0.0195, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.050889921185865654, |
| "grad_norm": 0.44879260659217834, |
| "learning_rate": 8.481210855949897e-06, |
| "loss": 0.0145, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.05480453050785532, |
| "grad_norm": 0.6279118657112122, |
| "learning_rate": 9.133611691022965e-06, |
| "loss": 0.0142, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.05871913982984498, |
| "grad_norm": 0.15489937365055084, |
| "learning_rate": 9.786012526096033e-06, |
| "loss": 0.0159, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.06263374915183464, |
| "grad_norm": 0.1354319453239441, |
| "learning_rate": 1.0438413361169103e-05, |
| "loss": 0.0144, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06654835847382432, |
| "grad_norm": 0.36055588722229004, |
| "learning_rate": 1.1090814196242173e-05, |
| "loss": 0.0078, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.07046296779581397, |
| "grad_norm": 0.11695325374603271, |
| "learning_rate": 1.174321503131524e-05, |
| "loss": 0.0064, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.07437757711780364, |
| "grad_norm": 1.524307370185852, |
| "learning_rate": 1.2395615866388309e-05, |
| "loss": 0.0069, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.07829218643979331, |
| "grad_norm": 0.43202999234199524, |
| "learning_rate": 1.304801670146138e-05, |
| "loss": 0.0098, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08220679576178297, |
| "grad_norm": 0.20765632390975952, |
| "learning_rate": 1.3700417536534447e-05, |
| "loss": 0.0068, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.08612140508377264, |
| "grad_norm": 0.020130537450313568, |
| "learning_rate": 1.4352818371607515e-05, |
| "loss": 0.0045, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.09003601440576231, |
| "grad_norm": 1.2454266548156738, |
| "learning_rate": 1.5005219206680585e-05, |
| "loss": 0.0057, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.09395062372775197, |
| "grad_norm": 1.0225284099578857, |
| "learning_rate": 1.5657620041753654e-05, |
| "loss": 0.0049, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.09786523304974164, |
| "grad_norm": 0.755135178565979, |
| "learning_rate": 1.6310020876826724e-05, |
| "loss": 0.0031, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.10177984237173131, |
| "grad_norm": 0.2760821282863617, |
| "learning_rate": 1.6962421711899794e-05, |
| "loss": 0.0071, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.10569445169372096, |
| "grad_norm": 0.4344524145126343, |
| "learning_rate": 1.761482254697286e-05, |
| "loss": 0.0034, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.10960906101571063, |
| "grad_norm": 0.020152989774942398, |
| "learning_rate": 1.826722338204593e-05, |
| "loss": 0.0025, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.11352367033770029, |
| "grad_norm": 0.08340949565172195, |
| "learning_rate": 1.8919624217118996e-05, |
| "loss": 0.0078, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.11743827965968996, |
| "grad_norm": 0.03789607062935829, |
| "learning_rate": 1.9572025052192066e-05, |
| "loss": 0.0011, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.12135288898167963, |
| "grad_norm": 0.8983942866325378, |
| "learning_rate": 2.0224425887265136e-05, |
| "loss": 0.0083, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.1252674983036693, |
| "grad_norm": 2.239677667617798, |
| "learning_rate": 2.0876826722338206e-05, |
| "loss": 0.0121, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.12918210762565896, |
| "grad_norm": 0.017707696184515953, |
| "learning_rate": 2.1529227557411276e-05, |
| "loss": 0.0029, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.13309671694764863, |
| "grad_norm": 0.45937007665634155, |
| "learning_rate": 2.2181628392484345e-05, |
| "loss": 0.0049, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.1370113262696383, |
| "grad_norm": 0.9611666202545166, |
| "learning_rate": 2.2834029227557412e-05, |
| "loss": 0.0128, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.14092593559162794, |
| "grad_norm": 0.22098630666732788, |
| "learning_rate": 2.348643006263048e-05, |
| "loss": 0.0139, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1448405449136176, |
| "grad_norm": 0.016266101971268654, |
| "learning_rate": 2.413883089770355e-05, |
| "loss": 0.0033, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.14875515423560728, |
| "grad_norm": 0.03947868198156357, |
| "learning_rate": 2.4791231732776618e-05, |
| "loss": 0.0042, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.15266976355759695, |
| "grad_norm": 0.20284026861190796, |
| "learning_rate": 2.544363256784969e-05, |
| "loss": 0.0037, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.15658437287958663, |
| "grad_norm": 0.9516937732696533, |
| "learning_rate": 2.609603340292276e-05, |
| "loss": 0.013, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1604989822015763, |
| "grad_norm": 0.35638949275016785, |
| "learning_rate": 2.6748434237995827e-05, |
| "loss": 0.0043, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.16441359152356594, |
| "grad_norm": 0.2974227964878082, |
| "learning_rate": 2.7400835073068893e-05, |
| "loss": 0.0058, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1683282008455556, |
| "grad_norm": 0.10002760589122772, |
| "learning_rate": 2.8053235908141963e-05, |
| "loss": 0.0047, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.17224281016754528, |
| "grad_norm": 0.03456703945994377, |
| "learning_rate": 2.870563674321503e-05, |
| "loss": 0.0034, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.17615741948953495, |
| "grad_norm": 0.02147500589489937, |
| "learning_rate": 2.93580375782881e-05, |
| "loss": 0.0052, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.18007202881152462, |
| "grad_norm": 0.048098206520080566, |
| "learning_rate": 3.001043841336117e-05, |
| "loss": 0.0042, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.18398663813351426, |
| "grad_norm": 0.251152902841568, |
| "learning_rate": 3.0662839248434235e-05, |
| "loss": 0.0068, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.18790124745550393, |
| "grad_norm": 0.040291983634233475, |
| "learning_rate": 3.131524008350731e-05, |
| "loss": 0.0097, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1918158567774936, |
| "grad_norm": 0.019989246502518654, |
| "learning_rate": 3.1967640918580375e-05, |
| "loss": 0.0053, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.19573046609948327, |
| "grad_norm": 0.016093524172902107, |
| "learning_rate": 3.262004175365345e-05, |
| "loss": 0.0022, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.19964507542147295, |
| "grad_norm": 0.008093849755823612, |
| "learning_rate": 3.3272442588726515e-05, |
| "loss": 0.0019, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.20355968474346262, |
| "grad_norm": 0.018408527597784996, |
| "learning_rate": 3.392484342379959e-05, |
| "loss": 0.0022, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.20747429406545226, |
| "grad_norm": 0.009230668656527996, |
| "learning_rate": 3.4577244258872654e-05, |
| "loss": 0.0007, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.21138890338744193, |
| "grad_norm": 0.061230212450027466, |
| "learning_rate": 3.522964509394572e-05, |
| "loss": 0.0031, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.2153035127094316, |
| "grad_norm": 0.20762716233730316, |
| "learning_rate": 3.5882045929018794e-05, |
| "loss": 0.0024, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.21921812203142127, |
| "grad_norm": 0.048180121928453445, |
| "learning_rate": 3.653444676409186e-05, |
| "loss": 0.0165, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.22313273135341094, |
| "grad_norm": 0.026987021788954735, |
| "learning_rate": 3.718684759916493e-05, |
| "loss": 0.0041, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.22704734067540058, |
| "grad_norm": 0.6532347202301025, |
| "learning_rate": 3.783924843423799e-05, |
| "loss": 0.0061, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.23096194999739025, |
| "grad_norm": 0.0236322320997715, |
| "learning_rate": 3.8491649269311066e-05, |
| "loss": 0.0018, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.23487655931937992, |
| "grad_norm": 0.3827228844165802, |
| "learning_rate": 3.914405010438413e-05, |
| "loss": 0.0029, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2387911686413696, |
| "grad_norm": 0.00414466205984354, |
| "learning_rate": 3.9796450939457206e-05, |
| "loss": 0.0024, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.24270577796335926, |
| "grad_norm": 0.03536088764667511, |
| "learning_rate": 4.044885177453027e-05, |
| "loss": 0.0041, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.24662038728534894, |
| "grad_norm": 0.02724548988044262, |
| "learning_rate": 4.110125260960334e-05, |
| "loss": 0.0018, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.2505349966073386, |
| "grad_norm": 1.551004409790039, |
| "learning_rate": 4.175365344467641e-05, |
| "loss": 0.0089, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.25444960592932825, |
| "grad_norm": 0.28799840807914734, |
| "learning_rate": 4.240605427974948e-05, |
| "loss": 0.0075, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.2583642152513179, |
| "grad_norm": 0.009647930040955544, |
| "learning_rate": 4.305845511482255e-05, |
| "loss": 0.0049, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.2622788245733076, |
| "grad_norm": 0.006901186890900135, |
| "learning_rate": 4.371085594989562e-05, |
| "loss": 0.0027, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.26619343389529726, |
| "grad_norm": 0.05002870783209801, |
| "learning_rate": 4.436325678496869e-05, |
| "loss": 0.0064, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.27010804321728693, |
| "grad_norm": 0.1099412590265274, |
| "learning_rate": 4.501565762004176e-05, |
| "loss": 0.0035, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.2740226525392766, |
| "grad_norm": 0.43022432923316956, |
| "learning_rate": 4.5668058455114823e-05, |
| "loss": 0.005, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.27793726186126627, |
| "grad_norm": 0.0661238431930542, |
| "learning_rate": 4.6320459290187897e-05, |
| "loss": 0.0111, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.2818518711832559, |
| "grad_norm": 0.04808713495731354, |
| "learning_rate": 4.697286012526096e-05, |
| "loss": 0.0082, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.28576648050524556, |
| "grad_norm": 0.010018469765782356, |
| "learning_rate": 4.7625260960334036e-05, |
| "loss": 0.0063, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.2896810898272352, |
| "grad_norm": 0.02794954925775528, |
| "learning_rate": 4.82776617954071e-05, |
| "loss": 0.0023, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2935956991492249, |
| "grad_norm": 0.08497870713472366, |
| "learning_rate": 4.893006263048017e-05, |
| "loss": 0.0053, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.29751030847121457, |
| "grad_norm": 0.012794610112905502, |
| "learning_rate": 4.9582463465553235e-05, |
| "loss": 0.0023, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.30142491779320424, |
| "grad_norm": 0.5463805794715881, |
| "learning_rate": 4.997390093956617e-05, |
| "loss": 0.0123, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.3053395271151939, |
| "grad_norm": 0.643292248249054, |
| "learning_rate": 4.990140354947222e-05, |
| "loss": 0.006, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.3092541364371836, |
| "grad_norm": 0.016200900077819824, |
| "learning_rate": 4.9828906159378265e-05, |
| "loss": 0.0038, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.31316874575917325, |
| "grad_norm": 0.01805788092315197, |
| "learning_rate": 4.975640876928431e-05, |
| "loss": 0.0005, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3170833550811629, |
| "grad_norm": 0.09769612550735474, |
| "learning_rate": 4.968391137919035e-05, |
| "loss": 0.0017, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.3209979644031526, |
| "grad_norm": 0.7254369258880615, |
| "learning_rate": 4.96114139890964e-05, |
| "loss": 0.0029, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.3249125737251422, |
| "grad_norm": 0.21768023073673248, |
| "learning_rate": 4.953891659900244e-05, |
| "loss": 0.0051, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.3288271830471319, |
| "grad_norm": 0.0030887445900589228, |
| "learning_rate": 4.946641920890848e-05, |
| "loss": 0.0007, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.33274179236912155, |
| "grad_norm": 0.03302296623587608, |
| "learning_rate": 4.9393921818814525e-05, |
| "loss": 0.008, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.3366564016911112, |
| "grad_norm": 0.07119308412075043, |
| "learning_rate": 4.932142442872057e-05, |
| "loss": 0.0005, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3405710110131009, |
| "grad_norm": 0.0021239419002085924, |
| "learning_rate": 4.9248927038626616e-05, |
| "loss": 0.0006, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.34448562033509056, |
| "grad_norm": 0.0006605405360460281, |
| "learning_rate": 4.9179329544136416e-05, |
| "loss": 0.001, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.34840022965708023, |
| "grad_norm": 0.35737213492393494, |
| "learning_rate": 4.910683215404246e-05, |
| "loss": 0.0031, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.3523148389790699, |
| "grad_norm": 0.003352939384058118, |
| "learning_rate": 4.90343347639485e-05, |
| "loss": 0.0026, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.35622944830105957, |
| "grad_norm": 0.011482371017336845, |
| "learning_rate": 4.896183737385454e-05, |
| "loss": 0.0056, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.36014405762304924, |
| "grad_norm": 0.31182751059532166, |
| "learning_rate": 4.8889339983760585e-05, |
| "loss": 0.003, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3640586669450389, |
| "grad_norm": 0.019928403198719025, |
| "learning_rate": 4.881684259366663e-05, |
| "loss": 0.004, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.3679732762670285, |
| "grad_norm": 0.17220672965049744, |
| "learning_rate": 4.874434520357267e-05, |
| "loss": 0.0022, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.3718878855890182, |
| "grad_norm": 0.002172990469262004, |
| "learning_rate": 4.867184781347872e-05, |
| "loss": 0.0018, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.37580249491100787, |
| "grad_norm": 0.6102157831192017, |
| "learning_rate": 4.859935042338476e-05, |
| "loss": 0.0024, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.37971710423299754, |
| "grad_norm": 0.011678989976644516, |
| "learning_rate": 4.85268530332908e-05, |
| "loss": 0.0023, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.3836317135549872, |
| "grad_norm": 0.7285154461860657, |
| "learning_rate": 4.8454355643196845e-05, |
| "loss": 0.0051, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.3875463228769769, |
| "grad_norm": 0.004773287568241358, |
| "learning_rate": 4.838185825310289e-05, |
| "loss": 0.0018, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.39146093219896655, |
| "grad_norm": 0.00791076384484768, |
| "learning_rate": 4.8309360863008937e-05, |
| "loss": 0.004, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3953755415209562, |
| "grad_norm": 0.8710932731628418, |
| "learning_rate": 4.823686347291498e-05, |
| "loss": 0.0042, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.3992901508429459, |
| "grad_norm": 0.04120909795165062, |
| "learning_rate": 4.816436608282102e-05, |
| "loss": 0.0024, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.40320476016493556, |
| "grad_norm": 1.0033127069473267, |
| "learning_rate": 4.809186869272706e-05, |
| "loss": 0.004, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.40711936948692523, |
| "grad_norm": 0.1285122036933899, |
| "learning_rate": 4.801937130263311e-05, |
| "loss": 0.009, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.41103397880891485, |
| "grad_norm": 0.8447295427322388, |
| "learning_rate": 4.7946873912539154e-05, |
| "loss": 0.0015, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.4149485881309045, |
| "grad_norm": 0.10731597989797592, |
| "learning_rate": 4.78743765224452e-05, |
| "loss": 0.0066, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.4188631974528942, |
| "grad_norm": 0.011971144936978817, |
| "learning_rate": 4.780187913235123e-05, |
| "loss": 0.0042, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.42277780677488386, |
| "grad_norm": 0.0017153106164187193, |
| "learning_rate": 4.772938174225728e-05, |
| "loss": 0.0013, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.42669241609687353, |
| "grad_norm": 0.0010528437560424209, |
| "learning_rate": 4.7656884352163323e-05, |
| "loss": 0.0006, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.4306070254188632, |
| "grad_norm": 0.0007753331447020173, |
| "learning_rate": 4.7584386962069366e-05, |
| "loss": 0.0002, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.43452163474085287, |
| "grad_norm": 0.0036313000600785017, |
| "learning_rate": 4.751188957197541e-05, |
| "loss": 0.004, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.43843624406284254, |
| "grad_norm": 0.10537869483232498, |
| "learning_rate": 4.743939218188146e-05, |
| "loss": 0.004, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.4423508533848322, |
| "grad_norm": 0.0017782174982130527, |
| "learning_rate": 4.73668947917875e-05, |
| "loss": 0.0011, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.4462654627068219, |
| "grad_norm": 0.02180619165301323, |
| "learning_rate": 4.729439740169354e-05, |
| "loss": 0.0003, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.45018007202881155, |
| "grad_norm": 0.0014395464677363634, |
| "learning_rate": 4.7221900011599584e-05, |
| "loss": 0.0011, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.45409468135080117, |
| "grad_norm": 0.041430070996284485, |
| "learning_rate": 4.7149402621505626e-05, |
| "loss": 0.0007, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.45800929067279084, |
| "grad_norm": 0.054793838411569595, |
| "learning_rate": 4.7076905231411675e-05, |
| "loss": 0.0025, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.4619238999947805, |
| "grad_norm": 0.08612020313739777, |
| "learning_rate": 4.700440784131772e-05, |
| "loss": 0.0068, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.4658385093167702, |
| "grad_norm": 1.2504163980484009, |
| "learning_rate": 4.693191045122376e-05, |
| "loss": 0.0075, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.46975311863875985, |
| "grad_norm": 0.8100822567939758, |
| "learning_rate": 4.68594130611298e-05, |
| "loss": 0.0024, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4736677279607495, |
| "grad_norm": 0.7344357967376709, |
| "learning_rate": 4.6786915671035844e-05, |
| "loss": 0.0042, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.4775823372827392, |
| "grad_norm": 0.006882940419018269, |
| "learning_rate": 4.671441828094189e-05, |
| "loss": 0.0108, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.48149694660472886, |
| "grad_norm": 0.07418603450059891, |
| "learning_rate": 4.6641920890847935e-05, |
| "loss": 0.0015, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.48541155592671853, |
| "grad_norm": 0.023311011493206024, |
| "learning_rate": 4.656942350075397e-05, |
| "loss": 0.0011, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.4893261652487082, |
| "grad_norm": 0.22213295102119446, |
| "learning_rate": 4.649692611066002e-05, |
| "loss": 0.0002, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.49324077457069787, |
| "grad_norm": 0.028663238510489464, |
| "learning_rate": 4.642442872056606e-05, |
| "loss": 0.0034, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.4971553838926875, |
| "grad_norm": 0.010352909564971924, |
| "learning_rate": 4.6351931330472104e-05, |
| "loss": 0.0003, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.5010699932146772, |
| "grad_norm": 0.01622854731976986, |
| "learning_rate": 4.6279433940378146e-05, |
| "loss": 0.0035, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.5049846025366669, |
| "grad_norm": 0.0045238700695335865, |
| "learning_rate": 4.620693655028419e-05, |
| "loss": 0.0016, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.5088992118586565, |
| "grad_norm": 0.000869418028742075, |
| "learning_rate": 4.613443916019024e-05, |
| "loss": 0.0003, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.5128138211806462, |
| "grad_norm": 0.0070857820101082325, |
| "learning_rate": 4.606194177009628e-05, |
| "loss": 0.0013, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.5167284305026358, |
| "grad_norm": 0.019664961844682693, |
| "learning_rate": 4.598944438000232e-05, |
| "loss": 0.0014, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.5206430398246255, |
| "grad_norm": 0.002933235839009285, |
| "learning_rate": 4.5916946989908364e-05, |
| "loss": 0.0024, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.5245576491466152, |
| "grad_norm": 0.009601329453289509, |
| "learning_rate": 4.5844449599814406e-05, |
| "loss": 0.001, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.5284722584686048, |
| "grad_norm": 0.03231184929609299, |
| "learning_rate": 4.5771952209720455e-05, |
| "loss": 0.001, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.5323868677905945, |
| "grad_norm": 0.038716524839401245, |
| "learning_rate": 4.56994548196265e-05, |
| "loss": 0.0104, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.5363014771125841, |
| "grad_norm": 0.005376841872930527, |
| "learning_rate": 4.562695742953254e-05, |
| "loss": 0.0021, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.5402160864345739, |
| "grad_norm": 0.8506935834884644, |
| "learning_rate": 4.555446003943858e-05, |
| "loss": 0.0037, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.5441306957565635, |
| "grad_norm": 0.00393926864489913, |
| "learning_rate": 4.548196264934463e-05, |
| "loss": 0.0007, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.5480453050785532, |
| "grad_norm": 0.49948814511299133, |
| "learning_rate": 4.5409465259250666e-05, |
| "loss": 0.0017, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.5519599144005428, |
| "grad_norm": 0.008987600915133953, |
| "learning_rate": 4.533696786915671e-05, |
| "loss": 0.0017, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.5558745237225325, |
| "grad_norm": 0.06366792321205139, |
| "learning_rate": 4.526447047906275e-05, |
| "loss": 0.0016, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.5597891330445222, |
| "grad_norm": 0.9016256332397461, |
| "learning_rate": 4.51919730889688e-05, |
| "loss": 0.0023, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.5637037423665118, |
| "grad_norm": 0.010248661041259766, |
| "learning_rate": 4.511947569887484e-05, |
| "loss": 0.0037, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.5676183516885015, |
| "grad_norm": 0.007675408851355314, |
| "learning_rate": 4.5046978308780884e-05, |
| "loss": 0.0053, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.5715329610104911, |
| "grad_norm": 0.0017978112446144223, |
| "learning_rate": 4.497448091868693e-05, |
| "loss": 0.0006, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.5754475703324808, |
| "grad_norm": 1.0881074666976929, |
| "learning_rate": 4.4901983528592976e-05, |
| "loss": 0.0026, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.5793621796544705, |
| "grad_norm": 0.0023445766419172287, |
| "learning_rate": 4.4832386034102776e-05, |
| "loss": 0.0014, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5832767889764602, |
| "grad_norm": 0.0032128174789249897, |
| "learning_rate": 4.475988864400882e-05, |
| "loss": 0.0013, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.5871913982984498, |
| "grad_norm": 0.07783033698797226, |
| "learning_rate": 4.468739125391486e-05, |
| "loss": 0.0061, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.5911060076204395, |
| "grad_norm": 0.018863795325160027, |
| "learning_rate": 4.46148938638209e-05, |
| "loss": 0.0022, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.5950206169424291, |
| "grad_norm": 0.004098298028111458, |
| "learning_rate": 4.454239647372695e-05, |
| "loss": 0.0022, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.5989352262644189, |
| "grad_norm": 0.0029339243192225695, |
| "learning_rate": 4.4469899083632994e-05, |
| "loss": 0.0023, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.6028498355864085, |
| "grad_norm": 0.0022904234938323498, |
| "learning_rate": 4.4397401693539036e-05, |
| "loss": 0.0013, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.6067644449083981, |
| "grad_norm": 0.001695298939011991, |
| "learning_rate": 4.432490430344508e-05, |
| "loss": 0.0003, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.6106790542303878, |
| "grad_norm": 0.3725820779800415, |
| "learning_rate": 4.425240691335112e-05, |
| "loss": 0.0012, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.6145936635523774, |
| "grad_norm": 0.000986219383776188, |
| "learning_rate": 4.417990952325716e-05, |
| "loss": 0.0007, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.6185082728743672, |
| "grad_norm": 0.016280701383948326, |
| "learning_rate": 4.4107412133163205e-05, |
| "loss": 0.0005, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.6224228821963568, |
| "grad_norm": 0.0007005013758316636, |
| "learning_rate": 4.403491474306925e-05, |
| "loss": 0.0008, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.6263374915183465, |
| "grad_norm": 0.0015142613556236029, |
| "learning_rate": 4.3962417352975296e-05, |
| "loss": 0.0022, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.6302521008403361, |
| "grad_norm": 0.02496866136789322, |
| "learning_rate": 4.388991996288134e-05, |
| "loss": 0.0081, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.6341667101623258, |
| "grad_norm": 0.10312812030315399, |
| "learning_rate": 4.381742257278738e-05, |
| "loss": 0.0028, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.6380813194843155, |
| "grad_norm": 0.005419147200882435, |
| "learning_rate": 4.374492518269342e-05, |
| "loss": 0.0005, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.6419959288063052, |
| "grad_norm": 0.0012350809993222356, |
| "learning_rate": 4.3672427792599465e-05, |
| "loss": 0.0005, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.6459105381282948, |
| "grad_norm": 0.0014117741957306862, |
| "learning_rate": 4.3599930402505514e-05, |
| "loss": 0.0004, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.6498251474502844, |
| "grad_norm": 0.011549504473805428, |
| "learning_rate": 4.3527433012411556e-05, |
| "loss": 0.0108, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.6537397567722741, |
| "grad_norm": 0.0015101751778274775, |
| "learning_rate": 4.34549356223176e-05, |
| "loss": 0.0023, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.6576543660942638, |
| "grad_norm": 0.831576406955719, |
| "learning_rate": 4.338243823222364e-05, |
| "loss": 0.003, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.6615689754162535, |
| "grad_norm": 0.003971900790929794, |
| "learning_rate": 4.330994084212969e-05, |
| "loss": 0.0009, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.6654835847382431, |
| "grad_norm": 0.00122584099881351, |
| "learning_rate": 4.323744345203573e-05, |
| "loss": 0.0004, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.6693981940602328, |
| "grad_norm": 1.1975153684616089, |
| "learning_rate": 4.3164946061941774e-05, |
| "loss": 0.008, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.6733128033822224, |
| "grad_norm": 0.007587050087749958, |
| "learning_rate": 4.309244867184782e-05, |
| "loss": 0.0008, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.6772274127042122, |
| "grad_norm": 1.974413514137268, |
| "learning_rate": 4.301995128175386e-05, |
| "loss": 0.0003, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.6811420220262018, |
| "grad_norm": 0.0011919812532141805, |
| "learning_rate": 4.29474538916599e-05, |
| "loss": 0.0055, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.6850566313481915, |
| "grad_norm": 0.0037530860863626003, |
| "learning_rate": 4.287495650156594e-05, |
| "loss": 0.0031, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.6889712406701811, |
| "grad_norm": 0.0055799526162445545, |
| "learning_rate": 4.2802459111471986e-05, |
| "loss": 0.0009, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6928858499921707, |
| "grad_norm": 0.7918204069137573, |
| "learning_rate": 4.2729961721378035e-05, |
| "loss": 0.0011, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.6968004593141605, |
| "grad_norm": 0.021195508539676666, |
| "learning_rate": 4.265746433128408e-05, |
| "loss": 0.0014, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.7007150686361501, |
| "grad_norm": 0.0016733302036300302, |
| "learning_rate": 4.258496694119012e-05, |
| "loss": 0.0016, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.7046296779581398, |
| "grad_norm": 0.0015721771633252501, |
| "learning_rate": 4.251246955109616e-05, |
| "loss": 0.002, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.7085442872801294, |
| "grad_norm": 0.024684101343154907, |
| "learning_rate": 4.2439972161002204e-05, |
| "loss": 0.0021, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.7124588966021191, |
| "grad_norm": 0.0010000619804486632, |
| "learning_rate": 4.236747477090825e-05, |
| "loss": 0.001, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.7163735059241088, |
| "grad_norm": 0.0010993380565196276, |
| "learning_rate": 4.2294977380814295e-05, |
| "loss": 0.0024, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.7202881152460985, |
| "grad_norm": 0.01743653602898121, |
| "learning_rate": 4.222247999072034e-05, |
| "loss": 0.001, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.7242027245680881, |
| "grad_norm": 0.0034048547968268394, |
| "learning_rate": 4.214998260062638e-05, |
| "loss": 0.0012, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.7281173338900778, |
| "grad_norm": 0.006288307718932629, |
| "learning_rate": 4.207748521053242e-05, |
| "loss": 0.0016, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.7320319432120674, |
| "grad_norm": 0.09262362122535706, |
| "learning_rate": 4.200498782043847e-05, |
| "loss": 0.0062, |
| "step": 4675 |
| }, |
| { |
| "epoch": 0.735946552534057, |
| "grad_norm": 0.0012087648501619697, |
| "learning_rate": 4.193249043034451e-05, |
| "loss": 0.0003, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.7398611618560468, |
| "grad_norm": 2.551692247390747, |
| "learning_rate": 4.185999304025055e-05, |
| "loss": 0.0007, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.7437757711780364, |
| "grad_norm": 0.003155685495585203, |
| "learning_rate": 4.17874956501566e-05, |
| "loss": 0.0035, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.7476903805000261, |
| "grad_norm": 0.0007522006053477526, |
| "learning_rate": 4.171499826006264e-05, |
| "loss": 0.0003, |
| "step": 4775 |
| }, |
| { |
| "epoch": 0.7516049898220157, |
| "grad_norm": 0.1172158420085907, |
| "learning_rate": 4.164250086996868e-05, |
| "loss": 0.0022, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.7555195991440055, |
| "grad_norm": 0.0018555809510871768, |
| "learning_rate": 4.1570003479874724e-05, |
| "loss": 0.0008, |
| "step": 4825 |
| }, |
| { |
| "epoch": 0.7594342084659951, |
| "grad_norm": 0.014069788157939911, |
| "learning_rate": 4.1497506089780766e-05, |
| "loss": 0.0027, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.7633488177879848, |
| "grad_norm": 0.0070347595028579235, |
| "learning_rate": 4.1425008699686815e-05, |
| "loss": 0.0044, |
| "step": 4875 |
| }, |
| { |
| "epoch": 0.7672634271099744, |
| "grad_norm": 0.005139984656125307, |
| "learning_rate": 4.135251130959286e-05, |
| "loss": 0.0013, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.7711780364319641, |
| "grad_norm": 0.03146003186702728, |
| "learning_rate": 4.12800139194989e-05, |
| "loss": 0.0008, |
| "step": 4925 |
| }, |
| { |
| "epoch": 0.7750926457539538, |
| "grad_norm": 0.0008966127061285079, |
| "learning_rate": 4.120751652940494e-05, |
| "loss": 0.0013, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.7790072550759434, |
| "grad_norm": 0.010651414282619953, |
| "learning_rate": 4.1135019139310984e-05, |
| "loss": 0.0007, |
| "step": 4975 |
| }, |
| { |
| "epoch": 0.7829218643979331, |
| "grad_norm": 0.05222166329622269, |
| "learning_rate": 4.106252174921703e-05, |
| "loss": 0.0025, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.7868364737199227, |
| "grad_norm": 0.008781126700341702, |
| "learning_rate": 4.0990024359123075e-05, |
| "loss": 0.0006, |
| "step": 5025 |
| }, |
| { |
| "epoch": 0.7907510830419124, |
| "grad_norm": 0.0023096187505871058, |
| "learning_rate": 4.091752696902912e-05, |
| "loss": 0.0003, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.794665692363902, |
| "grad_norm": 0.000690230808686465, |
| "learning_rate": 4.084502957893516e-05, |
| "loss": 0.001, |
| "step": 5075 |
| }, |
| { |
| "epoch": 0.7985803016858918, |
| "grad_norm": 0.0017941935220733285, |
| "learning_rate": 4.077253218884121e-05, |
| "loss": 0.0018, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.8024949110078814, |
| "grad_norm": 0.001472643343731761, |
| "learning_rate": 4.070003479874725e-05, |
| "loss": 0.0011, |
| "step": 5125 |
| }, |
| { |
| "epoch": 0.8064095203298711, |
| "grad_norm": 0.050277333706617355, |
| "learning_rate": 4.0627537408653286e-05, |
| "loss": 0.0104, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.8103241296518607, |
| "grad_norm": 0.020627155900001526, |
| "learning_rate": 4.055504001855933e-05, |
| "loss": 0.0019, |
| "step": 5175 |
| }, |
| { |
| "epoch": 0.8142387389738505, |
| "grad_norm": 1.6748356819152832, |
| "learning_rate": 4.048254262846538e-05, |
| "loss": 0.0013, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.8181533482958401, |
| "grad_norm": 0.0005242697079665959, |
| "learning_rate": 4.041004523837142e-05, |
| "loss": 0.0003, |
| "step": 5225 |
| }, |
| { |
| "epoch": 0.8220679576178297, |
| "grad_norm": 0.0004012222634628415, |
| "learning_rate": 4.033754784827746e-05, |
| "loss": 0.0011, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.8259825669398194, |
| "grad_norm": 0.0007638943498022854, |
| "learning_rate": 4.0265050458183504e-05, |
| "loss": 0.0008, |
| "step": 5275 |
| }, |
| { |
| "epoch": 0.829897176261809, |
| "grad_norm": 0.000370625639334321, |
| "learning_rate": 4.019255306808955e-05, |
| "loss": 0.0002, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.8338117855837988, |
| "grad_norm": 0.17966459691524506, |
| "learning_rate": 4.0120055677995596e-05, |
| "loss": 0.0061, |
| "step": 5325 |
| }, |
| { |
| "epoch": 0.8377263949057884, |
| "grad_norm": 0.5298845171928406, |
| "learning_rate": 4.004755828790164e-05, |
| "loss": 0.0021, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.8416410042277781, |
| "grad_norm": 0.010731186717748642, |
| "learning_rate": 3.997506089780768e-05, |
| "loss": 0.0012, |
| "step": 5375 |
| }, |
| { |
| "epoch": 0.8455556135497677, |
| "grad_norm": 0.0006224720855243504, |
| "learning_rate": 3.990256350771372e-05, |
| "loss": 0.0014, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.8494702228717574, |
| "grad_norm": 0.00034521459019742906, |
| "learning_rate": 3.983006611761977e-05, |
| "loss": 0.0005, |
| "step": 5425 |
| }, |
| { |
| "epoch": 0.8533848321937471, |
| "grad_norm": 0.07561736553907394, |
| "learning_rate": 3.9757568727525814e-05, |
| "loss": 0.002, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.8572994415157368, |
| "grad_norm": 0.010748780332505703, |
| "learning_rate": 3.9685071337431856e-05, |
| "loss": 0.0025, |
| "step": 5475 |
| }, |
| { |
| "epoch": 0.8612140508377264, |
| "grad_norm": 0.03456795960664749, |
| "learning_rate": 3.96125739473379e-05, |
| "loss": 0.0079, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.865128660159716, |
| "grad_norm": 0.013776997104287148, |
| "learning_rate": 3.954007655724394e-05, |
| "loss": 0.0015, |
| "step": 5525 |
| }, |
| { |
| "epoch": 0.8690432694817057, |
| "grad_norm": 0.013151598162949085, |
| "learning_rate": 3.946757916714999e-05, |
| "loss": 0.0027, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.8729578788036954, |
| "grad_norm": 0.005265055689960718, |
| "learning_rate": 3.9395081777056025e-05, |
| "loss": 0.0045, |
| "step": 5575 |
| }, |
| { |
| "epoch": 0.8768724881256851, |
| "grad_norm": 0.0019183550029993057, |
| "learning_rate": 3.932258438696207e-05, |
| "loss": 0.0005, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.8807870974476747, |
| "grad_norm": 0.42332738637924194, |
| "learning_rate": 3.9250086996868116e-05, |
| "loss": 0.0037, |
| "step": 5625 |
| }, |
| { |
| "epoch": 0.8847017067696644, |
| "grad_norm": 0.00447813980281353, |
| "learning_rate": 3.917758960677416e-05, |
| "loss": 0.0014, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.888616316091654, |
| "grad_norm": 0.0005977645632810891, |
| "learning_rate": 3.91050922166802e-05, |
| "loss": 0.0005, |
| "step": 5675 |
| }, |
| { |
| "epoch": 0.8925309254136438, |
| "grad_norm": 0.9014317989349365, |
| "learning_rate": 3.903259482658624e-05, |
| "loss": 0.0012, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.8964455347356334, |
| "grad_norm": 1.6808857917785645, |
| "learning_rate": 3.8960097436492285e-05, |
| "loss": 0.0033, |
| "step": 5725 |
| }, |
| { |
| "epoch": 0.9003601440576231, |
| "grad_norm": 0.002373639028519392, |
| "learning_rate": 3.8887600046398334e-05, |
| "loss": 0.0136, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.9042747533796127, |
| "grad_norm": 0.0012994492426514626, |
| "learning_rate": 3.8815102656304376e-05, |
| "loss": 0.0001, |
| "step": 5775 |
| }, |
| { |
| "epoch": 0.9081893627016023, |
| "grad_norm": 0.0006246384000405669, |
| "learning_rate": 3.874260526621042e-05, |
| "loss": 0.0001, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.9121039720235921, |
| "grad_norm": 0.0005325423553586006, |
| "learning_rate": 3.867010787611646e-05, |
| "loss": 0.0005, |
| "step": 5825 |
| }, |
| { |
| "epoch": 0.9160185813455817, |
| "grad_norm": 0.0009510382078588009, |
| "learning_rate": 3.859761048602251e-05, |
| "loss": 0.0032, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.9199331906675714, |
| "grad_norm": 0.012179987505078316, |
| "learning_rate": 3.852511309592855e-05, |
| "loss": 0.0015, |
| "step": 5875 |
| }, |
| { |
| "epoch": 0.923847799989561, |
| "grad_norm": 0.0014047386357560754, |
| "learning_rate": 3.8452615705834594e-05, |
| "loss": 0.0006, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.9277624093115507, |
| "grad_norm": 0.13963516056537628, |
| "learning_rate": 3.8380118315740636e-05, |
| "loss": 0.0012, |
| "step": 5925 |
| }, |
| { |
| "epoch": 0.9316770186335404, |
| "grad_norm": 0.7947016954421997, |
| "learning_rate": 3.830762092564668e-05, |
| "loss": 0.0014, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.9355916279555301, |
| "grad_norm": 0.001768257119692862, |
| "learning_rate": 3.823512353555272e-05, |
| "loss": 0.0004, |
| "step": 5975 |
| }, |
| { |
| "epoch": 0.9395062372775197, |
| "grad_norm": 0.0007245225715450943, |
| "learning_rate": 3.816262614545876e-05, |
| "loss": 0.0001, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.9434208465995094, |
| "grad_norm": 0.016255930066108704, |
| "learning_rate": 3.8090128755364805e-05, |
| "loss": 0.0, |
| "step": 6025 |
| }, |
| { |
| "epoch": 0.947335455921499, |
| "grad_norm": 0.00034742074785754085, |
| "learning_rate": 3.801763136527085e-05, |
| "loss": 0.0003, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.9512500652434887, |
| "grad_norm": 0.0013885988155379891, |
| "learning_rate": 3.7945133975176896e-05, |
| "loss": 0.0032, |
| "step": 6075 |
| }, |
| { |
| "epoch": 0.9551646745654784, |
| "grad_norm": 0.8642656207084656, |
| "learning_rate": 3.787263658508294e-05, |
| "loss": 0.0019, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.959079283887468, |
| "grad_norm": 0.002853901358321309, |
| "learning_rate": 3.780013919498898e-05, |
| "loss": 0.0031, |
| "step": 6125 |
| }, |
| { |
| "epoch": 0.9629938932094577, |
| "grad_norm": 0.6826348304748535, |
| "learning_rate": 3.772764180489502e-05, |
| "loss": 0.0029, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.9669085025314473, |
| "grad_norm": 0.01645534299314022, |
| "learning_rate": 3.765514441480107e-05, |
| "loss": 0.0003, |
| "step": 6175 |
| }, |
| { |
| "epoch": 0.9708231118534371, |
| "grad_norm": 0.001097380998544395, |
| "learning_rate": 3.7582647024707114e-05, |
| "loss": 0.0011, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.9747377211754267, |
| "grad_norm": 0.001092984457500279, |
| "learning_rate": 3.7513049530216915e-05, |
| "loss": 0.0006, |
| "step": 6225 |
| }, |
| { |
| "epoch": 0.9786523304974164, |
| "grad_norm": 0.001488927286118269, |
| "learning_rate": 3.744055214012296e-05, |
| "loss": 0.0018, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.982566939819406, |
| "grad_norm": 0.0012959851883351803, |
| "learning_rate": 3.7368054750029e-05, |
| "loss": 0.0011, |
| "step": 6275 |
| }, |
| { |
| "epoch": 0.9864815491413957, |
| "grad_norm": 0.002524161711335182, |
| "learning_rate": 3.729555735993505e-05, |
| "loss": 0.0039, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.9903961584633854, |
| "grad_norm": 0.0023267928045243025, |
| "learning_rate": 3.722305996984109e-05, |
| "loss": 0.0011, |
| "step": 6325 |
| }, |
| { |
| "epoch": 0.994310767785375, |
| "grad_norm": 0.0007459365879185498, |
| "learning_rate": 3.715056257974713e-05, |
| "loss": 0.0003, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.9982253771073647, |
| "grad_norm": 0.004343962296843529, |
| "learning_rate": 3.7078065189653175e-05, |
| "loss": 0.0018, |
| "step": 6375 |
| }, |
| { |
| "epoch": 0.9999478052090401, |
| "eval_accuracy": 0.9997799951169648, |
| "eval_f1": 0.9997142385928128, |
| "eval_loss": 0.0011581754079088569, |
| "eval_precision": 0.9996766935217872, |
| "eval_recall": 0.9997517864841209, |
| "eval_runtime": 62.9623, |
| "eval_samples_per_second": 608.586, |
| "eval_steps_per_second": 38.039, |
| "step": 6386 |
| }, |
| { |
| "epoch": 1.0021399864293543, |
| "grad_norm": 0.007820851169526577, |
| "learning_rate": 3.700556779955922e-05, |
| "loss": 0.0006, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.006054595751344, |
| "grad_norm": 0.00049219821812585, |
| "learning_rate": 3.693307040946526e-05, |
| "loss": 0.0005, |
| "step": 6425 |
| }, |
| { |
| "epoch": 1.0099692050733338, |
| "grad_norm": 0.0008093062788248062, |
| "learning_rate": 3.68605730193713e-05, |
| "loss": 0.0008, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.0138838143953233, |
| "grad_norm": 0.3265334963798523, |
| "learning_rate": 3.6788075629277344e-05, |
| "loss": 0.0001, |
| "step": 6475 |
| }, |
| { |
| "epoch": 1.017798423717313, |
| "grad_norm": 0.015381108038127422, |
| "learning_rate": 3.671557823918339e-05, |
| "loss": 0.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.0217130330393027, |
| "grad_norm": 0.00040746491868048906, |
| "learning_rate": 3.6643080849089435e-05, |
| "loss": 0.0001, |
| "step": 6525 |
| }, |
| { |
| "epoch": 1.0256276423612924, |
| "grad_norm": 2.2102978229522705, |
| "learning_rate": 3.657058345899548e-05, |
| "loss": 0.0022, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.029542251683282, |
| "grad_norm": 0.0007900640484876931, |
| "learning_rate": 3.649808606890152e-05, |
| "loss": 0.0034, |
| "step": 6575 |
| }, |
| { |
| "epoch": 1.0334568610052717, |
| "grad_norm": 0.07358774542808533, |
| "learning_rate": 3.642558867880756e-05, |
| "loss": 0.0004, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.0373714703272614, |
| "grad_norm": 0.0004924671957269311, |
| "learning_rate": 3.635309128871361e-05, |
| "loss": 0.0008, |
| "step": 6625 |
| }, |
| { |
| "epoch": 1.041286079649251, |
| "grad_norm": 0.0007265584426932037, |
| "learning_rate": 3.628059389861965e-05, |
| "loss": 0.0005, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.0452006889712406, |
| "grad_norm": 0.006537444423884153, |
| "learning_rate": 3.6208096508525695e-05, |
| "loss": 0.0031, |
| "step": 6675 |
| }, |
| { |
| "epoch": 1.0491152982932304, |
| "grad_norm": 0.02974896878004074, |
| "learning_rate": 3.613559911843174e-05, |
| "loss": 0.0003, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.05302990761522, |
| "grad_norm": 0.0008949014008976519, |
| "learning_rate": 3.6063101728337786e-05, |
| "loss": 0.0001, |
| "step": 6725 |
| }, |
| { |
| "epoch": 1.0569445169372096, |
| "grad_norm": 5.2669758796691895, |
| "learning_rate": 3.599060433824383e-05, |
| "loss": 0.0038, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.0608591262591993, |
| "grad_norm": 0.0008383537060581148, |
| "learning_rate": 3.591810694814987e-05, |
| "loss": 0.0014, |
| "step": 6775 |
| }, |
| { |
| "epoch": 1.064773735581189, |
| "grad_norm": 0.03583945333957672, |
| "learning_rate": 3.5845609558055906e-05, |
| "loss": 0.002, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.0686883449031788, |
| "grad_norm": 0.0004048035480082035, |
| "learning_rate": 3.5773112167961955e-05, |
| "loss": 0.0002, |
| "step": 6825 |
| }, |
| { |
| "epoch": 1.0726029542251683, |
| "grad_norm": 0.0006589085678569973, |
| "learning_rate": 3.5700614777868e-05, |
| "loss": 0.0, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.076517563547158, |
| "grad_norm": 0.01224551722407341, |
| "learning_rate": 3.562811738777404e-05, |
| "loss": 0.0028, |
| "step": 6875 |
| }, |
| { |
| "epoch": 1.0804321728691477, |
| "grad_norm": 1.1463470458984375, |
| "learning_rate": 3.555561999768008e-05, |
| "loss": 0.0029, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.0843467821911372, |
| "grad_norm": 0.002099130768328905, |
| "learning_rate": 3.548312260758613e-05, |
| "loss": 0.0016, |
| "step": 6925 |
| }, |
| { |
| "epoch": 1.088261391513127, |
| "grad_norm": 0.4577861428260803, |
| "learning_rate": 3.541062521749217e-05, |
| "loss": 0.0009, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.0921760008351167, |
| "grad_norm": 0.08768904209136963, |
| "learning_rate": 3.5338127827398216e-05, |
| "loss": 0.0017, |
| "step": 6975 |
| }, |
| { |
| "epoch": 1.0960906101571064, |
| "grad_norm": 0.002661600476130843, |
| "learning_rate": 3.526563043730426e-05, |
| "loss": 0.0002, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.100005219479096, |
| "grad_norm": 0.0006299412925727665, |
| "learning_rate": 3.51931330472103e-05, |
| "loss": 0.0, |
| "step": 7025 |
| }, |
| { |
| "epoch": 1.1039198288010856, |
| "grad_norm": 0.1650131493806839, |
| "learning_rate": 3.512063565711635e-05, |
| "loss": 0.0002, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.1078344381230754, |
| "grad_norm": 0.37610143423080444, |
| "learning_rate": 3.504813826702239e-05, |
| "loss": 0.0009, |
| "step": 7075 |
| }, |
| { |
| "epoch": 1.111749047445065, |
| "grad_norm": 0.029113056138157845, |
| "learning_rate": 3.4975640876928433e-05, |
| "loss": 0.0012, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.1156636567670546, |
| "grad_norm": 0.004116399679332972, |
| "learning_rate": 3.4903143486834476e-05, |
| "loss": 0.0023, |
| "step": 7125 |
| }, |
| { |
| "epoch": 1.1195782660890443, |
| "grad_norm": 0.015721509233117104, |
| "learning_rate": 3.483064609674052e-05, |
| "loss": 0.0097, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.123492875411034, |
| "grad_norm": 0.01373900007456541, |
| "learning_rate": 3.475814870664657e-05, |
| "loss": 0.0064, |
| "step": 7175 |
| }, |
| { |
| "epoch": 1.1274074847330238, |
| "grad_norm": 0.0016198121011257172, |
| "learning_rate": 3.46856513165526e-05, |
| "loss": 0.0008, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.1313220940550133, |
| "grad_norm": 0.0009071112144738436, |
| "learning_rate": 3.4613153926458645e-05, |
| "loss": 0.0001, |
| "step": 7225 |
| }, |
| { |
| "epoch": 1.135236703377003, |
| "grad_norm": 0.0006360800471156836, |
| "learning_rate": 3.4540656536364694e-05, |
| "loss": 0.0007, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.1391513126989927, |
| "grad_norm": 0.0013603122206404805, |
| "learning_rate": 3.4468159146270736e-05, |
| "loss": 0.0003, |
| "step": 7275 |
| }, |
| { |
| "epoch": 1.1430659220209822, |
| "grad_norm": 0.2531895339488983, |
| "learning_rate": 3.439566175617678e-05, |
| "loss": 0.0019, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.146980531342972, |
| "grad_norm": 0.08225157856941223, |
| "learning_rate": 3.432316436608282e-05, |
| "loss": 0.001, |
| "step": 7325 |
| }, |
| { |
| "epoch": 1.1508951406649617, |
| "grad_norm": 0.0010974809993058443, |
| "learning_rate": 3.425066697598886e-05, |
| "loss": 0.0009, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.1548097499869514, |
| "grad_norm": 0.007975243031978607, |
| "learning_rate": 3.417816958589491e-05, |
| "loss": 0.0001, |
| "step": 7375 |
| }, |
| { |
| "epoch": 1.158724359308941, |
| "grad_norm": 0.0005916508380323648, |
| "learning_rate": 3.4105672195800954e-05, |
| "loss": 0.0001, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.1626389686309306, |
| "grad_norm": 0.003101816400885582, |
| "learning_rate": 3.4033174805706996e-05, |
| "loss": 0.0, |
| "step": 7425 |
| }, |
| { |
| "epoch": 1.1665535779529204, |
| "grad_norm": 0.00036523715243674815, |
| "learning_rate": 3.396067741561304e-05, |
| "loss": 0.0001, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.1704681872749099, |
| "grad_norm": 0.02329937182366848, |
| "learning_rate": 3.388818002551909e-05, |
| "loss": 0.0013, |
| "step": 7475 |
| }, |
| { |
| "epoch": 1.1743827965968996, |
| "grad_norm": 0.5784549117088318, |
| "learning_rate": 3.381568263542513e-05, |
| "loss": 0.0038, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.1782974059188893, |
| "grad_norm": 0.0012637526961043477, |
| "learning_rate": 3.374318524533117e-05, |
| "loss": 0.0015, |
| "step": 7525 |
| }, |
| { |
| "epoch": 1.182212015240879, |
| "grad_norm": 0.019489184021949768, |
| "learning_rate": 3.3670687855237214e-05, |
| "loss": 0.0002, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.1861266245628685, |
| "grad_norm": 0.0006683494430035353, |
| "learning_rate": 3.3598190465143256e-05, |
| "loss": 0.0008, |
| "step": 7575 |
| }, |
| { |
| "epoch": 1.1900412338848583, |
| "grad_norm": 0.027937965467572212, |
| "learning_rate": 3.3525693075049305e-05, |
| "loss": 0.0008, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.193955843206848, |
| "grad_norm": 0.00035219776327721775, |
| "learning_rate": 3.345319568495534e-05, |
| "loss": 0.0002, |
| "step": 7625 |
| }, |
| { |
| "epoch": 1.1978704525288375, |
| "grad_norm": 0.0009345108992420137, |
| "learning_rate": 3.338069829486138e-05, |
| "loss": 0.0032, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.2017850618508272, |
| "grad_norm": 0.05174746736884117, |
| "learning_rate": 3.3308200904767425e-05, |
| "loss": 0.0028, |
| "step": 7675 |
| }, |
| { |
| "epoch": 1.205699671172817, |
| "grad_norm": 0.1187373697757721, |
| "learning_rate": 3.3235703514673474e-05, |
| "loss": 0.0006, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.2096142804948067, |
| "grad_norm": 0.0881095826625824, |
| "learning_rate": 3.3163206124579516e-05, |
| "loss": 0.0018, |
| "step": 7725 |
| }, |
| { |
| "epoch": 1.2135288898167964, |
| "grad_norm": 1.4924030303955078, |
| "learning_rate": 3.309070873448556e-05, |
| "loss": 0.0006, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.217443499138786, |
| "grad_norm": 0.10360655933618546, |
| "learning_rate": 3.30182113443916e-05, |
| "loss": 0.0009, |
| "step": 7775 |
| }, |
| { |
| "epoch": 1.2213581084607756, |
| "grad_norm": 0.0007201316766440868, |
| "learning_rate": 3.294571395429765e-05, |
| "loss": 0.0003, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.2252727177827654, |
| "grad_norm": 0.001118672196753323, |
| "learning_rate": 3.287321656420369e-05, |
| "loss": 0.0007, |
| "step": 7825 |
| }, |
| { |
| "epoch": 1.2291873271047549, |
| "grad_norm": 0.008757601492106915, |
| "learning_rate": 3.2800719174109734e-05, |
| "loss": 0.0038, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.2331019364267446, |
| "grad_norm": 1.405776023864746, |
| "learning_rate": 3.2728221784015777e-05, |
| "loss": 0.0021, |
| "step": 7875 |
| }, |
| { |
| "epoch": 1.2370165457487343, |
| "grad_norm": 0.06606610119342804, |
| "learning_rate": 3.265572439392182e-05, |
| "loss": 0.0001, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.240931155070724, |
| "grad_norm": 0.00046704983105883, |
| "learning_rate": 3.258322700382787e-05, |
| "loss": 0.0001, |
| "step": 7925 |
| }, |
| { |
| "epoch": 1.2448457643927135, |
| "grad_norm": 0.0005030676256865263, |
| "learning_rate": 3.251072961373391e-05, |
| "loss": 0.0009, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.2487603737147033, |
| "grad_norm": 0.004642080515623093, |
| "learning_rate": 3.243823222363995e-05, |
| "loss": 0.0005, |
| "step": 7975 |
| }, |
| { |
| "epoch": 1.252674983036693, |
| "grad_norm": 0.00609723711386323, |
| "learning_rate": 3.2365734833545994e-05, |
| "loss": 0.0018, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.2565895923586825, |
| "grad_norm": 0.003095820778980851, |
| "learning_rate": 3.229323744345204e-05, |
| "loss": 0.0003, |
| "step": 8025 |
| }, |
| { |
| "epoch": 1.2605042016806722, |
| "grad_norm": 0.08622787892818451, |
| "learning_rate": 3.222074005335808e-05, |
| "loss": 0.0001, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.264418811002662, |
| "grad_norm": 0.022611690685153008, |
| "learning_rate": 3.214824266326412e-05, |
| "loss": 0.0009, |
| "step": 8075 |
| }, |
| { |
| "epoch": 1.2683334203246517, |
| "grad_norm": 0.0005983790615573525, |
| "learning_rate": 3.2075745273170163e-05, |
| "loss": 0.0012, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.2722480296466412, |
| "grad_norm": 0.0008185420883819461, |
| "learning_rate": 3.200324788307621e-05, |
| "loss": 0.0008, |
| "step": 8125 |
| }, |
| { |
| "epoch": 1.276162638968631, |
| "grad_norm": 0.0019505377858877182, |
| "learning_rate": 3.1930750492982255e-05, |
| "loss": 0.0003, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.2800772482906206, |
| "grad_norm": 0.005252277944236994, |
| "learning_rate": 3.18582531028883e-05, |
| "loss": 0.0047, |
| "step": 8175 |
| }, |
| { |
| "epoch": 1.2839918576126101, |
| "grad_norm": 0.0010310772340744734, |
| "learning_rate": 3.178575571279434e-05, |
| "loss": 0.0003, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.2879064669345999, |
| "grad_norm": 0.002415160648524761, |
| "learning_rate": 3.171325832270038e-05, |
| "loss": 0.004, |
| "step": 8225 |
| }, |
| { |
| "epoch": 1.2918210762565896, |
| "grad_norm": 0.0005815212498418987, |
| "learning_rate": 3.164076093260643e-05, |
| "loss": 0.0018, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.2957356855785793, |
| "grad_norm": 0.0003597593167796731, |
| "learning_rate": 3.156826354251247e-05, |
| "loss": 0.0003, |
| "step": 8275 |
| }, |
| { |
| "epoch": 1.299650294900569, |
| "grad_norm": 0.004648554138839245, |
| "learning_rate": 3.1495766152418515e-05, |
| "loss": 0.005, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.3035649042225586, |
| "grad_norm": 0.0015794150531291962, |
| "learning_rate": 3.142326876232456e-05, |
| "loss": 0.0006, |
| "step": 8325 |
| }, |
| { |
| "epoch": 1.3074795135445483, |
| "grad_norm": 0.000883117550984025, |
| "learning_rate": 3.1350771372230606e-05, |
| "loss": 0.0022, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.3113941228665378, |
| "grad_norm": 0.0004549395525828004, |
| "learning_rate": 3.127827398213665e-05, |
| "loss": 0.0, |
| "step": 8375 |
| }, |
| { |
| "epoch": 1.3153087321885275, |
| "grad_norm": 0.00043308446765877306, |
| "learning_rate": 3.120577659204269e-05, |
| "loss": 0.0017, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.3192233415105172, |
| "grad_norm": 0.000435361813288182, |
| "learning_rate": 3.113327920194873e-05, |
| "loss": 0.0004, |
| "step": 8425 |
| }, |
| { |
| "epoch": 1.323137950832507, |
| "grad_norm": 0.000388374668546021, |
| "learning_rate": 3.1060781811854775e-05, |
| "loss": 0.0001, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.3270525601544967, |
| "grad_norm": 0.0006863649468868971, |
| "learning_rate": 3.098828442176082e-05, |
| "loss": 0.0004, |
| "step": 8475 |
| }, |
| { |
| "epoch": 1.3309671694764862, |
| "grad_norm": 0.00127976608928293, |
| "learning_rate": 3.091578703166686e-05, |
| "loss": 0.0018, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.334881778798476, |
| "grad_norm": 0.036596138030290604, |
| "learning_rate": 3.08432896415729e-05, |
| "loss": 0.0013, |
| "step": 8525 |
| }, |
| { |
| "epoch": 1.3387963881204656, |
| "grad_norm": 0.002909492002800107, |
| "learning_rate": 3.077369214708271e-05, |
| "loss": 0.0003, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.3427109974424551, |
| "grad_norm": 0.0010424726642668247, |
| "learning_rate": 3.070119475698875e-05, |
| "loss": 0.0024, |
| "step": 8575 |
| }, |
| { |
| "epoch": 1.3466256067644449, |
| "grad_norm": 0.0005914925131946802, |
| "learning_rate": 3.062869736689479e-05, |
| "loss": 0.0004, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.3505402160864346, |
| "grad_norm": 0.00037522296770475805, |
| "learning_rate": 3.0556199976800835e-05, |
| "loss": 0.0001, |
| "step": 8625 |
| }, |
| { |
| "epoch": 1.3544548254084243, |
| "grad_norm": 0.00039554465911351144, |
| "learning_rate": 3.048370258670688e-05, |
| "loss": 0.0003, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.3583694347304138, |
| "grad_norm": 0.0040852464735507965, |
| "learning_rate": 3.0411205196612923e-05, |
| "loss": 0.0004, |
| "step": 8675 |
| }, |
| { |
| "epoch": 1.3622840440524036, |
| "grad_norm": 0.006642700172960758, |
| "learning_rate": 3.033870780651897e-05, |
| "loss": 0.0014, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.3661986533743933, |
| "grad_norm": 0.003900151466950774, |
| "learning_rate": 3.026621041642501e-05, |
| "loss": 0.005, |
| "step": 8725 |
| }, |
| { |
| "epoch": 1.3701132626963828, |
| "grad_norm": 0.0015803135465830564, |
| "learning_rate": 3.0193713026331057e-05, |
| "loss": 0.0005, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.3740278720183725, |
| "grad_norm": 0.010884587652981281, |
| "learning_rate": 3.01212156362371e-05, |
| "loss": 0.0009, |
| "step": 8775 |
| }, |
| { |
| "epoch": 1.3779424813403622, |
| "grad_norm": 0.0010327239288017154, |
| "learning_rate": 3.004871824614314e-05, |
| "loss": 0.0002, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.381857090662352, |
| "grad_norm": 0.0019380106823518872, |
| "learning_rate": 2.9976220856049187e-05, |
| "loss": 0.0075, |
| "step": 8825 |
| }, |
| { |
| "epoch": 1.3857716999843417, |
| "grad_norm": 0.0012182651553303003, |
| "learning_rate": 2.9903723465955226e-05, |
| "loss": 0.0053, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.3896863093063312, |
| "grad_norm": 0.0017849428113549948, |
| "learning_rate": 2.9831226075861268e-05, |
| "loss": 0.0006, |
| "step": 8875 |
| }, |
| { |
| "epoch": 1.393600918628321, |
| "grad_norm": 0.01608388125896454, |
| "learning_rate": 2.9758728685767314e-05, |
| "loss": 0.0011, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.3975155279503104, |
| "grad_norm": 0.0005828512366861105, |
| "learning_rate": 2.9686231295673356e-05, |
| "loss": 0.0022, |
| "step": 8925 |
| }, |
| { |
| "epoch": 1.4014301372723001, |
| "grad_norm": 0.0004743439785670489, |
| "learning_rate": 2.9613733905579398e-05, |
| "loss": 0.0006, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.4053447465942899, |
| "grad_norm": 0.0005540683632716537, |
| "learning_rate": 2.9541236515485444e-05, |
| "loss": 0.0003, |
| "step": 8975 |
| }, |
| { |
| "epoch": 1.4092593559162796, |
| "grad_norm": 0.0015846255701035261, |
| "learning_rate": 2.9468739125391486e-05, |
| "loss": 0.003, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.4131739652382693, |
| "grad_norm": 0.0018151472322642803, |
| "learning_rate": 2.939624173529753e-05, |
| "loss": 0.0013, |
| "step": 9025 |
| }, |
| { |
| "epoch": 1.4170885745602588, |
| "grad_norm": 0.019647782668471336, |
| "learning_rate": 2.9323744345203574e-05, |
| "loss": 0.0005, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.4210031838822486, |
| "grad_norm": 0.0019365083426237106, |
| "learning_rate": 2.925124695510962e-05, |
| "loss": 0.0078, |
| "step": 9075 |
| }, |
| { |
| "epoch": 1.4249177932042383, |
| "grad_norm": 0.018348557874560356, |
| "learning_rate": 2.917874956501566e-05, |
| "loss": 0.0007, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.4288324025262278, |
| "grad_norm": 0.0018460671417415142, |
| "learning_rate": 2.9106252174921704e-05, |
| "loss": 0.0004, |
| "step": 9125 |
| }, |
| { |
| "epoch": 1.4327470118482175, |
| "grad_norm": 0.014430728740990162, |
| "learning_rate": 2.903375478482775e-05, |
| "loss": 0.0065, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.4366616211702072, |
| "grad_norm": 0.004876923281699419, |
| "learning_rate": 2.896125739473379e-05, |
| "loss": 0.0005, |
| "step": 9175 |
| }, |
| { |
| "epoch": 1.440576230492197, |
| "grad_norm": 0.012378478422760963, |
| "learning_rate": 2.8888760004639837e-05, |
| "loss": 0.0011, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.4444908398141865, |
| "grad_norm": 0.0017155319219455123, |
| "learning_rate": 2.881626261454588e-05, |
| "loss": 0.0005, |
| "step": 9225 |
| }, |
| { |
| "epoch": 1.4484054491361762, |
| "grad_norm": 0.0008338566403836012, |
| "learning_rate": 2.874376522445192e-05, |
| "loss": 0.0002, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.452320058458166, |
| "grad_norm": 0.18289905786514282, |
| "learning_rate": 2.867126783435796e-05, |
| "loss": 0.0012, |
| "step": 9275 |
| }, |
| { |
| "epoch": 1.4562346677801554, |
| "grad_norm": 0.0008503763237968087, |
| "learning_rate": 2.8598770444264006e-05, |
| "loss": 0.0005, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.4601492771021451, |
| "grad_norm": 0.0007721242727711797, |
| "learning_rate": 2.852627305417005e-05, |
| "loss": 0.0001, |
| "step": 9325 |
| }, |
| { |
| "epoch": 1.4640638864241349, |
| "grad_norm": 0.006053832825273275, |
| "learning_rate": 2.8453775664076094e-05, |
| "loss": 0.0004, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.4679784957461246, |
| "grad_norm": 0.002682841382920742, |
| "learning_rate": 2.8381278273982136e-05, |
| "loss": 0.0011, |
| "step": 9375 |
| }, |
| { |
| "epoch": 1.4718931050681143, |
| "grad_norm": 0.0006761788972653449, |
| "learning_rate": 2.8308780883888182e-05, |
| "loss": 0.0006, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.4758077143901038, |
| "grad_norm": 0.0006122990744188428, |
| "learning_rate": 2.8236283493794224e-05, |
| "loss": 0.0003, |
| "step": 9425 |
| }, |
| { |
| "epoch": 1.4797223237120936, |
| "grad_norm": 0.0022469067480415106, |
| "learning_rate": 2.816378610370027e-05, |
| "loss": 0.0001, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.483636933034083, |
| "grad_norm": 0.005789736285805702, |
| "learning_rate": 2.8091288713606312e-05, |
| "loss": 0.001, |
| "step": 9475 |
| }, |
| { |
| "epoch": 1.4875515423560728, |
| "grad_norm": 0.0005803314852528274, |
| "learning_rate": 2.8018791323512354e-05, |
| "loss": 0.0001, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.4914661516780625, |
| "grad_norm": 0.00044589489698410034, |
| "learning_rate": 2.79462939334184e-05, |
| "loss": 0.0001, |
| "step": 9525 |
| }, |
| { |
| "epoch": 1.4953807610000522, |
| "grad_norm": 0.00034716431400738657, |
| "learning_rate": 2.7873796543324442e-05, |
| "loss": 0.0004, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.499295370322042, |
| "grad_norm": 0.034700002521276474, |
| "learning_rate": 2.7801299153230488e-05, |
| "loss": 0.0003, |
| "step": 9575 |
| }, |
| { |
| "epoch": 1.5032099796440317, |
| "grad_norm": 0.00039778611971996725, |
| "learning_rate": 2.772880176313653e-05, |
| "loss": 0.0001, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.5071245889660212, |
| "grad_norm": 0.0003559018950909376, |
| "learning_rate": 2.7656304373042576e-05, |
| "loss": 0.0, |
| "step": 9625 |
| }, |
| { |
| "epoch": 1.5110391982880107, |
| "grad_norm": 0.7171289920806885, |
| "learning_rate": 2.7583806982948618e-05, |
| "loss": 0.001, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.5149538076100004, |
| "grad_norm": 0.0009173134458251297, |
| "learning_rate": 2.7511309592854657e-05, |
| "loss": 0.0002, |
| "step": 9675 |
| }, |
| { |
| "epoch": 1.5188684169319902, |
| "grad_norm": 0.6568087935447693, |
| "learning_rate": 2.74388122027607e-05, |
| "loss": 0.0032, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.5227830262539799, |
| "grad_norm": 0.03286755084991455, |
| "learning_rate": 2.7366314812666745e-05, |
| "loss": 0.0005, |
| "step": 9725 |
| }, |
| { |
| "epoch": 1.5266976355759696, |
| "grad_norm": 0.0004193273780401796, |
| "learning_rate": 2.7293817422572787e-05, |
| "loss": 0.0004, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.5306122448979593, |
| "grad_norm": 2.4084434509277344, |
| "learning_rate": 2.7221320032478832e-05, |
| "loss": 0.0061, |
| "step": 9775 |
| }, |
| { |
| "epoch": 1.5345268542199488, |
| "grad_norm": 0.020185716450214386, |
| "learning_rate": 2.7148822642384875e-05, |
| "loss": 0.0068, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.5384414635419386, |
| "grad_norm": 0.6322495937347412, |
| "learning_rate": 2.7076325252290917e-05, |
| "loss": 0.0015, |
| "step": 9825 |
| }, |
| { |
| "epoch": 1.542356072863928, |
| "grad_norm": 0.0004228654725011438, |
| "learning_rate": 2.7003827862196962e-05, |
| "loss": 0.0009, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.5462706821859178, |
| "grad_norm": 0.0012805104488506913, |
| "learning_rate": 2.6931330472103005e-05, |
| "loss": 0.0002, |
| "step": 9875 |
| }, |
| { |
| "epoch": 1.5501852915079075, |
| "grad_norm": 0.0005116848042234778, |
| "learning_rate": 2.685883308200905e-05, |
| "loss": 0.0006, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.5540999008298972, |
| "grad_norm": 0.8417395353317261, |
| "learning_rate": 2.6786335691915093e-05, |
| "loss": 0.0017, |
| "step": 9925 |
| }, |
| { |
| "epoch": 1.558014510151887, |
| "grad_norm": 0.0006132688722573221, |
| "learning_rate": 2.6713838301821138e-05, |
| "loss": 0.0002, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.5619291194738765, |
| "grad_norm": 0.001284563448280096, |
| "learning_rate": 2.664134091172718e-05, |
| "loss": 0.0001, |
| "step": 9975 |
| }, |
| { |
| "epoch": 1.5658437287958662, |
| "grad_norm": 0.002453350927680731, |
| "learning_rate": 2.6568843521633226e-05, |
| "loss": 0.0001, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.5697583381178557, |
| "grad_norm": 0.002474565990269184, |
| "learning_rate": 2.6496346131539268e-05, |
| "loss": 0.0, |
| "step": 10025 |
| }, |
| { |
| "epoch": 1.5736729474398454, |
| "grad_norm": 0.0012147346278652549, |
| "learning_rate": 2.642384874144531e-05, |
| "loss": 0.0031, |
| "step": 10050 |
| }, |
| { |
| "epoch": 1.5775875567618352, |
| "grad_norm": 0.0009614901500754058, |
| "learning_rate": 2.635135135135135e-05, |
| "loss": 0.0009, |
| "step": 10075 |
| }, |
| { |
| "epoch": 1.5815021660838249, |
| "grad_norm": 0.00043524886132217944, |
| "learning_rate": 2.6278853961257395e-05, |
| "loss": 0.0001, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.5854167754058146, |
| "grad_norm": 0.0005262857885099947, |
| "learning_rate": 2.6206356571163437e-05, |
| "loss": 0.0, |
| "step": 10125 |
| }, |
| { |
| "epoch": 1.5893313847278043, |
| "grad_norm": 0.00038553698686882854, |
| "learning_rate": 2.613385918106948e-05, |
| "loss": 0.0013, |
| "step": 10150 |
| }, |
| { |
| "epoch": 1.5932459940497938, |
| "grad_norm": 0.0006603036308661103, |
| "learning_rate": 2.6061361790975525e-05, |
| "loss": 0.0001, |
| "step": 10175 |
| }, |
| { |
| "epoch": 1.5971606033717833, |
| "grad_norm": 0.0011721713235601783, |
| "learning_rate": 2.5988864400881567e-05, |
| "loss": 0.0, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.601075212693773, |
| "grad_norm": 0.00034801868605427444, |
| "learning_rate": 2.5916367010787613e-05, |
| "loss": 0.0003, |
| "step": 10225 |
| }, |
| { |
| "epoch": 1.6049898220157628, |
| "grad_norm": 0.00029766836087219417, |
| "learning_rate": 2.5843869620693655e-05, |
| "loss": 0.0005, |
| "step": 10250 |
| }, |
| { |
| "epoch": 1.6089044313377525, |
| "grad_norm": 0.8273627161979675, |
| "learning_rate": 2.57713722305997e-05, |
| "loss": 0.004, |
| "step": 10275 |
| }, |
| { |
| "epoch": 1.6128190406597422, |
| "grad_norm": 0.0023189974017441273, |
| "learning_rate": 2.5698874840505743e-05, |
| "loss": 0.0005, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.616733649981732, |
| "grad_norm": 0.001266616047360003, |
| "learning_rate": 2.562637745041179e-05, |
| "loss": 0.0006, |
| "step": 10325 |
| }, |
| { |
| "epoch": 1.6206482593037215, |
| "grad_norm": 0.0006485527264885604, |
| "learning_rate": 2.555388006031783e-05, |
| "loss": 0.0001, |
| "step": 10350 |
| }, |
| { |
| "epoch": 1.6245628686257112, |
| "grad_norm": 0.01249407883733511, |
| "learning_rate": 2.5481382670223873e-05, |
| "loss": 0.0047, |
| "step": 10375 |
| }, |
| { |
| "epoch": 1.6284774779477007, |
| "grad_norm": 0.0016884652432054281, |
| "learning_rate": 2.540888528012992e-05, |
| "loss": 0.0008, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.6323920872696904, |
| "grad_norm": 0.0009969666134566069, |
| "learning_rate": 2.533638789003596e-05, |
| "loss": 0.0001, |
| "step": 10425 |
| }, |
| { |
| "epoch": 1.6363066965916802, |
| "grad_norm": 0.0008430654415860772, |
| "learning_rate": 2.5263890499942007e-05, |
| "loss": 0.0004, |
| "step": 10450 |
| }, |
| { |
| "epoch": 1.6402213059136699, |
| "grad_norm": 0.0007658881950192153, |
| "learning_rate": 2.519139310984805e-05, |
| "loss": 0.0001, |
| "step": 10475 |
| }, |
| { |
| "epoch": 1.6441359152356596, |
| "grad_norm": 0.0007439135224558413, |
| "learning_rate": 2.5118895719754088e-05, |
| "loss": 0.0, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.648050524557649, |
| "grad_norm": 0.0005683203344233334, |
| "learning_rate": 2.504639832966013e-05, |
| "loss": 0.0001, |
| "step": 10525 |
| }, |
| { |
| "epoch": 1.6519651338796388, |
| "grad_norm": 0.00042879345710389316, |
| "learning_rate": 2.497390093956618e-05, |
| "loss": 0.0, |
| "step": 10550 |
| }, |
| { |
| "epoch": 1.6558797432016283, |
| "grad_norm": 0.0004082492378074676, |
| "learning_rate": 2.490140354947222e-05, |
| "loss": 0.0001, |
| "step": 10575 |
| }, |
| { |
| "epoch": 1.659794352523618, |
| "grad_norm": 0.002024848246946931, |
| "learning_rate": 2.4828906159378263e-05, |
| "loss": 0.0006, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.6637089618456078, |
| "grad_norm": 0.3372742533683777, |
| "learning_rate": 2.4756408769284306e-05, |
| "loss": 0.0001, |
| "step": 10625 |
| }, |
| { |
| "epoch": 1.6676235711675975, |
| "grad_norm": 0.005234843585640192, |
| "learning_rate": 2.468391137919035e-05, |
| "loss": 0.0, |
| "step": 10650 |
| }, |
| { |
| "epoch": 1.6715381804895872, |
| "grad_norm": 0.0004937741323374212, |
| "learning_rate": 2.4611413989096393e-05, |
| "loss": 0.0005, |
| "step": 10675 |
| }, |
| { |
| "epoch": 1.675452789811577, |
| "grad_norm": 0.0007821121835149825, |
| "learning_rate": 2.4538916599002436e-05, |
| "loss": 0.0013, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.6793673991335665, |
| "grad_norm": 0.0010803567711263895, |
| "learning_rate": 2.446641920890848e-05, |
| "loss": 0.0004, |
| "step": 10725 |
| }, |
| { |
| "epoch": 1.683282008455556, |
| "grad_norm": 0.0005569527274928987, |
| "learning_rate": 2.4393921818814523e-05, |
| "loss": 0.0002, |
| "step": 10750 |
| }, |
| { |
| "epoch": 1.6871966177775457, |
| "grad_norm": 0.005404625087976456, |
| "learning_rate": 2.432142442872057e-05, |
| "loss": 0.0085, |
| "step": 10775 |
| }, |
| { |
| "epoch": 1.6911112270995354, |
| "grad_norm": 0.001234252005815506, |
| "learning_rate": 2.4248927038626608e-05, |
| "loss": 0.0014, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.6950258364215252, |
| "grad_norm": 0.0025794110260903835, |
| "learning_rate": 2.4176429648532654e-05, |
| "loss": 0.0014, |
| "step": 10825 |
| }, |
| { |
| "epoch": 1.6989404457435149, |
| "grad_norm": 0.07590831816196442, |
| "learning_rate": 2.4103932258438696e-05, |
| "loss": 0.0023, |
| "step": 10850 |
| }, |
| { |
| "epoch": 1.7028550550655046, |
| "grad_norm": 0.005912380293011665, |
| "learning_rate": 2.403143486834474e-05, |
| "loss": 0.0015, |
| "step": 10875 |
| }, |
| { |
| "epoch": 1.7067696643874941, |
| "grad_norm": 0.010333801619708538, |
| "learning_rate": 2.3958937478250784e-05, |
| "loss": 0.0001, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.7106842737094838, |
| "grad_norm": 0.01580122299492359, |
| "learning_rate": 2.388644008815683e-05, |
| "loss": 0.0018, |
| "step": 10925 |
| }, |
| { |
| "epoch": 1.7145988830314733, |
| "grad_norm": 0.10874010622501373, |
| "learning_rate": 2.381394269806287e-05, |
| "loss": 0.0012, |
| "step": 10950 |
| }, |
| { |
| "epoch": 1.718513492353463, |
| "grad_norm": 0.016742747277021408, |
| "learning_rate": 2.3741445307968914e-05, |
| "loss": 0.0009, |
| "step": 10975 |
| }, |
| { |
| "epoch": 1.7224281016754528, |
| "grad_norm": 0.012475020252168179, |
| "learning_rate": 2.366894791787496e-05, |
| "loss": 0.0007, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.7263427109974425, |
| "grad_norm": 0.1469310075044632, |
| "learning_rate": 2.3596450527781e-05, |
| "loss": 0.002, |
| "step": 11025 |
| }, |
| { |
| "epoch": 1.7302573203194322, |
| "grad_norm": 0.0017377269687131047, |
| "learning_rate": 2.3523953137687044e-05, |
| "loss": 0.0001, |
| "step": 11050 |
| }, |
| { |
| "epoch": 1.7341719296414217, |
| "grad_norm": 0.003490234026685357, |
| "learning_rate": 2.3451455747593086e-05, |
| "loss": 0.0001, |
| "step": 11075 |
| }, |
| { |
| "epoch": 1.7380865389634115, |
| "grad_norm": 0.008674775250256062, |
| "learning_rate": 2.337895835749913e-05, |
| "loss": 0.0039, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.742001148285401, |
| "grad_norm": 0.004905765876173973, |
| "learning_rate": 2.3306460967405174e-05, |
| "loss": 0.0023, |
| "step": 11125 |
| }, |
| { |
| "epoch": 1.7459157576073907, |
| "grad_norm": 0.0013971665175631642, |
| "learning_rate": 2.323396357731122e-05, |
| "loss": 0.0002, |
| "step": 11150 |
| }, |
| { |
| "epoch": 1.7498303669293804, |
| "grad_norm": 0.004542670212686062, |
| "learning_rate": 2.3161466187217262e-05, |
| "loss": 0.0001, |
| "step": 11175 |
| }, |
| { |
| "epoch": 1.7537449762513702, |
| "grad_norm": 0.0004924107925035059, |
| "learning_rate": 2.3088968797123307e-05, |
| "loss": 0.0001, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.7576595855733599, |
| "grad_norm": 0.0016612813342362642, |
| "learning_rate": 2.3016471407029346e-05, |
| "loss": 0.0003, |
| "step": 11225 |
| }, |
| { |
| "epoch": 1.7615741948953496, |
| "grad_norm": 0.0002968982153106481, |
| "learning_rate": 2.2943974016935392e-05, |
| "loss": 0.0001, |
| "step": 11250 |
| }, |
| { |
| "epoch": 1.7654888042173391, |
| "grad_norm": 0.00263870763592422, |
| "learning_rate": 2.2871476626841434e-05, |
| "loss": 0.0011, |
| "step": 11275 |
| }, |
| { |
| "epoch": 1.7694034135393286, |
| "grad_norm": 0.00835906621068716, |
| "learning_rate": 2.279897923674748e-05, |
| "loss": 0.0067, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.7733180228613183, |
| "grad_norm": 0.0007750336080789566, |
| "learning_rate": 2.2726481846653522e-05, |
| "loss": 0.0003, |
| "step": 11325 |
| }, |
| { |
| "epoch": 1.777232632183308, |
| "grad_norm": 0.0028884296771138906, |
| "learning_rate": 2.2653984456559564e-05, |
| "loss": 0.0023, |
| "step": 11350 |
| }, |
| { |
| "epoch": 1.7811472415052978, |
| "grad_norm": 0.042546164244413376, |
| "learning_rate": 2.258148706646561e-05, |
| "loss": 0.0003, |
| "step": 11375 |
| }, |
| { |
| "epoch": 1.7850618508272875, |
| "grad_norm": 0.0007674749358557165, |
| "learning_rate": 2.2508989676371652e-05, |
| "loss": 0.0003, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.7889764601492772, |
| "grad_norm": 0.040151335299015045, |
| "learning_rate": 2.2436492286277694e-05, |
| "loss": 0.0002, |
| "step": 11425 |
| }, |
| { |
| "epoch": 1.7928910694712668, |
| "grad_norm": 0.0003488350484985858, |
| "learning_rate": 2.2363994896183736e-05, |
| "loss": 0.0, |
| "step": 11450 |
| }, |
| { |
| "epoch": 1.7968056787932565, |
| "grad_norm": 0.25811877846717834, |
| "learning_rate": 2.2291497506089782e-05, |
| "loss": 0.0004, |
| "step": 11475 |
| }, |
| { |
| "epoch": 1.800720288115246, |
| "grad_norm": 0.00024293421301990747, |
| "learning_rate": 2.2219000115995824e-05, |
| "loss": 0.0003, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.8046348974372357, |
| "grad_norm": 0.004234221298247576, |
| "learning_rate": 2.214650272590187e-05, |
| "loss": 0.0029, |
| "step": 11525 |
| }, |
| { |
| "epoch": 1.8085495067592254, |
| "grad_norm": 0.0003131197008769959, |
| "learning_rate": 2.2074005335807912e-05, |
| "loss": 0.0002, |
| "step": 11550 |
| }, |
| { |
| "epoch": 1.8124641160812152, |
| "grad_norm": 0.05105828866362572, |
| "learning_rate": 2.2001507945713958e-05, |
| "loss": 0.0008, |
| "step": 11575 |
| }, |
| { |
| "epoch": 1.8163787254032049, |
| "grad_norm": 0.014320386573672295, |
| "learning_rate": 2.192901055562e-05, |
| "loss": 0.0009, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.8202933347251944, |
| "grad_norm": 0.0003410752979107201, |
| "learning_rate": 2.1856513165526042e-05, |
| "loss": 0.0007, |
| "step": 11625 |
| }, |
| { |
| "epoch": 1.8242079440471841, |
| "grad_norm": 0.0003042153548449278, |
| "learning_rate": 2.1784015775432085e-05, |
| "loss": 0.0006, |
| "step": 11650 |
| }, |
| { |
| "epoch": 1.8281225533691736, |
| "grad_norm": 0.1060762032866478, |
| "learning_rate": 2.1711518385338127e-05, |
| "loss": 0.0001, |
| "step": 11675 |
| }, |
| { |
| "epoch": 1.8320371626911633, |
| "grad_norm": 0.0008619217551313341, |
| "learning_rate": 2.1639020995244172e-05, |
| "loss": 0.0006, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.835951772013153, |
| "grad_norm": 0.0005810207221657038, |
| "learning_rate": 2.1566523605150215e-05, |
| "loss": 0.0005, |
| "step": 11725 |
| }, |
| { |
| "epoch": 1.8398663813351428, |
| "grad_norm": 0.005664344877004623, |
| "learning_rate": 2.149402621505626e-05, |
| "loss": 0.0026, |
| "step": 11750 |
| }, |
| { |
| "epoch": 1.8437809906571325, |
| "grad_norm": 4.294293403625488, |
| "learning_rate": 2.1421528824962302e-05, |
| "loss": 0.0007, |
| "step": 11775 |
| }, |
| { |
| "epoch": 1.8476955999791222, |
| "grad_norm": 0.040877822786569595, |
| "learning_rate": 2.1349031434868348e-05, |
| "loss": 0.0021, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.8516102093011118, |
| "grad_norm": 0.003679527435451746, |
| "learning_rate": 2.127653404477439e-05, |
| "loss": 0.0006, |
| "step": 11825 |
| }, |
| { |
| "epoch": 1.8555248186231013, |
| "grad_norm": 0.003342527663335204, |
| "learning_rate": 2.1204036654680433e-05, |
| "loss": 0.0002, |
| "step": 11850 |
| }, |
| { |
| "epoch": 1.859439427945091, |
| "grad_norm": 0.000454226101282984, |
| "learning_rate": 2.1131539264586475e-05, |
| "loss": 0.0003, |
| "step": 11875 |
| }, |
| { |
| "epoch": 1.8633540372670807, |
| "grad_norm": 0.00024604357895441353, |
| "learning_rate": 2.105904187449252e-05, |
| "loss": 0.0004, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.8672686465890704, |
| "grad_norm": 0.00022296722454484552, |
| "learning_rate": 2.0986544484398563e-05, |
| "loss": 0.0003, |
| "step": 11925 |
| }, |
| { |
| "epoch": 1.8711832559110602, |
| "grad_norm": 0.0013281836872920394, |
| "learning_rate": 2.0914047094304605e-05, |
| "loss": 0.0012, |
| "step": 11950 |
| }, |
| { |
| "epoch": 1.8750978652330499, |
| "grad_norm": 0.00042916362872347236, |
| "learning_rate": 2.084154970421065e-05, |
| "loss": 0.0006, |
| "step": 11975 |
| }, |
| { |
| "epoch": 1.8790124745550394, |
| "grad_norm": 0.0013623477425426245, |
| "learning_rate": 2.0769052314116693e-05, |
| "loss": 0.0014, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.8829270838770291, |
| "grad_norm": 0.0005729927215725183, |
| "learning_rate": 2.069655492402274e-05, |
| "loss": 0.0047, |
| "step": 12025 |
| }, |
| { |
| "epoch": 1.8868416931990186, |
| "grad_norm": 0.0161959920078516, |
| "learning_rate": 2.0624057533928777e-05, |
| "loss": 0.0002, |
| "step": 12050 |
| }, |
| { |
| "epoch": 1.8907563025210083, |
| "grad_norm": 0.05182856693863869, |
| "learning_rate": 2.0551560143834823e-05, |
| "loss": 0.0011, |
| "step": 12075 |
| }, |
| { |
| "epoch": 1.894670911842998, |
| "grad_norm": 0.0009345468715764582, |
| "learning_rate": 2.0479062753740865e-05, |
| "loss": 0.0004, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.8985855211649878, |
| "grad_norm": 0.004085169639438391, |
| "learning_rate": 2.040656536364691e-05, |
| "loss": 0.0009, |
| "step": 12125 |
| }, |
| { |
| "epoch": 1.9025001304869775, |
| "grad_norm": 0.003939950373023748, |
| "learning_rate": 2.0334067973552953e-05, |
| "loss": 0.0002, |
| "step": 12150 |
| }, |
| { |
| "epoch": 1.906414739808967, |
| "grad_norm": 0.0006880080327391624, |
| "learning_rate": 2.0261570583459e-05, |
| "loss": 0.0001, |
| "step": 12175 |
| }, |
| { |
| "epoch": 1.9103293491309568, |
| "grad_norm": 0.01777348481118679, |
| "learning_rate": 2.018907319336504e-05, |
| "loss": 0.0001, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.9142439584529463, |
| "grad_norm": 0.0002502555726096034, |
| "learning_rate": 2.0116575803271083e-05, |
| "loss": 0.0005, |
| "step": 12225 |
| }, |
| { |
| "epoch": 1.918158567774936, |
| "grad_norm": 0.0007615393842570484, |
| "learning_rate": 2.0044078413177125e-05, |
| "loss": 0.0022, |
| "step": 12250 |
| }, |
| { |
| "epoch": 1.9220731770969257, |
| "grad_norm": 0.008713331073522568, |
| "learning_rate": 1.9971581023083167e-05, |
| "loss": 0.001, |
| "step": 12275 |
| }, |
| { |
| "epoch": 1.9259877864189154, |
| "grad_norm": 0.003203247208148241, |
| "learning_rate": 1.9899083632989213e-05, |
| "loss": 0.0031, |
| "step": 12300 |
| }, |
| { |
| "epoch": 1.9299023957409052, |
| "grad_norm": 0.02553451806306839, |
| "learning_rate": 1.9826586242895255e-05, |
| "loss": 0.001, |
| "step": 12325 |
| }, |
| { |
| "epoch": 1.933817005062895, |
| "grad_norm": 0.045750390738248825, |
| "learning_rate": 1.97540888528013e-05, |
| "loss": 0.0007, |
| "step": 12350 |
| }, |
| { |
| "epoch": 1.9377316143848844, |
| "grad_norm": 0.0004758847935590893, |
| "learning_rate": 1.9681591462707343e-05, |
| "loss": 0.001, |
| "step": 12375 |
| }, |
| { |
| "epoch": 1.941646223706874, |
| "grad_norm": 0.0024788689333945513, |
| "learning_rate": 1.960909407261339e-05, |
| "loss": 0.0003, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.9455608330288636, |
| "grad_norm": 0.0014538065297529101, |
| "learning_rate": 1.953659668251943e-05, |
| "loss": 0.0007, |
| "step": 12425 |
| }, |
| { |
| "epoch": 1.9494754423508533, |
| "grad_norm": 0.00023535569198429585, |
| "learning_rate": 1.9464099292425473e-05, |
| "loss": 0.0003, |
| "step": 12450 |
| }, |
| { |
| "epoch": 1.953390051672843, |
| "grad_norm": 0.0002048378373729065, |
| "learning_rate": 1.9391601902331515e-05, |
| "loss": 0.0001, |
| "step": 12475 |
| }, |
| { |
| "epoch": 1.9573046609948328, |
| "grad_norm": 0.0004028423863928765, |
| "learning_rate": 1.931910451223756e-05, |
| "loss": 0.0008, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.9612192703168225, |
| "grad_norm": 0.0021086076740175486, |
| "learning_rate": 1.9246607122143603e-05, |
| "loss": 0.0002, |
| "step": 12525 |
| }, |
| { |
| "epoch": 1.965133879638812, |
| "grad_norm": 0.00085318653145805, |
| "learning_rate": 1.9174109732049646e-05, |
| "loss": 0.0001, |
| "step": 12550 |
| }, |
| { |
| "epoch": 1.9690484889608018, |
| "grad_norm": 0.00021198119793552905, |
| "learning_rate": 1.910161234195569e-05, |
| "loss": 0.0007, |
| "step": 12575 |
| }, |
| { |
| "epoch": 1.9729630982827913, |
| "grad_norm": 0.00025199473020620644, |
| "learning_rate": 1.9029114951861733e-05, |
| "loss": 0.0011, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.976877707604781, |
| "grad_norm": 0.0007640988333150744, |
| "learning_rate": 1.895661756176778e-05, |
| "loss": 0.0003, |
| "step": 12625 |
| }, |
| { |
| "epoch": 1.9807923169267707, |
| "grad_norm": 0.013913657516241074, |
| "learning_rate": 1.888412017167382e-05, |
| "loss": 0.0001, |
| "step": 12650 |
| }, |
| { |
| "epoch": 1.9847069262487604, |
| "grad_norm": 0.00018586177611723542, |
| "learning_rate": 1.8811622781579863e-05, |
| "loss": 0.001, |
| "step": 12675 |
| }, |
| { |
| "epoch": 1.9886215355707502, |
| "grad_norm": 0.00032623313018120825, |
| "learning_rate": 1.8739125391485906e-05, |
| "loss": 0.0, |
| "step": 12700 |
| }, |
| { |
| "epoch": 1.9925361448927397, |
| "grad_norm": 0.00017907471919897944, |
| "learning_rate": 1.866662800139195e-05, |
| "loss": 0.0, |
| "step": 12725 |
| }, |
| { |
| "epoch": 1.9964507542147294, |
| "grad_norm": 1.226132869720459, |
| "learning_rate": 1.8594130611297994e-05, |
| "loss": 0.0007, |
| "step": 12750 |
| }, |
| { |
| "epoch": 1.9998956104180803, |
| "eval_accuracy": 0.9998184512550563, |
| "eval_f1": 0.9998301719182735, |
| "eval_loss": 0.0009782494744285941, |
| "eval_precision": 0.999817110608891, |
| "eval_recall": 0.9998432335689185, |
| "eval_runtime": 63.1773, |
| "eval_samples_per_second": 606.515, |
| "eval_steps_per_second": 37.909, |
| "step": 12772 |
| }, |
| { |
| "epoch": 2.000365363536719, |
| "grad_norm": 0.0005716659361496568, |
| "learning_rate": 1.852163322120404e-05, |
| "loss": 0.0, |
| "step": 12775 |
| }, |
| { |
| "epoch": 2.0042799728587086, |
| "grad_norm": 0.0003549535758793354, |
| "learning_rate": 1.844913583111008e-05, |
| "loss": 0.0016, |
| "step": 12800 |
| }, |
| { |
| "epoch": 2.0081945821806984, |
| "grad_norm": 0.012497562915086746, |
| "learning_rate": 1.8376638441016124e-05, |
| "loss": 0.0003, |
| "step": 12825 |
| }, |
| { |
| "epoch": 2.012109191502688, |
| "grad_norm": 0.0003994225990027189, |
| "learning_rate": 1.830414105092217e-05, |
| "loss": 0.0022, |
| "step": 12850 |
| }, |
| { |
| "epoch": 2.016023800824678, |
| "grad_norm": 0.0007454080041497946, |
| "learning_rate": 1.823164366082821e-05, |
| "loss": 0.0001, |
| "step": 12875 |
| }, |
| { |
| "epoch": 2.0199384101466675, |
| "grad_norm": 0.0001763895561452955, |
| "learning_rate": 1.8159146270734254e-05, |
| "loss": 0.0001, |
| "step": 12900 |
| }, |
| { |
| "epoch": 2.023853019468657, |
| "grad_norm": 1.3950115442276, |
| "learning_rate": 1.8086648880640296e-05, |
| "loss": 0.0002, |
| "step": 12925 |
| }, |
| { |
| "epoch": 2.0277676287906465, |
| "grad_norm": 0.00019921216880902648, |
| "learning_rate": 1.801415149054634e-05, |
| "loss": 0.0003, |
| "step": 12950 |
| }, |
| { |
| "epoch": 2.0316822381126363, |
| "grad_norm": 0.00017710919200908393, |
| "learning_rate": 1.7941654100452384e-05, |
| "loss": 0.0001, |
| "step": 12975 |
| }, |
| { |
| "epoch": 2.035596847434626, |
| "grad_norm": 0.0029750317335128784, |
| "learning_rate": 1.7872056605962187e-05, |
| "loss": 0.0048, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.0395114567566157, |
| "grad_norm": 0.006306216586381197, |
| "learning_rate": 1.779955921586823e-05, |
| "loss": 0.0001, |
| "step": 13025 |
| }, |
| { |
| "epoch": 2.0434260660786054, |
| "grad_norm": 0.0116845378652215, |
| "learning_rate": 1.7727061825774275e-05, |
| "loss": 0.0001, |
| "step": 13050 |
| }, |
| { |
| "epoch": 2.047340675400595, |
| "grad_norm": 0.6511118412017822, |
| "learning_rate": 1.7654564435680314e-05, |
| "loss": 0.0003, |
| "step": 13075 |
| }, |
| { |
| "epoch": 2.051255284722585, |
| "grad_norm": 0.036821216344833374, |
| "learning_rate": 1.758206704558636e-05, |
| "loss": 0.0007, |
| "step": 13100 |
| }, |
| { |
| "epoch": 2.055169894044574, |
| "grad_norm": 0.002877579303458333, |
| "learning_rate": 1.7509569655492402e-05, |
| "loss": 0.0001, |
| "step": 13125 |
| }, |
| { |
| "epoch": 2.059084503366564, |
| "grad_norm": 0.3921562433242798, |
| "learning_rate": 1.7437072265398448e-05, |
| "loss": 0.0015, |
| "step": 13150 |
| }, |
| { |
| "epoch": 2.0629991126885536, |
| "grad_norm": 0.0009924178011715412, |
| "learning_rate": 1.736457487530449e-05, |
| "loss": 0.0001, |
| "step": 13175 |
| }, |
| { |
| "epoch": 2.0669137220105434, |
| "grad_norm": 0.00017288805975113064, |
| "learning_rate": 1.7292077485210536e-05, |
| "loss": 0.0, |
| "step": 13200 |
| }, |
| { |
| "epoch": 2.070828331332533, |
| "grad_norm": 0.002701199846342206, |
| "learning_rate": 1.7222479990720336e-05, |
| "loss": 0.0002, |
| "step": 13225 |
| }, |
| { |
| "epoch": 2.074742940654523, |
| "grad_norm": 0.0008174364338628948, |
| "learning_rate": 1.7149982600626378e-05, |
| "loss": 0.0002, |
| "step": 13250 |
| }, |
| { |
| "epoch": 2.0786575499765125, |
| "grad_norm": 0.0007628415478393435, |
| "learning_rate": 1.707748521053242e-05, |
| "loss": 0.0026, |
| "step": 13275 |
| }, |
| { |
| "epoch": 2.082572159298502, |
| "grad_norm": 0.002812017919495702, |
| "learning_rate": 1.7004987820438466e-05, |
| "loss": 0.0005, |
| "step": 13300 |
| }, |
| { |
| "epoch": 2.0864867686204915, |
| "grad_norm": 0.0008141273865476251, |
| "learning_rate": 1.6932490430344508e-05, |
| "loss": 0.0011, |
| "step": 13325 |
| }, |
| { |
| "epoch": 2.0904013779424813, |
| "grad_norm": 0.001382953836582601, |
| "learning_rate": 1.685999304025055e-05, |
| "loss": 0.0001, |
| "step": 13350 |
| }, |
| { |
| "epoch": 2.094315987264471, |
| "grad_norm": 0.0003401144640520215, |
| "learning_rate": 1.6787495650156596e-05, |
| "loss": 0.0007, |
| "step": 13375 |
| }, |
| { |
| "epoch": 2.0982305965864607, |
| "grad_norm": 0.0013442619238048792, |
| "learning_rate": 1.6714998260062638e-05, |
| "loss": 0.0003, |
| "step": 13400 |
| }, |
| { |
| "epoch": 2.1021452059084504, |
| "grad_norm": 0.0014413492754101753, |
| "learning_rate": 1.6642500869968684e-05, |
| "loss": 0.0, |
| "step": 13425 |
| }, |
| { |
| "epoch": 2.10605981523044, |
| "grad_norm": 0.0010517615592107177, |
| "learning_rate": 1.6570003479874726e-05, |
| "loss": 0.0001, |
| "step": 13450 |
| }, |
| { |
| "epoch": 2.10997442455243, |
| "grad_norm": 0.0006386275636032224, |
| "learning_rate": 1.6497506089780768e-05, |
| "loss": 0.0001, |
| "step": 13475 |
| }, |
| { |
| "epoch": 2.113889033874419, |
| "grad_norm": 0.06532581895589828, |
| "learning_rate": 1.642500869968681e-05, |
| "loss": 0.0, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.117803643196409, |
| "grad_norm": 0.6693994402885437, |
| "learning_rate": 1.6352511309592856e-05, |
| "loss": 0.0003, |
| "step": 13525 |
| }, |
| { |
| "epoch": 2.1217182525183986, |
| "grad_norm": 0.00038365976070053875, |
| "learning_rate": 1.62800139194989e-05, |
| "loss": 0.001, |
| "step": 13550 |
| }, |
| { |
| "epoch": 2.1256328618403884, |
| "grad_norm": 0.00022832312970422208, |
| "learning_rate": 1.6207516529404944e-05, |
| "loss": 0.0005, |
| "step": 13575 |
| }, |
| { |
| "epoch": 2.129547471162378, |
| "grad_norm": 0.0010044885566458106, |
| "learning_rate": 1.6135019139310986e-05, |
| "loss": 0.0002, |
| "step": 13600 |
| }, |
| { |
| "epoch": 2.133462080484368, |
| "grad_norm": 0.0077380407601594925, |
| "learning_rate": 1.606252174921703e-05, |
| "loss": 0.0006, |
| "step": 13625 |
| }, |
| { |
| "epoch": 2.1373766898063575, |
| "grad_norm": 0.004086634609848261, |
| "learning_rate": 1.5990024359123074e-05, |
| "loss": 0.0, |
| "step": 13650 |
| }, |
| { |
| "epoch": 2.141291299128347, |
| "grad_norm": 0.0014021744718775153, |
| "learning_rate": 1.5917526969029113e-05, |
| "loss": 0.0004, |
| "step": 13675 |
| }, |
| { |
| "epoch": 2.1452059084503365, |
| "grad_norm": 0.0007703950395807624, |
| "learning_rate": 1.584502957893516e-05, |
| "loss": 0.0001, |
| "step": 13700 |
| }, |
| { |
| "epoch": 2.1491205177723263, |
| "grad_norm": 0.041545968502759933, |
| "learning_rate": 1.57725321888412e-05, |
| "loss": 0.0131, |
| "step": 13725 |
| }, |
| { |
| "epoch": 2.153035127094316, |
| "grad_norm": 0.005660552531480789, |
| "learning_rate": 1.5700034798747246e-05, |
| "loss": 0.0033, |
| "step": 13750 |
| }, |
| { |
| "epoch": 2.1569497364163057, |
| "grad_norm": 0.00475983927026391, |
| "learning_rate": 1.562753740865329e-05, |
| "loss": 0.0003, |
| "step": 13775 |
| }, |
| { |
| "epoch": 2.1608643457382954, |
| "grad_norm": 0.8047095537185669, |
| "learning_rate": 1.5555040018559334e-05, |
| "loss": 0.0003, |
| "step": 13800 |
| }, |
| { |
| "epoch": 2.164778955060285, |
| "grad_norm": 0.0028548124246299267, |
| "learning_rate": 1.5482542628465376e-05, |
| "loss": 0.0002, |
| "step": 13825 |
| }, |
| { |
| "epoch": 2.1686935643822745, |
| "grad_norm": 0.01292176079005003, |
| "learning_rate": 1.5410045238371422e-05, |
| "loss": 0.0004, |
| "step": 13850 |
| }, |
| { |
| "epoch": 2.172608173704264, |
| "grad_norm": 0.00023661291925236583, |
| "learning_rate": 1.533754784827746e-05, |
| "loss": 0.0003, |
| "step": 13875 |
| }, |
| { |
| "epoch": 2.176522783026254, |
| "grad_norm": 0.0002082917490042746, |
| "learning_rate": 1.5265050458183507e-05, |
| "loss": 0.0, |
| "step": 13900 |
| }, |
| { |
| "epoch": 2.1804373923482436, |
| "grad_norm": 0.0005662673502229154, |
| "learning_rate": 1.5192553068089549e-05, |
| "loss": 0.0001, |
| "step": 13925 |
| }, |
| { |
| "epoch": 2.1843520016702334, |
| "grad_norm": 0.00044558930676430464, |
| "learning_rate": 1.5120055677995593e-05, |
| "loss": 0.0, |
| "step": 13950 |
| }, |
| { |
| "epoch": 2.188266610992223, |
| "grad_norm": 0.00019830386736430228, |
| "learning_rate": 1.5047558287901637e-05, |
| "loss": 0.0015, |
| "step": 13975 |
| }, |
| { |
| "epoch": 2.192181220314213, |
| "grad_norm": 0.00026091316249221563, |
| "learning_rate": 1.497506089780768e-05, |
| "loss": 0.0003, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.196095829636202, |
| "grad_norm": 0.00025246432051062584, |
| "learning_rate": 1.4902563507713724e-05, |
| "loss": 0.0, |
| "step": 14025 |
| }, |
| { |
| "epoch": 2.200010438958192, |
| "grad_norm": 0.13300319015979767, |
| "learning_rate": 1.4830066117619767e-05, |
| "loss": 0.001, |
| "step": 14050 |
| }, |
| { |
| "epoch": 2.2039250482801815, |
| "grad_norm": 0.00025016642757691443, |
| "learning_rate": 1.4757568727525809e-05, |
| "loss": 0.0001, |
| "step": 14075 |
| }, |
| { |
| "epoch": 2.2078396576021713, |
| "grad_norm": 0.0002115444076480344, |
| "learning_rate": 1.4685071337431853e-05, |
| "loss": 0.0001, |
| "step": 14100 |
| }, |
| { |
| "epoch": 2.211754266924161, |
| "grad_norm": 0.028121890500187874, |
| "learning_rate": 1.4612573947337895e-05, |
| "loss": 0.0009, |
| "step": 14125 |
| }, |
| { |
| "epoch": 2.2156688762461507, |
| "grad_norm": 0.00024847922031767666, |
| "learning_rate": 1.4540076557243939e-05, |
| "loss": 0.0002, |
| "step": 14150 |
| }, |
| { |
| "epoch": 2.2195834855681404, |
| "grad_norm": 0.0006722984835505486, |
| "learning_rate": 1.4467579167149983e-05, |
| "loss": 0.0006, |
| "step": 14175 |
| }, |
| { |
| "epoch": 2.22349809489013, |
| "grad_norm": 0.004081141669303179, |
| "learning_rate": 1.4395081777056027e-05, |
| "loss": 0.0001, |
| "step": 14200 |
| }, |
| { |
| "epoch": 2.2274127042121195, |
| "grad_norm": 0.0002169485087506473, |
| "learning_rate": 1.432258438696207e-05, |
| "loss": 0.0, |
| "step": 14225 |
| }, |
| { |
| "epoch": 2.231327313534109, |
| "grad_norm": 0.0004130221204832196, |
| "learning_rate": 1.4250086996868115e-05, |
| "loss": 0.0, |
| "step": 14250 |
| }, |
| { |
| "epoch": 2.235241922856099, |
| "grad_norm": 0.00018515564443077892, |
| "learning_rate": 1.4177589606774159e-05, |
| "loss": 0.0, |
| "step": 14275 |
| }, |
| { |
| "epoch": 2.2391565321780886, |
| "grad_norm": 0.00019148353021591902, |
| "learning_rate": 1.41050922166802e-05, |
| "loss": 0.0001, |
| "step": 14300 |
| }, |
| { |
| "epoch": 2.2430711415000784, |
| "grad_norm": 0.0020026888232678175, |
| "learning_rate": 1.4032594826586243e-05, |
| "loss": 0.0, |
| "step": 14325 |
| }, |
| { |
| "epoch": 2.246985750822068, |
| "grad_norm": 0.00482906075194478, |
| "learning_rate": 1.3960097436492287e-05, |
| "loss": 0.0, |
| "step": 14350 |
| }, |
| { |
| "epoch": 2.250900360144058, |
| "grad_norm": 0.00026496723876334727, |
| "learning_rate": 1.3887600046398331e-05, |
| "loss": 0.0, |
| "step": 14375 |
| }, |
| { |
| "epoch": 2.2548149694660475, |
| "grad_norm": 0.0002499364491086453, |
| "learning_rate": 1.3815102656304373e-05, |
| "loss": 0.0004, |
| "step": 14400 |
| }, |
| { |
| "epoch": 2.258729578788037, |
| "grad_norm": 0.00017081611440517008, |
| "learning_rate": 1.3742605266210417e-05, |
| "loss": 0.0, |
| "step": 14425 |
| }, |
| { |
| "epoch": 2.2626441881100265, |
| "grad_norm": 0.00017463510448578745, |
| "learning_rate": 1.3670107876116461e-05, |
| "loss": 0.0, |
| "step": 14450 |
| }, |
| { |
| "epoch": 2.2665587974320163, |
| "grad_norm": 0.0022245387081056833, |
| "learning_rate": 1.3597610486022505e-05, |
| "loss": 0.0091, |
| "step": 14475 |
| }, |
| { |
| "epoch": 2.270473406754006, |
| "grad_norm": 0.007297486532479525, |
| "learning_rate": 1.3525113095928546e-05, |
| "loss": 0.0004, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.2743880160759957, |
| "grad_norm": 0.005688577424734831, |
| "learning_rate": 1.345261570583459e-05, |
| "loss": 0.0002, |
| "step": 14525 |
| }, |
| { |
| "epoch": 2.2783026253979854, |
| "grad_norm": 0.004135147202759981, |
| "learning_rate": 1.3380118315740633e-05, |
| "loss": 0.0024, |
| "step": 14550 |
| }, |
| { |
| "epoch": 2.282217234719975, |
| "grad_norm": 0.002322606975212693, |
| "learning_rate": 1.3307620925646677e-05, |
| "loss": 0.0005, |
| "step": 14575 |
| }, |
| { |
| "epoch": 2.2861318440419645, |
| "grad_norm": 0.002005909802392125, |
| "learning_rate": 1.3235123535552721e-05, |
| "loss": 0.001, |
| "step": 14600 |
| }, |
| { |
| "epoch": 2.290046453363954, |
| "grad_norm": 0.0009009299101307988, |
| "learning_rate": 1.3162626145458765e-05, |
| "loss": 0.0009, |
| "step": 14625 |
| }, |
| { |
| "epoch": 2.293961062685944, |
| "grad_norm": 0.009488469921052456, |
| "learning_rate": 1.3090128755364809e-05, |
| "loss": 0.0001, |
| "step": 14650 |
| }, |
| { |
| "epoch": 2.2978756720079336, |
| "grad_norm": 0.017507528886198997, |
| "learning_rate": 1.3017631365270851e-05, |
| "loss": 0.0038, |
| "step": 14675 |
| }, |
| { |
| "epoch": 2.3017902813299234, |
| "grad_norm": 0.0015606528613716364, |
| "learning_rate": 1.2945133975176894e-05, |
| "loss": 0.0003, |
| "step": 14700 |
| }, |
| { |
| "epoch": 2.305704890651913, |
| "grad_norm": 0.0004906764370389283, |
| "learning_rate": 1.2872636585082937e-05, |
| "loss": 0.0006, |
| "step": 14725 |
| }, |
| { |
| "epoch": 2.309619499973903, |
| "grad_norm": 0.001650349353440106, |
| "learning_rate": 1.280013919498898e-05, |
| "loss": 0.0001, |
| "step": 14750 |
| }, |
| { |
| "epoch": 2.313534109295892, |
| "grad_norm": 0.00038060618680901825, |
| "learning_rate": 1.2727641804895024e-05, |
| "loss": 0.0005, |
| "step": 14775 |
| }, |
| { |
| "epoch": 2.317448718617882, |
| "grad_norm": 0.001028302125632763, |
| "learning_rate": 1.2655144414801068e-05, |
| "loss": 0.0001, |
| "step": 14800 |
| }, |
| { |
| "epoch": 2.3213633279398715, |
| "grad_norm": 0.007792349439114332, |
| "learning_rate": 1.2582647024707112e-05, |
| "loss": 0.0002, |
| "step": 14825 |
| }, |
| { |
| "epoch": 2.3252779372618613, |
| "grad_norm": 0.01641431264579296, |
| "learning_rate": 1.2510149634613155e-05, |
| "loss": 0.0001, |
| "step": 14850 |
| }, |
| { |
| "epoch": 2.329192546583851, |
| "grad_norm": 0.0006683383253403008, |
| "learning_rate": 1.2437652244519198e-05, |
| "loss": 0.0022, |
| "step": 14875 |
| }, |
| { |
| "epoch": 2.3331071559058407, |
| "grad_norm": 0.0009384675067849457, |
| "learning_rate": 1.2365154854425242e-05, |
| "loss": 0.0017, |
| "step": 14900 |
| }, |
| { |
| "epoch": 2.3370217652278304, |
| "grad_norm": 0.00037563694058917463, |
| "learning_rate": 1.2292657464331286e-05, |
| "loss": 0.0011, |
| "step": 14925 |
| }, |
| { |
| "epoch": 2.3409363745498197, |
| "grad_norm": 0.00020698497246485204, |
| "learning_rate": 1.222016007423733e-05, |
| "loss": 0.0008, |
| "step": 14950 |
| }, |
| { |
| "epoch": 2.3448509838718095, |
| "grad_norm": 0.0001723883324302733, |
| "learning_rate": 1.2147662684143372e-05, |
| "loss": 0.0001, |
| "step": 14975 |
| }, |
| { |
| "epoch": 2.348765593193799, |
| "grad_norm": 0.06703776121139526, |
| "learning_rate": 1.2075165294049416e-05, |
| "loss": 0.0001, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.352680202515789, |
| "grad_norm": 0.00018680775247048587, |
| "learning_rate": 1.2002667903955458e-05, |
| "loss": 0.0, |
| "step": 15025 |
| }, |
| { |
| "epoch": 2.3565948118377786, |
| "grad_norm": 0.0001830816181609407, |
| "learning_rate": 1.1930170513861502e-05, |
| "loss": 0.0, |
| "step": 15050 |
| }, |
| { |
| "epoch": 2.3605094211597684, |
| "grad_norm": 0.00020273331028874964, |
| "learning_rate": 1.1857673123767544e-05, |
| "loss": 0.0001, |
| "step": 15075 |
| }, |
| { |
| "epoch": 2.364424030481758, |
| "grad_norm": 0.00020694882550742477, |
| "learning_rate": 1.1785175733673588e-05, |
| "loss": 0.0003, |
| "step": 15100 |
| }, |
| { |
| "epoch": 2.3683386398037474, |
| "grad_norm": 0.0007374598644673824, |
| "learning_rate": 1.1712678343579632e-05, |
| "loss": 0.0061, |
| "step": 15125 |
| }, |
| { |
| "epoch": 2.372253249125737, |
| "grad_norm": 0.01134900189936161, |
| "learning_rate": 1.1640180953485676e-05, |
| "loss": 0.0002, |
| "step": 15150 |
| }, |
| { |
| "epoch": 2.376167858447727, |
| "grad_norm": 0.000648992951028049, |
| "learning_rate": 1.1567683563391718e-05, |
| "loss": 0.0001, |
| "step": 15175 |
| }, |
| { |
| "epoch": 2.3800824677697165, |
| "grad_norm": 0.0004858991305809468, |
| "learning_rate": 1.1495186173297762e-05, |
| "loss": 0.0, |
| "step": 15200 |
| }, |
| { |
| "epoch": 2.3839970770917063, |
| "grad_norm": 0.0870414674282074, |
| "learning_rate": 1.1422688783203806e-05, |
| "loss": 0.0002, |
| "step": 15225 |
| }, |
| { |
| "epoch": 2.387911686413696, |
| "grad_norm": 0.0006449563661590219, |
| "learning_rate": 1.135019139310985e-05, |
| "loss": 0.0, |
| "step": 15250 |
| }, |
| { |
| "epoch": 2.3918262957356857, |
| "grad_norm": 0.00027997951838187873, |
| "learning_rate": 1.1277694003015892e-05, |
| "loss": 0.0001, |
| "step": 15275 |
| }, |
| { |
| "epoch": 2.395740905057675, |
| "grad_norm": 0.9710797667503357, |
| "learning_rate": 1.1205196612921936e-05, |
| "loss": 0.0, |
| "step": 15300 |
| }, |
| { |
| "epoch": 2.3996555143796647, |
| "grad_norm": 0.00019451680418569595, |
| "learning_rate": 1.1132699222827978e-05, |
| "loss": 0.0, |
| "step": 15325 |
| }, |
| { |
| "epoch": 2.4035701237016545, |
| "grad_norm": 0.0015565232606604695, |
| "learning_rate": 1.1060201832734022e-05, |
| "loss": 0.0038, |
| "step": 15350 |
| }, |
| { |
| "epoch": 2.407484733023644, |
| "grad_norm": 0.00032800339977256954, |
| "learning_rate": 1.0987704442640064e-05, |
| "loss": 0.0002, |
| "step": 15375 |
| }, |
| { |
| "epoch": 2.411399342345634, |
| "grad_norm": 0.017102686688303947, |
| "learning_rate": 1.0915207052546108e-05, |
| "loss": 0.0005, |
| "step": 15400 |
| }, |
| { |
| "epoch": 2.4153139516676236, |
| "grad_norm": 0.001418459229171276, |
| "learning_rate": 1.0842709662452152e-05, |
| "loss": 0.0, |
| "step": 15425 |
| }, |
| { |
| "epoch": 2.4192285609896134, |
| "grad_norm": 0.015620172023773193, |
| "learning_rate": 1.0770212272358196e-05, |
| "loss": 0.0001, |
| "step": 15450 |
| }, |
| { |
| "epoch": 2.423143170311603, |
| "grad_norm": 0.0006858358392491937, |
| "learning_rate": 1.0697714882264238e-05, |
| "loss": 0.0018, |
| "step": 15475 |
| }, |
| { |
| "epoch": 2.427057779633593, |
| "grad_norm": 0.0004693476075772196, |
| "learning_rate": 1.0625217492170282e-05, |
| "loss": 0.0001, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.430972388955582, |
| "grad_norm": 0.34811919927597046, |
| "learning_rate": 1.0552720102076326e-05, |
| "loss": 0.0002, |
| "step": 15525 |
| }, |
| { |
| "epoch": 2.434886998277572, |
| "grad_norm": 0.0014850205043330789, |
| "learning_rate": 1.048022271198237e-05, |
| "loss": 0.0001, |
| "step": 15550 |
| }, |
| { |
| "epoch": 2.4388016075995615, |
| "grad_norm": 0.000363474857294932, |
| "learning_rate": 1.0407725321888412e-05, |
| "loss": 0.0004, |
| "step": 15575 |
| }, |
| { |
| "epoch": 2.4427162169215513, |
| "grad_norm": 0.0010670394403859973, |
| "learning_rate": 1.0335227931794456e-05, |
| "loss": 0.0004, |
| "step": 15600 |
| }, |
| { |
| "epoch": 2.446630826243541, |
| "grad_norm": 0.14377856254577637, |
| "learning_rate": 1.0262730541700499e-05, |
| "loss": 0.0019, |
| "step": 15625 |
| }, |
| { |
| "epoch": 2.4505454355655307, |
| "grad_norm": 0.0004735889961011708, |
| "learning_rate": 1.0190233151606542e-05, |
| "loss": 0.0001, |
| "step": 15650 |
| }, |
| { |
| "epoch": 2.4544600448875205, |
| "grad_norm": 0.0004282770969439298, |
| "learning_rate": 1.0117735761512585e-05, |
| "loss": 0.0026, |
| "step": 15675 |
| }, |
| { |
| "epoch": 2.4583746542095097, |
| "grad_norm": 0.007325559854507446, |
| "learning_rate": 1.0045238371418629e-05, |
| "loss": 0.0007, |
| "step": 15700 |
| }, |
| { |
| "epoch": 2.4622892635314995, |
| "grad_norm": 0.000777337234467268, |
| "learning_rate": 9.972740981324673e-06, |
| "loss": 0.0009, |
| "step": 15725 |
| }, |
| { |
| "epoch": 2.466203872853489, |
| "grad_norm": 0.003926098812371492, |
| "learning_rate": 9.900243591230716e-06, |
| "loss": 0.0004, |
| "step": 15750 |
| }, |
| { |
| "epoch": 2.470118482175479, |
| "grad_norm": 0.00045515818055719137, |
| "learning_rate": 9.82774620113676e-06, |
| "loss": 0.0001, |
| "step": 15775 |
| }, |
| { |
| "epoch": 2.4740330914974686, |
| "grad_norm": 0.01267548743635416, |
| "learning_rate": 9.755248811042803e-06, |
| "loss": 0.0002, |
| "step": 15800 |
| }, |
| { |
| "epoch": 2.4779477008194584, |
| "grad_norm": 0.0004507755220402032, |
| "learning_rate": 9.682751420948847e-06, |
| "loss": 0.0008, |
| "step": 15825 |
| }, |
| { |
| "epoch": 2.481862310141448, |
| "grad_norm": 0.07765714824199677, |
| "learning_rate": 9.61025403085489e-06, |
| "loss": 0.0001, |
| "step": 15850 |
| }, |
| { |
| "epoch": 2.4857769194634374, |
| "grad_norm": 0.0010181193938478827, |
| "learning_rate": 9.537756640760934e-06, |
| "loss": 0.0002, |
| "step": 15875 |
| }, |
| { |
| "epoch": 2.489691528785427, |
| "grad_norm": 0.00023663626052439213, |
| "learning_rate": 9.465259250666977e-06, |
| "loss": 0.0011, |
| "step": 15900 |
| }, |
| { |
| "epoch": 2.493606138107417, |
| "grad_norm": 0.010522628203034401, |
| "learning_rate": 9.392761860573019e-06, |
| "loss": 0.0002, |
| "step": 15925 |
| }, |
| { |
| "epoch": 2.4975207474294066, |
| "grad_norm": 0.0006732672336511314, |
| "learning_rate": 9.320264470479063e-06, |
| "loss": 0.0001, |
| "step": 15950 |
| }, |
| { |
| "epoch": 2.5014353567513963, |
| "grad_norm": 0.0013339362340047956, |
| "learning_rate": 9.247767080385107e-06, |
| "loss": 0.0, |
| "step": 15975 |
| }, |
| { |
| "epoch": 2.505349966073386, |
| "grad_norm": 0.0003018657735083252, |
| "learning_rate": 9.175269690291149e-06, |
| "loss": 0.0, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.5092645753953757, |
| "grad_norm": 0.0003551984846126288, |
| "learning_rate": 9.102772300197193e-06, |
| "loss": 0.0, |
| "step": 16025 |
| }, |
| { |
| "epoch": 2.513179184717365, |
| "grad_norm": 0.00026321958284825087, |
| "learning_rate": 9.030274910103237e-06, |
| "loss": 0.0, |
| "step": 16050 |
| }, |
| { |
| "epoch": 2.5170937940393547, |
| "grad_norm": 0.00023617663828190416, |
| "learning_rate": 8.95777752000928e-06, |
| "loss": 0.0001, |
| "step": 16075 |
| }, |
| { |
| "epoch": 2.5210084033613445, |
| "grad_norm": 0.0029212015215307474, |
| "learning_rate": 8.885280129915323e-06, |
| "loss": 0.0, |
| "step": 16100 |
| }, |
| { |
| "epoch": 2.524923012683334, |
| "grad_norm": 0.00017464791017118841, |
| "learning_rate": 8.812782739821367e-06, |
| "loss": 0.0, |
| "step": 16125 |
| }, |
| { |
| "epoch": 2.528837622005324, |
| "grad_norm": 0.00018289768195245415, |
| "learning_rate": 8.74028534972741e-06, |
| "loss": 0.0002, |
| "step": 16150 |
| }, |
| { |
| "epoch": 2.5327522313273136, |
| "grad_norm": 0.00490641500800848, |
| "learning_rate": 8.667787959633455e-06, |
| "loss": 0.0, |
| "step": 16175 |
| }, |
| { |
| "epoch": 2.5366668406493034, |
| "grad_norm": 0.0014596167020499706, |
| "learning_rate": 8.595290569539497e-06, |
| "loss": 0.0, |
| "step": 16200 |
| }, |
| { |
| "epoch": 2.5405814499712926, |
| "grad_norm": 0.00016923531075008214, |
| "learning_rate": 8.522793179445541e-06, |
| "loss": 0.0001, |
| "step": 16225 |
| }, |
| { |
| "epoch": 2.5444960592932824, |
| "grad_norm": 0.001988182310014963, |
| "learning_rate": 8.450295789351583e-06, |
| "loss": 0.0001, |
| "step": 16250 |
| }, |
| { |
| "epoch": 2.548410668615272, |
| "grad_norm": 0.0008615644765086472, |
| "learning_rate": 8.377798399257627e-06, |
| "loss": 0.0001, |
| "step": 16275 |
| }, |
| { |
| "epoch": 2.552325277937262, |
| "grad_norm": 0.0002167491620639339, |
| "learning_rate": 8.30530100916367e-06, |
| "loss": 0.0, |
| "step": 16300 |
| }, |
| { |
| "epoch": 2.5562398872592516, |
| "grad_norm": 0.000143597528222017, |
| "learning_rate": 8.232803619069713e-06, |
| "loss": 0.0001, |
| "step": 16325 |
| }, |
| { |
| "epoch": 2.5601544965812413, |
| "grad_norm": 0.0002638675505295396, |
| "learning_rate": 8.160306228975757e-06, |
| "loss": 0.0, |
| "step": 16350 |
| }, |
| { |
| "epoch": 2.564069105903231, |
| "grad_norm": 0.0011918079107999802, |
| "learning_rate": 8.087808838881801e-06, |
| "loss": 0.001, |
| "step": 16375 |
| }, |
| { |
| "epoch": 2.5679837152252203, |
| "grad_norm": 0.00031527673127129674, |
| "learning_rate": 8.015311448787843e-06, |
| "loss": 0.0, |
| "step": 16400 |
| }, |
| { |
| "epoch": 2.5718983245472105, |
| "grad_norm": 0.0002970160567201674, |
| "learning_rate": 7.942814058693887e-06, |
| "loss": 0.0, |
| "step": 16425 |
| }, |
| { |
| "epoch": 2.5758129338691997, |
| "grad_norm": 0.00032033000024966896, |
| "learning_rate": 7.870316668599931e-06, |
| "loss": 0.0, |
| "step": 16450 |
| }, |
| { |
| "epoch": 2.5797275431911895, |
| "grad_norm": 0.00021383754210546613, |
| "learning_rate": 7.797819278505975e-06, |
| "loss": 0.0, |
| "step": 16475 |
| }, |
| { |
| "epoch": 2.583642152513179, |
| "grad_norm": 0.0025132743176072836, |
| "learning_rate": 7.725321888412019e-06, |
| "loss": 0.0016, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.587556761835169, |
| "grad_norm": 0.0004337320278864354, |
| "learning_rate": 7.652824498318061e-06, |
| "loss": 0.0, |
| "step": 16525 |
| }, |
| { |
| "epoch": 2.5914713711571586, |
| "grad_norm": 0.000609175069257617, |
| "learning_rate": 7.580327108224104e-06, |
| "loss": 0.0016, |
| "step": 16550 |
| }, |
| { |
| "epoch": 2.595385980479148, |
| "grad_norm": 0.00021605034999083728, |
| "learning_rate": 7.507829718130148e-06, |
| "loss": 0.0, |
| "step": 16575 |
| }, |
| { |
| "epoch": 2.599300589801138, |
| "grad_norm": 0.0006186183891259134, |
| "learning_rate": 7.435332328036191e-06, |
| "loss": 0.0, |
| "step": 16600 |
| }, |
| { |
| "epoch": 2.6032151991231274, |
| "grad_norm": 0.0002476814261171967, |
| "learning_rate": 7.362834937942234e-06, |
| "loss": 0.0006, |
| "step": 16625 |
| }, |
| { |
| "epoch": 2.607129808445117, |
| "grad_norm": 0.0007107394631020725, |
| "learning_rate": 7.2903375478482775e-06, |
| "loss": 0.0, |
| "step": 16650 |
| }, |
| { |
| "epoch": 2.611044417767107, |
| "grad_norm": 0.00029517774237319827, |
| "learning_rate": 7.217840157754321e-06, |
| "loss": 0.0, |
| "step": 16675 |
| }, |
| { |
| "epoch": 2.6149590270890966, |
| "grad_norm": 0.0001818942982936278, |
| "learning_rate": 7.145342767660365e-06, |
| "loss": 0.0, |
| "step": 16700 |
| }, |
| { |
| "epoch": 2.6188736364110863, |
| "grad_norm": 0.0003430229553487152, |
| "learning_rate": 7.072845377566408e-06, |
| "loss": 0.0029, |
| "step": 16725 |
| }, |
| { |
| "epoch": 2.6227882457330756, |
| "grad_norm": 0.01995168998837471, |
| "learning_rate": 7.0003479874724515e-06, |
| "loss": 0.004, |
| "step": 16750 |
| }, |
| { |
| "epoch": 2.6267028550550657, |
| "grad_norm": 0.003800376318395138, |
| "learning_rate": 6.927850597378495e-06, |
| "loss": 0.0002, |
| "step": 16775 |
| }, |
| { |
| "epoch": 2.630617464377055, |
| "grad_norm": 0.0005610916996374726, |
| "learning_rate": 6.8553532072845385e-06, |
| "loss": 0.0009, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.6345320736990447, |
| "grad_norm": 0.0002944047737400979, |
| "learning_rate": 6.782855817190581e-06, |
| "loss": 0.0002, |
| "step": 16825 |
| }, |
| { |
| "epoch": 2.6384466830210345, |
| "grad_norm": 0.0002466263249516487, |
| "learning_rate": 6.710358427096625e-06, |
| "loss": 0.0, |
| "step": 16850 |
| }, |
| { |
| "epoch": 2.642361292343024, |
| "grad_norm": 0.0001500146317994222, |
| "learning_rate": 6.637861037002669e-06, |
| "loss": 0.0, |
| "step": 16875 |
| }, |
| { |
| "epoch": 2.646275901665014, |
| "grad_norm": 0.0007112550083547831, |
| "learning_rate": 6.565363646908712e-06, |
| "loss": 0.0001, |
| "step": 16900 |
| }, |
| { |
| "epoch": 2.6501905109870036, |
| "grad_norm": 0.00017001846572384238, |
| "learning_rate": 6.492866256814755e-06, |
| "loss": 0.0, |
| "step": 16925 |
| }, |
| { |
| "epoch": 2.6541051203089934, |
| "grad_norm": 0.0007822296465747058, |
| "learning_rate": 6.420368866720798e-06, |
| "loss": 0.0, |
| "step": 16950 |
| }, |
| { |
| "epoch": 2.6580197296309827, |
| "grad_norm": 0.0006506266072392464, |
| "learning_rate": 6.347871476626842e-06, |
| "loss": 0.0011, |
| "step": 16975 |
| }, |
| { |
| "epoch": 2.6619343389529724, |
| "grad_norm": 0.001398293417878449, |
| "learning_rate": 6.275374086532886e-06, |
| "loss": 0.0009, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.665848948274962, |
| "grad_norm": 0.01400019507855177, |
| "learning_rate": 6.202876696438929e-06, |
| "loss": 0.0007, |
| "step": 17025 |
| }, |
| { |
| "epoch": 2.669763557596952, |
| "grad_norm": 0.001175655866973102, |
| "learning_rate": 6.130379306344972e-06, |
| "loss": 0.0008, |
| "step": 17050 |
| }, |
| { |
| "epoch": 2.6736781669189416, |
| "grad_norm": 0.004671004135161638, |
| "learning_rate": 6.057881916251015e-06, |
| "loss": 0.0001, |
| "step": 17075 |
| }, |
| { |
| "epoch": 2.6775927762409313, |
| "grad_norm": 0.03513360768556595, |
| "learning_rate": 5.985384526157058e-06, |
| "loss": 0.0001, |
| "step": 17100 |
| }, |
| { |
| "epoch": 2.681507385562921, |
| "grad_norm": 0.0003586947568692267, |
| "learning_rate": 5.912887136063102e-06, |
| "loss": 0.0, |
| "step": 17125 |
| }, |
| { |
| "epoch": 2.6854219948849103, |
| "grad_norm": 0.00038099908852018416, |
| "learning_rate": 5.840389745969145e-06, |
| "loss": 0.0, |
| "step": 17150 |
| }, |
| { |
| "epoch": 2.6893366042069, |
| "grad_norm": 0.00031486572697758675, |
| "learning_rate": 5.767892355875189e-06, |
| "loss": 0.0001, |
| "step": 17175 |
| }, |
| { |
| "epoch": 2.6932512135288897, |
| "grad_norm": 0.0003432184748817235, |
| "learning_rate": 5.695394965781233e-06, |
| "loss": 0.0001, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.6971658228508795, |
| "grad_norm": 0.0004650696355383843, |
| "learning_rate": 5.622897575687275e-06, |
| "loss": 0.0001, |
| "step": 17225 |
| }, |
| { |
| "epoch": 2.701080432172869, |
| "grad_norm": 0.0003299444215372205, |
| "learning_rate": 5.550400185593319e-06, |
| "loss": 0.0006, |
| "step": 17250 |
| }, |
| { |
| "epoch": 2.704995041494859, |
| "grad_norm": 0.0003300994576420635, |
| "learning_rate": 5.477902795499362e-06, |
| "loss": 0.0, |
| "step": 17275 |
| }, |
| { |
| "epoch": 2.7089096508168486, |
| "grad_norm": 0.0005288653774186969, |
| "learning_rate": 5.405405405405406e-06, |
| "loss": 0.001, |
| "step": 17300 |
| }, |
| { |
| "epoch": 2.712824260138838, |
| "grad_norm": 0.00020216924895066768, |
| "learning_rate": 5.332908015311449e-06, |
| "loss": 0.0, |
| "step": 17325 |
| }, |
| { |
| "epoch": 2.7167388694608277, |
| "grad_norm": 0.006004292517900467, |
| "learning_rate": 5.260410625217493e-06, |
| "loss": 0.0, |
| "step": 17350 |
| }, |
| { |
| "epoch": 2.7206534787828174, |
| "grad_norm": 0.0001754688419168815, |
| "learning_rate": 5.187913235123536e-06, |
| "loss": 0.0, |
| "step": 17375 |
| }, |
| { |
| "epoch": 2.724568088104807, |
| "grad_norm": 0.012972986325621605, |
| "learning_rate": 5.115415845029579e-06, |
| "loss": 0.0, |
| "step": 17400 |
| }, |
| { |
| "epoch": 2.728482697426797, |
| "grad_norm": 0.00019060824706684798, |
| "learning_rate": 5.042918454935622e-06, |
| "loss": 0.0, |
| "step": 17425 |
| }, |
| { |
| "epoch": 2.7323973067487866, |
| "grad_norm": 0.0002203083859058097, |
| "learning_rate": 4.970421064841666e-06, |
| "loss": 0.0001, |
| "step": 17450 |
| }, |
| { |
| "epoch": 2.7363119160707763, |
| "grad_norm": 0.00014681309403385967, |
| "learning_rate": 4.897923674747709e-06, |
| "loss": 0.0, |
| "step": 17475 |
| }, |
| { |
| "epoch": 2.7402265253927656, |
| "grad_norm": 0.0002081810962408781, |
| "learning_rate": 4.825426284653753e-06, |
| "loss": 0.0009, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.7441411347147557, |
| "grad_norm": 0.0002039974497165531, |
| "learning_rate": 4.752928894559796e-06, |
| "loss": 0.0001, |
| "step": 17525 |
| }, |
| { |
| "epoch": 2.748055744036745, |
| "grad_norm": 0.0001363355404464528, |
| "learning_rate": 4.680431504465839e-06, |
| "loss": 0.0, |
| "step": 17550 |
| }, |
| { |
| "epoch": 2.7519703533587347, |
| "grad_norm": 0.0013490230776369572, |
| "learning_rate": 4.6079341143718824e-06, |
| "loss": 0.0004, |
| "step": 17575 |
| }, |
| { |
| "epoch": 2.7558849626807245, |
| "grad_norm": 0.00017140705313067883, |
| "learning_rate": 4.535436724277926e-06, |
| "loss": 0.0005, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.759799572002714, |
| "grad_norm": 0.00012424413580447435, |
| "learning_rate": 4.4629393341839695e-06, |
| "loss": 0.0, |
| "step": 17625 |
| }, |
| { |
| "epoch": 2.763714181324704, |
| "grad_norm": 0.000487282668473199, |
| "learning_rate": 4.390441944090013e-06, |
| "loss": 0.0001, |
| "step": 17650 |
| }, |
| { |
| "epoch": 2.767628790646693, |
| "grad_norm": 0.0003270190500188619, |
| "learning_rate": 4.3179445539960565e-06, |
| "loss": 0.0004, |
| "step": 17675 |
| }, |
| { |
| "epoch": 2.7715433999686834, |
| "grad_norm": 0.00023671095550525934, |
| "learning_rate": 4.2454471639020995e-06, |
| "loss": 0.0, |
| "step": 17700 |
| }, |
| { |
| "epoch": 2.7754580092906727, |
| "grad_norm": 0.0011756513267755508, |
| "learning_rate": 4.172949773808143e-06, |
| "loss": 0.0067, |
| "step": 17725 |
| }, |
| { |
| "epoch": 2.7793726186126624, |
| "grad_norm": 0.0004914366290904582, |
| "learning_rate": 4.1004523837141865e-06, |
| "loss": 0.0005, |
| "step": 17750 |
| }, |
| { |
| "epoch": 2.783287227934652, |
| "grad_norm": 0.0011064461432397366, |
| "learning_rate": 4.02795499362023e-06, |
| "loss": 0.0, |
| "step": 17775 |
| }, |
| { |
| "epoch": 2.787201837256642, |
| "grad_norm": 0.0006191087886691093, |
| "learning_rate": 3.9554576035262736e-06, |
| "loss": 0.001, |
| "step": 17800 |
| }, |
| { |
| "epoch": 2.7911164465786316, |
| "grad_norm": 0.005341960582882166, |
| "learning_rate": 3.882960213432317e-06, |
| "loss": 0.0001, |
| "step": 17825 |
| }, |
| { |
| "epoch": 2.795031055900621, |
| "grad_norm": 0.004248717799782753, |
| "learning_rate": 3.81046282333836e-06, |
| "loss": 0.0003, |
| "step": 17850 |
| }, |
| { |
| "epoch": 2.798945665222611, |
| "grad_norm": 0.02371644414961338, |
| "learning_rate": 3.7379654332444032e-06, |
| "loss": 0.0001, |
| "step": 17875 |
| }, |
| { |
| "epoch": 2.8028602745446003, |
| "grad_norm": 0.0007812991389073431, |
| "learning_rate": 3.6654680431504467e-06, |
| "loss": 0.0004, |
| "step": 17900 |
| }, |
| { |
| "epoch": 2.80677488386659, |
| "grad_norm": 0.00031172268791124225, |
| "learning_rate": 3.59297065305649e-06, |
| "loss": 0.0002, |
| "step": 17925 |
| }, |
| { |
| "epoch": 2.8106894931885797, |
| "grad_norm": 0.012311534956097603, |
| "learning_rate": 3.5204732629625337e-06, |
| "loss": 0.0001, |
| "step": 17950 |
| }, |
| { |
| "epoch": 2.8146041025105695, |
| "grad_norm": 0.0008552991203032434, |
| "learning_rate": 3.4479758728685772e-06, |
| "loss": 0.0003, |
| "step": 17975 |
| }, |
| { |
| "epoch": 2.818518711832559, |
| "grad_norm": 0.011254767887294292, |
| "learning_rate": 3.3754784827746203e-06, |
| "loss": 0.0001, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.822433321154549, |
| "grad_norm": 0.003312336513772607, |
| "learning_rate": 3.302981092680664e-06, |
| "loss": 0.005, |
| "step": 18025 |
| }, |
| { |
| "epoch": 2.8263479304765387, |
| "grad_norm": 0.0033339662477374077, |
| "learning_rate": 3.230483702586707e-06, |
| "loss": 0.0001, |
| "step": 18050 |
| }, |
| { |
| "epoch": 2.830262539798528, |
| "grad_norm": 0.0307988952845335, |
| "learning_rate": 3.157986312492751e-06, |
| "loss": 0.0001, |
| "step": 18075 |
| }, |
| { |
| "epoch": 2.8341771491205177, |
| "grad_norm": 0.001144499285146594, |
| "learning_rate": 3.085488922398794e-06, |
| "loss": 0.0, |
| "step": 18100 |
| }, |
| { |
| "epoch": 2.8380917584425074, |
| "grad_norm": 0.0007567739812657237, |
| "learning_rate": 3.012991532304837e-06, |
| "loss": 0.0001, |
| "step": 18125 |
| }, |
| { |
| "epoch": 2.842006367764497, |
| "grad_norm": 0.0014737301971763372, |
| "learning_rate": 2.9404941422108805e-06, |
| "loss": 0.0001, |
| "step": 18150 |
| }, |
| { |
| "epoch": 2.845920977086487, |
| "grad_norm": 0.0020669877994805574, |
| "learning_rate": 2.867996752116924e-06, |
| "loss": 0.0005, |
| "step": 18175 |
| }, |
| { |
| "epoch": 2.8498355864084766, |
| "grad_norm": 0.0025932856369763613, |
| "learning_rate": 2.795499362022967e-06, |
| "loss": 0.0001, |
| "step": 18200 |
| }, |
| { |
| "epoch": 2.8537501957304663, |
| "grad_norm": 0.0018630975391715765, |
| "learning_rate": 2.7230019719290106e-06, |
| "loss": 0.0, |
| "step": 18225 |
| }, |
| { |
| "epoch": 2.8576648050524556, |
| "grad_norm": 0.011522402986884117, |
| "learning_rate": 2.650504581835054e-06, |
| "loss": 0.0021, |
| "step": 18250 |
| }, |
| { |
| "epoch": 2.8615794143744453, |
| "grad_norm": 0.020472779870033264, |
| "learning_rate": 2.578007191741097e-06, |
| "loss": 0.0001, |
| "step": 18275 |
| }, |
| { |
| "epoch": 2.865494023696435, |
| "grad_norm": 0.06026843190193176, |
| "learning_rate": 2.5055098016471406e-06, |
| "loss": 0.0001, |
| "step": 18300 |
| }, |
| { |
| "epoch": 2.8694086330184247, |
| "grad_norm": 0.0006733342306688428, |
| "learning_rate": 2.433012411553184e-06, |
| "loss": 0.0, |
| "step": 18325 |
| }, |
| { |
| "epoch": 2.8733232423404145, |
| "grad_norm": 0.0009708734578453004, |
| "learning_rate": 2.3605150214592277e-06, |
| "loss": 0.0001, |
| "step": 18350 |
| }, |
| { |
| "epoch": 2.877237851662404, |
| "grad_norm": 0.00023784795484971255, |
| "learning_rate": 2.288017631365271e-06, |
| "loss": 0.0001, |
| "step": 18375 |
| }, |
| { |
| "epoch": 2.881152460984394, |
| "grad_norm": 0.004968983121216297, |
| "learning_rate": 2.2155202412713147e-06, |
| "loss": 0.0001, |
| "step": 18400 |
| }, |
| { |
| "epoch": 2.885067070306383, |
| "grad_norm": 0.0008029749151319265, |
| "learning_rate": 2.1430228511773577e-06, |
| "loss": 0.0, |
| "step": 18425 |
| }, |
| { |
| "epoch": 2.888981679628373, |
| "grad_norm": 0.0008586676558479667, |
| "learning_rate": 2.0705254610834012e-06, |
| "loss": 0.0, |
| "step": 18450 |
| }, |
| { |
| "epoch": 2.8928962889503627, |
| "grad_norm": 0.0015144862700253725, |
| "learning_rate": 1.9980280709894447e-06, |
| "loss": 0.0005, |
| "step": 18475 |
| }, |
| { |
| "epoch": 2.8968108982723524, |
| "grad_norm": 0.003726179013028741, |
| "learning_rate": 1.925530680895488e-06, |
| "loss": 0.006, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.900725507594342, |
| "grad_norm": 0.00036417951923795044, |
| "learning_rate": 1.8530332908015313e-06, |
| "loss": 0.0, |
| "step": 18525 |
| }, |
| { |
| "epoch": 2.904640116916332, |
| "grad_norm": 0.0008425221894867718, |
| "learning_rate": 1.7805359007075746e-06, |
| "loss": 0.0006, |
| "step": 18550 |
| }, |
| { |
| "epoch": 2.9085547262383216, |
| "grad_norm": 0.12718600034713745, |
| "learning_rate": 1.7080385106136181e-06, |
| "loss": 0.001, |
| "step": 18575 |
| }, |
| { |
| "epoch": 2.912469335560311, |
| "grad_norm": 0.0010318702552467585, |
| "learning_rate": 1.6355411205196614e-06, |
| "loss": 0.0013, |
| "step": 18600 |
| }, |
| { |
| "epoch": 2.916383944882301, |
| "grad_norm": 0.0004336585116107017, |
| "learning_rate": 1.5630437304257047e-06, |
| "loss": 0.0003, |
| "step": 18625 |
| }, |
| { |
| "epoch": 2.9202985542042903, |
| "grad_norm": 0.000422166776843369, |
| "learning_rate": 1.4905463403317482e-06, |
| "loss": 0.0001, |
| "step": 18650 |
| }, |
| { |
| "epoch": 2.92421316352628, |
| "grad_norm": 0.0006406558677554131, |
| "learning_rate": 1.4180489502377915e-06, |
| "loss": 0.0, |
| "step": 18675 |
| }, |
| { |
| "epoch": 2.9281277728482697, |
| "grad_norm": 0.0005850115558132529, |
| "learning_rate": 1.3455515601438348e-06, |
| "loss": 0.0001, |
| "step": 18700 |
| }, |
| { |
| "epoch": 2.9320423821702595, |
| "grad_norm": 0.0158847626298666, |
| "learning_rate": 1.2730541700498783e-06, |
| "loss": 0.0001, |
| "step": 18725 |
| }, |
| { |
| "epoch": 2.935956991492249, |
| "grad_norm": 0.0014247479848563671, |
| "learning_rate": 1.2005567799559216e-06, |
| "loss": 0.0015, |
| "step": 18750 |
| }, |
| { |
| "epoch": 2.9398716008142385, |
| "grad_norm": 0.0002501108101569116, |
| "learning_rate": 1.128059389861965e-06, |
| "loss": 0.0001, |
| "step": 18775 |
| }, |
| { |
| "epoch": 2.9437862101362287, |
| "grad_norm": 0.0004493577580433339, |
| "learning_rate": 1.0555619997680084e-06, |
| "loss": 0.0002, |
| "step": 18800 |
| }, |
| { |
| "epoch": 2.947700819458218, |
| "grad_norm": 0.00032207099138759077, |
| "learning_rate": 9.830646096740517e-07, |
| "loss": 0.0001, |
| "step": 18825 |
| }, |
| { |
| "epoch": 2.9516154287802077, |
| "grad_norm": 0.002576634753495455, |
| "learning_rate": 9.105672195800951e-07, |
| "loss": 0.0001, |
| "step": 18850 |
| }, |
| { |
| "epoch": 2.9555300381021974, |
| "grad_norm": 0.0007969861035235226, |
| "learning_rate": 8.380698294861385e-07, |
| "loss": 0.0, |
| "step": 18875 |
| }, |
| { |
| "epoch": 2.959444647424187, |
| "grad_norm": 0.0008164517930708826, |
| "learning_rate": 7.655724393921819e-07, |
| "loss": 0.0, |
| "step": 18900 |
| }, |
| { |
| "epoch": 2.963359256746177, |
| "grad_norm": 0.000589414150454104, |
| "learning_rate": 6.930750492982253e-07, |
| "loss": 0.0002, |
| "step": 18925 |
| }, |
| { |
| "epoch": 2.967273866068166, |
| "grad_norm": 0.009339476004242897, |
| "learning_rate": 6.205776592042687e-07, |
| "loss": 0.0, |
| "step": 18950 |
| }, |
| { |
| "epoch": 2.9711884753901563, |
| "grad_norm": 0.0037412915844470263, |
| "learning_rate": 5.480802691103121e-07, |
| "loss": 0.0, |
| "step": 18975 |
| }, |
| { |
| "epoch": 2.9751030847121456, |
| "grad_norm": 0.00445817643776536, |
| "learning_rate": 4.7558287901635545e-07, |
| "loss": 0.0001, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.9790176940341353, |
| "grad_norm": 0.001451736083254218, |
| "learning_rate": 4.0308548892239885e-07, |
| "loss": 0.001, |
| "step": 19025 |
| }, |
| { |
| "epoch": 2.982932303356125, |
| "grad_norm": 0.000640546262729913, |
| "learning_rate": 3.305880988284422e-07, |
| "loss": 0.0001, |
| "step": 19050 |
| }, |
| { |
| "epoch": 2.9868469126781148, |
| "grad_norm": 0.2126484215259552, |
| "learning_rate": 2.580907087344856e-07, |
| "loss": 0.0008, |
| "step": 19075 |
| }, |
| { |
| "epoch": 2.9907615220001045, |
| "grad_norm": 0.0003385374147910625, |
| "learning_rate": 1.8559331864052894e-07, |
| "loss": 0.0, |
| "step": 19100 |
| }, |
| { |
| "epoch": 2.994676131322094, |
| "grad_norm": 0.00034946645610034466, |
| "learning_rate": 1.1309592854657233e-07, |
| "loss": 0.0, |
| "step": 19125 |
| }, |
| { |
| "epoch": 2.998590740644084, |
| "grad_norm": 0.0007842128979973495, |
| "learning_rate": 4.0598538452615705e-08, |
| "loss": 0.0016, |
| "step": 19150 |
| }, |
| { |
| "epoch": 2.9998434156271205, |
| "eval_accuracy": 0.9999096727919246, |
| "eval_f1": 0.9998709961870392, |
| "eval_loss": 0.0005143894231878221, |
| "eval_precision": 0.9998563016620019, |
| "eval_recall": 0.999885691144003, |
| "eval_runtime": 66.002, |
| "eval_samples_per_second": 580.558, |
| "eval_steps_per_second": 36.287, |
| "step": 19158 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 19158, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 2 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.003595204150272e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|