| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1033, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0009682885499878964, |
| "grad_norm": 0.6142507791519165, |
| "learning_rate": 0.0, |
| "loss": 0.7025314569473267, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0019365770999757927, |
| "grad_norm": 0.6211322546005249, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.6656137704849243, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.002904865649963689, |
| "grad_norm": 0.6215519905090332, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.6469869017601013, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0038731541999515854, |
| "grad_norm": 0.6204696297645569, |
| "learning_rate": 3e-06, |
| "loss": 0.6729673147201538, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.004841442749939482, |
| "grad_norm": 0.5724360942840576, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.6311185956001282, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.005809731299927378, |
| "grad_norm": 0.6253241896629333, |
| "learning_rate": 5e-06, |
| "loss": 0.6582703590393066, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.006778019849915275, |
| "grad_norm": 0.6960524320602417, |
| "learning_rate": 6e-06, |
| "loss": 0.6846659183502197, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.007746308399903171, |
| "grad_norm": 0.669350802898407, |
| "learning_rate": 7e-06, |
| "loss": 0.690190315246582, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.008714596949891068, |
| "grad_norm": 0.603227436542511, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.6193867325782776, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.009682885499878963, |
| "grad_norm": 0.5855698585510254, |
| "learning_rate": 9e-06, |
| "loss": 0.5652514696121216, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01065117404986686, |
| "grad_norm": 0.652574360370636, |
| "learning_rate": 1e-05, |
| "loss": 0.5543577671051025, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.011619462599854757, |
| "grad_norm": 0.6981928944587708, |
| "learning_rate": 9.990224828934506e-06, |
| "loss": 0.5915582180023193, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.012587751149842653, |
| "grad_norm": 0.6892595291137695, |
| "learning_rate": 9.980449657869014e-06, |
| "loss": 0.5405319333076477, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01355603969983055, |
| "grad_norm": 0.6135081648826599, |
| "learning_rate": 9.97067448680352e-06, |
| "loss": 0.535025954246521, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.014524328249818447, |
| "grad_norm": 0.6271191239356995, |
| "learning_rate": 9.960899315738027e-06, |
| "loss": 0.5299935340881348, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.015492616799806342, |
| "grad_norm": 0.5334578156471252, |
| "learning_rate": 9.951124144672532e-06, |
| "loss": 0.4963300824165344, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01646090534979424, |
| "grad_norm": 0.5006250143051147, |
| "learning_rate": 9.94134897360704e-06, |
| "loss": 0.49313884973526, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.017429193899782137, |
| "grad_norm": 0.3994006812572479, |
| "learning_rate": 9.931573802541545e-06, |
| "loss": 0.4737316966056824, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.01839748244977003, |
| "grad_norm": 0.28798240423202515, |
| "learning_rate": 9.921798631476052e-06, |
| "loss": 0.4324286878108978, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.019365770999757927, |
| "grad_norm": 0.26087912917137146, |
| "learning_rate": 9.912023460410558e-06, |
| "loss": 0.43165814876556396, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.020334059549745823, |
| "grad_norm": 0.2219318300485611, |
| "learning_rate": 9.902248289345065e-06, |
| "loss": 0.44213879108428955, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.02130234809973372, |
| "grad_norm": 0.2282973825931549, |
| "learning_rate": 9.89247311827957e-06, |
| "loss": 0.45201411843299866, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.022270636649721617, |
| "grad_norm": 0.2014036774635315, |
| "learning_rate": 9.882697947214078e-06, |
| "loss": 0.41472718119621277, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.023238925199709513, |
| "grad_norm": 0.21150773763656616, |
| "learning_rate": 9.872922776148584e-06, |
| "loss": 0.41946664452552795, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02420721374969741, |
| "grad_norm": 0.253612220287323, |
| "learning_rate": 9.863147605083089e-06, |
| "loss": 0.42294907569885254, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.025175502299685307, |
| "grad_norm": 0.21278540790081024, |
| "learning_rate": 9.853372434017596e-06, |
| "loss": 0.4383317828178406, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.026143790849673203, |
| "grad_norm": 0.24685898423194885, |
| "learning_rate": 9.843597262952102e-06, |
| "loss": 0.46812987327575684, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0271120793996611, |
| "grad_norm": 0.18908213078975677, |
| "learning_rate": 9.83382209188661e-06, |
| "loss": 0.40582185983657837, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.028080367949648997, |
| "grad_norm": 0.20694321393966675, |
| "learning_rate": 9.824046920821115e-06, |
| "loss": 0.39531630277633667, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.029048656499636893, |
| "grad_norm": 0.1862354725599289, |
| "learning_rate": 9.814271749755622e-06, |
| "loss": 0.4046899676322937, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.030016945049624787, |
| "grad_norm": 0.21069619059562683, |
| "learning_rate": 9.804496578690128e-06, |
| "loss": 0.39400529861450195, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.030985233599612683, |
| "grad_norm": 0.1739916056394577, |
| "learning_rate": 9.794721407624635e-06, |
| "loss": 0.40542545914649963, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03195352214960058, |
| "grad_norm": 0.16906821727752686, |
| "learning_rate": 9.78494623655914e-06, |
| "loss": 0.37384384870529175, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.03292181069958848, |
| "grad_norm": 0.17224127054214478, |
| "learning_rate": 9.775171065493648e-06, |
| "loss": 0.3819228410720825, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.03389009924957637, |
| "grad_norm": 0.16344308853149414, |
| "learning_rate": 9.765395894428153e-06, |
| "loss": 0.41173726320266724, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.034858387799564274, |
| "grad_norm": 0.17100028693675995, |
| "learning_rate": 9.75562072336266e-06, |
| "loss": 0.39287662506103516, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03582667634955217, |
| "grad_norm": 0.15641067922115326, |
| "learning_rate": 9.745845552297166e-06, |
| "loss": 0.3993951976299286, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.03679496489954006, |
| "grad_norm": 0.15000952780246735, |
| "learning_rate": 9.736070381231672e-06, |
| "loss": 0.37331604957580566, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.03776325344952796, |
| "grad_norm": 0.16917653381824493, |
| "learning_rate": 9.726295210166179e-06, |
| "loss": 0.45155206322669983, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.03873154199951585, |
| "grad_norm": 0.1585894376039505, |
| "learning_rate": 9.716520039100685e-06, |
| "loss": 0.3733840882778168, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03969983054950375, |
| "grad_norm": 0.14439353346824646, |
| "learning_rate": 9.706744868035192e-06, |
| "loss": 0.3886685073375702, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04066811909949165, |
| "grad_norm": 0.14183790981769562, |
| "learning_rate": 9.696969696969698e-06, |
| "loss": 0.3821936547756195, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04163640764947955, |
| "grad_norm": 0.16753076016902924, |
| "learning_rate": 9.687194525904205e-06, |
| "loss": 0.4522704780101776, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.04260469619946744, |
| "grad_norm": 0.1615847498178482, |
| "learning_rate": 9.67741935483871e-06, |
| "loss": 0.3997975289821625, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.04357298474945534, |
| "grad_norm": 0.1554916650056839, |
| "learning_rate": 9.667644183773218e-06, |
| "loss": 0.3691978454589844, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.04454127329944323, |
| "grad_norm": 0.16841153800487518, |
| "learning_rate": 9.657869012707723e-06, |
| "loss": 0.41484490036964417, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.045509561849431134, |
| "grad_norm": 0.1278965324163437, |
| "learning_rate": 9.64809384164223e-06, |
| "loss": 0.35556912422180176, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.04647785039941903, |
| "grad_norm": 0.14681562781333923, |
| "learning_rate": 9.638318670576736e-06, |
| "loss": 0.40769901871681213, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.04744613894940692, |
| "grad_norm": 0.14918000996112823, |
| "learning_rate": 9.628543499511243e-06, |
| "loss": 0.3773626387119293, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.04841442749939482, |
| "grad_norm": 0.13720250129699707, |
| "learning_rate": 9.618768328445749e-06, |
| "loss": 0.3663005232810974, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04938271604938271, |
| "grad_norm": 0.14886170625686646, |
| "learning_rate": 9.608993157380255e-06, |
| "loss": 0.4067220985889435, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.05035100459937061, |
| "grad_norm": 0.14274443686008453, |
| "learning_rate": 9.599217986314762e-06, |
| "loss": 0.3832167685031891, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.05131929314935851, |
| "grad_norm": 0.15536513924598694, |
| "learning_rate": 9.589442815249267e-06, |
| "loss": 0.4395195245742798, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05228758169934641, |
| "grad_norm": 0.1393464207649231, |
| "learning_rate": 9.579667644183775e-06, |
| "loss": 0.3716701567173004, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0532558702493343, |
| "grad_norm": 0.1450338065624237, |
| "learning_rate": 9.56989247311828e-06, |
| "loss": 0.362257719039917, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0542241587993222, |
| "grad_norm": 0.14616632461547852, |
| "learning_rate": 9.560117302052788e-06, |
| "loss": 0.4077686369419098, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.05519244734931009, |
| "grad_norm": 0.1374523639678955, |
| "learning_rate": 9.550342130987293e-06, |
| "loss": 0.3961605429649353, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.056160735899297994, |
| "grad_norm": 0.1394190788269043, |
| "learning_rate": 9.5405669599218e-06, |
| "loss": 0.3706665635108948, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.05712902444928589, |
| "grad_norm": 0.11874961853027344, |
| "learning_rate": 9.530791788856306e-06, |
| "loss": 0.30775582790374756, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.05809731299927379, |
| "grad_norm": 0.1349610835313797, |
| "learning_rate": 9.521016617790813e-06, |
| "loss": 0.3784869313240051, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05906560154926168, |
| "grad_norm": 0.13463151454925537, |
| "learning_rate": 9.511241446725319e-06, |
| "loss": 0.3922792077064514, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.06003389009924957, |
| "grad_norm": 0.12930694222450256, |
| "learning_rate": 9.501466275659824e-06, |
| "loss": 0.37777647376060486, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.06100217864923747, |
| "grad_norm": 0.13075940310955048, |
| "learning_rate": 9.491691104594332e-06, |
| "loss": 0.38546550273895264, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.06197046719922537, |
| "grad_norm": 0.13507235050201416, |
| "learning_rate": 9.481915933528837e-06, |
| "loss": 0.3709413707256317, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.06293875574921326, |
| "grad_norm": 0.1276707798242569, |
| "learning_rate": 9.472140762463345e-06, |
| "loss": 0.3352872431278229, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.06390704429920116, |
| "grad_norm": 0.11911962181329727, |
| "learning_rate": 9.46236559139785e-06, |
| "loss": 0.3386506140232086, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.06487533284918906, |
| "grad_norm": 0.12956929206848145, |
| "learning_rate": 9.452590420332357e-06, |
| "loss": 0.3754933774471283, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.06584362139917696, |
| "grad_norm": 0.13890986144542694, |
| "learning_rate": 9.442815249266863e-06, |
| "loss": 0.4128858745098114, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.06681190994916485, |
| "grad_norm": 0.11394089460372925, |
| "learning_rate": 9.43304007820137e-06, |
| "loss": 0.3089035153388977, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.06778019849915275, |
| "grad_norm": 0.1245599314570427, |
| "learning_rate": 9.423264907135876e-06, |
| "loss": 0.32959863543510437, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06874848704914065, |
| "grad_norm": 0.14015918970108032, |
| "learning_rate": 9.413489736070383e-06, |
| "loss": 0.3884163498878479, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.06971677559912855, |
| "grad_norm": 0.1232111006975174, |
| "learning_rate": 9.403714565004889e-06, |
| "loss": 0.3284456431865692, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.07068506414911643, |
| "grad_norm": 0.13799598813056946, |
| "learning_rate": 9.393939393939396e-06, |
| "loss": 0.3356078565120697, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.07165335269910433, |
| "grad_norm": 0.1208195611834526, |
| "learning_rate": 9.384164222873902e-06, |
| "loss": 0.3578157126903534, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.07262164124909223, |
| "grad_norm": 0.11411258578300476, |
| "learning_rate": 9.374389051808407e-06, |
| "loss": 0.31658506393432617, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.07358992979908012, |
| "grad_norm": 0.11944623291492462, |
| "learning_rate": 9.364613880742913e-06, |
| "loss": 0.36029356718063354, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.07455821834906802, |
| "grad_norm": 0.12852734327316284, |
| "learning_rate": 9.35483870967742e-06, |
| "loss": 0.3340183198451996, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.07552650689905592, |
| "grad_norm": 0.11775587499141693, |
| "learning_rate": 9.345063538611926e-06, |
| "loss": 0.3083425760269165, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.07649479544904382, |
| "grad_norm": 0.13742083311080933, |
| "learning_rate": 9.335288367546433e-06, |
| "loss": 0.3448983132839203, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0774630839990317, |
| "grad_norm": 0.12395518273115158, |
| "learning_rate": 9.325513196480938e-06, |
| "loss": 0.39519673585891724, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0784313725490196, |
| "grad_norm": 0.1322673112154007, |
| "learning_rate": 9.315738025415446e-06, |
| "loss": 0.3364032506942749, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0793996610990075, |
| "grad_norm": 0.13429760932922363, |
| "learning_rate": 9.305962854349951e-06, |
| "loss": 0.3513767719268799, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.08036794964899541, |
| "grad_norm": 0.11903934180736542, |
| "learning_rate": 9.296187683284459e-06, |
| "loss": 0.3263617157936096, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.0813362381989833, |
| "grad_norm": 0.11960665881633759, |
| "learning_rate": 9.286412512218964e-06, |
| "loss": 0.3449134826660156, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0823045267489712, |
| "grad_norm": 0.12201559543609619, |
| "learning_rate": 9.27663734115347e-06, |
| "loss": 0.32846352458000183, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0832728152989591, |
| "grad_norm": 0.12875522673130035, |
| "learning_rate": 9.266862170087977e-06, |
| "loss": 0.37101566791534424, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.08424110384894698, |
| "grad_norm": 0.11964312195777893, |
| "learning_rate": 9.257086999022483e-06, |
| "loss": 0.31812378764152527, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.08520939239893488, |
| "grad_norm": 0.15213125944137573, |
| "learning_rate": 9.24731182795699e-06, |
| "loss": 0.3813604414463043, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.08617768094892278, |
| "grad_norm": 0.1251516193151474, |
| "learning_rate": 9.237536656891495e-06, |
| "loss": 0.3027239441871643, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.08714596949891068, |
| "grad_norm": 0.11938060075044632, |
| "learning_rate": 9.227761485826003e-06, |
| "loss": 0.37067025899887085, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08811425804889857, |
| "grad_norm": 0.13240274786949158, |
| "learning_rate": 9.217986314760508e-06, |
| "loss": 0.33599379658699036, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.08908254659888647, |
| "grad_norm": 0.13307581841945648, |
| "learning_rate": 9.208211143695016e-06, |
| "loss": 0.37259358167648315, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.09005083514887437, |
| "grad_norm": 0.12138372659683228, |
| "learning_rate": 9.198435972629521e-06, |
| "loss": 0.3587302565574646, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.09101912369886227, |
| "grad_norm": 0.1167801097035408, |
| "learning_rate": 9.188660801564028e-06, |
| "loss": 0.33125776052474976, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.09198741224885015, |
| "grad_norm": 0.1275295913219452, |
| "learning_rate": 9.178885630498534e-06, |
| "loss": 0.30975601077079773, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.09295570079883805, |
| "grad_norm": 0.13747365772724152, |
| "learning_rate": 9.16911045943304e-06, |
| "loss": 0.3368357717990875, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.09392398934882595, |
| "grad_norm": 0.12222792208194733, |
| "learning_rate": 9.159335288367547e-06, |
| "loss": 0.31197813153266907, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.09489227789881384, |
| "grad_norm": 0.1364426612854004, |
| "learning_rate": 9.149560117302052e-06, |
| "loss": 0.32897326350212097, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.09586056644880174, |
| "grad_norm": 0.14532364904880524, |
| "learning_rate": 9.13978494623656e-06, |
| "loss": 0.3669801652431488, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.09682885499878964, |
| "grad_norm": 0.13844749331474304, |
| "learning_rate": 9.130009775171065e-06, |
| "loss": 0.36969247460365295, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09779714354877754, |
| "grad_norm": 0.12275300920009613, |
| "learning_rate": 9.120234604105573e-06, |
| "loss": 0.3295097053050995, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.09876543209876543, |
| "grad_norm": 0.13520191609859467, |
| "learning_rate": 9.110459433040078e-06, |
| "loss": 0.3737986087799072, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.09973372064875333, |
| "grad_norm": 0.13066619634628296, |
| "learning_rate": 9.100684261974585e-06, |
| "loss": 0.3033255934715271, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.10070200919874123, |
| "grad_norm": 0.11890687793493271, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.34380683302879333, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.10167029774872913, |
| "grad_norm": 0.13914473354816437, |
| "learning_rate": 9.081133919843598e-06, |
| "loss": 0.35355979204177856, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.10263858629871701, |
| "grad_norm": 0.120316281914711, |
| "learning_rate": 9.071358748778104e-06, |
| "loss": 0.3205440044403076, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.10360687484870491, |
| "grad_norm": 0.16602486371994019, |
| "learning_rate": 9.061583577712611e-06, |
| "loss": 0.3168993890285492, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.10457516339869281, |
| "grad_norm": 0.12726294994354248, |
| "learning_rate": 9.051808406647117e-06, |
| "loss": 0.3384619653224945, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.10554345194868071, |
| "grad_norm": 0.1322595477104187, |
| "learning_rate": 9.042033235581622e-06, |
| "loss": 0.3112761974334717, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.1065117404986686, |
| "grad_norm": 0.1453908234834671, |
| "learning_rate": 9.03225806451613e-06, |
| "loss": 0.2989104390144348, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1074800290486565, |
| "grad_norm": 0.12833762168884277, |
| "learning_rate": 9.022482893450635e-06, |
| "loss": 0.3298214077949524, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1084483175986444, |
| "grad_norm": 0.12525658309459686, |
| "learning_rate": 9.012707722385142e-06, |
| "loss": 0.32383644580841064, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.10941660614863229, |
| "grad_norm": 0.1307019740343094, |
| "learning_rate": 9.002932551319648e-06, |
| "loss": 0.3317619264125824, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.11038489469862019, |
| "grad_norm": 0.13214020431041718, |
| "learning_rate": 8.993157380254155e-06, |
| "loss": 0.2884703576564789, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.11135318324860809, |
| "grad_norm": 0.12866291403770447, |
| "learning_rate": 8.983382209188661e-06, |
| "loss": 0.351254940032959, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.11232147179859599, |
| "grad_norm": 0.13350999355316162, |
| "learning_rate": 8.973607038123168e-06, |
| "loss": 0.30035918951034546, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.11328976034858387, |
| "grad_norm": 0.11388203501701355, |
| "learning_rate": 8.963831867057674e-06, |
| "loss": 0.2958531081676483, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.11425804889857177, |
| "grad_norm": 0.14289627969264984, |
| "learning_rate": 8.954056695992181e-06, |
| "loss": 0.32053035497665405, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.11522633744855967, |
| "grad_norm": 0.13894328474998474, |
| "learning_rate": 8.944281524926687e-06, |
| "loss": 0.30120429396629333, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.11619462599854757, |
| "grad_norm": 0.12141028046607971, |
| "learning_rate": 8.934506353861194e-06, |
| "loss": 0.31479382514953613, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.11716291454853546, |
| "grad_norm": 0.12900526821613312, |
| "learning_rate": 8.9247311827957e-06, |
| "loss": 0.36297452449798584, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.11813120309852336, |
| "grad_norm": 0.12149893492460251, |
| "learning_rate": 8.914956011730205e-06, |
| "loss": 0.2906142473220825, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.11909949164851126, |
| "grad_norm": 0.16426807641983032, |
| "learning_rate": 8.905180840664712e-06, |
| "loss": 0.31878572702407837, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.12006778019849915, |
| "grad_norm": 0.11868342757225037, |
| "learning_rate": 8.895405669599218e-06, |
| "loss": 0.3231990933418274, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.12103606874848705, |
| "grad_norm": 0.13657772541046143, |
| "learning_rate": 8.885630498533725e-06, |
| "loss": 0.29259440302848816, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.12200435729847495, |
| "grad_norm": 0.1253119558095932, |
| "learning_rate": 8.87585532746823e-06, |
| "loss": 0.2838287353515625, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.12297264584846285, |
| "grad_norm": 0.1284995973110199, |
| "learning_rate": 8.866080156402738e-06, |
| "loss": 0.3066769242286682, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.12394093439845073, |
| "grad_norm": 0.11573974788188934, |
| "learning_rate": 8.856304985337244e-06, |
| "loss": 0.3010478615760803, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.12490922294843863, |
| "grad_norm": 0.12995308637619019, |
| "learning_rate": 8.846529814271751e-06, |
| "loss": 0.3350738286972046, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.12587751149842652, |
| "grad_norm": 0.13642707467079163, |
| "learning_rate": 8.836754643206256e-06, |
| "loss": 0.31808528304100037, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.12684580004841442, |
| "grad_norm": 0.1310724914073944, |
| "learning_rate": 8.826979472140764e-06, |
| "loss": 0.3332287669181824, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.12781408859840232, |
| "grad_norm": 0.12466035038232803, |
| "learning_rate": 8.81720430107527e-06, |
| "loss": 0.3196363151073456, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.12878237714839022, |
| "grad_norm": 0.13588100671768188, |
| "learning_rate": 8.807429130009777e-06, |
| "loss": 0.31376099586486816, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.12975066569837812, |
| "grad_norm": 0.13263723254203796, |
| "learning_rate": 8.797653958944282e-06, |
| "loss": 0.3171752393245697, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.13071895424836602, |
| "grad_norm": 0.13374009728431702, |
| "learning_rate": 8.787878787878788e-06, |
| "loss": 0.2953280210494995, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.13168724279835392, |
| "grad_norm": 0.13743482530117035, |
| "learning_rate": 8.778103616813295e-06, |
| "loss": 0.3488181531429291, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.1326555313483418, |
| "grad_norm": 0.13955242931842804, |
| "learning_rate": 8.7683284457478e-06, |
| "loss": 0.331007182598114, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.1336238198983297, |
| "grad_norm": 0.14186261594295502, |
| "learning_rate": 8.758553274682308e-06, |
| "loss": 0.36398252844810486, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.1345921084483176, |
| "grad_norm": 0.1471295952796936, |
| "learning_rate": 8.748778103616813e-06, |
| "loss": 0.336472749710083, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.1355603969983055, |
| "grad_norm": 0.11482029408216476, |
| "learning_rate": 8.73900293255132e-06, |
| "loss": 0.3006575405597687, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1365286855482934, |
| "grad_norm": 0.13504621386528015, |
| "learning_rate": 8.729227761485826e-06, |
| "loss": 0.3569592535495758, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.1374969740982813, |
| "grad_norm": 0.15188293159008026, |
| "learning_rate": 8.719452590420334e-06, |
| "loss": 0.3333485424518585, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1384652626482692, |
| "grad_norm": 0.13065899908542633, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 0.3319074511528015, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.1394335511982571, |
| "grad_norm": 0.1272367686033249, |
| "learning_rate": 8.699902248289346e-06, |
| "loss": 0.3033870756626129, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.14040183974824497, |
| "grad_norm": 0.1433865875005722, |
| "learning_rate": 8.690127077223852e-06, |
| "loss": 0.30503055453300476, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.14137012829823287, |
| "grad_norm": 0.12748195230960846, |
| "learning_rate": 8.68035190615836e-06, |
| "loss": 0.3041837811470032, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.14233841684822077, |
| "grad_norm": 0.13291986286640167, |
| "learning_rate": 8.670576735092865e-06, |
| "loss": 0.3430430591106415, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.14330670539820867, |
| "grad_norm": 0.12126651406288147, |
| "learning_rate": 8.66080156402737e-06, |
| "loss": 0.33859869837760925, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.14427499394819657, |
| "grad_norm": 0.15293890237808228, |
| "learning_rate": 8.651026392961878e-06, |
| "loss": 0.33767563104629517, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.14524328249818447, |
| "grad_norm": 0.13613349199295044, |
| "learning_rate": 8.641251221896383e-06, |
| "loss": 0.29907119274139404, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.14621157104817237, |
| "grad_norm": 0.12802888453006744, |
| "learning_rate": 8.63147605083089e-06, |
| "loss": 0.3123582601547241, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.14717985959816024, |
| "grad_norm": 0.16021937131881714, |
| "learning_rate": 8.621700879765396e-06, |
| "loss": 0.37841248512268066, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.14814814814814814, |
| "grad_norm": 0.12790994346141815, |
| "learning_rate": 8.611925708699903e-06, |
| "loss": 0.27514874935150146, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.14911643669813604, |
| "grad_norm": 0.13345623016357422, |
| "learning_rate": 8.602150537634409e-06, |
| "loss": 0.3330199718475342, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.15008472524812394, |
| "grad_norm": 0.13262543082237244, |
| "learning_rate": 8.592375366568916e-06, |
| "loss": 0.2858338952064514, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.15105301379811184, |
| "grad_norm": 0.16613167524337769, |
| "learning_rate": 8.582600195503422e-06, |
| "loss": 0.3251619040966034, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.15202130234809974, |
| "grad_norm": 0.14091891050338745, |
| "learning_rate": 8.57282502443793e-06, |
| "loss": 0.3126198649406433, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.15298959089808764, |
| "grad_norm": 0.12945963442325592, |
| "learning_rate": 8.563049853372435e-06, |
| "loss": 0.3045946955680847, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1539578794480755, |
| "grad_norm": 0.13346253335475922, |
| "learning_rate": 8.553274682306942e-06, |
| "loss": 0.3187895119190216, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.1549261679980634, |
| "grad_norm": 0.1281236708164215, |
| "learning_rate": 8.543499511241448e-06, |
| "loss": 0.2990340292453766, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1558944565480513, |
| "grad_norm": 0.13074296712875366, |
| "learning_rate": 8.533724340175953e-06, |
| "loss": 0.3452467620372772, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.1568627450980392, |
| "grad_norm": 0.13953045010566711, |
| "learning_rate": 8.52394916911046e-06, |
| "loss": 0.32909831404685974, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.1578310336480271, |
| "grad_norm": 0.14059635996818542, |
| "learning_rate": 8.514173998044966e-06, |
| "loss": 0.27773308753967285, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.158799322198015, |
| "grad_norm": 0.1397535353899002, |
| "learning_rate": 8.504398826979473e-06, |
| "loss": 0.3393952250480652, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.15976761074800291, |
| "grad_norm": 0.13574957847595215, |
| "learning_rate": 8.494623655913979e-06, |
| "loss": 0.32174286246299744, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.16073589929799081, |
| "grad_norm": 0.13975924253463745, |
| "learning_rate": 8.484848484848486e-06, |
| "loss": 0.30419760942459106, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1617041878479787, |
| "grad_norm": 0.1427648961544037, |
| "learning_rate": 8.475073313782992e-06, |
| "loss": 0.3033597469329834, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.1626724763979666, |
| "grad_norm": 0.14715762436389923, |
| "learning_rate": 8.465298142717499e-06, |
| "loss": 0.32338106632232666, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.1636407649479545, |
| "grad_norm": 0.1452789306640625, |
| "learning_rate": 8.455522971652005e-06, |
| "loss": 0.345528781414032, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.1646090534979424, |
| "grad_norm": 0.13867947459220886, |
| "learning_rate": 8.445747800586512e-06, |
| "loss": 0.32734549045562744, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1655773420479303, |
| "grad_norm": 0.137126162648201, |
| "learning_rate": 8.435972629521018e-06, |
| "loss": 0.32425397634506226, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.1665456305979182, |
| "grad_norm": 0.15507617592811584, |
| "learning_rate": 8.426197458455525e-06, |
| "loss": 0.3226757049560547, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1675139191479061, |
| "grad_norm": 0.12315394729375839, |
| "learning_rate": 8.41642228739003e-06, |
| "loss": 0.3322482407093048, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.16848220769789396, |
| "grad_norm": 0.14539486169815063, |
| "learning_rate": 8.406647116324536e-06, |
| "loss": 0.3431966304779053, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.16945049624788186, |
| "grad_norm": 0.1458021104335785, |
| "learning_rate": 8.396871945259043e-06, |
| "loss": 0.3117983937263489, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.17041878479786976, |
| "grad_norm": 0.126032292842865, |
| "learning_rate": 8.387096774193549e-06, |
| "loss": 0.304436057806015, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.17138707334785766, |
| "grad_norm": 0.15044239163398743, |
| "learning_rate": 8.377321603128056e-06, |
| "loss": 0.3327201306819916, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.17235536189784556, |
| "grad_norm": 0.12567083537578583, |
| "learning_rate": 8.367546432062562e-06, |
| "loss": 0.32488536834716797, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.17332365044783346, |
| "grad_norm": 0.1399868130683899, |
| "learning_rate": 8.357771260997069e-06, |
| "loss": 0.28393518924713135, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.17429193899782136, |
| "grad_norm": 0.12733778357505798, |
| "learning_rate": 8.347996089931575e-06, |
| "loss": 0.28177058696746826, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.17526022754780926, |
| "grad_norm": 0.15660876035690308, |
| "learning_rate": 8.338220918866082e-06, |
| "loss": 0.31686797738075256, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.17622851609779713, |
| "grad_norm": 0.1510598510503769, |
| "learning_rate": 8.328445747800587e-06, |
| "loss": 0.2910916209220886, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.17719680464778503, |
| "grad_norm": 0.13863040506839752, |
| "learning_rate": 8.318670576735095e-06, |
| "loss": 0.3391288220882416, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.17816509319777293, |
| "grad_norm": 0.1262752115726471, |
| "learning_rate": 8.3088954056696e-06, |
| "loss": 0.26286113262176514, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.17913338174776083, |
| "grad_norm": 0.1450256109237671, |
| "learning_rate": 8.299120234604106e-06, |
| "loss": 0.33761149644851685, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.18010167029774873, |
| "grad_norm": 0.145137757062912, |
| "learning_rate": 8.289345063538613e-06, |
| "loss": 0.299782931804657, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.18106995884773663, |
| "grad_norm": 0.14223727583885193, |
| "learning_rate": 8.279569892473119e-06, |
| "loss": 0.28673055768013, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.18203824739772453, |
| "grad_norm": 0.14671868085861206, |
| "learning_rate": 8.269794721407626e-06, |
| "loss": 0.3334650993347168, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.1830065359477124, |
| "grad_norm": 0.14561879634857178, |
| "learning_rate": 8.260019550342132e-06, |
| "loss": 0.3131367564201355, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.1839748244977003, |
| "grad_norm": 0.13915206491947174, |
| "learning_rate": 8.250244379276639e-06, |
| "loss": 0.285634845495224, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1849431130476882, |
| "grad_norm": 0.13024187088012695, |
| "learning_rate": 8.240469208211144e-06, |
| "loss": 0.3532557487487793, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.1859114015976761, |
| "grad_norm": 0.1433749794960022, |
| "learning_rate": 8.230694037145652e-06, |
| "loss": 0.3235865831375122, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.186879690147664, |
| "grad_norm": 0.1517333984375, |
| "learning_rate": 8.220918866080157e-06, |
| "loss": 0.2964053153991699, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.1878479786976519, |
| "grad_norm": 0.139493927359581, |
| "learning_rate": 8.211143695014665e-06, |
| "loss": 0.2810608148574829, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.1888162672476398, |
| "grad_norm": 0.12849940359592438, |
| "learning_rate": 8.20136852394917e-06, |
| "loss": 0.2949499189853668, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.18978455579762768, |
| "grad_norm": 0.14101386070251465, |
| "learning_rate": 8.191593352883677e-06, |
| "loss": 0.28787606954574585, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.19075284434761558, |
| "grad_norm": 0.13321508467197418, |
| "learning_rate": 8.181818181818183e-06, |
| "loss": 0.31819286942481995, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.19172113289760348, |
| "grad_norm": 0.1368619203567505, |
| "learning_rate": 8.172043010752689e-06, |
| "loss": 0.2770519256591797, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.19268942144759138, |
| "grad_norm": 0.14590312540531158, |
| "learning_rate": 8.162267839687196e-06, |
| "loss": 0.331787645816803, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.19365770999757928, |
| "grad_norm": 0.14525046944618225, |
| "learning_rate": 8.152492668621701e-06, |
| "loss": 0.3185243308544159, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.19462599854756718, |
| "grad_norm": 0.14318214356899261, |
| "learning_rate": 8.142717497556209e-06, |
| "loss": 0.2779344320297241, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.19559428709755508, |
| "grad_norm": 0.13709904253482819, |
| "learning_rate": 8.132942326490714e-06, |
| "loss": 0.2518289387226105, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.19656257564754298, |
| "grad_norm": 0.1377800703048706, |
| "learning_rate": 8.12316715542522e-06, |
| "loss": 0.2550484836101532, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.19753086419753085, |
| "grad_norm": 0.15116380155086517, |
| "learning_rate": 8.113391984359727e-06, |
| "loss": 0.3201026916503906, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.19849915274751875, |
| "grad_norm": 0.13895870745182037, |
| "learning_rate": 8.103616813294233e-06, |
| "loss": 0.314879834651947, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.19946744129750665, |
| "grad_norm": 0.1607581377029419, |
| "learning_rate": 8.09384164222874e-06, |
| "loss": 0.35479456186294556, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.20043572984749455, |
| "grad_norm": 0.14690084755420685, |
| "learning_rate": 8.084066471163246e-06, |
| "loss": 0.35220852494239807, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.20140401839748245, |
| "grad_norm": 0.14206227660179138, |
| "learning_rate": 8.074291300097751e-06, |
| "loss": 0.30387187004089355, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.20237230694747035, |
| "grad_norm": 0.15204882621765137, |
| "learning_rate": 8.064516129032258e-06, |
| "loss": 0.2756717801094055, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.20334059549745825, |
| "grad_norm": 0.1398657113313675, |
| "learning_rate": 8.054740957966764e-06, |
| "loss": 0.2777653932571411, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.20430888404744613, |
| "grad_norm": 0.13735412061214447, |
| "learning_rate": 8.044965786901271e-06, |
| "loss": 0.28360527753829956, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.20527717259743403, |
| "grad_norm": 0.15420980751514435, |
| "learning_rate": 8.035190615835777e-06, |
| "loss": 0.32784318923950195, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.20624546114742193, |
| "grad_norm": 0.14892657101154327, |
| "learning_rate": 8.025415444770284e-06, |
| "loss": 0.33523041009902954, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.20721374969740983, |
| "grad_norm": 0.13076002895832062, |
| "learning_rate": 8.01564027370479e-06, |
| "loss": 0.2862524092197418, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.20818203824739773, |
| "grad_norm": 0.1415518820285797, |
| "learning_rate": 8.005865102639297e-06, |
| "loss": 0.33531394600868225, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.20915032679738563, |
| "grad_norm": 0.1702524572610855, |
| "learning_rate": 7.996089931573803e-06, |
| "loss": 0.32485491037368774, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.21011861534737353, |
| "grad_norm": 0.1344050168991089, |
| "learning_rate": 7.98631476050831e-06, |
| "loss": 0.3258602023124695, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.21108690389736143, |
| "grad_norm": 0.1570902317762375, |
| "learning_rate": 7.976539589442815e-06, |
| "loss": 0.32586684823036194, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2120551924473493, |
| "grad_norm": 0.15897458791732788, |
| "learning_rate": 7.966764418377323e-06, |
| "loss": 0.2798767685890198, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.2130234809973372, |
| "grad_norm": 0.15497955679893494, |
| "learning_rate": 7.956989247311828e-06, |
| "loss": 0.3338768482208252, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2139917695473251, |
| "grad_norm": 0.14507335424423218, |
| "learning_rate": 7.947214076246334e-06, |
| "loss": 0.2613910138607025, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.214960058097313, |
| "grad_norm": 0.1506527066230774, |
| "learning_rate": 7.937438905180841e-06, |
| "loss": 0.2877991795539856, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2159283466473009, |
| "grad_norm": 0.14218902587890625, |
| "learning_rate": 7.927663734115347e-06, |
| "loss": 0.28079351782798767, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.2168966351972888, |
| "grad_norm": 0.14527355134487152, |
| "learning_rate": 7.917888563049854e-06, |
| "loss": 0.29178351163864136, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.2178649237472767, |
| "grad_norm": 0.1565907746553421, |
| "learning_rate": 7.90811339198436e-06, |
| "loss": 0.29092147946357727, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.21883321229726457, |
| "grad_norm": 0.16128268837928772, |
| "learning_rate": 7.898338220918867e-06, |
| "loss": 0.30649298429489136, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.21980150084725247, |
| "grad_norm": 0.13981635868549347, |
| "learning_rate": 7.888563049853372e-06, |
| "loss": 0.2914465069770813, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.22076978939724037, |
| "grad_norm": 0.13276293873786926, |
| "learning_rate": 7.87878787878788e-06, |
| "loss": 0.29372796416282654, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.22173807794722827, |
| "grad_norm": 0.14917345345020294, |
| "learning_rate": 7.869012707722385e-06, |
| "loss": 0.30806928873062134, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.22270636649721617, |
| "grad_norm": 0.15436047315597534, |
| "learning_rate": 7.859237536656893e-06, |
| "loss": 0.32170775532722473, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.22367465504720407, |
| "grad_norm": 0.14901861548423767, |
| "learning_rate": 7.849462365591398e-06, |
| "loss": 0.2876305878162384, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.22464294359719197, |
| "grad_norm": 0.13269929587841034, |
| "learning_rate": 7.839687194525904e-06, |
| "loss": 0.34557828307151794, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.22561123214717985, |
| "grad_norm": 0.14736007153987885, |
| "learning_rate": 7.829912023460411e-06, |
| "loss": 0.3261890411376953, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.22657952069716775, |
| "grad_norm": 0.157369464635849, |
| "learning_rate": 7.820136852394917e-06, |
| "loss": 0.33243319392204285, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.22754780924715565, |
| "grad_norm": 0.15422044694423676, |
| "learning_rate": 7.810361681329424e-06, |
| "loss": 0.32125651836395264, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.22851609779714355, |
| "grad_norm": 0.15290172398090363, |
| "learning_rate": 7.80058651026393e-06, |
| "loss": 0.3197525143623352, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.22948438634713145, |
| "grad_norm": 0.13229885697364807, |
| "learning_rate": 7.790811339198437e-06, |
| "loss": 0.3051709532737732, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.23045267489711935, |
| "grad_norm": 0.15573133528232574, |
| "learning_rate": 7.781036168132942e-06, |
| "loss": 0.3190789520740509, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.23142096344710725, |
| "grad_norm": 0.1598438322544098, |
| "learning_rate": 7.77126099706745e-06, |
| "loss": 0.32250896096229553, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.23238925199709515, |
| "grad_norm": 0.13183802366256714, |
| "learning_rate": 7.761485826001955e-06, |
| "loss": 0.31600791215896606, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.23335754054708302, |
| "grad_norm": 0.14657722413539886, |
| "learning_rate": 7.751710654936462e-06, |
| "loss": 0.2864221930503845, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.23432582909707092, |
| "grad_norm": 0.1653253585100174, |
| "learning_rate": 7.741935483870968e-06, |
| "loss": 0.30967211723327637, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 0.15613378584384918, |
| "learning_rate": 7.732160312805475e-06, |
| "loss": 0.30234426259994507, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.23626240619704672, |
| "grad_norm": 0.1411314755678177, |
| "learning_rate": 7.722385141739981e-06, |
| "loss": 0.28815943002700806, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.23723069474703462, |
| "grad_norm": 0.14803080260753632, |
| "learning_rate": 7.712609970674486e-06, |
| "loss": 0.2975384294986725, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.23819898329702252, |
| "grad_norm": 0.1584216207265854, |
| "learning_rate": 7.702834799608994e-06, |
| "loss": 0.2974746525287628, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.23916727184701042, |
| "grad_norm": 0.16107121109962463, |
| "learning_rate": 7.6930596285435e-06, |
| "loss": 0.31581875681877136, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.2401355603969983, |
| "grad_norm": 0.17307540774345398, |
| "learning_rate": 7.683284457478007e-06, |
| "loss": 0.29687726497650146, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.2411038489469862, |
| "grad_norm": 0.16493360698223114, |
| "learning_rate": 7.673509286412512e-06, |
| "loss": 0.35423439741134644, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.2420721374969741, |
| "grad_norm": 0.14273418486118317, |
| "learning_rate": 7.66373411534702e-06, |
| "loss": 0.29487237334251404, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.243040426046962, |
| "grad_norm": 0.164155974984169, |
| "learning_rate": 7.653958944281525e-06, |
| "loss": 0.32345789670944214, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2440087145969499, |
| "grad_norm": 0.15766294300556183, |
| "learning_rate": 7.644183773216032e-06, |
| "loss": 0.319865882396698, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2449770031469378, |
| "grad_norm": 0.15514512360095978, |
| "learning_rate": 7.634408602150538e-06, |
| "loss": 0.3162165880203247, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.2459452916969257, |
| "grad_norm": 0.15435358881950378, |
| "learning_rate": 7.624633431085044e-06, |
| "loss": 0.28262361884117126, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.24691358024691357, |
| "grad_norm": 0.14835764467716217, |
| "learning_rate": 7.614858260019551e-06, |
| "loss": 0.28154870867729187, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.24788186879690147, |
| "grad_norm": 0.15369164943695068, |
| "learning_rate": 7.605083088954057e-06, |
| "loss": 0.3451993465423584, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.24885015734688937, |
| "grad_norm": 0.13362520933151245, |
| "learning_rate": 7.5953079178885636e-06, |
| "loss": 0.2882372736930847, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.24981844589687727, |
| "grad_norm": 0.14538030326366425, |
| "learning_rate": 7.58553274682307e-06, |
| "loss": 0.30620044469833374, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.25078673444686517, |
| "grad_norm": 0.1679297834634781, |
| "learning_rate": 7.5757575757575764e-06, |
| "loss": 0.3071752190589905, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.25175502299685304, |
| "grad_norm": 0.1505117118358612, |
| "learning_rate": 7.565982404692083e-06, |
| "loss": 0.2810661792755127, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.25272331154684097, |
| "grad_norm": 0.13865773379802704, |
| "learning_rate": 7.556207233626589e-06, |
| "loss": 0.2779511511325836, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.25369160009682884, |
| "grad_norm": 0.14810754358768463, |
| "learning_rate": 7.546432062561096e-06, |
| "loss": 0.3234580457210541, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.25465988864681677, |
| "grad_norm": 0.15836334228515625, |
| "learning_rate": 7.536656891495602e-06, |
| "loss": 0.3174368739128113, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.25562817719680464, |
| "grad_norm": 0.15845665335655212, |
| "learning_rate": 7.526881720430108e-06, |
| "loss": 0.292019784450531, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.25659646574679257, |
| "grad_norm": 0.1532326340675354, |
| "learning_rate": 7.517106549364614e-06, |
| "loss": 0.3122391700744629, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.25756475429678044, |
| "grad_norm": 0.16675175726413727, |
| "learning_rate": 7.507331378299121e-06, |
| "loss": 0.33715298771858215, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.2585330428467683, |
| "grad_norm": 0.1525373011827469, |
| "learning_rate": 7.497556207233627e-06, |
| "loss": 0.32337823510169983, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.25950133139675624, |
| "grad_norm": 0.15557681024074554, |
| "learning_rate": 7.4877810361681334e-06, |
| "loss": 0.33496809005737305, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.2604696199467441, |
| "grad_norm": 0.1552857607603073, |
| "learning_rate": 7.47800586510264e-06, |
| "loss": 0.29575905203819275, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.26143790849673204, |
| "grad_norm": 0.17039579153060913, |
| "learning_rate": 7.468230694037146e-06, |
| "loss": 0.3094739317893982, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2624061970467199, |
| "grad_norm": 0.15926915407180786, |
| "learning_rate": 7.458455522971653e-06, |
| "loss": 0.30116045475006104, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.26337448559670784, |
| "grad_norm": 0.15641555190086365, |
| "learning_rate": 7.448680351906159e-06, |
| "loss": 0.33569908142089844, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.2643427741466957, |
| "grad_norm": 0.14819961786270142, |
| "learning_rate": 7.438905180840666e-06, |
| "loss": 0.3124736249446869, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.2653110626966836, |
| "grad_norm": 0.15690119564533234, |
| "learning_rate": 7.429130009775172e-06, |
| "loss": 0.2757438123226166, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.2662793512466715, |
| "grad_norm": 0.1666852980852127, |
| "learning_rate": 7.4193548387096784e-06, |
| "loss": 0.3246593177318573, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.2672476397966594, |
| "grad_norm": 0.16269199550151825, |
| "learning_rate": 7.409579667644185e-06, |
| "loss": 0.2908874750137329, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.2682159283466473, |
| "grad_norm": 0.1640820950269699, |
| "learning_rate": 7.3998044965786904e-06, |
| "loss": 0.3241088390350342, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.2691842168966352, |
| "grad_norm": 0.15672433376312256, |
| "learning_rate": 7.390029325513197e-06, |
| "loss": 0.2804832458496094, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.2701525054466231, |
| "grad_norm": 0.14417491853237152, |
| "learning_rate": 7.380254154447703e-06, |
| "loss": 0.29370468854904175, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.271120793996611, |
| "grad_norm": 0.14853185415267944, |
| "learning_rate": 7.37047898338221e-06, |
| "loss": 0.278653621673584, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.27208908254659886, |
| "grad_norm": 0.1435244232416153, |
| "learning_rate": 7.360703812316716e-06, |
| "loss": 0.2943509817123413, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.2730573710965868, |
| "grad_norm": 0.16490313410758972, |
| "learning_rate": 7.350928641251223e-06, |
| "loss": 0.32479339838027954, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.27402565964657466, |
| "grad_norm": 0.14916065335273743, |
| "learning_rate": 7.341153470185729e-06, |
| "loss": 0.3356713652610779, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.2749939481965626, |
| "grad_norm": 0.158106729388237, |
| "learning_rate": 7.3313782991202354e-06, |
| "loss": 0.3492435812950134, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.27596223674655046, |
| "grad_norm": 0.15584231913089752, |
| "learning_rate": 7.321603128054742e-06, |
| "loss": 0.30297964811325073, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.2769305252965384, |
| "grad_norm": 0.15248778462409973, |
| "learning_rate": 7.311827956989248e-06, |
| "loss": 0.28723299503326416, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.27789881384652626, |
| "grad_norm": 0.14938265085220337, |
| "learning_rate": 7.302052785923755e-06, |
| "loss": 0.2916105389595032, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.2788671023965142, |
| "grad_norm": 0.15402287244796753, |
| "learning_rate": 7.292277614858261e-06, |
| "loss": 0.29120731353759766, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.27983539094650206, |
| "grad_norm": 0.15380002558231354, |
| "learning_rate": 7.282502443792767e-06, |
| "loss": 0.34908658266067505, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.28080367949648993, |
| "grad_norm": 0.15176504850387573, |
| "learning_rate": 7.272727272727273e-06, |
| "loss": 0.346072793006897, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.28177196804647786, |
| "grad_norm": 0.1672578603029251, |
| "learning_rate": 7.26295210166178e-06, |
| "loss": 0.31465622782707214, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.28274025659646573, |
| "grad_norm": 0.14658679068088531, |
| "learning_rate": 7.253176930596286e-06, |
| "loss": 0.28865766525268555, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.28370854514645366, |
| "grad_norm": 0.1515662670135498, |
| "learning_rate": 7.2434017595307925e-06, |
| "loss": 0.30215001106262207, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.28467683369644153, |
| "grad_norm": 0.18654093146324158, |
| "learning_rate": 7.233626588465299e-06, |
| "loss": 0.3126724064350128, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.28564512224642946, |
| "grad_norm": 0.1485200673341751, |
| "learning_rate": 7.223851417399805e-06, |
| "loss": 0.2654643654823303, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.28661341079641733, |
| "grad_norm": 0.1476060450077057, |
| "learning_rate": 7.214076246334312e-06, |
| "loss": 0.2976668179035187, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.2875816993464052, |
| "grad_norm": 0.14893072843551636, |
| "learning_rate": 7.204301075268818e-06, |
| "loss": 0.3222036361694336, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.28854998789639313, |
| "grad_norm": 0.15406127274036407, |
| "learning_rate": 7.194525904203325e-06, |
| "loss": 0.33120018243789673, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.289518276446381, |
| "grad_norm": 0.17568016052246094, |
| "learning_rate": 7.184750733137831e-06, |
| "loss": 0.3500976860523224, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.29048656499636893, |
| "grad_norm": 0.15244793891906738, |
| "learning_rate": 7.1749755620723375e-06, |
| "loss": 0.29483211040496826, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2914548535463568, |
| "grad_norm": 0.14039957523345947, |
| "learning_rate": 7.165200391006844e-06, |
| "loss": 0.30703267455101013, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.29242314209634473, |
| "grad_norm": 0.16928645968437195, |
| "learning_rate": 7.1554252199413495e-06, |
| "loss": 0.285969078540802, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.2933914306463326, |
| "grad_norm": 0.15592513978481293, |
| "learning_rate": 7.145650048875856e-06, |
| "loss": 0.2873114347457886, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.2943597191963205, |
| "grad_norm": 0.1448688954114914, |
| "learning_rate": 7.135874877810362e-06, |
| "loss": 0.256163626909256, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.2953280077463084, |
| "grad_norm": 0.1630263477563858, |
| "learning_rate": 7.126099706744869e-06, |
| "loss": 0.2792564630508423, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 0.17319388687610626, |
| "learning_rate": 7.116324535679375e-06, |
| "loss": 0.2806742489337921, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.2972645848462842, |
| "grad_norm": 0.15814098715782166, |
| "learning_rate": 7.106549364613882e-06, |
| "loss": 0.2914005517959595, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.2982328733962721, |
| "grad_norm": 0.16322992742061615, |
| "learning_rate": 7.096774193548388e-06, |
| "loss": 0.293082594871521, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.29920116194626, |
| "grad_norm": 0.1633518785238266, |
| "learning_rate": 7.0869990224828945e-06, |
| "loss": 0.2866649925708771, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3001694504962479, |
| "grad_norm": 0.16669867932796478, |
| "learning_rate": 7.077223851417401e-06, |
| "loss": 0.28914347290992737, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.30113773904623575, |
| "grad_norm": 0.15272612869739532, |
| "learning_rate": 7.067448680351907e-06, |
| "loss": 0.3084270656108856, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.3021060275962237, |
| "grad_norm": 0.15269719064235687, |
| "learning_rate": 7.057673509286414e-06, |
| "loss": 0.30022960901260376, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.30307431614621155, |
| "grad_norm": 0.17254814505577087, |
| "learning_rate": 7.04789833822092e-06, |
| "loss": 0.32925137877464294, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3040426046961995, |
| "grad_norm": 0.1512719690799713, |
| "learning_rate": 7.038123167155427e-06, |
| "loss": 0.302681028842926, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.30501089324618735, |
| "grad_norm": 0.14636491239070892, |
| "learning_rate": 7.028347996089932e-06, |
| "loss": 0.28604596853256226, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.3059791817961753, |
| "grad_norm": 0.17897070944309235, |
| "learning_rate": 7.018572825024439e-06, |
| "loss": 0.3204849362373352, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.30694747034616315, |
| "grad_norm": 0.1784772127866745, |
| "learning_rate": 7.008797653958945e-06, |
| "loss": 0.26697760820388794, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.307915758896151, |
| "grad_norm": 0.15881852805614471, |
| "learning_rate": 6.9990224828934515e-06, |
| "loss": 0.2558300495147705, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.30888404744613895, |
| "grad_norm": 0.15870684385299683, |
| "learning_rate": 6.989247311827958e-06, |
| "loss": 0.3025914132595062, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.3098523359961268, |
| "grad_norm": 0.1527319699525833, |
| "learning_rate": 6.979472140762464e-06, |
| "loss": 0.3081514835357666, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.31082062454611475, |
| "grad_norm": 0.1624557226896286, |
| "learning_rate": 6.969696969696971e-06, |
| "loss": 0.30399930477142334, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.3117889130961026, |
| "grad_norm": 0.1645076870918274, |
| "learning_rate": 6.959921798631477e-06, |
| "loss": 0.294676810503006, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.31275720164609055, |
| "grad_norm": 0.14686374366283417, |
| "learning_rate": 6.950146627565984e-06, |
| "loss": 0.2830040752887726, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.3137254901960784, |
| "grad_norm": 0.15746936202049255, |
| "learning_rate": 6.94037145650049e-06, |
| "loss": 0.3081029951572418, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.31469377874606635, |
| "grad_norm": 0.14588280022144318, |
| "learning_rate": 6.9305962854349965e-06, |
| "loss": 0.2718695104122162, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3156620672960542, |
| "grad_norm": 0.16646429896354675, |
| "learning_rate": 6.920821114369503e-06, |
| "loss": 0.31487900018692017, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.3166303558460421, |
| "grad_norm": 0.1697472333908081, |
| "learning_rate": 6.911045943304009e-06, |
| "loss": 0.29448121786117554, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.31759864439603, |
| "grad_norm": 0.16235950589179993, |
| "learning_rate": 6.901270772238515e-06, |
| "loss": 0.2658935487270355, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.3185669329460179, |
| "grad_norm": 0.167832151055336, |
| "learning_rate": 6.891495601173021e-06, |
| "loss": 0.28193116188049316, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.31953522149600583, |
| "grad_norm": 0.15145322680473328, |
| "learning_rate": 6.881720430107528e-06, |
| "loss": 0.2826996445655823, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3205035100459937, |
| "grad_norm": 0.19785556197166443, |
| "learning_rate": 6.871945259042033e-06, |
| "loss": 0.31135326623916626, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.32147179859598163, |
| "grad_norm": 0.1549469530582428, |
| "learning_rate": 6.86217008797654e-06, |
| "loss": 0.3094058930873871, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.3224400871459695, |
| "grad_norm": 0.15144820511341095, |
| "learning_rate": 6.852394916911046e-06, |
| "loss": 0.26982536911964417, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.3234083756959574, |
| "grad_norm": 0.15371711552143097, |
| "learning_rate": 6.842619745845553e-06, |
| "loss": 0.29452741146087646, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.3243766642459453, |
| "grad_norm": 0.15917186439037323, |
| "learning_rate": 6.832844574780059e-06, |
| "loss": 0.29580235481262207, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.3253449527959332, |
| "grad_norm": 0.1550978273153305, |
| "learning_rate": 6.823069403714565e-06, |
| "loss": 0.36159706115722656, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.3263132413459211, |
| "grad_norm": 0.14809750020503998, |
| "learning_rate": 6.813294232649071e-06, |
| "loss": 0.2756076753139496, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.327281529895909, |
| "grad_norm": 0.15484212338924408, |
| "learning_rate": 6.8035190615835775e-06, |
| "loss": 0.26915088295936584, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.3282498184458969, |
| "grad_norm": 0.16279636323451996, |
| "learning_rate": 6.793743890518084e-06, |
| "loss": 0.3175829350948334, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.3292181069958848, |
| "grad_norm": 0.16756120324134827, |
| "learning_rate": 6.78396871945259e-06, |
| "loss": 0.3034948706626892, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.33018639554587265, |
| "grad_norm": 0.1840161681175232, |
| "learning_rate": 6.774193548387097e-06, |
| "loss": 0.32814380526542664, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.3311546840958606, |
| "grad_norm": 0.17000938951969147, |
| "learning_rate": 6.764418377321603e-06, |
| "loss": 0.2874530553817749, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.33212297264584845, |
| "grad_norm": 0.16385532915592194, |
| "learning_rate": 6.75464320625611e-06, |
| "loss": 0.3138440251350403, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.3330912611958364, |
| "grad_norm": 0.18285442888736725, |
| "learning_rate": 6.744868035190616e-06, |
| "loss": 0.30819112062454224, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.33405954974582425, |
| "grad_norm": 0.16300350427627563, |
| "learning_rate": 6.7350928641251225e-06, |
| "loss": 0.291953444480896, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.3350278382958122, |
| "grad_norm": 0.18186615407466888, |
| "learning_rate": 6.725317693059629e-06, |
| "loss": 0.27950209379196167, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.33599612684580005, |
| "grad_norm": 0.16226314008235931, |
| "learning_rate": 6.715542521994135e-06, |
| "loss": 0.29765087366104126, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3369644153957879, |
| "grad_norm": 0.18730367720127106, |
| "learning_rate": 6.705767350928642e-06, |
| "loss": 0.3281936049461365, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.33793270394577585, |
| "grad_norm": 0.16875147819519043, |
| "learning_rate": 6.695992179863147e-06, |
| "loss": 0.3038584291934967, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.3389009924957637, |
| "grad_norm": 0.17776557803153992, |
| "learning_rate": 6.686217008797654e-06, |
| "loss": 0.2937045395374298, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.33986928104575165, |
| "grad_norm": 0.14246642589569092, |
| "learning_rate": 6.67644183773216e-06, |
| "loss": 0.3092482388019562, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3408375695957395, |
| "grad_norm": 0.15377755463123322, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.2588135898113251, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.34180585814572745, |
| "grad_norm": 0.1557725965976715, |
| "learning_rate": 6.656891495601173e-06, |
| "loss": 0.2831732928752899, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3427741466957153, |
| "grad_norm": 0.15106302499771118, |
| "learning_rate": 6.6471163245356795e-06, |
| "loss": 0.26685526967048645, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.3437424352457032, |
| "grad_norm": 0.17710185050964355, |
| "learning_rate": 6.637341153470186e-06, |
| "loss": 0.3084600567817688, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.3447107237956911, |
| "grad_norm": 0.14838555455207825, |
| "learning_rate": 6.627565982404692e-06, |
| "loss": 0.2909257113933563, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.345679012345679, |
| "grad_norm": 0.1700345277786255, |
| "learning_rate": 6.617790811339199e-06, |
| "loss": 0.3314460217952728, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.3466473008956669, |
| "grad_norm": 0.17465804517269135, |
| "learning_rate": 6.608015640273705e-06, |
| "loss": 0.30570876598358154, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.3476155894456548, |
| "grad_norm": 0.1821223646402359, |
| "learning_rate": 6.598240469208212e-06, |
| "loss": 0.3378984034061432, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.3485838779956427, |
| "grad_norm": 0.15884800255298615, |
| "learning_rate": 6.588465298142718e-06, |
| "loss": 0.2953569293022156, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3495521665456306, |
| "grad_norm": 0.15600639581680298, |
| "learning_rate": 6.5786901270772245e-06, |
| "loss": 0.36609965562820435, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.3505204550956185, |
| "grad_norm": 0.15438097715377808, |
| "learning_rate": 6.56891495601173e-06, |
| "loss": 0.33403828740119934, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.3514887436456064, |
| "grad_norm": 0.1604045182466507, |
| "learning_rate": 6.5591397849462365e-06, |
| "loss": 0.2991517186164856, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.35245703219559427, |
| "grad_norm": 0.17102058231830597, |
| "learning_rate": 6.549364613880743e-06, |
| "loss": 0.28116142749786377, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.3534253207455822, |
| "grad_norm": 0.15797586739063263, |
| "learning_rate": 6.539589442815249e-06, |
| "loss": 0.30658936500549316, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.35439360929557007, |
| "grad_norm": 0.1578051745891571, |
| "learning_rate": 6.529814271749756e-06, |
| "loss": 0.3066115379333496, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.355361897845558, |
| "grad_norm": 0.15831097960472107, |
| "learning_rate": 6.520039100684262e-06, |
| "loss": 0.30893969535827637, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.35633018639554587, |
| "grad_norm": 0.16711507737636566, |
| "learning_rate": 6.510263929618769e-06, |
| "loss": 0.29801586270332336, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.3572984749455338, |
| "grad_norm": 0.1717497706413269, |
| "learning_rate": 6.500488758553275e-06, |
| "loss": 0.3415631949901581, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.35826676349552167, |
| "grad_norm": 0.1789737045764923, |
| "learning_rate": 6.4907135874877815e-06, |
| "loss": 0.3342001140117645, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.35923505204550954, |
| "grad_norm": 0.16474243998527527, |
| "learning_rate": 6.480938416422288e-06, |
| "loss": 0.29570648074150085, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.36020334059549747, |
| "grad_norm": 0.17131595313549042, |
| "learning_rate": 6.471163245356794e-06, |
| "loss": 0.33989042043685913, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.36117162914548534, |
| "grad_norm": 0.1660817712545395, |
| "learning_rate": 6.461388074291301e-06, |
| "loss": 0.28438785672187805, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.36213991769547327, |
| "grad_norm": 0.16126903891563416, |
| "learning_rate": 6.451612903225806e-06, |
| "loss": 0.30980294942855835, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.36310820624546114, |
| "grad_norm": 0.1648305058479309, |
| "learning_rate": 6.441837732160313e-06, |
| "loss": 0.325278103351593, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.36407649479544907, |
| "grad_norm": 0.17030373215675354, |
| "learning_rate": 6.432062561094819e-06, |
| "loss": 0.3066975176334381, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.36504478334543694, |
| "grad_norm": 0.17431139945983887, |
| "learning_rate": 6.422287390029326e-06, |
| "loss": 0.28780531883239746, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.3660130718954248, |
| "grad_norm": 0.1605546921491623, |
| "learning_rate": 6.412512218963832e-06, |
| "loss": 0.2759549915790558, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.36698136044541274, |
| "grad_norm": 0.16246621310710907, |
| "learning_rate": 6.4027370478983385e-06, |
| "loss": 0.2838786840438843, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.3679496489954006, |
| "grad_norm": 0.173859640955925, |
| "learning_rate": 6.392961876832845e-06, |
| "loss": 0.28778478503227234, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.36891793754538854, |
| "grad_norm": 0.17580649256706238, |
| "learning_rate": 6.383186705767351e-06, |
| "loss": 0.29734641313552856, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.3698862260953764, |
| "grad_norm": 0.21752490103244781, |
| "learning_rate": 6.373411534701858e-06, |
| "loss": 0.331564724445343, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.37085451464536434, |
| "grad_norm": 0.1802123785018921, |
| "learning_rate": 6.363636363636364e-06, |
| "loss": 0.2878391742706299, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.3718228031953522, |
| "grad_norm": 0.16118982434272766, |
| "learning_rate": 6.353861192570871e-06, |
| "loss": 0.29216498136520386, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.3727910917453401, |
| "grad_norm": 0.18390141427516937, |
| "learning_rate": 6.344086021505377e-06, |
| "loss": 0.3013034164905548, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.373759380295328, |
| "grad_norm": 0.17186126112937927, |
| "learning_rate": 6.3343108504398835e-06, |
| "loss": 0.2939417362213135, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.3747276688453159, |
| "grad_norm": 0.1863613873720169, |
| "learning_rate": 6.324535679374389e-06, |
| "loss": 0.3011291027069092, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.3756959573953038, |
| "grad_norm": 0.16492682695388794, |
| "learning_rate": 6.3147605083088955e-06, |
| "loss": 0.29598119854927063, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.3766642459452917, |
| "grad_norm": 0.1751633882522583, |
| "learning_rate": 6.304985337243402e-06, |
| "loss": 0.3110932409763336, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.3776325344952796, |
| "grad_norm": 0.1898571252822876, |
| "learning_rate": 6.295210166177908e-06, |
| "loss": 0.30633416771888733, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3786008230452675, |
| "grad_norm": 0.1563596874475479, |
| "learning_rate": 6.285434995112415e-06, |
| "loss": 0.2938535809516907, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.37956911159525536, |
| "grad_norm": 0.18046635389328003, |
| "learning_rate": 6.275659824046921e-06, |
| "loss": 0.2856330871582031, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.3805374001452433, |
| "grad_norm": 0.15708908438682556, |
| "learning_rate": 6.265884652981428e-06, |
| "loss": 0.2904341518878937, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.38150568869523116, |
| "grad_norm": 0.17719998955726624, |
| "learning_rate": 6.256109481915934e-06, |
| "loss": 0.28807759284973145, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.3824739772452191, |
| "grad_norm": 0.15774236619472504, |
| "learning_rate": 6.2463343108504405e-06, |
| "loss": 0.2675943076610565, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.38344226579520696, |
| "grad_norm": 0.15558338165283203, |
| "learning_rate": 6.236559139784947e-06, |
| "loss": 0.2567376494407654, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.3844105543451949, |
| "grad_norm": 0.1891474723815918, |
| "learning_rate": 6.226783968719453e-06, |
| "loss": 0.2961275279521942, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.38537884289518276, |
| "grad_norm": 0.17778225243091583, |
| "learning_rate": 6.21700879765396e-06, |
| "loss": 0.3132587671279907, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.3863471314451707, |
| "grad_norm": 0.1902502477169037, |
| "learning_rate": 6.207233626588466e-06, |
| "loss": 0.31374305486679077, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.38731541999515856, |
| "grad_norm": 0.1710149198770523, |
| "learning_rate": 6.197458455522972e-06, |
| "loss": 0.34003812074661255, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.38828370854514643, |
| "grad_norm": 0.16460557281970978, |
| "learning_rate": 6.187683284457478e-06, |
| "loss": 0.2728930115699768, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.38925199709513436, |
| "grad_norm": 0.17229019105434418, |
| "learning_rate": 6.177908113391985e-06, |
| "loss": 0.2700308561325073, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.39022028564512223, |
| "grad_norm": 0.18431095778942108, |
| "learning_rate": 6.168132942326491e-06, |
| "loss": 0.2867494821548462, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.39118857419511016, |
| "grad_norm": 0.17898224294185638, |
| "learning_rate": 6.1583577712609975e-06, |
| "loss": 0.26027926802635193, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 0.1536150425672531, |
| "learning_rate": 6.148582600195504e-06, |
| "loss": 0.2740130126476288, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.39312515129508596, |
| "grad_norm": 0.46492230892181396, |
| "learning_rate": 6.13880742913001e-06, |
| "loss": 0.2832326889038086, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.39409343984507383, |
| "grad_norm": 0.18063224852085114, |
| "learning_rate": 6.129032258064517e-06, |
| "loss": 0.2683679163455963, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.3950617283950617, |
| "grad_norm": 0.18479417264461517, |
| "learning_rate": 6.119257086999023e-06, |
| "loss": 0.2960650324821472, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.39603001694504963, |
| "grad_norm": 0.16542568802833557, |
| "learning_rate": 6.10948191593353e-06, |
| "loss": 0.28208404779434204, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.3969983054950375, |
| "grad_norm": 0.1611918956041336, |
| "learning_rate": 6.099706744868036e-06, |
| "loss": 0.2653481364250183, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.39796659404502543, |
| "grad_norm": 0.17886482179164886, |
| "learning_rate": 6.0899315738025425e-06, |
| "loss": 0.33219113945961, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.3989348825950133, |
| "grad_norm": 0.16463807225227356, |
| "learning_rate": 6.080156402737049e-06, |
| "loss": 0.28929123282432556, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.39990317114500123, |
| "grad_norm": 0.18031014502048492, |
| "learning_rate": 6.0703812316715545e-06, |
| "loss": 0.27609509229660034, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.4008714596949891, |
| "grad_norm": 0.18028417229652405, |
| "learning_rate": 6.060606060606061e-06, |
| "loss": 0.29474079608917236, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.401839748244977, |
| "grad_norm": 0.16485083103179932, |
| "learning_rate": 6.050830889540567e-06, |
| "loss": 0.33132994174957275, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.4028080367949649, |
| "grad_norm": 0.17660938203334808, |
| "learning_rate": 6.041055718475074e-06, |
| "loss": 0.2553951144218445, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.4037763253449528, |
| "grad_norm": 0.18007521331310272, |
| "learning_rate": 6.03128054740958e-06, |
| "loss": 0.2640475034713745, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.4047446138949407, |
| "grad_norm": 0.16710299253463745, |
| "learning_rate": 6.021505376344087e-06, |
| "loss": 0.26302963495254517, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.4057129024449286, |
| "grad_norm": 0.1827956736087799, |
| "learning_rate": 6.011730205278593e-06, |
| "loss": 0.3405194878578186, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4066811909949165, |
| "grad_norm": 0.1711130291223526, |
| "learning_rate": 6.0019550342130995e-06, |
| "loss": 0.280174195766449, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4076494795449044, |
| "grad_norm": 0.16884659230709076, |
| "learning_rate": 5.992179863147606e-06, |
| "loss": 0.26946425437927246, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.40861776809489225, |
| "grad_norm": 0.17745757102966309, |
| "learning_rate": 5.982404692082112e-06, |
| "loss": 0.3392980396747589, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.4095860566448802, |
| "grad_norm": 0.1780301034450531, |
| "learning_rate": 5.972629521016619e-06, |
| "loss": 0.30674225091934204, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.41055434519486805, |
| "grad_norm": 0.17808158695697784, |
| "learning_rate": 5.962854349951125e-06, |
| "loss": 0.3345290720462799, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.411522633744856, |
| "grad_norm": 0.16129203140735626, |
| "learning_rate": 5.953079178885631e-06, |
| "loss": 0.2831481695175171, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.41249092229484385, |
| "grad_norm": 0.18456275761127472, |
| "learning_rate": 5.943304007820137e-06, |
| "loss": 0.3257300853729248, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.4134592108448318, |
| "grad_norm": 0.18435759842395782, |
| "learning_rate": 5.933528836754644e-06, |
| "loss": 0.26924797892570496, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.41442749939481965, |
| "grad_norm": 0.1941821128129959, |
| "learning_rate": 5.92375366568915e-06, |
| "loss": 0.3252018392086029, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4153957879448075, |
| "grad_norm": 0.17482848465442657, |
| "learning_rate": 5.9139784946236566e-06, |
| "loss": 0.33910396695137024, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.41636407649479545, |
| "grad_norm": 0.18026143312454224, |
| "learning_rate": 5.904203323558163e-06, |
| "loss": 0.2899131178855896, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4173323650447833, |
| "grad_norm": 0.18868599832057953, |
| "learning_rate": 5.894428152492669e-06, |
| "loss": 0.26209527254104614, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.41830065359477125, |
| "grad_norm": 0.172159805893898, |
| "learning_rate": 5.884652981427176e-06, |
| "loss": 0.2784045338630676, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.4192689421447591, |
| "grad_norm": 0.19189684092998505, |
| "learning_rate": 5.874877810361682e-06, |
| "loss": 0.3449173867702484, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.42023723069474705, |
| "grad_norm": 0.18038828670978546, |
| "learning_rate": 5.865102639296189e-06, |
| "loss": 0.260070264339447, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.4212055192447349, |
| "grad_norm": 0.17879043519496918, |
| "learning_rate": 5.855327468230695e-06, |
| "loss": 0.2970094382762909, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.42217380779472286, |
| "grad_norm": 0.19369956851005554, |
| "learning_rate": 5.8455522971652016e-06, |
| "loss": 0.262788325548172, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.4231420963447107, |
| "grad_norm": 0.1980774849653244, |
| "learning_rate": 5.835777126099708e-06, |
| "loss": 0.3415115475654602, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4241103848946986, |
| "grad_norm": 0.1517505943775177, |
| "learning_rate": 5.8260019550342136e-06, |
| "loss": 0.2550700902938843, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.42507867344468653, |
| "grad_norm": 0.16468308866024017, |
| "learning_rate": 5.81622678396872e-06, |
| "loss": 0.3277415633201599, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.4260469619946744, |
| "grad_norm": 0.1632845550775528, |
| "learning_rate": 5.806451612903226e-06, |
| "loss": 0.2696504294872284, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.42701525054466233, |
| "grad_norm": 0.17740678787231445, |
| "learning_rate": 5.796676441837733e-06, |
| "loss": 0.3146612048149109, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4279835390946502, |
| "grad_norm": 0.1720811426639557, |
| "learning_rate": 5.786901270772239e-06, |
| "loss": 0.293180376291275, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.42895182764463813, |
| "grad_norm": 0.16457650065422058, |
| "learning_rate": 5.777126099706746e-06, |
| "loss": 0.25529271364212036, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.429920116194626, |
| "grad_norm": 0.18886499106884003, |
| "learning_rate": 5.767350928641252e-06, |
| "loss": 0.2667441964149475, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.4308884047446139, |
| "grad_norm": 0.16837763786315918, |
| "learning_rate": 5.7575757575757586e-06, |
| "loss": 0.2874595820903778, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.4318566932946018, |
| "grad_norm": 0.19567479193210602, |
| "learning_rate": 5.747800586510265e-06, |
| "loss": 0.2736223042011261, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.4328249818445897, |
| "grad_norm": 0.18101078271865845, |
| "learning_rate": 5.738025415444771e-06, |
| "loss": 0.3007189631462097, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.4337932703945776, |
| "grad_norm": 0.17572757601737976, |
| "learning_rate": 5.728250244379278e-06, |
| "loss": 0.3632327914237976, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.4347615589445655, |
| "grad_norm": 0.17773869633674622, |
| "learning_rate": 5.718475073313784e-06, |
| "loss": 0.3204823434352875, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.4357298474945534, |
| "grad_norm": 0.1703418791294098, |
| "learning_rate": 5.708699902248291e-06, |
| "loss": 0.31934505701065063, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4366981360445413, |
| "grad_norm": 0.16851919889450073, |
| "learning_rate": 5.698924731182796e-06, |
| "loss": 0.33900323510169983, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.43766642459452915, |
| "grad_norm": 0.16920781135559082, |
| "learning_rate": 5.689149560117303e-06, |
| "loss": 0.2747448980808258, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.4386347131445171, |
| "grad_norm": 0.20053993165493011, |
| "learning_rate": 5.679374389051809e-06, |
| "loss": 0.28275251388549805, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.43960300169450495, |
| "grad_norm": 0.17686837911605835, |
| "learning_rate": 5.6695992179863156e-06, |
| "loss": 0.26753419637680054, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.4405712902444929, |
| "grad_norm": 0.20442141592502594, |
| "learning_rate": 5.659824046920822e-06, |
| "loss": 0.32636407017707825, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.44153957879448075, |
| "grad_norm": 0.1751495897769928, |
| "learning_rate": 5.6500488758553284e-06, |
| "loss": 0.29740267992019653, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.4425078673444687, |
| "grad_norm": 0.17008022964000702, |
| "learning_rate": 5.640273704789835e-06, |
| "loss": 0.2965855002403259, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.44347615589445655, |
| "grad_norm": 0.1770244538784027, |
| "learning_rate": 5.630498533724341e-06, |
| "loss": 0.39362120628356934, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.17790165543556213, |
| "learning_rate": 5.620723362658846e-06, |
| "loss": 0.2864190340042114, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.44541273299443235, |
| "grad_norm": 0.17405082285404205, |
| "learning_rate": 5.6109481915933524e-06, |
| "loss": 0.2946798801422119, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4463810215444202, |
| "grad_norm": 0.16010600328445435, |
| "learning_rate": 5.601173020527859e-06, |
| "loss": 0.32160502672195435, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.44734931009440815, |
| "grad_norm": 0.1997617781162262, |
| "learning_rate": 5.591397849462365e-06, |
| "loss": 0.32814455032348633, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.448317598644396, |
| "grad_norm": 0.17624011635780334, |
| "learning_rate": 5.581622678396872e-06, |
| "loss": 0.2808952331542969, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.44928588719438395, |
| "grad_norm": 0.16722382605075836, |
| "learning_rate": 5.571847507331378e-06, |
| "loss": 0.26833376288414, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.4502541757443718, |
| "grad_norm": 0.16350014507770538, |
| "learning_rate": 5.562072336265885e-06, |
| "loss": 0.2904164791107178, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.4512224642943597, |
| "grad_norm": 0.15504086017608643, |
| "learning_rate": 5.552297165200391e-06, |
| "loss": 0.3124706745147705, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.4521907528443476, |
| "grad_norm": 0.17865699529647827, |
| "learning_rate": 5.5425219941348974e-06, |
| "loss": 0.30932655930519104, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.4531590413943355, |
| "grad_norm": 0.179380863904953, |
| "learning_rate": 5.532746823069404e-06, |
| "loss": 0.3099682033061981, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.4541273299443234, |
| "grad_norm": 0.1848987489938736, |
| "learning_rate": 5.52297165200391e-06, |
| "loss": 0.310943603515625, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.4550956184943113, |
| "grad_norm": 0.17355690896511078, |
| "learning_rate": 5.513196480938417e-06, |
| "loss": 0.27683690190315247, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4560639070442992, |
| "grad_norm": 0.18208661675453186, |
| "learning_rate": 5.503421309872923e-06, |
| "loss": 0.26567360758781433, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.4570321955942871, |
| "grad_norm": 0.17654170095920563, |
| "learning_rate": 5.493646138807429e-06, |
| "loss": 0.29490426182746887, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.45800048414427497, |
| "grad_norm": 0.1757243424654007, |
| "learning_rate": 5.483870967741935e-06, |
| "loss": 0.30711159110069275, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.4589687726942629, |
| "grad_norm": 0.17413422465324402, |
| "learning_rate": 5.474095796676442e-06, |
| "loss": 0.28973209857940674, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.45993706124425077, |
| "grad_norm": 0.20302073657512665, |
| "learning_rate": 5.464320625610948e-06, |
| "loss": 0.3249307870864868, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.4609053497942387, |
| "grad_norm": 0.17959873378276825, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 0.29579484462738037, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.46187363834422657, |
| "grad_norm": 0.17562335729599, |
| "learning_rate": 5.444770283479961e-06, |
| "loss": 0.3038690984249115, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.4628419268942145, |
| "grad_norm": 0.16495366394519806, |
| "learning_rate": 5.434995112414467e-06, |
| "loss": 0.281146377325058, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.46381021544420237, |
| "grad_norm": 0.17205455899238586, |
| "learning_rate": 5.425219941348974e-06, |
| "loss": 0.2786451280117035, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.4647785039941903, |
| "grad_norm": 0.19133879244327545, |
| "learning_rate": 5.41544477028348e-06, |
| "loss": 0.3336411416530609, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.46574679254417817, |
| "grad_norm": 0.18153399229049683, |
| "learning_rate": 5.405669599217987e-06, |
| "loss": 0.28267285227775574, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.46671508109416604, |
| "grad_norm": 0.16732986271381378, |
| "learning_rate": 5.395894428152493e-06, |
| "loss": 0.2745664119720459, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.46768336964415397, |
| "grad_norm": 0.19961762428283691, |
| "learning_rate": 5.3861192570869995e-06, |
| "loss": 0.2916579246520996, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.46865165819414184, |
| "grad_norm": 0.18672992289066315, |
| "learning_rate": 5.376344086021506e-06, |
| "loss": 0.2882307767868042, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.46961994674412977, |
| "grad_norm": 0.16605433821678162, |
| "learning_rate": 5.3665689149560115e-06, |
| "loss": 0.32832133769989014, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.1809573769569397, |
| "learning_rate": 5.356793743890518e-06, |
| "loss": 0.28796786069869995, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.47155652384410557, |
| "grad_norm": 0.15820080041885376, |
| "learning_rate": 5.347018572825024e-06, |
| "loss": 0.24655906856060028, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.47252481239409344, |
| "grad_norm": 0.183393657207489, |
| "learning_rate": 5.337243401759531e-06, |
| "loss": 0.3693656027317047, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.4734931009440813, |
| "grad_norm": 0.17333702743053436, |
| "learning_rate": 5.327468230694037e-06, |
| "loss": 0.2813875079154968, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.47446138949406924, |
| "grad_norm": 0.18470393121242523, |
| "learning_rate": 5.317693059628544e-06, |
| "loss": 0.32118356227874756, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4754296780440571, |
| "grad_norm": 0.17366191744804382, |
| "learning_rate": 5.30791788856305e-06, |
| "loss": 0.27578046917915344, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.47639796659404504, |
| "grad_norm": 0.16945011913776398, |
| "learning_rate": 5.2981427174975565e-06, |
| "loss": 0.3115886151790619, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.4773662551440329, |
| "grad_norm": 0.20388440787792206, |
| "learning_rate": 5.288367546432063e-06, |
| "loss": 0.309696227312088, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.47833454369402084, |
| "grad_norm": 0.156901016831398, |
| "learning_rate": 5.278592375366569e-06, |
| "loss": 0.27146872878074646, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.4793028322440087, |
| "grad_norm": 0.20242440700531006, |
| "learning_rate": 5.268817204301076e-06, |
| "loss": 0.33286309242248535, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.4802711207939966, |
| "grad_norm": 0.20036989450454712, |
| "learning_rate": 5.259042033235582e-06, |
| "loss": 0.285398006439209, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.4812394093439845, |
| "grad_norm": 0.16521663963794708, |
| "learning_rate": 5.249266862170089e-06, |
| "loss": 0.27880388498306274, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.4822076978939724, |
| "grad_norm": 0.16702234745025635, |
| "learning_rate": 5.239491691104594e-06, |
| "loss": 0.29399362206459045, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.4831759864439603, |
| "grad_norm": 0.18302516639232635, |
| "learning_rate": 5.229716520039101e-06, |
| "loss": 0.2757553160190582, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.4841442749939482, |
| "grad_norm": 0.17423763871192932, |
| "learning_rate": 5.219941348973607e-06, |
| "loss": 0.2870354950428009, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4851125635439361, |
| "grad_norm": 0.19603262841701508, |
| "learning_rate": 5.2101661779081135e-06, |
| "loss": 0.2726498246192932, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.486080852093924, |
| "grad_norm": 0.1614205241203308, |
| "learning_rate": 5.20039100684262e-06, |
| "loss": 0.25111639499664307, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.48704914064391186, |
| "grad_norm": 0.17319105565547943, |
| "learning_rate": 5.190615835777126e-06, |
| "loss": 0.27468031644821167, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.4880174291938998, |
| "grad_norm": 0.16882063448429108, |
| "learning_rate": 5.180840664711633e-06, |
| "loss": 0.27068573236465454, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.48898571774388766, |
| "grad_norm": 0.18153499066829681, |
| "learning_rate": 5.171065493646139e-06, |
| "loss": 0.28188517689704895, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.4899540062938756, |
| "grad_norm": 0.1816774159669876, |
| "learning_rate": 5.161290322580646e-06, |
| "loss": 0.32222485542297363, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.49092229484386346, |
| "grad_norm": 0.16442593932151794, |
| "learning_rate": 5.151515151515152e-06, |
| "loss": 0.30542707443237305, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.4918905833938514, |
| "grad_norm": 0.1821308732032776, |
| "learning_rate": 5.1417399804496585e-06, |
| "loss": 0.293884813785553, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.49285887194383926, |
| "grad_norm": 0.1683465987443924, |
| "learning_rate": 5.131964809384165e-06, |
| "loss": 0.26638439297676086, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.49382716049382713, |
| "grad_norm": 0.17483524978160858, |
| "learning_rate": 5.1221896383186705e-06, |
| "loss": 0.30652916431427, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.49479544904381506, |
| "grad_norm": 0.1842867136001587, |
| "learning_rate": 5.112414467253177e-06, |
| "loss": 0.364931583404541, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.49576373759380293, |
| "grad_norm": 0.19743406772613525, |
| "learning_rate": 5.102639296187683e-06, |
| "loss": 0.2590721547603607, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.49673202614379086, |
| "grad_norm": 0.18802092969417572, |
| "learning_rate": 5.09286412512219e-06, |
| "loss": 0.31060951948165894, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.49770031469377873, |
| "grad_norm": 0.16384844481945038, |
| "learning_rate": 5.083088954056696e-06, |
| "loss": 0.27959296107292175, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.49866860324376666, |
| "grad_norm": 0.2127850353717804, |
| "learning_rate": 5.073313782991203e-06, |
| "loss": 0.3346613049507141, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.49963689179375453, |
| "grad_norm": 0.17491693794727325, |
| "learning_rate": 5.063538611925709e-06, |
| "loss": 0.2960091531276703, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5006051803437425, |
| "grad_norm": 0.1880018264055252, |
| "learning_rate": 5.0537634408602155e-06, |
| "loss": 0.2997010350227356, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5015734688937303, |
| "grad_norm": 0.1748742163181305, |
| "learning_rate": 5.043988269794722e-06, |
| "loss": 0.2931768596172333, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5025417574437182, |
| "grad_norm": 0.15878638625144958, |
| "learning_rate": 5.034213098729228e-06, |
| "loss": 0.254057914018631, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5035100459937061, |
| "grad_norm": 0.2069050818681717, |
| "learning_rate": 5.024437927663735e-06, |
| "loss": 0.2735084295272827, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5044783345436941, |
| "grad_norm": 0.16623827815055847, |
| "learning_rate": 5.014662756598241e-06, |
| "loss": 0.25306957960128784, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5054466230936819, |
| "grad_norm": 0.1891428381204605, |
| "learning_rate": 5.004887585532748e-06, |
| "loss": 0.2810228765010834, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5064149116436698, |
| "grad_norm": 0.2315511256456375, |
| "learning_rate": 4.995112414467253e-06, |
| "loss": 0.2733577489852905, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5073832001936577, |
| "grad_norm": 0.16957992315292358, |
| "learning_rate": 4.98533724340176e-06, |
| "loss": 0.27292630076408386, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5083514887436456, |
| "grad_norm": 0.17816272377967834, |
| "learning_rate": 4.975562072336266e-06, |
| "loss": 0.27049022912979126, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5093197772936335, |
| "grad_norm": 0.17525239288806915, |
| "learning_rate": 4.9657869012707725e-06, |
| "loss": 0.2759566903114319, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5102880658436214, |
| "grad_norm": 0.18764440715312958, |
| "learning_rate": 4.956011730205279e-06, |
| "loss": 0.27127569913864136, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5112563543936093, |
| "grad_norm": 0.18698008358478546, |
| "learning_rate": 4.946236559139785e-06, |
| "loss": 0.2902853786945343, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5122246429435972, |
| "grad_norm": 0.17745737731456757, |
| "learning_rate": 4.936461388074292e-06, |
| "loss": 0.32079097628593445, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.5131929314935851, |
| "grad_norm": 0.17994803190231323, |
| "learning_rate": 4.926686217008798e-06, |
| "loss": 0.27671536803245544, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.514161220043573, |
| "grad_norm": 0.1736883968114853, |
| "learning_rate": 4.916911045943305e-06, |
| "loss": 0.29842981696128845, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5151295085935609, |
| "grad_norm": 0.17682136595249176, |
| "learning_rate": 4.907135874877811e-06, |
| "loss": 0.28436222672462463, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5160977971435488, |
| "grad_norm": 0.18292061984539032, |
| "learning_rate": 4.8973607038123175e-06, |
| "loss": 0.2722223401069641, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.5170660856935366, |
| "grad_norm": 0.1844838410615921, |
| "learning_rate": 4.887585532746824e-06, |
| "loss": 0.26570263504981995, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.5180343742435246, |
| "grad_norm": 0.18923698365688324, |
| "learning_rate": 4.87781036168133e-06, |
| "loss": 0.3637017607688904, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5190026627935125, |
| "grad_norm": 0.16404788196086884, |
| "learning_rate": 4.868035190615836e-06, |
| "loss": 0.28690028190612793, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5199709513435004, |
| "grad_norm": 0.1970244199037552, |
| "learning_rate": 4.858260019550342e-06, |
| "loss": 0.2881229519844055, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.5209392398934882, |
| "grad_norm": 0.1616058647632599, |
| "learning_rate": 4.848484848484849e-06, |
| "loss": 0.2817743122577667, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5219075284434762, |
| "grad_norm": 0.18213775753974915, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 0.2646360695362091, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.5228758169934641, |
| "grad_norm": 0.1883658468723297, |
| "learning_rate": 4.828934506353862e-06, |
| "loss": 0.32929307222366333, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.523844105543452, |
| "grad_norm": 0.1898542195558548, |
| "learning_rate": 4.819159335288368e-06, |
| "loss": 0.27511003613471985, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.5248123940934398, |
| "grad_norm": 0.1817118525505066, |
| "learning_rate": 4.8093841642228745e-06, |
| "loss": 0.27474260330200195, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5257806826434277, |
| "grad_norm": 0.19033664464950562, |
| "learning_rate": 4.799608993157381e-06, |
| "loss": 0.32937076687812805, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.5267489711934157, |
| "grad_norm": 0.18128858506679535, |
| "learning_rate": 4.789833822091887e-06, |
| "loss": 0.3000837564468384, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.5277172597434036, |
| "grad_norm": 0.18828479945659637, |
| "learning_rate": 4.780058651026394e-06, |
| "loss": 0.3411107063293457, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.5286855482933914, |
| "grad_norm": 0.21484431624412537, |
| "learning_rate": 4.7702834799609e-06, |
| "loss": 0.32155299186706543, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.5296538368433793, |
| "grad_norm": 0.19658254086971283, |
| "learning_rate": 4.760508308895407e-06, |
| "loss": 0.2874881327152252, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.5306221253933672, |
| "grad_norm": 0.19206486642360687, |
| "learning_rate": 4.750733137829912e-06, |
| "loss": 0.31940093636512756, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.5315904139433552, |
| "grad_norm": 0.2160305678844452, |
| "learning_rate": 4.740957966764419e-06, |
| "loss": 0.3076990246772766, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.532558702493343, |
| "grad_norm": 0.18269337713718414, |
| "learning_rate": 4.731182795698925e-06, |
| "loss": 0.27614516019821167, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5335269910433309, |
| "grad_norm": 0.18244397640228271, |
| "learning_rate": 4.7214076246334315e-06, |
| "loss": 0.28050702810287476, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.5344952795933188, |
| "grad_norm": 0.16129615902900696, |
| "learning_rate": 4.711632453567938e-06, |
| "loss": 0.28339844942092896, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.5354635681433068, |
| "grad_norm": 0.1605842411518097, |
| "learning_rate": 4.701857282502444e-06, |
| "loss": 0.28020599484443665, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.5364318566932946, |
| "grad_norm": 0.17767396569252014, |
| "learning_rate": 4.692082111436951e-06, |
| "loss": 0.26483970880508423, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.5374001452432825, |
| "grad_norm": 0.17699919641017914, |
| "learning_rate": 4.682306940371456e-06, |
| "loss": 0.27966004610061646, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.5383684337932704, |
| "grad_norm": 0.19072790443897247, |
| "learning_rate": 4.672531769305963e-06, |
| "loss": 0.282270222902298, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.5393367223432582, |
| "grad_norm": 0.1869659274816513, |
| "learning_rate": 4.662756598240469e-06, |
| "loss": 0.3432008624076843, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.5403050108932462, |
| "grad_norm": 0.18851327896118164, |
| "learning_rate": 4.652981427174976e-06, |
| "loss": 0.2940416932106018, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.5412732994432341, |
| "grad_norm": 0.20195099711418152, |
| "learning_rate": 4.643206256109482e-06, |
| "loss": 0.30535370111465454, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.542241587993222, |
| "grad_norm": 0.17963868379592896, |
| "learning_rate": 4.6334310850439885e-06, |
| "loss": 0.3085969388484955, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5432098765432098, |
| "grad_norm": 0.170511856675148, |
| "learning_rate": 4.623655913978495e-06, |
| "loss": 0.3072543442249298, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.5441781650931977, |
| "grad_norm": 0.18112339079380035, |
| "learning_rate": 4.613880742913001e-06, |
| "loss": 0.3005993366241455, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.5451464536431857, |
| "grad_norm": 0.18734918534755707, |
| "learning_rate": 4.604105571847508e-06, |
| "loss": 0.2741018533706665, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.5461147421931736, |
| "grad_norm": 0.18844076991081238, |
| "learning_rate": 4.594330400782014e-06, |
| "loss": 0.27082327008247375, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.5470830307431614, |
| "grad_norm": 0.18848098814487457, |
| "learning_rate": 4.58455522971652e-06, |
| "loss": 0.2900712490081787, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.5480513192931493, |
| "grad_norm": 0.18217670917510986, |
| "learning_rate": 4.574780058651026e-06, |
| "loss": 0.2818305492401123, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.5490196078431373, |
| "grad_norm": 0.1847630739212036, |
| "learning_rate": 4.565004887585533e-06, |
| "loss": 0.3052092492580414, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.5499878963931252, |
| "grad_norm": 0.17965678870677948, |
| "learning_rate": 4.555229716520039e-06, |
| "loss": 0.37061765789985657, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.550956184943113, |
| "grad_norm": 0.182081401348114, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.2812265157699585, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.5519244734931009, |
| "grad_norm": 0.1826234757900238, |
| "learning_rate": 4.535679374389052e-06, |
| "loss": 0.33616483211517334, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5528927620430888, |
| "grad_norm": 0.18337081372737885, |
| "learning_rate": 4.525904203323558e-06, |
| "loss": 0.26936668157577515, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.5538610505930768, |
| "grad_norm": 0.19079728424549103, |
| "learning_rate": 4.516129032258065e-06, |
| "loss": 0.31582286953926086, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.5548293391430646, |
| "grad_norm": 0.19277691841125488, |
| "learning_rate": 4.506353861192571e-06, |
| "loss": 0.26570555567741394, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.5557976276930525, |
| "grad_norm": 0.1885417103767395, |
| "learning_rate": 4.496578690127078e-06, |
| "loss": 0.283278226852417, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.5567659162430404, |
| "grad_norm": 0.1837887167930603, |
| "learning_rate": 4.486803519061584e-06, |
| "loss": 0.2855049967765808, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.5577342047930284, |
| "grad_norm": 0.1967337280511856, |
| "learning_rate": 4.4770283479960905e-06, |
| "loss": 0.2932886481285095, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.5587024933430162, |
| "grad_norm": 0.17725642025470734, |
| "learning_rate": 4.467253176930597e-06, |
| "loss": 0.27526989579200745, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.5596707818930041, |
| "grad_norm": 0.17137347161769867, |
| "learning_rate": 4.4574780058651025e-06, |
| "loss": 0.3213641047477722, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.560639070442992, |
| "grad_norm": 0.21623080968856812, |
| "learning_rate": 4.447702834799609e-06, |
| "loss": 0.30579251050949097, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.5616073589929799, |
| "grad_norm": 0.17714564502239227, |
| "learning_rate": 4.437927663734115e-06, |
| "loss": 0.27001863718032837, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5625756475429678, |
| "grad_norm": 0.19795329868793488, |
| "learning_rate": 4.428152492668622e-06, |
| "loss": 0.3162938356399536, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.5635439360929557, |
| "grad_norm": 0.16567392647266388, |
| "learning_rate": 4.418377321603128e-06, |
| "loss": 0.27828705310821533, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.5645122246429436, |
| "grad_norm": 0.19157780706882477, |
| "learning_rate": 4.408602150537635e-06, |
| "loss": 0.26456013321876526, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.5654805131929315, |
| "grad_norm": 0.18285039067268372, |
| "learning_rate": 4.398826979472141e-06, |
| "loss": 0.27962782979011536, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.5664488017429193, |
| "grad_norm": 0.18198364973068237, |
| "learning_rate": 4.3890518084066475e-06, |
| "loss": 0.32034292817115784, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.5674170902929073, |
| "grad_norm": 0.189778670668602, |
| "learning_rate": 4.379276637341154e-06, |
| "loss": 0.27116918563842773, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.5683853788428952, |
| "grad_norm": 0.19017699360847473, |
| "learning_rate": 4.36950146627566e-06, |
| "loss": 0.28804683685302734, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.5693536673928831, |
| "grad_norm": 0.1705840528011322, |
| "learning_rate": 4.359726295210167e-06, |
| "loss": 0.3060193657875061, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.5703219559428709, |
| "grad_norm": 0.22186465561389923, |
| "learning_rate": 4.349951124144673e-06, |
| "loss": 0.26255226135253906, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.5712902444928589, |
| "grad_norm": 0.16935674846172333, |
| "learning_rate": 4.34017595307918e-06, |
| "loss": 0.25682443380355835, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5722585330428468, |
| "grad_norm": 0.2110513299703598, |
| "learning_rate": 4.330400782013685e-06, |
| "loss": 0.3172002136707306, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.5732268215928347, |
| "grad_norm": 0.17660263180732727, |
| "learning_rate": 4.320625610948192e-06, |
| "loss": 0.2504763603210449, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.5741951101428225, |
| "grad_norm": 0.1752292513847351, |
| "learning_rate": 4.310850439882698e-06, |
| "loss": 0.28053516149520874, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.5751633986928104, |
| "grad_norm": 0.17443427443504333, |
| "learning_rate": 4.3010752688172045e-06, |
| "loss": 0.27481114864349365, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.5761316872427984, |
| "grad_norm": 0.20570909976959229, |
| "learning_rate": 4.291300097751711e-06, |
| "loss": 0.32052427530288696, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.5770999757927863, |
| "grad_norm": 0.17960628867149353, |
| "learning_rate": 4.281524926686217e-06, |
| "loss": 0.30593350529670715, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.5780682643427741, |
| "grad_norm": 0.20899339020252228, |
| "learning_rate": 4.271749755620724e-06, |
| "loss": 0.3231653571128845, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.579036552892762, |
| "grad_norm": 0.17927585542201996, |
| "learning_rate": 4.26197458455523e-06, |
| "loss": 0.23228108882904053, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.5800048414427499, |
| "grad_norm": 0.19766579568386078, |
| "learning_rate": 4.252199413489737e-06, |
| "loss": 0.3512587547302246, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.5809731299927379, |
| "grad_norm": 0.2258554995059967, |
| "learning_rate": 4.242424242424243e-06, |
| "loss": 0.29843974113464355, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5819414185427257, |
| "grad_norm": 0.19223785400390625, |
| "learning_rate": 4.2326490713587495e-06, |
| "loss": 0.27962884306907654, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.5829097070927136, |
| "grad_norm": 0.17844106256961823, |
| "learning_rate": 4.222873900293256e-06, |
| "loss": 0.27644073963165283, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.5838779956427015, |
| "grad_norm": 0.1867385059595108, |
| "learning_rate": 4.213098729227762e-06, |
| "loss": 0.27366524934768677, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.5848462841926895, |
| "grad_norm": 0.17379915714263916, |
| "learning_rate": 4.203323558162268e-06, |
| "loss": 0.30100804567337036, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.5858145727426773, |
| "grad_norm": 0.1838119775056839, |
| "learning_rate": 4.193548387096774e-06, |
| "loss": 0.3351133167743683, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5867828612926652, |
| "grad_norm": 0.19593499600887299, |
| "learning_rate": 4.183773216031281e-06, |
| "loss": 0.28100982308387756, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.5877511498426531, |
| "grad_norm": 0.16322395205497742, |
| "learning_rate": 4.173998044965787e-06, |
| "loss": 0.26457294821739197, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.588719438392641, |
| "grad_norm": 0.1786675602197647, |
| "learning_rate": 4.164222873900294e-06, |
| "loss": 0.2559005618095398, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.5896877269426289, |
| "grad_norm": 0.19520226120948792, |
| "learning_rate": 4.1544477028348e-06, |
| "loss": 0.2999897003173828, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.5906560154926168, |
| "grad_norm": 0.17103256285190582, |
| "learning_rate": 4.1446725317693065e-06, |
| "loss": 0.30779922008514404, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5916243040426047, |
| "grad_norm": 0.17526350915431976, |
| "learning_rate": 4.134897360703813e-06, |
| "loss": 0.29173529148101807, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.18206097185611725, |
| "learning_rate": 4.125122189638319e-06, |
| "loss": 0.29199522733688354, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.5935608811425805, |
| "grad_norm": 0.1679670661687851, |
| "learning_rate": 4.115347018572826e-06, |
| "loss": 0.25542762875556946, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.5945291696925684, |
| "grad_norm": 0.19803665578365326, |
| "learning_rate": 4.105571847507332e-06, |
| "loss": 0.2858905792236328, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.5954974582425563, |
| "grad_norm": 0.17995841801166534, |
| "learning_rate": 4.095796676441839e-06, |
| "loss": 0.27671483159065247, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5964657467925442, |
| "grad_norm": 0.18616031110286713, |
| "learning_rate": 4.086021505376344e-06, |
| "loss": 0.2712816596031189, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.597434035342532, |
| "grad_norm": 0.19008490443229675, |
| "learning_rate": 4.076246334310851e-06, |
| "loss": 0.2625333368778229, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.59840232389252, |
| "grad_norm": 0.1998487263917923, |
| "learning_rate": 4.066471163245357e-06, |
| "loss": 0.28343838453292847, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.5993706124425079, |
| "grad_norm": 0.17429369688034058, |
| "learning_rate": 4.0566959921798636e-06, |
| "loss": 0.2731628715991974, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6003389009924958, |
| "grad_norm": 0.19498169422149658, |
| "learning_rate": 4.04692082111437e-06, |
| "loss": 0.29789942502975464, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6013071895424836, |
| "grad_norm": 0.178371399641037, |
| "learning_rate": 4.0371456500488756e-06, |
| "loss": 0.28699758648872375, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.6022754780924715, |
| "grad_norm": 0.1959543526172638, |
| "learning_rate": 4.027370478983382e-06, |
| "loss": 0.32473817467689514, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.6032437666424595, |
| "grad_norm": 0.18459352850914001, |
| "learning_rate": 4.017595307917888e-06, |
| "loss": 0.2685423493385315, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.6042120551924474, |
| "grad_norm": 0.18294654786586761, |
| "learning_rate": 4.007820136852395e-06, |
| "loss": 0.28354576230049133, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6051803437424352, |
| "grad_norm": 0.19509679079055786, |
| "learning_rate": 3.998044965786901e-06, |
| "loss": 0.30655306577682495, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.6061486322924231, |
| "grad_norm": 0.18222194910049438, |
| "learning_rate": 3.988269794721408e-06, |
| "loss": 0.26319050788879395, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.6071169208424111, |
| "grad_norm": 0.21766740083694458, |
| "learning_rate": 3.978494623655914e-06, |
| "loss": 0.29476338624954224, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.608085209392399, |
| "grad_norm": 0.1838199496269226, |
| "learning_rate": 3.9687194525904206e-06, |
| "loss": 0.313698947429657, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.6090534979423868, |
| "grad_norm": 0.18570809066295624, |
| "learning_rate": 3.958944281524927e-06, |
| "loss": 0.3509555160999298, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.6100217864923747, |
| "grad_norm": 0.19644515216350555, |
| "learning_rate": 3.949169110459433e-06, |
| "loss": 0.2718711197376251, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6109900750423626, |
| "grad_norm": 0.1909233182668686, |
| "learning_rate": 3.93939393939394e-06, |
| "loss": 0.3205246925354004, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.6119583635923506, |
| "grad_norm": 0.18373022973537445, |
| "learning_rate": 3.929618768328446e-06, |
| "loss": 0.295777827501297, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.6129266521423384, |
| "grad_norm": 0.18277910351753235, |
| "learning_rate": 3.919843597262952e-06, |
| "loss": 0.3180069625377655, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.6138949406923263, |
| "grad_norm": 0.19421808421611786, |
| "learning_rate": 3.910068426197458e-06, |
| "loss": 0.2791898250579834, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.6148632292423142, |
| "grad_norm": 0.17601901292800903, |
| "learning_rate": 3.900293255131965e-06, |
| "loss": 0.26764553785324097, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.615831517792302, |
| "grad_norm": 0.1744976043701172, |
| "learning_rate": 3.890518084066471e-06, |
| "loss": 0.307162344455719, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.61679980634229, |
| "grad_norm": 0.1944838911294937, |
| "learning_rate": 3.8807429130009776e-06, |
| "loss": 0.2940749228000641, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.6177680948922779, |
| "grad_norm": 0.29076093435287476, |
| "learning_rate": 3.870967741935484e-06, |
| "loss": 0.32644060254096985, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.6187363834422658, |
| "grad_norm": 0.18829455971717834, |
| "learning_rate": 3.8611925708699904e-06, |
| "loss": 0.28472450375556946, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.6197046719922537, |
| "grad_norm": 0.1949450969696045, |
| "learning_rate": 3.851417399804497e-06, |
| "loss": 0.2577253580093384, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6206729605422416, |
| "grad_norm": 0.1973968893289566, |
| "learning_rate": 3.841642228739003e-06, |
| "loss": 0.28368428349494934, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.6216412490922295, |
| "grad_norm": 0.1733219027519226, |
| "learning_rate": 3.83186705767351e-06, |
| "loss": 0.26086172461509705, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.6226095376422174, |
| "grad_norm": 0.20539860427379608, |
| "learning_rate": 3.822091886608016e-06, |
| "loss": 0.3593149483203888, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.6235778261922053, |
| "grad_norm": 0.18563023209571838, |
| "learning_rate": 3.812316715542522e-06, |
| "loss": 0.3003098964691162, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.6245461147421931, |
| "grad_norm": 0.19810666143894196, |
| "learning_rate": 3.8025415444770286e-06, |
| "loss": 0.2925172448158264, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.6255144032921811, |
| "grad_norm": 0.2321307510137558, |
| "learning_rate": 3.792766373411535e-06, |
| "loss": 0.25980299711227417, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.626482691842169, |
| "grad_norm": 0.16675977408885956, |
| "learning_rate": 3.7829912023460414e-06, |
| "loss": 0.258143812417984, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "grad_norm": 0.18522602319717407, |
| "learning_rate": 3.773216031280548e-06, |
| "loss": 0.3249315619468689, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.6284192689421447, |
| "grad_norm": 0.17373818159103394, |
| "learning_rate": 3.763440860215054e-06, |
| "loss": 0.289806991815567, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.6293875574921327, |
| "grad_norm": 0.18944744765758514, |
| "learning_rate": 3.7536656891495603e-06, |
| "loss": 0.30416756868362427, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6303558460421206, |
| "grad_norm": 0.19680985808372498, |
| "learning_rate": 3.7438905180840667e-06, |
| "loss": 0.28972989320755005, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.6313241345921085, |
| "grad_norm": 0.2205217033624649, |
| "learning_rate": 3.734115347018573e-06, |
| "loss": 0.28554368019104004, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.6322924231420963, |
| "grad_norm": 0.172973170876503, |
| "learning_rate": 3.7243401759530796e-06, |
| "loss": 0.331814169883728, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.6332607116920842, |
| "grad_norm": 0.1913972645998001, |
| "learning_rate": 3.714565004887586e-06, |
| "loss": 0.27782005071640015, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.6342290002420722, |
| "grad_norm": 0.19561362266540527, |
| "learning_rate": 3.7047898338220924e-06, |
| "loss": 0.3030650019645691, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.63519728879206, |
| "grad_norm": 0.19253604114055634, |
| "learning_rate": 3.6950146627565984e-06, |
| "loss": 0.29422512650489807, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.6361655773420479, |
| "grad_norm": 0.19124586880207062, |
| "learning_rate": 3.685239491691105e-06, |
| "loss": 0.26767367124557495, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.6371338658920358, |
| "grad_norm": 0.2221280336380005, |
| "learning_rate": 3.6754643206256113e-06, |
| "loss": 0.3479483723640442, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.6381021544420237, |
| "grad_norm": 0.20241160690784454, |
| "learning_rate": 3.6656891495601177e-06, |
| "loss": 0.2787402868270874, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.6390704429920117, |
| "grad_norm": 0.19073940813541412, |
| "learning_rate": 3.655913978494624e-06, |
| "loss": 0.29317712783813477, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.6400387315419995, |
| "grad_norm": 0.20870280265808105, |
| "learning_rate": 3.6461388074291306e-06, |
| "loss": 0.3079635202884674, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.6410070200919874, |
| "grad_norm": 0.18194538354873657, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 0.26036518812179565, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.6419753086419753, |
| "grad_norm": 0.19380781054496765, |
| "learning_rate": 3.626588465298143e-06, |
| "loss": 0.32105469703674316, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.6429435971919633, |
| "grad_norm": 0.18779927492141724, |
| "learning_rate": 3.6168132942326494e-06, |
| "loss": 0.23958516120910645, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.6439118857419511, |
| "grad_norm": 0.16800741851329803, |
| "learning_rate": 3.607038123167156e-06, |
| "loss": 0.3183926045894623, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.644880174291939, |
| "grad_norm": 0.18218325078487396, |
| "learning_rate": 3.5972629521016623e-06, |
| "loss": 0.36072227358818054, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.6458484628419269, |
| "grad_norm": 0.1973208338022232, |
| "learning_rate": 3.5874877810361687e-06, |
| "loss": 0.31081509590148926, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.6468167513919147, |
| "grad_norm": 0.17719313502311707, |
| "learning_rate": 3.5777126099706747e-06, |
| "loss": 0.3088850677013397, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.6477850399419027, |
| "grad_norm": 0.22201496362686157, |
| "learning_rate": 3.567937438905181e-06, |
| "loss": 0.2832217812538147, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.6487533284918906, |
| "grad_norm": 0.2052207589149475, |
| "learning_rate": 3.5581622678396876e-06, |
| "loss": 0.2777295708656311, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6497216170418785, |
| "grad_norm": 0.17530739307403564, |
| "learning_rate": 3.548387096774194e-06, |
| "loss": 0.3057093620300293, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.6506899055918663, |
| "grad_norm": 0.20253078639507294, |
| "learning_rate": 3.5386119257087004e-06, |
| "loss": 0.2525123059749603, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.6516581941418542, |
| "grad_norm": 0.19099098443984985, |
| "learning_rate": 3.528836754643207e-06, |
| "loss": 0.26486071944236755, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.6526264826918422, |
| "grad_norm": 0.19429947435855865, |
| "learning_rate": 3.5190615835777133e-06, |
| "loss": 0.27915486693382263, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.6535947712418301, |
| "grad_norm": 0.19641940295696259, |
| "learning_rate": 3.5092864125122193e-06, |
| "loss": 0.2952028214931488, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.654563059791818, |
| "grad_norm": 0.18606482446193695, |
| "learning_rate": 3.4995112414467257e-06, |
| "loss": 0.26710712909698486, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.6555313483418058, |
| "grad_norm": 0.18616363406181335, |
| "learning_rate": 3.489736070381232e-06, |
| "loss": 0.2896000146865845, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.6564996368917938, |
| "grad_norm": 0.18305549025535583, |
| "learning_rate": 3.4799608993157386e-06, |
| "loss": 0.27804529666900635, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.6574679254417817, |
| "grad_norm": 0.19162502884864807, |
| "learning_rate": 3.470185728250245e-06, |
| "loss": 0.3180793821811676, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.6584362139917695, |
| "grad_norm": 0.17288638651371002, |
| "learning_rate": 3.4604105571847514e-06, |
| "loss": 0.27254006266593933, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6594045025417574, |
| "grad_norm": 0.20115594565868378, |
| "learning_rate": 3.4506353861192575e-06, |
| "loss": 0.326080858707428, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.6603727910917453, |
| "grad_norm": 0.20309938490390778, |
| "learning_rate": 3.440860215053764e-06, |
| "loss": 0.29796141386032104, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.6613410796417333, |
| "grad_norm": 0.20176127552986145, |
| "learning_rate": 3.43108504398827e-06, |
| "loss": 0.2814856469631195, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.6623093681917211, |
| "grad_norm": 0.21620069444179535, |
| "learning_rate": 3.4213098729227763e-06, |
| "loss": 0.36335426568984985, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.663277656741709, |
| "grad_norm": 0.20982684195041656, |
| "learning_rate": 3.4115347018572823e-06, |
| "loss": 0.2819657027721405, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.6642459452916969, |
| "grad_norm": 0.18432947993278503, |
| "learning_rate": 3.4017595307917887e-06, |
| "loss": 0.32050448656082153, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.6652142338416849, |
| "grad_norm": 0.17828144133090973, |
| "learning_rate": 3.391984359726295e-06, |
| "loss": 0.3236311376094818, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.6661825223916727, |
| "grad_norm": 0.1964399665594101, |
| "learning_rate": 3.3822091886608016e-06, |
| "loss": 0.32314908504486084, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.6671508109416606, |
| "grad_norm": 0.19078870117664337, |
| "learning_rate": 3.372434017595308e-06, |
| "loss": 0.27393656969070435, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.6681190994916485, |
| "grad_norm": 0.19160780310630798, |
| "learning_rate": 3.3626588465298145e-06, |
| "loss": 0.3088667690753937, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6690873880416364, |
| "grad_norm": 0.18718208372592926, |
| "learning_rate": 3.352883675464321e-06, |
| "loss": 0.28694334626197815, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.6700556765916243, |
| "grad_norm": 0.19036638736724854, |
| "learning_rate": 3.343108504398827e-06, |
| "loss": 0.2764681279659271, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.6710239651416122, |
| "grad_norm": 0.17227678000926971, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.2784879207611084, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.6719922536916001, |
| "grad_norm": 0.20473547279834747, |
| "learning_rate": 3.3235581622678398e-06, |
| "loss": 0.2824912667274475, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.672960542241588, |
| "grad_norm": 0.1921864002943039, |
| "learning_rate": 3.313782991202346e-06, |
| "loss": 0.2795690894126892, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.6739288307915758, |
| "grad_norm": 0.2057105302810669, |
| "learning_rate": 3.3040078201368526e-06, |
| "loss": 0.27492284774780273, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.6748971193415638, |
| "grad_norm": 0.2041766345500946, |
| "learning_rate": 3.294232649071359e-06, |
| "loss": 0.30277037620544434, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.6758654078915517, |
| "grad_norm": 0.19042398035526276, |
| "learning_rate": 3.284457478005865e-06, |
| "loss": 0.31011852622032166, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.6768336964415396, |
| "grad_norm": 0.18352696299552917, |
| "learning_rate": 3.2746823069403715e-06, |
| "loss": 0.28382012248039246, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.6778019849915274, |
| "grad_norm": 0.2007741928100586, |
| "learning_rate": 3.264907135874878e-06, |
| "loss": 0.28195974230766296, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6787702735415154, |
| "grad_norm": 0.20310088992118835, |
| "learning_rate": 3.2551319648093843e-06, |
| "loss": 0.2988584637641907, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.6797385620915033, |
| "grad_norm": 0.20353393256664276, |
| "learning_rate": 3.2453567937438908e-06, |
| "loss": 0.24649690091609955, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.6807068506414912, |
| "grad_norm": 0.1926201432943344, |
| "learning_rate": 3.235581622678397e-06, |
| "loss": 0.2895974814891815, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.681675139191479, |
| "grad_norm": 0.19565631449222565, |
| "learning_rate": 3.225806451612903e-06, |
| "loss": 0.2735288441181183, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.6826434277414669, |
| "grad_norm": 0.20555929839611053, |
| "learning_rate": 3.2160312805474096e-06, |
| "loss": 0.2749082148075104, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.6836117162914549, |
| "grad_norm": 0.19519391655921936, |
| "learning_rate": 3.206256109481916e-06, |
| "loss": 0.35463032126426697, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.6845800048414428, |
| "grad_norm": 0.19124329090118408, |
| "learning_rate": 3.1964809384164225e-06, |
| "loss": 0.2960769832134247, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.6855482933914306, |
| "grad_norm": 0.19353725016117096, |
| "learning_rate": 3.186705767350929e-06, |
| "loss": 0.29588258266448975, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.6865165819414185, |
| "grad_norm": 0.1908576339483261, |
| "learning_rate": 3.1769305962854353e-06, |
| "loss": 0.32410839200019836, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.6874848704914064, |
| "grad_norm": 0.19978390634059906, |
| "learning_rate": 3.1671554252199418e-06, |
| "loss": 0.26154428720474243, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6884531590413944, |
| "grad_norm": 0.17735745012760162, |
| "learning_rate": 3.1573802541544478e-06, |
| "loss": 0.2741011083126068, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.6894214475913822, |
| "grad_norm": 0.19261346757411957, |
| "learning_rate": 3.147605083088954e-06, |
| "loss": 0.29346680641174316, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.6903897361413701, |
| "grad_norm": 0.18815375864505768, |
| "learning_rate": 3.1378299120234606e-06, |
| "loss": 0.317450612783432, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.691358024691358, |
| "grad_norm": 0.1747797578573227, |
| "learning_rate": 3.128054740957967e-06, |
| "loss": 0.26710936427116394, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.692326313241346, |
| "grad_norm": 0.1850060522556305, |
| "learning_rate": 3.1182795698924735e-06, |
| "loss": 0.3440788984298706, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.6932946017913338, |
| "grad_norm": 0.19904842972755432, |
| "learning_rate": 3.10850439882698e-06, |
| "loss": 0.27237698435783386, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.6942628903413217, |
| "grad_norm": 0.19219987094402313, |
| "learning_rate": 3.098729227761486e-06, |
| "loss": 0.2665986716747284, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.6952311788913096, |
| "grad_norm": 0.1957559734582901, |
| "learning_rate": 3.0889540566959923e-06, |
| "loss": 0.28654614090919495, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.6961994674412975, |
| "grad_norm": 0.2007106989622116, |
| "learning_rate": 3.0791788856304988e-06, |
| "loss": 0.30569547414779663, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.6971677559912854, |
| "grad_norm": 0.21884313225746155, |
| "learning_rate": 3.069403714565005e-06, |
| "loss": 0.2851307690143585, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6981360445412733, |
| "grad_norm": 0.18904490768909454, |
| "learning_rate": 3.0596285434995116e-06, |
| "loss": 0.32544124126434326, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.6991043330912612, |
| "grad_norm": 0.22827713191509247, |
| "learning_rate": 3.049853372434018e-06, |
| "loss": 0.2876453101634979, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.7000726216412491, |
| "grad_norm": 0.18982501327991486, |
| "learning_rate": 3.0400782013685245e-06, |
| "loss": 0.28896069526672363, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.701040910191237, |
| "grad_norm": 0.208974227309227, |
| "learning_rate": 3.0303030303030305e-06, |
| "loss": 0.26989954710006714, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.7020091987412249, |
| "grad_norm": 0.19682757556438446, |
| "learning_rate": 3.020527859237537e-06, |
| "loss": 0.316387414932251, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.7029774872912128, |
| "grad_norm": 0.1741049438714981, |
| "learning_rate": 3.0107526881720433e-06, |
| "loss": 0.26443612575531006, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.7039457758412007, |
| "grad_norm": 0.2087400257587433, |
| "learning_rate": 3.0009775171065498e-06, |
| "loss": 0.2930486500263214, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.7049140643911885, |
| "grad_norm": 0.19682444632053375, |
| "learning_rate": 2.991202346041056e-06, |
| "loss": 0.2777274250984192, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.18029047548770905, |
| "learning_rate": 2.9814271749755626e-06, |
| "loss": 0.30682748556137085, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.7068506414911644, |
| "grad_norm": 0.21413344144821167, |
| "learning_rate": 2.9716520039100686e-06, |
| "loss": 0.2852901220321655, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7078189300411523, |
| "grad_norm": 0.20641835033893585, |
| "learning_rate": 2.961876832844575e-06, |
| "loss": 0.30264589190483093, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.7087872185911401, |
| "grad_norm": 0.20583511888980865, |
| "learning_rate": 2.9521016617790815e-06, |
| "loss": 0.31246519088745117, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.709755507141128, |
| "grad_norm": 0.19352665543556213, |
| "learning_rate": 2.942326490713588e-06, |
| "loss": 0.25201672315597534, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.710723795691116, |
| "grad_norm": 0.19948013126850128, |
| "learning_rate": 2.9325513196480943e-06, |
| "loss": 0.2469996213912964, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.7116920842411039, |
| "grad_norm": 0.20024363696575165, |
| "learning_rate": 2.9227761485826008e-06, |
| "loss": 0.28980398178100586, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.7126603727910917, |
| "grad_norm": 0.19101053476333618, |
| "learning_rate": 2.9130009775171068e-06, |
| "loss": 0.27129659056663513, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.7136286613410796, |
| "grad_norm": 0.19807986915111542, |
| "learning_rate": 2.903225806451613e-06, |
| "loss": 0.2989445924758911, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.7145969498910676, |
| "grad_norm": 0.2047462910413742, |
| "learning_rate": 2.8934506353861196e-06, |
| "loss": 0.29249265789985657, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.7155652384410555, |
| "grad_norm": 0.21451207995414734, |
| "learning_rate": 2.883675464320626e-06, |
| "loss": 0.308368980884552, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.7165335269910433, |
| "grad_norm": 0.18969380855560303, |
| "learning_rate": 2.8739002932551325e-06, |
| "loss": 0.30544131994247437, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7175018155410312, |
| "grad_norm": 0.21949923038482666, |
| "learning_rate": 2.864125122189639e-06, |
| "loss": 0.2871190011501312, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.7184701040910191, |
| "grad_norm": 0.18441982567310333, |
| "learning_rate": 2.8543499511241454e-06, |
| "loss": 0.34001511335372925, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.7194383926410071, |
| "grad_norm": 0.20495833456516266, |
| "learning_rate": 2.8445747800586514e-06, |
| "loss": 0.31153956055641174, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.7204066811909949, |
| "grad_norm": 0.17847374081611633, |
| "learning_rate": 2.8347996089931578e-06, |
| "loss": 0.2785325348377228, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.7213749697409828, |
| "grad_norm": 0.20845407247543335, |
| "learning_rate": 2.8250244379276642e-06, |
| "loss": 0.28710830211639404, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.7223432582909707, |
| "grad_norm": 0.20801788568496704, |
| "learning_rate": 2.8152492668621706e-06, |
| "loss": 0.2709939181804657, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.7233115468409586, |
| "grad_norm": 0.17509667575359344, |
| "learning_rate": 2.8054740957966762e-06, |
| "loss": 0.24158413708209991, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.7242798353909465, |
| "grad_norm": 0.2237170934677124, |
| "learning_rate": 2.7956989247311827e-06, |
| "loss": 0.26651033759117126, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.7252481239409344, |
| "grad_norm": 0.1964648962020874, |
| "learning_rate": 2.785923753665689e-06, |
| "loss": 0.26544153690338135, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.7262164124909223, |
| "grad_norm": 0.1828320175409317, |
| "learning_rate": 2.7761485826001955e-06, |
| "loss": 0.24963009357452393, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7271847010409102, |
| "grad_norm": 0.17765893042087555, |
| "learning_rate": 2.766373411534702e-06, |
| "loss": 0.2530496418476105, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.7281529895908981, |
| "grad_norm": 0.17918957769870758, |
| "learning_rate": 2.7565982404692084e-06, |
| "loss": 0.2385520339012146, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.729121278140886, |
| "grad_norm": 0.1830013394355774, |
| "learning_rate": 2.7468230694037144e-06, |
| "loss": 0.26376885175704956, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.7300895666908739, |
| "grad_norm": 0.20502547919750214, |
| "learning_rate": 2.737047898338221e-06, |
| "loss": 0.2629661560058594, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.7310578552408618, |
| "grad_norm": 0.19126304984092712, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 0.27432548999786377, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.7320261437908496, |
| "grad_norm": 0.1837206333875656, |
| "learning_rate": 2.7174975562072337e-06, |
| "loss": 0.2646147906780243, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.7329944323408376, |
| "grad_norm": 0.22238245606422424, |
| "learning_rate": 2.70772238514174e-06, |
| "loss": 0.29708367586135864, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.7339627208908255, |
| "grad_norm": 0.19030597805976868, |
| "learning_rate": 2.6979472140762465e-06, |
| "loss": 0.3007453680038452, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.7349310094408134, |
| "grad_norm": 0.18150079250335693, |
| "learning_rate": 2.688172043010753e-06, |
| "loss": 0.28624916076660156, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.7358992979908012, |
| "grad_norm": 0.21237732470035553, |
| "learning_rate": 2.678396871945259e-06, |
| "loss": 0.31297317147254944, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7368675865407892, |
| "grad_norm": 0.2071557343006134, |
| "learning_rate": 2.6686217008797654e-06, |
| "loss": 0.25083449482917786, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.7378358750907771, |
| "grad_norm": 0.18313196301460266, |
| "learning_rate": 2.658846529814272e-06, |
| "loss": 0.28581753373146057, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.738804163640765, |
| "grad_norm": 0.20016784965991974, |
| "learning_rate": 2.6490713587487782e-06, |
| "loss": 0.28767916560173035, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.7397724521907528, |
| "grad_norm": 0.1874615103006363, |
| "learning_rate": 2.6392961876832847e-06, |
| "loss": 0.28244420886039734, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 0.21257996559143066, |
| "learning_rate": 2.629521016617791e-06, |
| "loss": 0.2639189064502716, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.7417090292907287, |
| "grad_norm": 0.21034327149391174, |
| "learning_rate": 2.619745845552297e-06, |
| "loss": 0.2773539125919342, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.7426773178407166, |
| "grad_norm": 0.21635524928569794, |
| "learning_rate": 2.6099706744868035e-06, |
| "loss": 0.283179372549057, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.7436456063907044, |
| "grad_norm": 0.19200022518634796, |
| "learning_rate": 2.60019550342131e-06, |
| "loss": 0.2603984475135803, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.7446138949406923, |
| "grad_norm": 0.20428141951560974, |
| "learning_rate": 2.5904203323558164e-06, |
| "loss": 0.3322230577468872, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.7455821834906802, |
| "grad_norm": 0.17995081841945648, |
| "learning_rate": 2.580645161290323e-06, |
| "loss": 0.26364511251449585, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.7465504720406682, |
| "grad_norm": 0.19678199291229248, |
| "learning_rate": 2.5708699902248292e-06, |
| "loss": 0.3625681698322296, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.747518760590656, |
| "grad_norm": 0.183084636926651, |
| "learning_rate": 2.5610948191593352e-06, |
| "loss": 0.2772168517112732, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.7484870491406439, |
| "grad_norm": 0.2048066258430481, |
| "learning_rate": 2.5513196480938417e-06, |
| "loss": 0.30713188648223877, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.7494553376906318, |
| "grad_norm": 0.21669703722000122, |
| "learning_rate": 2.541544477028348e-06, |
| "loss": 0.3376876413822174, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.7504236262406198, |
| "grad_norm": 0.16890452802181244, |
| "learning_rate": 2.5317693059628545e-06, |
| "loss": 0.28936320543289185, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.7513919147906076, |
| "grad_norm": 0.2113950401544571, |
| "learning_rate": 2.521994134897361e-06, |
| "loss": 0.3068625330924988, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.7523602033405955, |
| "grad_norm": 0.19548510015010834, |
| "learning_rate": 2.5122189638318674e-06, |
| "loss": 0.2764047384262085, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.7533284918905834, |
| "grad_norm": 0.19676341116428375, |
| "learning_rate": 2.502443792766374e-06, |
| "loss": 0.32238852977752686, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.7542967804405712, |
| "grad_norm": 0.20870518684387207, |
| "learning_rate": 2.49266862170088e-06, |
| "loss": 0.2966168224811554, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.7552650689905592, |
| "grad_norm": 0.19091863930225372, |
| "learning_rate": 2.4828934506353862e-06, |
| "loss": 0.260260671377182, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7562333575405471, |
| "grad_norm": 0.18716365098953247, |
| "learning_rate": 2.4731182795698927e-06, |
| "loss": 0.2716587781906128, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.757201646090535, |
| "grad_norm": 0.19777894020080566, |
| "learning_rate": 2.463343108504399e-06, |
| "loss": 0.2737089693546295, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.7581699346405228, |
| "grad_norm": 0.1986621618270874, |
| "learning_rate": 2.4535679374389055e-06, |
| "loss": 0.27934715151786804, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.7591382231905107, |
| "grad_norm": 0.2001214176416397, |
| "learning_rate": 2.443792766373412e-06, |
| "loss": 0.29675161838531494, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.7601065117404987, |
| "grad_norm": 0.17941324412822723, |
| "learning_rate": 2.434017595307918e-06, |
| "loss": 0.2796166241168976, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.7610748002904866, |
| "grad_norm": 0.18563294410705566, |
| "learning_rate": 2.4242424242424244e-06, |
| "loss": 0.2594640851020813, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.7620430888404744, |
| "grad_norm": 0.1819997876882553, |
| "learning_rate": 2.414467253176931e-06, |
| "loss": 0.28631582856178284, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.7630113773904623, |
| "grad_norm": 0.2092135101556778, |
| "learning_rate": 2.4046920821114372e-06, |
| "loss": 0.29993587732315063, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.7639796659404503, |
| "grad_norm": 0.20817267894744873, |
| "learning_rate": 2.3949169110459437e-06, |
| "loss": 0.2964945435523987, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.7649479544904382, |
| "grad_norm": 0.18305228650569916, |
| "learning_rate": 2.38514173998045e-06, |
| "loss": 0.2470388114452362, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.765916243040426, |
| "grad_norm": 0.18974260985851288, |
| "learning_rate": 2.375366568914956e-06, |
| "loss": 0.26321178674697876, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.7668845315904139, |
| "grad_norm": 0.22661836445331573, |
| "learning_rate": 2.3655913978494625e-06, |
| "loss": 0.28920090198516846, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.7678528201404018, |
| "grad_norm": 0.21956227719783783, |
| "learning_rate": 2.355816226783969e-06, |
| "loss": 0.2883264422416687, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.7688211086903898, |
| "grad_norm": 0.21458660066127777, |
| "learning_rate": 2.3460410557184754e-06, |
| "loss": 0.3575912117958069, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.7697893972403776, |
| "grad_norm": 0.19066624343395233, |
| "learning_rate": 2.3362658846529814e-06, |
| "loss": 0.25565528869628906, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.7707576857903655, |
| "grad_norm": 0.19037111103534698, |
| "learning_rate": 2.326490713587488e-06, |
| "loss": 0.26588374376296997, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.7717259743403534, |
| "grad_norm": 0.1706329733133316, |
| "learning_rate": 2.3167155425219943e-06, |
| "loss": 0.2640436887741089, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.7726942628903414, |
| "grad_norm": 0.203688383102417, |
| "learning_rate": 2.3069403714565007e-06, |
| "loss": 0.272479772567749, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.7736625514403292, |
| "grad_norm": 0.21687336266040802, |
| "learning_rate": 2.297165200391007e-06, |
| "loss": 0.2606707811355591, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.7746308399903171, |
| "grad_norm": 0.18459083139896393, |
| "learning_rate": 2.287390029325513e-06, |
| "loss": 0.2953495979309082, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.775599128540305, |
| "grad_norm": 0.2097976803779602, |
| "learning_rate": 2.2776148582600195e-06, |
| "loss": 0.29703575372695923, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.7765674170902929, |
| "grad_norm": 0.20715487003326416, |
| "learning_rate": 2.267839687194526e-06, |
| "loss": 0.2804234027862549, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.7775357056402808, |
| "grad_norm": 0.21985439956188202, |
| "learning_rate": 2.2580645161290324e-06, |
| "loss": 0.29094335436820984, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.7785039941902687, |
| "grad_norm": 0.17857959866523743, |
| "learning_rate": 2.248289345063539e-06, |
| "loss": 0.2993057668209076, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.7794722827402566, |
| "grad_norm": 0.20267243683338165, |
| "learning_rate": 2.2385141739980453e-06, |
| "loss": 0.28471803665161133, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.7804405712902445, |
| "grad_norm": 0.18737877905368805, |
| "learning_rate": 2.2287390029325513e-06, |
| "loss": 0.27943700551986694, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.7814088598402323, |
| "grad_norm": 0.17687441408634186, |
| "learning_rate": 2.2189638318670577e-06, |
| "loss": 0.2751350402832031, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.7823771483902203, |
| "grad_norm": 0.20583491027355194, |
| "learning_rate": 2.209188660801564e-06, |
| "loss": 0.28236058354377747, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.7833454369402082, |
| "grad_norm": 0.22925525903701782, |
| "learning_rate": 2.1994134897360705e-06, |
| "loss": 0.2999430000782013, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 0.1996539980173111, |
| "learning_rate": 2.189638318670577e-06, |
| "loss": 0.29116010665893555, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7852820140401839, |
| "grad_norm": 0.19890666007995605, |
| "learning_rate": 2.1798631476050834e-06, |
| "loss": 0.2903507947921753, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.7862503025901719, |
| "grad_norm": 0.1992999017238617, |
| "learning_rate": 2.17008797653959e-06, |
| "loss": 0.2690543532371521, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.7872185911401598, |
| "grad_norm": 0.1835276484489441, |
| "learning_rate": 2.160312805474096e-06, |
| "loss": 0.28388747572898865, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.7881868796901477, |
| "grad_norm": 0.236952006816864, |
| "learning_rate": 2.1505376344086023e-06, |
| "loss": 0.2714405953884125, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.7891551682401355, |
| "grad_norm": 0.19345760345458984, |
| "learning_rate": 2.1407624633431087e-06, |
| "loss": 0.2626250982284546, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.7901234567901234, |
| "grad_norm": 0.20259200036525726, |
| "learning_rate": 2.130987292277615e-06, |
| "loss": 0.29853078722953796, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.7910917453401114, |
| "grad_norm": 0.1846383810043335, |
| "learning_rate": 2.1212121212121216e-06, |
| "loss": 0.27077630162239075, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.7920600338900993, |
| "grad_norm": 0.21752354502677917, |
| "learning_rate": 2.111436950146628e-06, |
| "loss": 0.28987622261047363, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.7930283224400871, |
| "grad_norm": 0.18915565311908722, |
| "learning_rate": 2.101661779081134e-06, |
| "loss": 0.2888622581958771, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.793996610990075, |
| "grad_norm": 0.2110828459262848, |
| "learning_rate": 2.0918866080156404e-06, |
| "loss": 0.24480582773685455, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7949648995400629, |
| "grad_norm": 0.19739995896816254, |
| "learning_rate": 2.082111436950147e-06, |
| "loss": 0.26558613777160645, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.7959331880900509, |
| "grad_norm": 0.17837020754814148, |
| "learning_rate": 2.0723362658846533e-06, |
| "loss": 0.2380271553993225, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.7969014766400387, |
| "grad_norm": 0.2132730782032013, |
| "learning_rate": 2.0625610948191597e-06, |
| "loss": 0.2731876075267792, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.7978697651900266, |
| "grad_norm": 0.18625319004058838, |
| "learning_rate": 2.052785923753666e-06, |
| "loss": 0.2940404415130615, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.7988380537400145, |
| "grad_norm": 0.18981625139713287, |
| "learning_rate": 2.043010752688172e-06, |
| "loss": 0.25833550095558167, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.7998063422900025, |
| "grad_norm": 0.19009682536125183, |
| "learning_rate": 2.0332355816226786e-06, |
| "loss": 0.26862984895706177, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.8007746308399903, |
| "grad_norm": 0.17396694421768188, |
| "learning_rate": 2.023460410557185e-06, |
| "loss": 0.2869129180908203, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.8017429193899782, |
| "grad_norm": 0.19141492247581482, |
| "learning_rate": 2.013685239491691e-06, |
| "loss": 0.32933974266052246, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.8027112079399661, |
| "grad_norm": 0.22585217654705048, |
| "learning_rate": 2.0039100684261974e-06, |
| "loss": 0.25727906823158264, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.803679496489954, |
| "grad_norm": 0.20204074680805206, |
| "learning_rate": 1.994134897360704e-06, |
| "loss": 0.28683584928512573, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8046477850399419, |
| "grad_norm": 0.1816793978214264, |
| "learning_rate": 1.9843597262952103e-06, |
| "loss": 0.2783251702785492, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.8056160735899298, |
| "grad_norm": 0.19098123908042908, |
| "learning_rate": 1.9745845552297167e-06, |
| "loss": 0.28205838799476624, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.8065843621399177, |
| "grad_norm": 0.2102154642343521, |
| "learning_rate": 1.964809384164223e-06, |
| "loss": 0.32708585262298584, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.8075526506899056, |
| "grad_norm": 0.2377101480960846, |
| "learning_rate": 1.955034213098729e-06, |
| "loss": 0.3074392080307007, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.8085209392398935, |
| "grad_norm": 0.21340312063694, |
| "learning_rate": 1.9452590420332356e-06, |
| "loss": 0.28936126828193665, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.8094892277898814, |
| "grad_norm": 0.19761207699775696, |
| "learning_rate": 1.935483870967742e-06, |
| "loss": 0.30385932326316833, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.8104575163398693, |
| "grad_norm": 0.17896802723407745, |
| "learning_rate": 1.9257086999022484e-06, |
| "loss": 0.2657051682472229, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.8114258048898572, |
| "grad_norm": 0.19170638918876648, |
| "learning_rate": 1.915933528836755e-06, |
| "loss": 0.3132804036140442, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.812394093439845, |
| "grad_norm": 0.18938247859477997, |
| "learning_rate": 1.906158357771261e-06, |
| "loss": 0.260288804769516, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.813362381989833, |
| "grad_norm": 0.18173451721668243, |
| "learning_rate": 1.8963831867057675e-06, |
| "loss": 0.2886829078197479, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8143306705398209, |
| "grad_norm": 0.1915765106678009, |
| "learning_rate": 1.886608015640274e-06, |
| "loss": 0.30934807658195496, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.8152989590898088, |
| "grad_norm": 0.2193581461906433, |
| "learning_rate": 1.8768328445747801e-06, |
| "loss": 0.29573243856430054, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.8162672476397966, |
| "grad_norm": 0.1817786544561386, |
| "learning_rate": 1.8670576735092866e-06, |
| "loss": 0.2668893337249756, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.8172355361897845, |
| "grad_norm": 0.19725021719932556, |
| "learning_rate": 1.857282502443793e-06, |
| "loss": 0.3286668062210083, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.8182038247397725, |
| "grad_norm": 0.20280499756336212, |
| "learning_rate": 1.8475073313782992e-06, |
| "loss": 0.26897329092025757, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.8191721132897604, |
| "grad_norm": 0.19977053999900818, |
| "learning_rate": 1.8377321603128056e-06, |
| "loss": 0.27279871702194214, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.8201404018397482, |
| "grad_norm": 0.19068841636180878, |
| "learning_rate": 1.827956989247312e-06, |
| "loss": 0.254513144493103, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.8211086903897361, |
| "grad_norm": 0.2015547901391983, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 0.29649272561073303, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.8220769789397241, |
| "grad_norm": 0.18814009428024292, |
| "learning_rate": 1.8084066471163247e-06, |
| "loss": 0.2868715524673462, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.823045267489712, |
| "grad_norm": 0.19368094205856323, |
| "learning_rate": 1.7986314760508311e-06, |
| "loss": 0.2806050777435303, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8240135560396998, |
| "grad_norm": 0.20298543572425842, |
| "learning_rate": 1.7888563049853374e-06, |
| "loss": 0.26234903931617737, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.8249818445896877, |
| "grad_norm": 0.1959095001220703, |
| "learning_rate": 1.7790811339198438e-06, |
| "loss": 0.28573155403137207, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.8259501331396756, |
| "grad_norm": 0.20691703259944916, |
| "learning_rate": 1.7693059628543502e-06, |
| "loss": 0.2719816565513611, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.8269184216896636, |
| "grad_norm": 0.21501125395298004, |
| "learning_rate": 1.7595307917888567e-06, |
| "loss": 0.29406917095184326, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.8278867102396514, |
| "grad_norm": 0.17245161533355713, |
| "learning_rate": 1.7497556207233629e-06, |
| "loss": 0.2694648206233978, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.8288549987896393, |
| "grad_norm": 0.18521907925605774, |
| "learning_rate": 1.7399804496578693e-06, |
| "loss": 0.2755904793739319, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.8298232873396272, |
| "grad_norm": 0.20708146691322327, |
| "learning_rate": 1.7302052785923757e-06, |
| "loss": 0.2739972472190857, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.830791575889615, |
| "grad_norm": 0.2165932059288025, |
| "learning_rate": 1.720430107526882e-06, |
| "loss": 0.30347809195518494, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.831759864439603, |
| "grad_norm": 0.2044944018125534, |
| "learning_rate": 1.7106549364613882e-06, |
| "loss": 0.30577352643013, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.8327281529895909, |
| "grad_norm": 0.23014850914478302, |
| "learning_rate": 1.7008797653958944e-06, |
| "loss": 0.2837938070297241, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8336964415395788, |
| "grad_norm": 0.170841246843338, |
| "learning_rate": 1.6911045943304008e-06, |
| "loss": 0.27039510011672974, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.8346647300895667, |
| "grad_norm": 0.2066902071237564, |
| "learning_rate": 1.6813294232649072e-06, |
| "loss": 0.3122199773788452, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.8356330186395546, |
| "grad_norm": 0.21400435268878937, |
| "learning_rate": 1.6715542521994134e-06, |
| "loss": 0.2904992997646332, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.8366013071895425, |
| "grad_norm": 0.23855531215667725, |
| "learning_rate": 1.6617790811339199e-06, |
| "loss": 0.2858680486679077, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.8375695957395304, |
| "grad_norm": 0.20174764096736908, |
| "learning_rate": 1.6520039100684263e-06, |
| "loss": 0.2764103412628174, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.8385378842895183, |
| "grad_norm": 0.1859450787305832, |
| "learning_rate": 1.6422287390029325e-06, |
| "loss": 0.2620023488998413, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.8395061728395061, |
| "grad_norm": 0.18559077382087708, |
| "learning_rate": 1.632453567937439e-06, |
| "loss": 0.2956124544143677, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.8404744613894941, |
| "grad_norm": 0.1958460807800293, |
| "learning_rate": 1.6226783968719454e-06, |
| "loss": 0.24393334984779358, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.841442749939482, |
| "grad_norm": 0.20028391480445862, |
| "learning_rate": 1.6129032258064516e-06, |
| "loss": 0.2675096392631531, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.8424110384894699, |
| "grad_norm": 0.18042640388011932, |
| "learning_rate": 1.603128054740958e-06, |
| "loss": 0.2402784675359726, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8433793270394577, |
| "grad_norm": 0.21275922656059265, |
| "learning_rate": 1.5933528836754645e-06, |
| "loss": 0.2840040922164917, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.8443476155894457, |
| "grad_norm": 0.19365417957305908, |
| "learning_rate": 1.5835777126099709e-06, |
| "loss": 0.28499388694763184, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.8453159041394336, |
| "grad_norm": 0.1794516146183014, |
| "learning_rate": 1.573802541544477e-06, |
| "loss": 0.24146252870559692, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.8462841926894215, |
| "grad_norm": 0.2163521647453308, |
| "learning_rate": 1.5640273704789835e-06, |
| "loss": 0.3129892349243164, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.8472524812394093, |
| "grad_norm": 0.1975439339876175, |
| "learning_rate": 1.55425219941349e-06, |
| "loss": 0.2796524167060852, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.8482207697893972, |
| "grad_norm": 0.2034914195537567, |
| "learning_rate": 1.5444770283479962e-06, |
| "loss": 0.279870867729187, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.8491890583393852, |
| "grad_norm": 0.19650639593601227, |
| "learning_rate": 1.5347018572825026e-06, |
| "loss": 0.2665901184082031, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.8501573468893731, |
| "grad_norm": 0.2097690999507904, |
| "learning_rate": 1.524926686217009e-06, |
| "loss": 0.27686014771461487, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.8511256354393609, |
| "grad_norm": 0.2037818878889084, |
| "learning_rate": 1.5151515151515152e-06, |
| "loss": 0.3026971220970154, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.8520939239893488, |
| "grad_norm": 0.20769764482975006, |
| "learning_rate": 1.5053763440860217e-06, |
| "loss": 0.27736592292785645, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8530622125393367, |
| "grad_norm": 0.1871424913406372, |
| "learning_rate": 1.495601173020528e-06, |
| "loss": 0.2646699547767639, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.8540305010893247, |
| "grad_norm": 0.17274564504623413, |
| "learning_rate": 1.4858260019550343e-06, |
| "loss": 0.2835018038749695, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.8549987896393125, |
| "grad_norm": 0.19041228294372559, |
| "learning_rate": 1.4760508308895407e-06, |
| "loss": 0.2666222155094147, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.8559670781893004, |
| "grad_norm": 0.2032071202993393, |
| "learning_rate": 1.4662756598240472e-06, |
| "loss": 0.2845078706741333, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.8569353667392883, |
| "grad_norm": 0.19567905366420746, |
| "learning_rate": 1.4565004887585534e-06, |
| "loss": 0.2767939567565918, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.8579036552892763, |
| "grad_norm": 0.21539276838302612, |
| "learning_rate": 1.4467253176930598e-06, |
| "loss": 0.28917932510375977, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.8588719438392641, |
| "grad_norm": 0.19841663539409637, |
| "learning_rate": 1.4369501466275662e-06, |
| "loss": 0.2754652202129364, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.859840232389252, |
| "grad_norm": 0.1980779618024826, |
| "learning_rate": 1.4271749755620727e-06, |
| "loss": 0.24697673320770264, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.8608085209392399, |
| "grad_norm": 0.21110616624355316, |
| "learning_rate": 1.4173998044965789e-06, |
| "loss": 0.2851879894733429, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.8617768094892277, |
| "grad_norm": 0.19414329528808594, |
| "learning_rate": 1.4076246334310853e-06, |
| "loss": 0.25482916831970215, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.8627450980392157, |
| "grad_norm": 0.21367470920085907, |
| "learning_rate": 1.3978494623655913e-06, |
| "loss": 0.2666151821613312, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.8637133865892036, |
| "grad_norm": 0.1971525102853775, |
| "learning_rate": 1.3880742913000978e-06, |
| "loss": 0.28804174065589905, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.8646816751391915, |
| "grad_norm": 0.196051225066185, |
| "learning_rate": 1.3782991202346042e-06, |
| "loss": 0.2798953354358673, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.8656499636891793, |
| "grad_norm": 0.19818323850631714, |
| "learning_rate": 1.3685239491691104e-06, |
| "loss": 0.251752108335495, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.8666182522391672, |
| "grad_norm": 0.19199031591415405, |
| "learning_rate": 1.3587487781036168e-06, |
| "loss": 0.27647408843040466, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.8675865407891552, |
| "grad_norm": 0.22557084262371063, |
| "learning_rate": 1.3489736070381233e-06, |
| "loss": 0.3452335000038147, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.8685548293391431, |
| "grad_norm": 0.21375709772109985, |
| "learning_rate": 1.3391984359726295e-06, |
| "loss": 0.31028902530670166, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.869523117889131, |
| "grad_norm": 0.2083037942647934, |
| "learning_rate": 1.329423264907136e-06, |
| "loss": 0.27808475494384766, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.8704914064391188, |
| "grad_norm": 0.19114944338798523, |
| "learning_rate": 1.3196480938416423e-06, |
| "loss": 0.2660242021083832, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.8714596949891068, |
| "grad_norm": 0.2077726423740387, |
| "learning_rate": 1.3098729227761485e-06, |
| "loss": 0.3196616470813751, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8724279835390947, |
| "grad_norm": 0.19043967127799988, |
| "learning_rate": 1.300097751710655e-06, |
| "loss": 0.2752097547054291, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.8733962720890825, |
| "grad_norm": 0.1956516057252884, |
| "learning_rate": 1.2903225806451614e-06, |
| "loss": 0.2782442271709442, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.8743645606390704, |
| "grad_norm": 0.21374346315860748, |
| "learning_rate": 1.2805474095796676e-06, |
| "loss": 0.3336328864097595, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.8753328491890583, |
| "grad_norm": 0.17390403151512146, |
| "learning_rate": 1.270772238514174e-06, |
| "loss": 0.28889116644859314, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.8763011377390463, |
| "grad_norm": 0.1946377009153366, |
| "learning_rate": 1.2609970674486805e-06, |
| "loss": 0.26131391525268555, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.8772694262890341, |
| "grad_norm": 0.19059988856315613, |
| "learning_rate": 1.251221896383187e-06, |
| "loss": 0.27641037106513977, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.878237714839022, |
| "grad_norm": 0.21638603508472443, |
| "learning_rate": 1.2414467253176931e-06, |
| "loss": 0.2549016773700714, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.8792060033890099, |
| "grad_norm": 0.18561683595180511, |
| "learning_rate": 1.2316715542521995e-06, |
| "loss": 0.2516704797744751, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.8801742919389978, |
| "grad_norm": 0.18754595518112183, |
| "learning_rate": 1.221896383186706e-06, |
| "loss": 0.24743372201919556, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.8811425804889857, |
| "grad_norm": 0.17627929151058197, |
| "learning_rate": 1.2121212121212122e-06, |
| "loss": 0.2443106323480606, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8821108690389736, |
| "grad_norm": 0.21671797335147858, |
| "learning_rate": 1.2023460410557186e-06, |
| "loss": 0.2789687514305115, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.8830791575889615, |
| "grad_norm": 0.18491996824741364, |
| "learning_rate": 1.192570869990225e-06, |
| "loss": 0.280285507440567, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.8840474461389494, |
| "grad_norm": 0.19200359284877777, |
| "learning_rate": 1.1827956989247313e-06, |
| "loss": 0.3697912096977234, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.8850157346889373, |
| "grad_norm": 0.19149360060691833, |
| "learning_rate": 1.1730205278592377e-06, |
| "loss": 0.2533896565437317, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.8859840232389252, |
| "grad_norm": 0.1858339011669159, |
| "learning_rate": 1.163245356793744e-06, |
| "loss": 0.2724184989929199, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.8869523117889131, |
| "grad_norm": 0.18043696880340576, |
| "learning_rate": 1.1534701857282503e-06, |
| "loss": 0.3179680109024048, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.887920600338901, |
| "grad_norm": 0.2031916230916977, |
| "learning_rate": 1.1436950146627566e-06, |
| "loss": 0.2644922733306885, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.2100847363471985, |
| "learning_rate": 1.133919843597263e-06, |
| "loss": 0.2834533154964447, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.8898571774388768, |
| "grad_norm": 0.20932041108608246, |
| "learning_rate": 1.1241446725317694e-06, |
| "loss": 0.2602953314781189, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.8908254659888647, |
| "grad_norm": 0.1940714567899704, |
| "learning_rate": 1.1143695014662756e-06, |
| "loss": 0.29519274830818176, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8917937545388526, |
| "grad_norm": 0.20699529349803925, |
| "learning_rate": 1.104594330400782e-06, |
| "loss": 0.2826448678970337, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.8927620430888404, |
| "grad_norm": 0.18003135919570923, |
| "learning_rate": 1.0948191593352885e-06, |
| "loss": 0.3036431670188904, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.8937303316388284, |
| "grad_norm": 0.2626630961894989, |
| "learning_rate": 1.085043988269795e-06, |
| "loss": 0.2694006860256195, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.8946986201888163, |
| "grad_norm": 0.21386921405792236, |
| "learning_rate": 1.0752688172043011e-06, |
| "loss": 0.2830575704574585, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.8956669087388042, |
| "grad_norm": 0.20465651154518127, |
| "learning_rate": 1.0654936461388076e-06, |
| "loss": 0.2928478419780731, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.896635197288792, |
| "grad_norm": 0.218974307179451, |
| "learning_rate": 1.055718475073314e-06, |
| "loss": 0.265733003616333, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.8976034858387799, |
| "grad_norm": 0.18097904324531555, |
| "learning_rate": 1.0459433040078202e-06, |
| "loss": 0.297993540763855, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.8985717743887679, |
| "grad_norm": 0.18121756613254547, |
| "learning_rate": 1.0361681329423266e-06, |
| "loss": 0.3206055760383606, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.8995400629387558, |
| "grad_norm": 0.18943090736865997, |
| "learning_rate": 1.026392961876833e-06, |
| "loss": 0.3015185594558716, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.9005083514887436, |
| "grad_norm": 0.19779494404792786, |
| "learning_rate": 1.0166177908113393e-06, |
| "loss": 0.27546051144599915, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9014766400387315, |
| "grad_norm": 0.1858789324760437, |
| "learning_rate": 1.0068426197458455e-06, |
| "loss": 0.2784835994243622, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.9024449285887194, |
| "grad_norm": 0.19459734857082367, |
| "learning_rate": 9.97067448680352e-07, |
| "loss": 0.28571465611457825, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.9034132171387074, |
| "grad_norm": 0.18275073170661926, |
| "learning_rate": 9.872922776148584e-07, |
| "loss": 0.2614639401435852, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.9043815056886952, |
| "grad_norm": 0.19755122065544128, |
| "learning_rate": 9.775171065493646e-07, |
| "loss": 0.3016014099121094, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.9053497942386831, |
| "grad_norm": 0.21569618582725525, |
| "learning_rate": 9.67741935483871e-07, |
| "loss": 0.29818177223205566, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.906318082788671, |
| "grad_norm": 0.18675316870212555, |
| "learning_rate": 9.579667644183774e-07, |
| "loss": 0.3368891477584839, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.907286371338659, |
| "grad_norm": 0.19871239364147186, |
| "learning_rate": 9.481915933528838e-07, |
| "loss": 0.3153863549232483, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.9082546598886468, |
| "grad_norm": 0.22014066576957703, |
| "learning_rate": 9.384164222873901e-07, |
| "loss": 0.2810421884059906, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.9092229484386347, |
| "grad_norm": 0.19278523325920105, |
| "learning_rate": 9.286412512218965e-07, |
| "loss": 0.2553982138633728, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.9101912369886226, |
| "grad_norm": 0.20471501350402832, |
| "learning_rate": 9.188660801564028e-07, |
| "loss": 0.3324427902698517, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9111595255386105, |
| "grad_norm": 0.19074149429798126, |
| "learning_rate": 9.090909090909091e-07, |
| "loss": 0.2935166656970978, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.9121278140885984, |
| "grad_norm": 0.19555461406707764, |
| "learning_rate": 8.993157380254156e-07, |
| "loss": 0.27848702669143677, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.9130961026385863, |
| "grad_norm": 0.1958128958940506, |
| "learning_rate": 8.895405669599219e-07, |
| "loss": 0.3214573860168457, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.9140643911885742, |
| "grad_norm": 0.20188724994659424, |
| "learning_rate": 8.797653958944283e-07, |
| "loss": 0.29266253113746643, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.9150326797385621, |
| "grad_norm": 0.2061896175146103, |
| "learning_rate": 8.699902248289346e-07, |
| "loss": 0.26876091957092285, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.9160009682885499, |
| "grad_norm": 0.18365229666233063, |
| "learning_rate": 8.60215053763441e-07, |
| "loss": 0.24429546296596527, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.9169692568385379, |
| "grad_norm": 0.2009628713130951, |
| "learning_rate": 8.504398826979472e-07, |
| "loss": 0.2813577651977539, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.9179375453885258, |
| "grad_norm": 0.21519120037555695, |
| "learning_rate": 8.406647116324536e-07, |
| "loss": 0.29421091079711914, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.9189058339385137, |
| "grad_norm": 0.19519393146038055, |
| "learning_rate": 8.308895405669599e-07, |
| "loss": 0.27097785472869873, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.9198741224885015, |
| "grad_norm": 0.18344323337078094, |
| "learning_rate": 8.211143695014663e-07, |
| "loss": 0.27933016419410706, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9208424110384895, |
| "grad_norm": 0.18683570623397827, |
| "learning_rate": 8.113391984359727e-07, |
| "loss": 0.28024059534072876, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.9218106995884774, |
| "grad_norm": 0.2764555513858795, |
| "learning_rate": 8.01564027370479e-07, |
| "loss": 0.2519608438014984, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.9227789881384653, |
| "grad_norm": 0.20227362215518951, |
| "learning_rate": 7.917888563049854e-07, |
| "loss": 0.2634407877922058, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.9237472766884531, |
| "grad_norm": 0.20687641203403473, |
| "learning_rate": 7.820136852394918e-07, |
| "loss": 0.2730368375778198, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.924715565238441, |
| "grad_norm": 0.18547162413597107, |
| "learning_rate": 7.722385141739981e-07, |
| "loss": 0.26113927364349365, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.925683853788429, |
| "grad_norm": 0.1972709447145462, |
| "learning_rate": 7.624633431085045e-07, |
| "loss": 0.3210276663303375, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.9266521423384169, |
| "grad_norm": 0.22296936810016632, |
| "learning_rate": 7.526881720430108e-07, |
| "loss": 0.2896474301815033, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.9276204308884047, |
| "grad_norm": 0.1758430004119873, |
| "learning_rate": 7.429130009775172e-07, |
| "loss": 0.25095510482788086, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.9285887194383926, |
| "grad_norm": 0.20484335720539093, |
| "learning_rate": 7.331378299120236e-07, |
| "loss": 0.27182087302207947, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.9295570079883806, |
| "grad_norm": 0.18908201158046722, |
| "learning_rate": 7.233626588465299e-07, |
| "loss": 0.2869470417499542, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.9305252965383685, |
| "grad_norm": 0.20601920783519745, |
| "learning_rate": 7.135874877810363e-07, |
| "loss": 0.31839150190353394, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.9314935850883563, |
| "grad_norm": 0.2003796547651291, |
| "learning_rate": 7.038123167155427e-07, |
| "loss": 0.28072643280029297, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.9324618736383442, |
| "grad_norm": 0.21452200412750244, |
| "learning_rate": 6.940371456500489e-07, |
| "loss": 0.3070773780345917, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.9334301621883321, |
| "grad_norm": 0.20407654345035553, |
| "learning_rate": 6.842619745845552e-07, |
| "loss": 0.28470179438591003, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.9343984507383201, |
| "grad_norm": 0.21125538647174835, |
| "learning_rate": 6.744868035190616e-07, |
| "loss": 0.29014891386032104, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.9353667392883079, |
| "grad_norm": 0.18405841290950775, |
| "learning_rate": 6.64711632453568e-07, |
| "loss": 0.2623524069786072, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.9363350278382958, |
| "grad_norm": 0.2125682681798935, |
| "learning_rate": 6.549364613880743e-07, |
| "loss": 0.3087378144264221, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.9373033163882837, |
| "grad_norm": 0.20235757529735565, |
| "learning_rate": 6.451612903225807e-07, |
| "loss": 0.2936643660068512, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.9382716049382716, |
| "grad_norm": 0.1939656287431717, |
| "learning_rate": 6.35386119257087e-07, |
| "loss": 0.2780473828315735, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.9392398934882595, |
| "grad_norm": 0.20643159747123718, |
| "learning_rate": 6.256109481915935e-07, |
| "loss": 0.2650626003742218, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9402081820382474, |
| "grad_norm": 0.1930253654718399, |
| "learning_rate": 6.158357771260998e-07, |
| "loss": 0.305324912071228, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.19949081540107727, |
| "learning_rate": 6.060606060606061e-07, |
| "loss": 0.27924615144729614, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.9421447591382232, |
| "grad_norm": 0.1923617720603943, |
| "learning_rate": 5.962854349951125e-07, |
| "loss": 0.33369550108909607, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.9431130476882111, |
| "grad_norm": 0.1924324929714203, |
| "learning_rate": 5.865102639296188e-07, |
| "loss": 0.2702648937702179, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.944081336238199, |
| "grad_norm": 0.189810648560524, |
| "learning_rate": 5.767350928641252e-07, |
| "loss": 0.2990330457687378, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.9450496247881869, |
| "grad_norm": 0.2015506476163864, |
| "learning_rate": 5.669599217986315e-07, |
| "loss": 0.30142831802368164, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.9460179133381748, |
| "grad_norm": 0.21465028822422028, |
| "learning_rate": 5.571847507331378e-07, |
| "loss": 0.2767145037651062, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.9469862018881626, |
| "grad_norm": 0.19279153645038605, |
| "learning_rate": 5.474095796676442e-07, |
| "loss": 0.24644437432289124, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.9479544904381506, |
| "grad_norm": 0.20867611467838287, |
| "learning_rate": 5.376344086021506e-07, |
| "loss": 0.27333688735961914, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.9489227789881385, |
| "grad_norm": 0.19024871289730072, |
| "learning_rate": 5.27859237536657e-07, |
| "loss": 0.2586132884025574, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.9498910675381264, |
| "grad_norm": 0.18523293733596802, |
| "learning_rate": 5.180840664711633e-07, |
| "loss": 0.2814341187477112, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.9508593560881142, |
| "grad_norm": 0.19874310493469238, |
| "learning_rate": 5.083088954056696e-07, |
| "loss": 0.27490949630737305, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.9518276446381021, |
| "grad_norm": 0.21202170848846436, |
| "learning_rate": 4.98533724340176e-07, |
| "loss": 0.2904297411441803, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.9527959331880901, |
| "grad_norm": 0.2094363272190094, |
| "learning_rate": 4.887585532746823e-07, |
| "loss": 0.27371150255203247, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.953764221738078, |
| "grad_norm": 0.18295787274837494, |
| "learning_rate": 4.789833822091887e-07, |
| "loss": 0.2708626985549927, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.9547325102880658, |
| "grad_norm": 0.2100997418165207, |
| "learning_rate": 4.6920821114369504e-07, |
| "loss": 0.26008886098861694, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.9557007988380537, |
| "grad_norm": 0.20343877375125885, |
| "learning_rate": 4.594330400782014e-07, |
| "loss": 0.2885707914829254, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.9566690873880417, |
| "grad_norm": 0.2062508761882782, |
| "learning_rate": 4.496578690127078e-07, |
| "loss": 0.2915845215320587, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.9576373759380296, |
| "grad_norm": 0.21393194794654846, |
| "learning_rate": 4.3988269794721416e-07, |
| "loss": 0.3045470714569092, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.9586056644880174, |
| "grad_norm": 0.20916247367858887, |
| "learning_rate": 4.301075268817205e-07, |
| "loss": 0.3206391930580139, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.9595739530380053, |
| "grad_norm": 0.20829743146896362, |
| "learning_rate": 4.203323558162268e-07, |
| "loss": 0.281288743019104, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.9605422415879932, |
| "grad_norm": 0.21254244446754456, |
| "learning_rate": 4.1055718475073313e-07, |
| "loss": 0.29028719663619995, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.9615105301379812, |
| "grad_norm": 0.20817913115024567, |
| "learning_rate": 4.007820136852395e-07, |
| "loss": 0.3223232626914978, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.962478818687969, |
| "grad_norm": 0.19397568702697754, |
| "learning_rate": 3.910068426197459e-07, |
| "loss": 0.2968447208404541, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.9634471072379569, |
| "grad_norm": 0.1994376927614212, |
| "learning_rate": 3.8123167155425226e-07, |
| "loss": 0.2874579131603241, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.9644153957879448, |
| "grad_norm": 0.20042456686496735, |
| "learning_rate": 3.714565004887586e-07, |
| "loss": 0.25470271706581116, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.9653836843379328, |
| "grad_norm": 0.21064911782741547, |
| "learning_rate": 3.6168132942326495e-07, |
| "loss": 0.25948402285575867, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.9663519728879206, |
| "grad_norm": 0.19920513033866882, |
| "learning_rate": 3.5190615835777133e-07, |
| "loss": 0.2682594358921051, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.9673202614379085, |
| "grad_norm": 0.1974617838859558, |
| "learning_rate": 3.421309872922776e-07, |
| "loss": 0.2706855833530426, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.9682885499878964, |
| "grad_norm": 0.19910918176174164, |
| "learning_rate": 3.32355816226784e-07, |
| "loss": 0.2881166338920593, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9692568385378842, |
| "grad_norm": 0.19066068530082703, |
| "learning_rate": 3.2258064516129035e-07, |
| "loss": 0.2593529224395752, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.9702251270878722, |
| "grad_norm": 0.21316994726657867, |
| "learning_rate": 3.128054740957967e-07, |
| "loss": 0.2673231363296509, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.9711934156378601, |
| "grad_norm": 0.1907181590795517, |
| "learning_rate": 3.0303030303030305e-07, |
| "loss": 0.2969304323196411, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.972161704187848, |
| "grad_norm": 0.2059427797794342, |
| "learning_rate": 2.932551319648094e-07, |
| "loss": 0.2977202832698822, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.9731299927378358, |
| "grad_norm": 0.19578853249549866, |
| "learning_rate": 2.8347996089931575e-07, |
| "loss": 0.2898738980293274, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.9740982812878237, |
| "grad_norm": 0.19571205973625183, |
| "learning_rate": 2.737047898338221e-07, |
| "loss": 0.2661632299423218, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.9750665698378117, |
| "grad_norm": 0.21246828138828278, |
| "learning_rate": 2.639296187683285e-07, |
| "loss": 0.26930439472198486, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.9760348583877996, |
| "grad_norm": 0.20583873987197876, |
| "learning_rate": 2.541544477028348e-07, |
| "loss": 0.2842969298362732, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.9770031469377874, |
| "grad_norm": 0.18972201645374298, |
| "learning_rate": 2.4437927663734114e-07, |
| "loss": 0.28072866797447205, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.9779714354877753, |
| "grad_norm": 0.2141742706298828, |
| "learning_rate": 2.3460410557184752e-07, |
| "loss": 0.28579071164131165, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9789397240377633, |
| "grad_norm": 0.2145223468542099, |
| "learning_rate": 2.248289345063539e-07, |
| "loss": 0.29237863421440125, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.9799080125877512, |
| "grad_norm": 0.18137916922569275, |
| "learning_rate": 2.1505376344086024e-07, |
| "loss": 0.27613335847854614, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.980876301137739, |
| "grad_norm": 0.18069401383399963, |
| "learning_rate": 2.0527859237536657e-07, |
| "loss": 0.255997896194458, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.9818445896877269, |
| "grad_norm": 0.1869657039642334, |
| "learning_rate": 1.9550342130987294e-07, |
| "loss": 0.25478553771972656, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.9828128782377148, |
| "grad_norm": 0.22846192121505737, |
| "learning_rate": 1.857282502443793e-07, |
| "loss": 0.2954884171485901, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.9837811667877028, |
| "grad_norm": 0.20486541092395782, |
| "learning_rate": 1.7595307917888567e-07, |
| "loss": 0.2752358317375183, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.9847494553376906, |
| "grad_norm": 0.20248091220855713, |
| "learning_rate": 1.66177908113392e-07, |
| "loss": 0.27697792649269104, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.9857177438876785, |
| "grad_norm": 0.2098854035139084, |
| "learning_rate": 1.5640273704789836e-07, |
| "loss": 0.30580762028694153, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.9866860324376664, |
| "grad_norm": 0.19671432673931122, |
| "learning_rate": 1.466275659824047e-07, |
| "loss": 0.2934240698814392, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.9876543209876543, |
| "grad_norm": 0.2045270800590515, |
| "learning_rate": 1.3685239491691106e-07, |
| "loss": 0.26624009013175964, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.9886226095376422, |
| "grad_norm": 0.18321022391319275, |
| "learning_rate": 1.270772238514174e-07, |
| "loss": 0.22881919145584106, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.9895908980876301, |
| "grad_norm": 0.1937808096408844, |
| "learning_rate": 1.1730205278592376e-07, |
| "loss": 0.27172714471817017, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.990559186637618, |
| "grad_norm": 0.22623102366924286, |
| "learning_rate": 1.0752688172043012e-07, |
| "loss": 0.3045693039894104, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.9915274751876059, |
| "grad_norm": 0.2050536870956421, |
| "learning_rate": 9.775171065493647e-08, |
| "loss": 0.26483532786369324, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.9924957637375939, |
| "grad_norm": 0.19890232384204865, |
| "learning_rate": 8.797653958944283e-08, |
| "loss": 0.25764352083206177, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.9934640522875817, |
| "grad_norm": 0.198257714509964, |
| "learning_rate": 7.820136852394918e-08, |
| "loss": 0.27279871702194214, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.9944323408375696, |
| "grad_norm": 0.19575795531272888, |
| "learning_rate": 6.842619745845553e-08, |
| "loss": 0.2848638594150543, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.9954006293875575, |
| "grad_norm": 0.19270561635494232, |
| "learning_rate": 5.865102639296188e-08, |
| "loss": 0.26724010705947876, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.9963689179375453, |
| "grad_norm": 0.2059524953365326, |
| "learning_rate": 4.8875855327468235e-08, |
| "loss": 0.28259921073913574, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.9973372064875333, |
| "grad_norm": 0.22036604583263397, |
| "learning_rate": 3.910068426197459e-08, |
| "loss": 0.2710026502609253, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.9983054950375212, |
| "grad_norm": 0.19827115535736084, |
| "learning_rate": 2.932551319648094e-08, |
| "loss": 0.2782309949398041, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.9992737835875091, |
| "grad_norm": 0.19505129754543304, |
| "learning_rate": 1.9550342130987295e-08, |
| "loss": 0.2857624292373657, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.22344279289245605, |
| "learning_rate": 9.775171065493648e-09, |
| "loss": 0.25447842478752136, |
| "step": 1033 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1033, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.907215148242811e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|