{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1033, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009682885499878964, "grad_norm": 0.6142507791519165, "learning_rate": 0.0, "loss": 0.7025314569473267, "step": 1 }, { "epoch": 0.0019365770999757927, "grad_norm": 0.6211322546005249, "learning_rate": 1.0000000000000002e-06, "loss": 0.6656137704849243, "step": 2 }, { "epoch": 0.002904865649963689, "grad_norm": 0.6215519905090332, "learning_rate": 2.0000000000000003e-06, "loss": 0.6469869017601013, "step": 3 }, { "epoch": 0.0038731541999515854, "grad_norm": 0.6204696297645569, "learning_rate": 3e-06, "loss": 0.6729673147201538, "step": 4 }, { "epoch": 0.004841442749939482, "grad_norm": 0.5724360942840576, "learning_rate": 4.000000000000001e-06, "loss": 0.6311185956001282, "step": 5 }, { "epoch": 0.005809731299927378, "grad_norm": 0.6253241896629333, "learning_rate": 5e-06, "loss": 0.6582703590393066, "step": 6 }, { "epoch": 0.006778019849915275, "grad_norm": 0.6960524320602417, "learning_rate": 6e-06, "loss": 0.6846659183502197, "step": 7 }, { "epoch": 0.007746308399903171, "grad_norm": 0.669350802898407, "learning_rate": 7e-06, "loss": 0.690190315246582, "step": 8 }, { "epoch": 0.008714596949891068, "grad_norm": 0.603227436542511, "learning_rate": 8.000000000000001e-06, "loss": 0.6193867325782776, "step": 9 }, { "epoch": 0.009682885499878963, "grad_norm": 0.5855698585510254, "learning_rate": 9e-06, "loss": 0.5652514696121216, "step": 10 }, { "epoch": 0.01065117404986686, "grad_norm": 0.652574360370636, "learning_rate": 1e-05, "loss": 0.5543577671051025, "step": 11 }, { "epoch": 0.011619462599854757, "grad_norm": 0.6981928944587708, "learning_rate": 9.990224828934506e-06, "loss": 0.5915582180023193, "step": 12 }, { "epoch": 0.012587751149842653, "grad_norm": 0.6892595291137695, "learning_rate": 9.980449657869014e-06, "loss": 0.5405319333076477, "step": 13 }, { "epoch": 0.01355603969983055, "grad_norm": 0.6135081648826599, "learning_rate": 9.97067448680352e-06, "loss": 0.535025954246521, "step": 14 }, { "epoch": 0.014524328249818447, "grad_norm": 0.6271191239356995, "learning_rate": 9.960899315738027e-06, "loss": 0.5299935340881348, "step": 15 }, { "epoch": 0.015492616799806342, "grad_norm": 0.5334578156471252, "learning_rate": 9.951124144672532e-06, "loss": 0.4963300824165344, "step": 16 }, { "epoch": 0.01646090534979424, "grad_norm": 0.5006250143051147, "learning_rate": 9.94134897360704e-06, "loss": 0.49313884973526, "step": 17 }, { "epoch": 0.017429193899782137, "grad_norm": 0.3994006812572479, "learning_rate": 9.931573802541545e-06, "loss": 0.4737316966056824, "step": 18 }, { "epoch": 0.01839748244977003, "grad_norm": 0.28798240423202515, "learning_rate": 9.921798631476052e-06, "loss": 0.4324286878108978, "step": 19 }, { "epoch": 0.019365770999757927, "grad_norm": 0.26087912917137146, "learning_rate": 9.912023460410558e-06, "loss": 0.43165814876556396, "step": 20 }, { "epoch": 0.020334059549745823, "grad_norm": 0.2219318300485611, "learning_rate": 9.902248289345065e-06, "loss": 0.44213879108428955, "step": 21 }, { "epoch": 0.02130234809973372, "grad_norm": 0.2282973825931549, "learning_rate": 9.89247311827957e-06, "loss": 0.45201411843299866, "step": 22 }, { "epoch": 0.022270636649721617, "grad_norm": 0.2014036774635315, "learning_rate": 9.882697947214078e-06, "loss": 0.41472718119621277, "step": 23 }, { "epoch": 0.023238925199709513, "grad_norm": 0.21150773763656616, "learning_rate": 9.872922776148584e-06, "loss": 0.41946664452552795, "step": 24 }, { "epoch": 0.02420721374969741, "grad_norm": 0.253612220287323, "learning_rate": 9.863147605083089e-06, "loss": 0.42294907569885254, "step": 25 }, { "epoch": 0.025175502299685307, "grad_norm": 0.21278540790081024, "learning_rate": 9.853372434017596e-06, "loss": 0.4383317828178406, "step": 26 }, { "epoch": 0.026143790849673203, "grad_norm": 0.24685898423194885, "learning_rate": 9.843597262952102e-06, "loss": 0.46812987327575684, "step": 27 }, { "epoch": 0.0271120793996611, "grad_norm": 0.18908213078975677, "learning_rate": 9.83382209188661e-06, "loss": 0.40582185983657837, "step": 28 }, { "epoch": 0.028080367949648997, "grad_norm": 0.20694321393966675, "learning_rate": 9.824046920821115e-06, "loss": 0.39531630277633667, "step": 29 }, { "epoch": 0.029048656499636893, "grad_norm": 0.1862354725599289, "learning_rate": 9.814271749755622e-06, "loss": 0.4046899676322937, "step": 30 }, { "epoch": 0.030016945049624787, "grad_norm": 0.21069619059562683, "learning_rate": 9.804496578690128e-06, "loss": 0.39400529861450195, "step": 31 }, { "epoch": 0.030985233599612683, "grad_norm": 0.1739916056394577, "learning_rate": 9.794721407624635e-06, "loss": 0.40542545914649963, "step": 32 }, { "epoch": 0.03195352214960058, "grad_norm": 0.16906821727752686, "learning_rate": 9.78494623655914e-06, "loss": 0.37384384870529175, "step": 33 }, { "epoch": 0.03292181069958848, "grad_norm": 0.17224127054214478, "learning_rate": 9.775171065493648e-06, "loss": 0.3819228410720825, "step": 34 }, { "epoch": 0.03389009924957637, "grad_norm": 0.16344308853149414, "learning_rate": 9.765395894428153e-06, "loss": 0.41173726320266724, "step": 35 }, { "epoch": 0.034858387799564274, "grad_norm": 0.17100028693675995, "learning_rate": 9.75562072336266e-06, "loss": 0.39287662506103516, "step": 36 }, { "epoch": 0.03582667634955217, "grad_norm": 0.15641067922115326, "learning_rate": 9.745845552297166e-06, "loss": 0.3993951976299286, "step": 37 }, { "epoch": 0.03679496489954006, "grad_norm": 0.15000952780246735, "learning_rate": 9.736070381231672e-06, "loss": 0.37331604957580566, "step": 38 }, { "epoch": 0.03776325344952796, "grad_norm": 0.16917653381824493, "learning_rate": 9.726295210166179e-06, "loss": 0.45155206322669983, "step": 39 }, { "epoch": 0.03873154199951585, "grad_norm": 0.1585894376039505, "learning_rate": 9.716520039100685e-06, "loss": 0.3733840882778168, "step": 40 }, { "epoch": 0.03969983054950375, "grad_norm": 0.14439353346824646, "learning_rate": 9.706744868035192e-06, "loss": 0.3886685073375702, "step": 41 }, { "epoch": 0.04066811909949165, "grad_norm": 0.14183790981769562, "learning_rate": 9.696969696969698e-06, "loss": 0.3821936547756195, "step": 42 }, { "epoch": 0.04163640764947955, "grad_norm": 0.16753076016902924, "learning_rate": 9.687194525904205e-06, "loss": 0.4522704780101776, "step": 43 }, { "epoch": 0.04260469619946744, "grad_norm": 0.1615847498178482, "learning_rate": 9.67741935483871e-06, "loss": 0.3997975289821625, "step": 44 }, { "epoch": 0.04357298474945534, "grad_norm": 0.1554916650056839, "learning_rate": 9.667644183773218e-06, "loss": 0.3691978454589844, "step": 45 }, { "epoch": 0.04454127329944323, "grad_norm": 0.16841153800487518, "learning_rate": 9.657869012707723e-06, "loss": 0.41484490036964417, "step": 46 }, { "epoch": 0.045509561849431134, "grad_norm": 0.1278965324163437, "learning_rate": 9.64809384164223e-06, "loss": 0.35556912422180176, "step": 47 }, { "epoch": 0.04647785039941903, "grad_norm": 0.14681562781333923, "learning_rate": 9.638318670576736e-06, "loss": 0.40769901871681213, "step": 48 }, { "epoch": 0.04744613894940692, "grad_norm": 0.14918000996112823, "learning_rate": 9.628543499511243e-06, "loss": 0.3773626387119293, "step": 49 }, { "epoch": 0.04841442749939482, "grad_norm": 0.13720250129699707, "learning_rate": 9.618768328445749e-06, "loss": 0.3663005232810974, "step": 50 }, { "epoch": 0.04938271604938271, "grad_norm": 0.14886170625686646, "learning_rate": 9.608993157380255e-06, "loss": 0.4067220985889435, "step": 51 }, { "epoch": 0.05035100459937061, "grad_norm": 0.14274443686008453, "learning_rate": 9.599217986314762e-06, "loss": 0.3832167685031891, "step": 52 }, { "epoch": 0.05131929314935851, "grad_norm": 0.15536513924598694, "learning_rate": 9.589442815249267e-06, "loss": 0.4395195245742798, "step": 53 }, { "epoch": 0.05228758169934641, "grad_norm": 0.1393464207649231, "learning_rate": 9.579667644183775e-06, "loss": 0.3716701567173004, "step": 54 }, { "epoch": 0.0532558702493343, "grad_norm": 0.1450338065624237, "learning_rate": 9.56989247311828e-06, "loss": 0.362257719039917, "step": 55 }, { "epoch": 0.0542241587993222, "grad_norm": 0.14616632461547852, "learning_rate": 9.560117302052788e-06, "loss": 0.4077686369419098, "step": 56 }, { "epoch": 0.05519244734931009, "grad_norm": 0.1374523639678955, "learning_rate": 9.550342130987293e-06, "loss": 0.3961605429649353, "step": 57 }, { "epoch": 0.056160735899297994, "grad_norm": 0.1394190788269043, "learning_rate": 9.5405669599218e-06, "loss": 0.3706665635108948, "step": 58 }, { "epoch": 0.05712902444928589, "grad_norm": 0.11874961853027344, "learning_rate": 9.530791788856306e-06, "loss": 0.30775582790374756, "step": 59 }, { "epoch": 0.05809731299927379, "grad_norm": 0.1349610835313797, "learning_rate": 9.521016617790813e-06, "loss": 0.3784869313240051, "step": 60 }, { "epoch": 0.05906560154926168, "grad_norm": 0.13463151454925537, "learning_rate": 9.511241446725319e-06, "loss": 0.3922792077064514, "step": 61 }, { "epoch": 0.06003389009924957, "grad_norm": 0.12930694222450256, "learning_rate": 9.501466275659824e-06, "loss": 0.37777647376060486, "step": 62 }, { "epoch": 0.06100217864923747, "grad_norm": 0.13075940310955048, "learning_rate": 9.491691104594332e-06, "loss": 0.38546550273895264, "step": 63 }, { "epoch": 0.06197046719922537, "grad_norm": 0.13507235050201416, "learning_rate": 9.481915933528837e-06, "loss": 0.3709413707256317, "step": 64 }, { "epoch": 0.06293875574921326, "grad_norm": 0.1276707798242569, "learning_rate": 9.472140762463345e-06, "loss": 0.3352872431278229, "step": 65 }, { "epoch": 0.06390704429920116, "grad_norm": 0.11911962181329727, "learning_rate": 9.46236559139785e-06, "loss": 0.3386506140232086, "step": 66 }, { "epoch": 0.06487533284918906, "grad_norm": 0.12956929206848145, "learning_rate": 9.452590420332357e-06, "loss": 0.3754933774471283, "step": 67 }, { "epoch": 0.06584362139917696, "grad_norm": 0.13890986144542694, "learning_rate": 9.442815249266863e-06, "loss": 0.4128858745098114, "step": 68 }, { "epoch": 0.06681190994916485, "grad_norm": 0.11394089460372925, "learning_rate": 9.43304007820137e-06, "loss": 0.3089035153388977, "step": 69 }, { "epoch": 0.06778019849915275, "grad_norm": 0.1245599314570427, "learning_rate": 9.423264907135876e-06, "loss": 0.32959863543510437, "step": 70 }, { "epoch": 0.06874848704914065, "grad_norm": 0.14015918970108032, "learning_rate": 9.413489736070383e-06, "loss": 0.3884163498878479, "step": 71 }, { "epoch": 0.06971677559912855, "grad_norm": 0.1232111006975174, "learning_rate": 9.403714565004889e-06, "loss": 0.3284456431865692, "step": 72 }, { "epoch": 0.07068506414911643, "grad_norm": 0.13799598813056946, "learning_rate": 9.393939393939396e-06, "loss": 0.3356078565120697, "step": 73 }, { "epoch": 0.07165335269910433, "grad_norm": 0.1208195611834526, "learning_rate": 9.384164222873902e-06, "loss": 0.3578157126903534, "step": 74 }, { "epoch": 0.07262164124909223, "grad_norm": 0.11411258578300476, "learning_rate": 9.374389051808407e-06, "loss": 0.31658506393432617, "step": 75 }, { "epoch": 0.07358992979908012, "grad_norm": 0.11944623291492462, "learning_rate": 9.364613880742913e-06, "loss": 0.36029356718063354, "step": 76 }, { "epoch": 0.07455821834906802, "grad_norm": 0.12852734327316284, "learning_rate": 9.35483870967742e-06, "loss": 0.3340183198451996, "step": 77 }, { "epoch": 0.07552650689905592, "grad_norm": 0.11775587499141693, "learning_rate": 9.345063538611926e-06, "loss": 0.3083425760269165, "step": 78 }, { "epoch": 0.07649479544904382, "grad_norm": 0.13742083311080933, "learning_rate": 9.335288367546433e-06, "loss": 0.3448983132839203, "step": 79 }, { "epoch": 0.0774630839990317, "grad_norm": 0.12395518273115158, "learning_rate": 9.325513196480938e-06, "loss": 0.39519673585891724, "step": 80 }, { "epoch": 0.0784313725490196, "grad_norm": 0.1322673112154007, "learning_rate": 9.315738025415446e-06, "loss": 0.3364032506942749, "step": 81 }, { "epoch": 0.0793996610990075, "grad_norm": 0.13429760932922363, "learning_rate": 9.305962854349951e-06, "loss": 0.3513767719268799, "step": 82 }, { "epoch": 0.08036794964899541, "grad_norm": 0.11903934180736542, "learning_rate": 9.296187683284459e-06, "loss": 0.3263617157936096, "step": 83 }, { "epoch": 0.0813362381989833, "grad_norm": 0.11960665881633759, "learning_rate": 9.286412512218964e-06, "loss": 0.3449134826660156, "step": 84 }, { "epoch": 0.0823045267489712, "grad_norm": 0.12201559543609619, "learning_rate": 9.27663734115347e-06, "loss": 0.32846352458000183, "step": 85 }, { "epoch": 0.0832728152989591, "grad_norm": 0.12875522673130035, "learning_rate": 9.266862170087977e-06, "loss": 0.37101566791534424, "step": 86 }, { "epoch": 0.08424110384894698, "grad_norm": 0.11964312195777893, "learning_rate": 9.257086999022483e-06, "loss": 0.31812378764152527, "step": 87 }, { "epoch": 0.08520939239893488, "grad_norm": 0.15213125944137573, "learning_rate": 9.24731182795699e-06, "loss": 0.3813604414463043, "step": 88 }, { "epoch": 0.08617768094892278, "grad_norm": 0.1251516193151474, "learning_rate": 9.237536656891495e-06, "loss": 0.3027239441871643, "step": 89 }, { "epoch": 0.08714596949891068, "grad_norm": 0.11938060075044632, "learning_rate": 9.227761485826003e-06, "loss": 0.37067025899887085, "step": 90 }, { "epoch": 0.08811425804889857, "grad_norm": 0.13240274786949158, "learning_rate": 9.217986314760508e-06, "loss": 0.33599379658699036, "step": 91 }, { "epoch": 0.08908254659888647, "grad_norm": 0.13307581841945648, "learning_rate": 9.208211143695016e-06, "loss": 0.37259358167648315, "step": 92 }, { "epoch": 0.09005083514887437, "grad_norm": 0.12138372659683228, "learning_rate": 9.198435972629521e-06, "loss": 0.3587302565574646, "step": 93 }, { "epoch": 0.09101912369886227, "grad_norm": 0.1167801097035408, "learning_rate": 9.188660801564028e-06, "loss": 0.33125776052474976, "step": 94 }, { "epoch": 0.09198741224885015, "grad_norm": 0.1275295913219452, "learning_rate": 9.178885630498534e-06, "loss": 0.30975601077079773, "step": 95 }, { "epoch": 0.09295570079883805, "grad_norm": 0.13747365772724152, "learning_rate": 9.16911045943304e-06, "loss": 0.3368357717990875, "step": 96 }, { "epoch": 0.09392398934882595, "grad_norm": 0.12222792208194733, "learning_rate": 9.159335288367547e-06, "loss": 0.31197813153266907, "step": 97 }, { "epoch": 0.09489227789881384, "grad_norm": 0.1364426612854004, "learning_rate": 9.149560117302052e-06, "loss": 0.32897326350212097, "step": 98 }, { "epoch": 0.09586056644880174, "grad_norm": 0.14532364904880524, "learning_rate": 9.13978494623656e-06, "loss": 0.3669801652431488, "step": 99 }, { "epoch": 0.09682885499878964, "grad_norm": 0.13844749331474304, "learning_rate": 9.130009775171065e-06, "loss": 0.36969247460365295, "step": 100 }, { "epoch": 0.09779714354877754, "grad_norm": 0.12275300920009613, "learning_rate": 9.120234604105573e-06, "loss": 0.3295097053050995, "step": 101 }, { "epoch": 0.09876543209876543, "grad_norm": 0.13520191609859467, "learning_rate": 9.110459433040078e-06, "loss": 0.3737986087799072, "step": 102 }, { "epoch": 0.09973372064875333, "grad_norm": 0.13066619634628296, "learning_rate": 9.100684261974585e-06, "loss": 0.3033255934715271, "step": 103 }, { "epoch": 0.10070200919874123, "grad_norm": 0.11890687793493271, "learning_rate": 9.090909090909091e-06, "loss": 0.34380683302879333, "step": 104 }, { "epoch": 0.10167029774872913, "grad_norm": 0.13914473354816437, "learning_rate": 9.081133919843598e-06, "loss": 0.35355979204177856, "step": 105 }, { "epoch": 0.10263858629871701, "grad_norm": 0.120316281914711, "learning_rate": 9.071358748778104e-06, "loss": 0.3205440044403076, "step": 106 }, { "epoch": 0.10360687484870491, "grad_norm": 0.16602486371994019, "learning_rate": 9.061583577712611e-06, "loss": 0.3168993890285492, "step": 107 }, { "epoch": 0.10457516339869281, "grad_norm": 0.12726294994354248, "learning_rate": 9.051808406647117e-06, "loss": 0.3384619653224945, "step": 108 }, { "epoch": 0.10554345194868071, "grad_norm": 0.1322595477104187, "learning_rate": 9.042033235581622e-06, "loss": 0.3112761974334717, "step": 109 }, { "epoch": 0.1065117404986686, "grad_norm": 0.1453908234834671, "learning_rate": 9.03225806451613e-06, "loss": 0.2989104390144348, "step": 110 }, { "epoch": 0.1074800290486565, "grad_norm": 0.12833762168884277, "learning_rate": 9.022482893450635e-06, "loss": 0.3298214077949524, "step": 111 }, { "epoch": 0.1084483175986444, "grad_norm": 0.12525658309459686, "learning_rate": 9.012707722385142e-06, "loss": 0.32383644580841064, "step": 112 }, { "epoch": 0.10941660614863229, "grad_norm": 0.1307019740343094, "learning_rate": 9.002932551319648e-06, "loss": 0.3317619264125824, "step": 113 }, { "epoch": 0.11038489469862019, "grad_norm": 0.13214020431041718, "learning_rate": 8.993157380254155e-06, "loss": 0.2884703576564789, "step": 114 }, { "epoch": 0.11135318324860809, "grad_norm": 0.12866291403770447, "learning_rate": 8.983382209188661e-06, "loss": 0.351254940032959, "step": 115 }, { "epoch": 0.11232147179859599, "grad_norm": 0.13350999355316162, "learning_rate": 8.973607038123168e-06, "loss": 0.30035918951034546, "step": 116 }, { "epoch": 0.11328976034858387, "grad_norm": 0.11388203501701355, "learning_rate": 8.963831867057674e-06, "loss": 0.2958531081676483, "step": 117 }, { "epoch": 0.11425804889857177, "grad_norm": 0.14289627969264984, "learning_rate": 8.954056695992181e-06, "loss": 0.32053035497665405, "step": 118 }, { "epoch": 0.11522633744855967, "grad_norm": 0.13894328474998474, "learning_rate": 8.944281524926687e-06, "loss": 0.30120429396629333, "step": 119 }, { "epoch": 0.11619462599854757, "grad_norm": 0.12141028046607971, "learning_rate": 8.934506353861194e-06, "loss": 0.31479382514953613, "step": 120 }, { "epoch": 0.11716291454853546, "grad_norm": 0.12900526821613312, "learning_rate": 8.9247311827957e-06, "loss": 0.36297452449798584, "step": 121 }, { "epoch": 0.11813120309852336, "grad_norm": 0.12149893492460251, "learning_rate": 8.914956011730205e-06, "loss": 0.2906142473220825, "step": 122 }, { "epoch": 0.11909949164851126, "grad_norm": 0.16426807641983032, "learning_rate": 8.905180840664712e-06, "loss": 0.31878572702407837, "step": 123 }, { "epoch": 0.12006778019849915, "grad_norm": 0.11868342757225037, "learning_rate": 8.895405669599218e-06, "loss": 0.3231990933418274, "step": 124 }, { "epoch": 0.12103606874848705, "grad_norm": 0.13657772541046143, "learning_rate": 8.885630498533725e-06, "loss": 0.29259440302848816, "step": 125 }, { "epoch": 0.12200435729847495, "grad_norm": 0.1253119558095932, "learning_rate": 8.87585532746823e-06, "loss": 0.2838287353515625, "step": 126 }, { "epoch": 0.12297264584846285, "grad_norm": 0.1284995973110199, "learning_rate": 8.866080156402738e-06, "loss": 0.3066769242286682, "step": 127 }, { "epoch": 0.12394093439845073, "grad_norm": 0.11573974788188934, "learning_rate": 8.856304985337244e-06, "loss": 0.3010478615760803, "step": 128 }, { "epoch": 0.12490922294843863, "grad_norm": 0.12995308637619019, "learning_rate": 8.846529814271751e-06, "loss": 0.3350738286972046, "step": 129 }, { "epoch": 0.12587751149842652, "grad_norm": 0.13642707467079163, "learning_rate": 8.836754643206256e-06, "loss": 0.31808528304100037, "step": 130 }, { "epoch": 0.12684580004841442, "grad_norm": 0.1310724914073944, "learning_rate": 8.826979472140764e-06, "loss": 0.3332287669181824, "step": 131 }, { "epoch": 0.12781408859840232, "grad_norm": 0.12466035038232803, "learning_rate": 8.81720430107527e-06, "loss": 0.3196363151073456, "step": 132 }, { "epoch": 0.12878237714839022, "grad_norm": 0.13588100671768188, "learning_rate": 8.807429130009777e-06, "loss": 0.31376099586486816, "step": 133 }, { "epoch": 0.12975066569837812, "grad_norm": 0.13263723254203796, "learning_rate": 8.797653958944282e-06, "loss": 0.3171752393245697, "step": 134 }, { "epoch": 0.13071895424836602, "grad_norm": 0.13374009728431702, "learning_rate": 8.787878787878788e-06, "loss": 0.2953280210494995, "step": 135 }, { "epoch": 0.13168724279835392, "grad_norm": 0.13743482530117035, "learning_rate": 8.778103616813295e-06, "loss": 0.3488181531429291, "step": 136 }, { "epoch": 0.1326555313483418, "grad_norm": 0.13955242931842804, "learning_rate": 8.7683284457478e-06, "loss": 0.331007182598114, "step": 137 }, { "epoch": 0.1336238198983297, "grad_norm": 0.14186261594295502, "learning_rate": 8.758553274682308e-06, "loss": 0.36398252844810486, "step": 138 }, { "epoch": 0.1345921084483176, "grad_norm": 0.1471295952796936, "learning_rate": 8.748778103616813e-06, "loss": 0.336472749710083, "step": 139 }, { "epoch": 0.1355603969983055, "grad_norm": 0.11482029408216476, "learning_rate": 8.73900293255132e-06, "loss": 0.3006575405597687, "step": 140 }, { "epoch": 0.1365286855482934, "grad_norm": 0.13504621386528015, "learning_rate": 8.729227761485826e-06, "loss": 0.3569592535495758, "step": 141 }, { "epoch": 0.1374969740982813, "grad_norm": 0.15188293159008026, "learning_rate": 8.719452590420334e-06, "loss": 0.3333485424518585, "step": 142 }, { "epoch": 0.1384652626482692, "grad_norm": 0.13065899908542633, "learning_rate": 8.70967741935484e-06, "loss": 0.3319074511528015, "step": 143 }, { "epoch": 0.1394335511982571, "grad_norm": 0.1272367686033249, "learning_rate": 8.699902248289346e-06, "loss": 0.3033870756626129, "step": 144 }, { "epoch": 0.14040183974824497, "grad_norm": 0.1433865875005722, "learning_rate": 8.690127077223852e-06, "loss": 0.30503055453300476, "step": 145 }, { "epoch": 0.14137012829823287, "grad_norm": 0.12748195230960846, "learning_rate": 8.68035190615836e-06, "loss": 0.3041837811470032, "step": 146 }, { "epoch": 0.14233841684822077, "grad_norm": 0.13291986286640167, "learning_rate": 8.670576735092865e-06, "loss": 0.3430430591106415, "step": 147 }, { "epoch": 0.14330670539820867, "grad_norm": 0.12126651406288147, "learning_rate": 8.66080156402737e-06, "loss": 0.33859869837760925, "step": 148 }, { "epoch": 0.14427499394819657, "grad_norm": 0.15293890237808228, "learning_rate": 8.651026392961878e-06, "loss": 0.33767563104629517, "step": 149 }, { "epoch": 0.14524328249818447, "grad_norm": 0.13613349199295044, "learning_rate": 8.641251221896383e-06, "loss": 0.29907119274139404, "step": 150 }, { "epoch": 0.14621157104817237, "grad_norm": 0.12802888453006744, "learning_rate": 8.63147605083089e-06, "loss": 0.3123582601547241, "step": 151 }, { "epoch": 0.14717985959816024, "grad_norm": 0.16021937131881714, "learning_rate": 8.621700879765396e-06, "loss": 0.37841248512268066, "step": 152 }, { "epoch": 0.14814814814814814, "grad_norm": 0.12790994346141815, "learning_rate": 8.611925708699903e-06, "loss": 0.27514874935150146, "step": 153 }, { "epoch": 0.14911643669813604, "grad_norm": 0.13345623016357422, "learning_rate": 8.602150537634409e-06, "loss": 0.3330199718475342, "step": 154 }, { "epoch": 0.15008472524812394, "grad_norm": 0.13262543082237244, "learning_rate": 8.592375366568916e-06, "loss": 0.2858338952064514, "step": 155 }, { "epoch": 0.15105301379811184, "grad_norm": 0.16613167524337769, "learning_rate": 8.582600195503422e-06, "loss": 0.3251619040966034, "step": 156 }, { "epoch": 0.15202130234809974, "grad_norm": 0.14091891050338745, "learning_rate": 8.57282502443793e-06, "loss": 0.3126198649406433, "step": 157 }, { "epoch": 0.15298959089808764, "grad_norm": 0.12945963442325592, "learning_rate": 8.563049853372435e-06, "loss": 0.3045946955680847, "step": 158 }, { "epoch": 0.1539578794480755, "grad_norm": 0.13346253335475922, "learning_rate": 8.553274682306942e-06, "loss": 0.3187895119190216, "step": 159 }, { "epoch": 0.1549261679980634, "grad_norm": 0.1281236708164215, "learning_rate": 8.543499511241448e-06, "loss": 0.2990340292453766, "step": 160 }, { "epoch": 0.1558944565480513, "grad_norm": 0.13074296712875366, "learning_rate": 8.533724340175953e-06, "loss": 0.3452467620372772, "step": 161 }, { "epoch": 0.1568627450980392, "grad_norm": 0.13953045010566711, "learning_rate": 8.52394916911046e-06, "loss": 0.32909831404685974, "step": 162 }, { "epoch": 0.1578310336480271, "grad_norm": 0.14059635996818542, "learning_rate": 8.514173998044966e-06, "loss": 0.27773308753967285, "step": 163 }, { "epoch": 0.158799322198015, "grad_norm": 0.1397535353899002, "learning_rate": 8.504398826979473e-06, "loss": 0.3393952250480652, "step": 164 }, { "epoch": 0.15976761074800291, "grad_norm": 0.13574957847595215, "learning_rate": 8.494623655913979e-06, "loss": 0.32174286246299744, "step": 165 }, { "epoch": 0.16073589929799081, "grad_norm": 0.13975924253463745, "learning_rate": 8.484848484848486e-06, "loss": 0.30419760942459106, "step": 166 }, { "epoch": 0.1617041878479787, "grad_norm": 0.1427648961544037, "learning_rate": 8.475073313782992e-06, "loss": 0.3033597469329834, "step": 167 }, { "epoch": 0.1626724763979666, "grad_norm": 0.14715762436389923, "learning_rate": 8.465298142717499e-06, "loss": 0.32338106632232666, "step": 168 }, { "epoch": 0.1636407649479545, "grad_norm": 0.1452789306640625, "learning_rate": 8.455522971652005e-06, "loss": 0.345528781414032, "step": 169 }, { "epoch": 0.1646090534979424, "grad_norm": 0.13867947459220886, "learning_rate": 8.445747800586512e-06, "loss": 0.32734549045562744, "step": 170 }, { "epoch": 0.1655773420479303, "grad_norm": 0.137126162648201, "learning_rate": 8.435972629521018e-06, "loss": 0.32425397634506226, "step": 171 }, { "epoch": 0.1665456305979182, "grad_norm": 0.15507617592811584, "learning_rate": 8.426197458455525e-06, "loss": 0.3226757049560547, "step": 172 }, { "epoch": 0.1675139191479061, "grad_norm": 0.12315394729375839, "learning_rate": 8.41642228739003e-06, "loss": 0.3322482407093048, "step": 173 }, { "epoch": 0.16848220769789396, "grad_norm": 0.14539486169815063, "learning_rate": 8.406647116324536e-06, "loss": 0.3431966304779053, "step": 174 }, { "epoch": 0.16945049624788186, "grad_norm": 0.1458021104335785, "learning_rate": 8.396871945259043e-06, "loss": 0.3117983937263489, "step": 175 }, { "epoch": 0.17041878479786976, "grad_norm": 0.126032292842865, "learning_rate": 8.387096774193549e-06, "loss": 0.304436057806015, "step": 176 }, { "epoch": 0.17138707334785766, "grad_norm": 0.15044239163398743, "learning_rate": 8.377321603128056e-06, "loss": 0.3327201306819916, "step": 177 }, { "epoch": 0.17235536189784556, "grad_norm": 0.12567083537578583, "learning_rate": 8.367546432062562e-06, "loss": 0.32488536834716797, "step": 178 }, { "epoch": 0.17332365044783346, "grad_norm": 0.1399868130683899, "learning_rate": 8.357771260997069e-06, "loss": 0.28393518924713135, "step": 179 }, { "epoch": 0.17429193899782136, "grad_norm": 0.12733778357505798, "learning_rate": 8.347996089931575e-06, "loss": 0.28177058696746826, "step": 180 }, { "epoch": 0.17526022754780926, "grad_norm": 0.15660876035690308, "learning_rate": 8.338220918866082e-06, "loss": 0.31686797738075256, "step": 181 }, { "epoch": 0.17622851609779713, "grad_norm": 0.1510598510503769, "learning_rate": 8.328445747800587e-06, "loss": 0.2910916209220886, "step": 182 }, { "epoch": 0.17719680464778503, "grad_norm": 0.13863040506839752, "learning_rate": 8.318670576735095e-06, "loss": 0.3391288220882416, "step": 183 }, { "epoch": 0.17816509319777293, "grad_norm": 0.1262752115726471, "learning_rate": 8.3088954056696e-06, "loss": 0.26286113262176514, "step": 184 }, { "epoch": 0.17913338174776083, "grad_norm": 0.1450256109237671, "learning_rate": 8.299120234604106e-06, "loss": 0.33761149644851685, "step": 185 }, { "epoch": 0.18010167029774873, "grad_norm": 0.145137757062912, "learning_rate": 8.289345063538613e-06, "loss": 0.299782931804657, "step": 186 }, { "epoch": 0.18106995884773663, "grad_norm": 0.14223727583885193, "learning_rate": 8.279569892473119e-06, "loss": 0.28673055768013, "step": 187 }, { "epoch": 0.18203824739772453, "grad_norm": 0.14671868085861206, "learning_rate": 8.269794721407626e-06, "loss": 0.3334650993347168, "step": 188 }, { "epoch": 0.1830065359477124, "grad_norm": 0.14561879634857178, "learning_rate": 8.260019550342132e-06, "loss": 0.3131367564201355, "step": 189 }, { "epoch": 0.1839748244977003, "grad_norm": 0.13915206491947174, "learning_rate": 8.250244379276639e-06, "loss": 0.285634845495224, "step": 190 }, { "epoch": 0.1849431130476882, "grad_norm": 0.13024187088012695, "learning_rate": 8.240469208211144e-06, "loss": 0.3532557487487793, "step": 191 }, { "epoch": 0.1859114015976761, "grad_norm": 0.1433749794960022, "learning_rate": 8.230694037145652e-06, "loss": 0.3235865831375122, "step": 192 }, { "epoch": 0.186879690147664, "grad_norm": 0.1517333984375, "learning_rate": 8.220918866080157e-06, "loss": 0.2964053153991699, "step": 193 }, { "epoch": 0.1878479786976519, "grad_norm": 0.139493927359581, "learning_rate": 8.211143695014665e-06, "loss": 0.2810608148574829, "step": 194 }, { "epoch": 0.1888162672476398, "grad_norm": 0.12849940359592438, "learning_rate": 8.20136852394917e-06, "loss": 0.2949499189853668, "step": 195 }, { "epoch": 0.18978455579762768, "grad_norm": 0.14101386070251465, "learning_rate": 8.191593352883677e-06, "loss": 0.28787606954574585, "step": 196 }, { "epoch": 0.19075284434761558, "grad_norm": 0.13321508467197418, "learning_rate": 8.181818181818183e-06, "loss": 0.31819286942481995, "step": 197 }, { "epoch": 0.19172113289760348, "grad_norm": 0.1368619203567505, "learning_rate": 8.172043010752689e-06, "loss": 0.2770519256591797, "step": 198 }, { "epoch": 0.19268942144759138, "grad_norm": 0.14590312540531158, "learning_rate": 8.162267839687196e-06, "loss": 0.331787645816803, "step": 199 }, { "epoch": 0.19365770999757928, "grad_norm": 0.14525046944618225, "learning_rate": 8.152492668621701e-06, "loss": 0.3185243308544159, "step": 200 }, { "epoch": 0.19462599854756718, "grad_norm": 0.14318214356899261, "learning_rate": 8.142717497556209e-06, "loss": 0.2779344320297241, "step": 201 }, { "epoch": 0.19559428709755508, "grad_norm": 0.13709904253482819, "learning_rate": 8.132942326490714e-06, "loss": 0.2518289387226105, "step": 202 }, { "epoch": 0.19656257564754298, "grad_norm": 0.1377800703048706, "learning_rate": 8.12316715542522e-06, "loss": 0.2550484836101532, "step": 203 }, { "epoch": 0.19753086419753085, "grad_norm": 0.15116380155086517, "learning_rate": 8.113391984359727e-06, "loss": 0.3201026916503906, "step": 204 }, { "epoch": 0.19849915274751875, "grad_norm": 0.13895870745182037, "learning_rate": 8.103616813294233e-06, "loss": 0.314879834651947, "step": 205 }, { "epoch": 0.19946744129750665, "grad_norm": 0.1607581377029419, "learning_rate": 8.09384164222874e-06, "loss": 0.35479456186294556, "step": 206 }, { "epoch": 0.20043572984749455, "grad_norm": 0.14690084755420685, "learning_rate": 8.084066471163246e-06, "loss": 0.35220852494239807, "step": 207 }, { "epoch": 0.20140401839748245, "grad_norm": 0.14206227660179138, "learning_rate": 8.074291300097751e-06, "loss": 0.30387187004089355, "step": 208 }, { "epoch": 0.20237230694747035, "grad_norm": 0.15204882621765137, "learning_rate": 8.064516129032258e-06, "loss": 0.2756717801094055, "step": 209 }, { "epoch": 0.20334059549745825, "grad_norm": 0.1398657113313675, "learning_rate": 8.054740957966764e-06, "loss": 0.2777653932571411, "step": 210 }, { "epoch": 0.20430888404744613, "grad_norm": 0.13735412061214447, "learning_rate": 8.044965786901271e-06, "loss": 0.28360527753829956, "step": 211 }, { "epoch": 0.20527717259743403, "grad_norm": 0.15420980751514435, "learning_rate": 8.035190615835777e-06, "loss": 0.32784318923950195, "step": 212 }, { "epoch": 0.20624546114742193, "grad_norm": 0.14892657101154327, "learning_rate": 8.025415444770284e-06, "loss": 0.33523041009902954, "step": 213 }, { "epoch": 0.20721374969740983, "grad_norm": 0.13076002895832062, "learning_rate": 8.01564027370479e-06, "loss": 0.2862524092197418, "step": 214 }, { "epoch": 0.20818203824739773, "grad_norm": 0.1415518820285797, "learning_rate": 8.005865102639297e-06, "loss": 0.33531394600868225, "step": 215 }, { "epoch": 0.20915032679738563, "grad_norm": 0.1702524572610855, "learning_rate": 7.996089931573803e-06, "loss": 0.32485491037368774, "step": 216 }, { "epoch": 0.21011861534737353, "grad_norm": 0.1344050168991089, "learning_rate": 7.98631476050831e-06, "loss": 0.3258602023124695, "step": 217 }, { "epoch": 0.21108690389736143, "grad_norm": 0.1570902317762375, "learning_rate": 7.976539589442815e-06, "loss": 0.32586684823036194, "step": 218 }, { "epoch": 0.2120551924473493, "grad_norm": 0.15897458791732788, "learning_rate": 7.966764418377323e-06, "loss": 0.2798767685890198, "step": 219 }, { "epoch": 0.2130234809973372, "grad_norm": 0.15497955679893494, "learning_rate": 7.956989247311828e-06, "loss": 0.3338768482208252, "step": 220 }, { "epoch": 0.2139917695473251, "grad_norm": 0.14507335424423218, "learning_rate": 7.947214076246334e-06, "loss": 0.2613910138607025, "step": 221 }, { "epoch": 0.214960058097313, "grad_norm": 0.1506527066230774, "learning_rate": 7.937438905180841e-06, "loss": 0.2877991795539856, "step": 222 }, { "epoch": 0.2159283466473009, "grad_norm": 0.14218902587890625, "learning_rate": 7.927663734115347e-06, "loss": 0.28079351782798767, "step": 223 }, { "epoch": 0.2168966351972888, "grad_norm": 0.14527355134487152, "learning_rate": 7.917888563049854e-06, "loss": 0.29178351163864136, "step": 224 }, { "epoch": 0.2178649237472767, "grad_norm": 0.1565907746553421, "learning_rate": 7.90811339198436e-06, "loss": 0.29092147946357727, "step": 225 }, { "epoch": 0.21883321229726457, "grad_norm": 0.16128268837928772, "learning_rate": 7.898338220918867e-06, "loss": 0.30649298429489136, "step": 226 }, { "epoch": 0.21980150084725247, "grad_norm": 0.13981635868549347, "learning_rate": 7.888563049853372e-06, "loss": 0.2914465069770813, "step": 227 }, { "epoch": 0.22076978939724037, "grad_norm": 0.13276293873786926, "learning_rate": 7.87878787878788e-06, "loss": 0.29372796416282654, "step": 228 }, { "epoch": 0.22173807794722827, "grad_norm": 0.14917345345020294, "learning_rate": 7.869012707722385e-06, "loss": 0.30806928873062134, "step": 229 }, { "epoch": 0.22270636649721617, "grad_norm": 0.15436047315597534, "learning_rate": 7.859237536656893e-06, "loss": 0.32170775532722473, "step": 230 }, { "epoch": 0.22367465504720407, "grad_norm": 0.14901861548423767, "learning_rate": 7.849462365591398e-06, "loss": 0.2876305878162384, "step": 231 }, { "epoch": 0.22464294359719197, "grad_norm": 0.13269929587841034, "learning_rate": 7.839687194525904e-06, "loss": 0.34557828307151794, "step": 232 }, { "epoch": 0.22561123214717985, "grad_norm": 0.14736007153987885, "learning_rate": 7.829912023460411e-06, "loss": 0.3261890411376953, "step": 233 }, { "epoch": 0.22657952069716775, "grad_norm": 0.157369464635849, "learning_rate": 7.820136852394917e-06, "loss": 0.33243319392204285, "step": 234 }, { "epoch": 0.22754780924715565, "grad_norm": 0.15422044694423676, "learning_rate": 7.810361681329424e-06, "loss": 0.32125651836395264, "step": 235 }, { "epoch": 0.22851609779714355, "grad_norm": 0.15290172398090363, "learning_rate": 7.80058651026393e-06, "loss": 0.3197525143623352, "step": 236 }, { "epoch": 0.22948438634713145, "grad_norm": 0.13229885697364807, "learning_rate": 7.790811339198437e-06, "loss": 0.3051709532737732, "step": 237 }, { "epoch": 0.23045267489711935, "grad_norm": 0.15573133528232574, "learning_rate": 7.781036168132942e-06, "loss": 0.3190789520740509, "step": 238 }, { "epoch": 0.23142096344710725, "grad_norm": 0.1598438322544098, "learning_rate": 7.77126099706745e-06, "loss": 0.32250896096229553, "step": 239 }, { "epoch": 0.23238925199709515, "grad_norm": 0.13183802366256714, "learning_rate": 7.761485826001955e-06, "loss": 0.31600791215896606, "step": 240 }, { "epoch": 0.23335754054708302, "grad_norm": 0.14657722413539886, "learning_rate": 7.751710654936462e-06, "loss": 0.2864221930503845, "step": 241 }, { "epoch": 0.23432582909707092, "grad_norm": 0.1653253585100174, "learning_rate": 7.741935483870968e-06, "loss": 0.30967211723327637, "step": 242 }, { "epoch": 0.23529411764705882, "grad_norm": 0.15613378584384918, "learning_rate": 7.732160312805475e-06, "loss": 0.30234426259994507, "step": 243 }, { "epoch": 0.23626240619704672, "grad_norm": 0.1411314755678177, "learning_rate": 7.722385141739981e-06, "loss": 0.28815943002700806, "step": 244 }, { "epoch": 0.23723069474703462, "grad_norm": 0.14803080260753632, "learning_rate": 7.712609970674486e-06, "loss": 0.2975384294986725, "step": 245 }, { "epoch": 0.23819898329702252, "grad_norm": 0.1584216207265854, "learning_rate": 7.702834799608994e-06, "loss": 0.2974746525287628, "step": 246 }, { "epoch": 0.23916727184701042, "grad_norm": 0.16107121109962463, "learning_rate": 7.6930596285435e-06, "loss": 0.31581875681877136, "step": 247 }, { "epoch": 0.2401355603969983, "grad_norm": 0.17307540774345398, "learning_rate": 7.683284457478007e-06, "loss": 0.29687726497650146, "step": 248 }, { "epoch": 0.2411038489469862, "grad_norm": 0.16493360698223114, "learning_rate": 7.673509286412512e-06, "loss": 0.35423439741134644, "step": 249 }, { "epoch": 0.2420721374969741, "grad_norm": 0.14273418486118317, "learning_rate": 7.66373411534702e-06, "loss": 0.29487237334251404, "step": 250 }, { "epoch": 0.243040426046962, "grad_norm": 0.164155974984169, "learning_rate": 7.653958944281525e-06, "loss": 0.32345789670944214, "step": 251 }, { "epoch": 0.2440087145969499, "grad_norm": 0.15766294300556183, "learning_rate": 7.644183773216032e-06, "loss": 0.319865882396698, "step": 252 }, { "epoch": 0.2449770031469378, "grad_norm": 0.15514512360095978, "learning_rate": 7.634408602150538e-06, "loss": 0.3162165880203247, "step": 253 }, { "epoch": 0.2459452916969257, "grad_norm": 0.15435358881950378, "learning_rate": 7.624633431085044e-06, "loss": 0.28262361884117126, "step": 254 }, { "epoch": 0.24691358024691357, "grad_norm": 0.14835764467716217, "learning_rate": 7.614858260019551e-06, "loss": 0.28154870867729187, "step": 255 }, { "epoch": 0.24788186879690147, "grad_norm": 0.15369164943695068, "learning_rate": 7.605083088954057e-06, "loss": 0.3451993465423584, "step": 256 }, { "epoch": 0.24885015734688937, "grad_norm": 0.13362520933151245, "learning_rate": 7.5953079178885636e-06, "loss": 0.2882372736930847, "step": 257 }, { "epoch": 0.24981844589687727, "grad_norm": 0.14538030326366425, "learning_rate": 7.58553274682307e-06, "loss": 0.30620044469833374, "step": 258 }, { "epoch": 0.25078673444686517, "grad_norm": 0.1679297834634781, "learning_rate": 7.5757575757575764e-06, "loss": 0.3071752190589905, "step": 259 }, { "epoch": 0.25175502299685304, "grad_norm": 0.1505117118358612, "learning_rate": 7.565982404692083e-06, "loss": 0.2810661792755127, "step": 260 }, { "epoch": 0.25272331154684097, "grad_norm": 0.13865773379802704, "learning_rate": 7.556207233626589e-06, "loss": 0.2779511511325836, "step": 261 }, { "epoch": 0.25369160009682884, "grad_norm": 0.14810754358768463, "learning_rate": 7.546432062561096e-06, "loss": 0.3234580457210541, "step": 262 }, { "epoch": 0.25465988864681677, "grad_norm": 0.15836334228515625, "learning_rate": 7.536656891495602e-06, "loss": 0.3174368739128113, "step": 263 }, { "epoch": 0.25562817719680464, "grad_norm": 0.15845665335655212, "learning_rate": 7.526881720430108e-06, "loss": 0.292019784450531, "step": 264 }, { "epoch": 0.25659646574679257, "grad_norm": 0.1532326340675354, "learning_rate": 7.517106549364614e-06, "loss": 0.3122391700744629, "step": 265 }, { "epoch": 0.25756475429678044, "grad_norm": 0.16675175726413727, "learning_rate": 7.507331378299121e-06, "loss": 0.33715298771858215, "step": 266 }, { "epoch": 0.2585330428467683, "grad_norm": 0.1525373011827469, "learning_rate": 7.497556207233627e-06, "loss": 0.32337823510169983, "step": 267 }, { "epoch": 0.25950133139675624, "grad_norm": 0.15557681024074554, "learning_rate": 7.4877810361681334e-06, "loss": 0.33496809005737305, "step": 268 }, { "epoch": 0.2604696199467441, "grad_norm": 0.1552857607603073, "learning_rate": 7.47800586510264e-06, "loss": 0.29575905203819275, "step": 269 }, { "epoch": 0.26143790849673204, "grad_norm": 0.17039579153060913, "learning_rate": 7.468230694037146e-06, "loss": 0.3094739317893982, "step": 270 }, { "epoch": 0.2624061970467199, "grad_norm": 0.15926915407180786, "learning_rate": 7.458455522971653e-06, "loss": 0.30116045475006104, "step": 271 }, { "epoch": 0.26337448559670784, "grad_norm": 0.15641555190086365, "learning_rate": 7.448680351906159e-06, "loss": 0.33569908142089844, "step": 272 }, { "epoch": 0.2643427741466957, "grad_norm": 0.14819961786270142, "learning_rate": 7.438905180840666e-06, "loss": 0.3124736249446869, "step": 273 }, { "epoch": 0.2653110626966836, "grad_norm": 0.15690119564533234, "learning_rate": 7.429130009775172e-06, "loss": 0.2757438123226166, "step": 274 }, { "epoch": 0.2662793512466715, "grad_norm": 0.1666852980852127, "learning_rate": 7.4193548387096784e-06, "loss": 0.3246593177318573, "step": 275 }, { "epoch": 0.2672476397966594, "grad_norm": 0.16269199550151825, "learning_rate": 7.409579667644185e-06, "loss": 0.2908874750137329, "step": 276 }, { "epoch": 0.2682159283466473, "grad_norm": 0.1640820950269699, "learning_rate": 7.3998044965786904e-06, "loss": 0.3241088390350342, "step": 277 }, { "epoch": 0.2691842168966352, "grad_norm": 0.15672433376312256, "learning_rate": 7.390029325513197e-06, "loss": 0.2804832458496094, "step": 278 }, { "epoch": 0.2701525054466231, "grad_norm": 0.14417491853237152, "learning_rate": 7.380254154447703e-06, "loss": 0.29370468854904175, "step": 279 }, { "epoch": 0.271120793996611, "grad_norm": 0.14853185415267944, "learning_rate": 7.37047898338221e-06, "loss": 0.278653621673584, "step": 280 }, { "epoch": 0.27208908254659886, "grad_norm": 0.1435244232416153, "learning_rate": 7.360703812316716e-06, "loss": 0.2943509817123413, "step": 281 }, { "epoch": 0.2730573710965868, "grad_norm": 0.16490313410758972, "learning_rate": 7.350928641251223e-06, "loss": 0.32479339838027954, "step": 282 }, { "epoch": 0.27402565964657466, "grad_norm": 0.14916065335273743, "learning_rate": 7.341153470185729e-06, "loss": 0.3356713652610779, "step": 283 }, { "epoch": 0.2749939481965626, "grad_norm": 0.158106729388237, "learning_rate": 7.3313782991202354e-06, "loss": 0.3492435812950134, "step": 284 }, { "epoch": 0.27596223674655046, "grad_norm": 0.15584231913089752, "learning_rate": 7.321603128054742e-06, "loss": 0.30297964811325073, "step": 285 }, { "epoch": 0.2769305252965384, "grad_norm": 0.15248778462409973, "learning_rate": 7.311827956989248e-06, "loss": 0.28723299503326416, "step": 286 }, { "epoch": 0.27789881384652626, "grad_norm": 0.14938265085220337, "learning_rate": 7.302052785923755e-06, "loss": 0.2916105389595032, "step": 287 }, { "epoch": 0.2788671023965142, "grad_norm": 0.15402287244796753, "learning_rate": 7.292277614858261e-06, "loss": 0.29120731353759766, "step": 288 }, { "epoch": 0.27983539094650206, "grad_norm": 0.15380002558231354, "learning_rate": 7.282502443792767e-06, "loss": 0.34908658266067505, "step": 289 }, { "epoch": 0.28080367949648993, "grad_norm": 0.15176504850387573, "learning_rate": 7.272727272727273e-06, "loss": 0.346072793006897, "step": 290 }, { "epoch": 0.28177196804647786, "grad_norm": 0.1672578603029251, "learning_rate": 7.26295210166178e-06, "loss": 0.31465622782707214, "step": 291 }, { "epoch": 0.28274025659646573, "grad_norm": 0.14658679068088531, "learning_rate": 7.253176930596286e-06, "loss": 0.28865766525268555, "step": 292 }, { "epoch": 0.28370854514645366, "grad_norm": 0.1515662670135498, "learning_rate": 7.2434017595307925e-06, "loss": 0.30215001106262207, "step": 293 }, { "epoch": 0.28467683369644153, "grad_norm": 0.18654093146324158, "learning_rate": 7.233626588465299e-06, "loss": 0.3126724064350128, "step": 294 }, { "epoch": 0.28564512224642946, "grad_norm": 0.1485200673341751, "learning_rate": 7.223851417399805e-06, "loss": 0.2654643654823303, "step": 295 }, { "epoch": 0.28661341079641733, "grad_norm": 0.1476060450077057, "learning_rate": 7.214076246334312e-06, "loss": 0.2976668179035187, "step": 296 }, { "epoch": 0.2875816993464052, "grad_norm": 0.14893072843551636, "learning_rate": 7.204301075268818e-06, "loss": 0.3222036361694336, "step": 297 }, { "epoch": 0.28854998789639313, "grad_norm": 0.15406127274036407, "learning_rate": 7.194525904203325e-06, "loss": 0.33120018243789673, "step": 298 }, { "epoch": 0.289518276446381, "grad_norm": 0.17568016052246094, "learning_rate": 7.184750733137831e-06, "loss": 0.3500976860523224, "step": 299 }, { "epoch": 0.29048656499636893, "grad_norm": 0.15244793891906738, "learning_rate": 7.1749755620723375e-06, "loss": 0.29483211040496826, "step": 300 }, { "epoch": 0.2914548535463568, "grad_norm": 0.14039957523345947, "learning_rate": 7.165200391006844e-06, "loss": 0.30703267455101013, "step": 301 }, { "epoch": 0.29242314209634473, "grad_norm": 0.16928645968437195, "learning_rate": 7.1554252199413495e-06, "loss": 0.285969078540802, "step": 302 }, { "epoch": 0.2933914306463326, "grad_norm": 0.15592513978481293, "learning_rate": 7.145650048875856e-06, "loss": 0.2873114347457886, "step": 303 }, { "epoch": 0.2943597191963205, "grad_norm": 0.1448688954114914, "learning_rate": 7.135874877810362e-06, "loss": 0.256163626909256, "step": 304 }, { "epoch": 0.2953280077463084, "grad_norm": 0.1630263477563858, "learning_rate": 7.126099706744869e-06, "loss": 0.2792564630508423, "step": 305 }, { "epoch": 0.2962962962962963, "grad_norm": 0.17319388687610626, "learning_rate": 7.116324535679375e-06, "loss": 0.2806742489337921, "step": 306 }, { "epoch": 0.2972645848462842, "grad_norm": 0.15814098715782166, "learning_rate": 7.106549364613882e-06, "loss": 0.2914005517959595, "step": 307 }, { "epoch": 0.2982328733962721, "grad_norm": 0.16322992742061615, "learning_rate": 7.096774193548388e-06, "loss": 0.293082594871521, "step": 308 }, { "epoch": 0.29920116194626, "grad_norm": 0.1633518785238266, "learning_rate": 7.0869990224828945e-06, "loss": 0.2866649925708771, "step": 309 }, { "epoch": 0.3001694504962479, "grad_norm": 0.16669867932796478, "learning_rate": 7.077223851417401e-06, "loss": 0.28914347290992737, "step": 310 }, { "epoch": 0.30113773904623575, "grad_norm": 0.15272612869739532, "learning_rate": 7.067448680351907e-06, "loss": 0.3084270656108856, "step": 311 }, { "epoch": 0.3021060275962237, "grad_norm": 0.15269719064235687, "learning_rate": 7.057673509286414e-06, "loss": 0.30022960901260376, "step": 312 }, { "epoch": 0.30307431614621155, "grad_norm": 0.17254814505577087, "learning_rate": 7.04789833822092e-06, "loss": 0.32925137877464294, "step": 313 }, { "epoch": 0.3040426046961995, "grad_norm": 0.1512719690799713, "learning_rate": 7.038123167155427e-06, "loss": 0.302681028842926, "step": 314 }, { "epoch": 0.30501089324618735, "grad_norm": 0.14636491239070892, "learning_rate": 7.028347996089932e-06, "loss": 0.28604596853256226, "step": 315 }, { "epoch": 0.3059791817961753, "grad_norm": 0.17897070944309235, "learning_rate": 7.018572825024439e-06, "loss": 0.3204849362373352, "step": 316 }, { "epoch": 0.30694747034616315, "grad_norm": 0.1784772127866745, "learning_rate": 7.008797653958945e-06, "loss": 0.26697760820388794, "step": 317 }, { "epoch": 0.307915758896151, "grad_norm": 0.15881852805614471, "learning_rate": 6.9990224828934515e-06, "loss": 0.2558300495147705, "step": 318 }, { "epoch": 0.30888404744613895, "grad_norm": 0.15870684385299683, "learning_rate": 6.989247311827958e-06, "loss": 0.3025914132595062, "step": 319 }, { "epoch": 0.3098523359961268, "grad_norm": 0.1527319699525833, "learning_rate": 6.979472140762464e-06, "loss": 0.3081514835357666, "step": 320 }, { "epoch": 0.31082062454611475, "grad_norm": 0.1624557226896286, "learning_rate": 6.969696969696971e-06, "loss": 0.30399930477142334, "step": 321 }, { "epoch": 0.3117889130961026, "grad_norm": 0.1645076870918274, "learning_rate": 6.959921798631477e-06, "loss": 0.294676810503006, "step": 322 }, { "epoch": 0.31275720164609055, "grad_norm": 0.14686374366283417, "learning_rate": 6.950146627565984e-06, "loss": 0.2830040752887726, "step": 323 }, { "epoch": 0.3137254901960784, "grad_norm": 0.15746936202049255, "learning_rate": 6.94037145650049e-06, "loss": 0.3081029951572418, "step": 324 }, { "epoch": 0.31469377874606635, "grad_norm": 0.14588280022144318, "learning_rate": 6.9305962854349965e-06, "loss": 0.2718695104122162, "step": 325 }, { "epoch": 0.3156620672960542, "grad_norm": 0.16646429896354675, "learning_rate": 6.920821114369503e-06, "loss": 0.31487900018692017, "step": 326 }, { "epoch": 0.3166303558460421, "grad_norm": 0.1697472333908081, "learning_rate": 6.911045943304009e-06, "loss": 0.29448121786117554, "step": 327 }, { "epoch": 0.31759864439603, "grad_norm": 0.16235950589179993, "learning_rate": 6.901270772238515e-06, "loss": 0.2658935487270355, "step": 328 }, { "epoch": 0.3185669329460179, "grad_norm": 0.167832151055336, "learning_rate": 6.891495601173021e-06, "loss": 0.28193116188049316, "step": 329 }, { "epoch": 0.31953522149600583, "grad_norm": 0.15145322680473328, "learning_rate": 6.881720430107528e-06, "loss": 0.2826996445655823, "step": 330 }, { "epoch": 0.3205035100459937, "grad_norm": 0.19785556197166443, "learning_rate": 6.871945259042033e-06, "loss": 0.31135326623916626, "step": 331 }, { "epoch": 0.32147179859598163, "grad_norm": 0.1549469530582428, "learning_rate": 6.86217008797654e-06, "loss": 0.3094058930873871, "step": 332 }, { "epoch": 0.3224400871459695, "grad_norm": 0.15144820511341095, "learning_rate": 6.852394916911046e-06, "loss": 0.26982536911964417, "step": 333 }, { "epoch": 0.3234083756959574, "grad_norm": 0.15371711552143097, "learning_rate": 6.842619745845553e-06, "loss": 0.29452741146087646, "step": 334 }, { "epoch": 0.3243766642459453, "grad_norm": 0.15917186439037323, "learning_rate": 6.832844574780059e-06, "loss": 0.29580235481262207, "step": 335 }, { "epoch": 0.3253449527959332, "grad_norm": 0.1550978273153305, "learning_rate": 6.823069403714565e-06, "loss": 0.36159706115722656, "step": 336 }, { "epoch": 0.3263132413459211, "grad_norm": 0.14809750020503998, "learning_rate": 6.813294232649071e-06, "loss": 0.2756076753139496, "step": 337 }, { "epoch": 0.327281529895909, "grad_norm": 0.15484212338924408, "learning_rate": 6.8035190615835775e-06, "loss": 0.26915088295936584, "step": 338 }, { "epoch": 0.3282498184458969, "grad_norm": 0.16279636323451996, "learning_rate": 6.793743890518084e-06, "loss": 0.3175829350948334, "step": 339 }, { "epoch": 0.3292181069958848, "grad_norm": 0.16756120324134827, "learning_rate": 6.78396871945259e-06, "loss": 0.3034948706626892, "step": 340 }, { "epoch": 0.33018639554587265, "grad_norm": 0.1840161681175232, "learning_rate": 6.774193548387097e-06, "loss": 0.32814380526542664, "step": 341 }, { "epoch": 0.3311546840958606, "grad_norm": 0.17000938951969147, "learning_rate": 6.764418377321603e-06, "loss": 0.2874530553817749, "step": 342 }, { "epoch": 0.33212297264584845, "grad_norm": 0.16385532915592194, "learning_rate": 6.75464320625611e-06, "loss": 0.3138440251350403, "step": 343 }, { "epoch": 0.3330912611958364, "grad_norm": 0.18285442888736725, "learning_rate": 6.744868035190616e-06, "loss": 0.30819112062454224, "step": 344 }, { "epoch": 0.33405954974582425, "grad_norm": 0.16300350427627563, "learning_rate": 6.7350928641251225e-06, "loss": 0.291953444480896, "step": 345 }, { "epoch": 0.3350278382958122, "grad_norm": 0.18186615407466888, "learning_rate": 6.725317693059629e-06, "loss": 0.27950209379196167, "step": 346 }, { "epoch": 0.33599612684580005, "grad_norm": 0.16226314008235931, "learning_rate": 6.715542521994135e-06, "loss": 0.29765087366104126, "step": 347 }, { "epoch": 0.3369644153957879, "grad_norm": 0.18730367720127106, "learning_rate": 6.705767350928642e-06, "loss": 0.3281936049461365, "step": 348 }, { "epoch": 0.33793270394577585, "grad_norm": 0.16875147819519043, "learning_rate": 6.695992179863147e-06, "loss": 0.3038584291934967, "step": 349 }, { "epoch": 0.3389009924957637, "grad_norm": 0.17776557803153992, "learning_rate": 6.686217008797654e-06, "loss": 0.2937045395374298, "step": 350 }, { "epoch": 0.33986928104575165, "grad_norm": 0.14246642589569092, "learning_rate": 6.67644183773216e-06, "loss": 0.3092482388019562, "step": 351 }, { "epoch": 0.3408375695957395, "grad_norm": 0.15377755463123322, "learning_rate": 6.666666666666667e-06, "loss": 0.2588135898113251, "step": 352 }, { "epoch": 0.34180585814572745, "grad_norm": 0.1557725965976715, "learning_rate": 6.656891495601173e-06, "loss": 0.2831732928752899, "step": 353 }, { "epoch": 0.3427741466957153, "grad_norm": 0.15106302499771118, "learning_rate": 6.6471163245356795e-06, "loss": 0.26685526967048645, "step": 354 }, { "epoch": 0.3437424352457032, "grad_norm": 0.17710185050964355, "learning_rate": 6.637341153470186e-06, "loss": 0.3084600567817688, "step": 355 }, { "epoch": 0.3447107237956911, "grad_norm": 0.14838555455207825, "learning_rate": 6.627565982404692e-06, "loss": 0.2909257113933563, "step": 356 }, { "epoch": 0.345679012345679, "grad_norm": 0.1700345277786255, "learning_rate": 6.617790811339199e-06, "loss": 0.3314460217952728, "step": 357 }, { "epoch": 0.3466473008956669, "grad_norm": 0.17465804517269135, "learning_rate": 6.608015640273705e-06, "loss": 0.30570876598358154, "step": 358 }, { "epoch": 0.3476155894456548, "grad_norm": 0.1821223646402359, "learning_rate": 6.598240469208212e-06, "loss": 0.3378984034061432, "step": 359 }, { "epoch": 0.3485838779956427, "grad_norm": 0.15884800255298615, "learning_rate": 6.588465298142718e-06, "loss": 0.2953569293022156, "step": 360 }, { "epoch": 0.3495521665456306, "grad_norm": 0.15600639581680298, "learning_rate": 6.5786901270772245e-06, "loss": 0.36609965562820435, "step": 361 }, { "epoch": 0.3505204550956185, "grad_norm": 0.15438097715377808, "learning_rate": 6.56891495601173e-06, "loss": 0.33403828740119934, "step": 362 }, { "epoch": 0.3514887436456064, "grad_norm": 0.1604045182466507, "learning_rate": 6.5591397849462365e-06, "loss": 0.2991517186164856, "step": 363 }, { "epoch": 0.35245703219559427, "grad_norm": 0.17102058231830597, "learning_rate": 6.549364613880743e-06, "loss": 0.28116142749786377, "step": 364 }, { "epoch": 0.3534253207455822, "grad_norm": 0.15797586739063263, "learning_rate": 6.539589442815249e-06, "loss": 0.30658936500549316, "step": 365 }, { "epoch": 0.35439360929557007, "grad_norm": 0.1578051745891571, "learning_rate": 6.529814271749756e-06, "loss": 0.3066115379333496, "step": 366 }, { "epoch": 0.355361897845558, "grad_norm": 0.15831097960472107, "learning_rate": 6.520039100684262e-06, "loss": 0.30893969535827637, "step": 367 }, { "epoch": 0.35633018639554587, "grad_norm": 0.16711507737636566, "learning_rate": 6.510263929618769e-06, "loss": 0.29801586270332336, "step": 368 }, { "epoch": 0.3572984749455338, "grad_norm": 0.1717497706413269, "learning_rate": 6.500488758553275e-06, "loss": 0.3415631949901581, "step": 369 }, { "epoch": 0.35826676349552167, "grad_norm": 0.1789737045764923, "learning_rate": 6.4907135874877815e-06, "loss": 0.3342001140117645, "step": 370 }, { "epoch": 0.35923505204550954, "grad_norm": 0.16474243998527527, "learning_rate": 6.480938416422288e-06, "loss": 0.29570648074150085, "step": 371 }, { "epoch": 0.36020334059549747, "grad_norm": 0.17131595313549042, "learning_rate": 6.471163245356794e-06, "loss": 0.33989042043685913, "step": 372 }, { "epoch": 0.36117162914548534, "grad_norm": 0.1660817712545395, "learning_rate": 6.461388074291301e-06, "loss": 0.28438785672187805, "step": 373 }, { "epoch": 0.36213991769547327, "grad_norm": 0.16126903891563416, "learning_rate": 6.451612903225806e-06, "loss": 0.30980294942855835, "step": 374 }, { "epoch": 0.36310820624546114, "grad_norm": 0.1648305058479309, "learning_rate": 6.441837732160313e-06, "loss": 0.325278103351593, "step": 375 }, { "epoch": 0.36407649479544907, "grad_norm": 0.17030373215675354, "learning_rate": 6.432062561094819e-06, "loss": 0.3066975176334381, "step": 376 }, { "epoch": 0.36504478334543694, "grad_norm": 0.17431139945983887, "learning_rate": 6.422287390029326e-06, "loss": 0.28780531883239746, "step": 377 }, { "epoch": 0.3660130718954248, "grad_norm": 0.1605546921491623, "learning_rate": 6.412512218963832e-06, "loss": 0.2759549915790558, "step": 378 }, { "epoch": 0.36698136044541274, "grad_norm": 0.16246621310710907, "learning_rate": 6.4027370478983385e-06, "loss": 0.2838786840438843, "step": 379 }, { "epoch": 0.3679496489954006, "grad_norm": 0.173859640955925, "learning_rate": 6.392961876832845e-06, "loss": 0.28778478503227234, "step": 380 }, { "epoch": 0.36891793754538854, "grad_norm": 0.17580649256706238, "learning_rate": 6.383186705767351e-06, "loss": 0.29734641313552856, "step": 381 }, { "epoch": 0.3698862260953764, "grad_norm": 0.21752490103244781, "learning_rate": 6.373411534701858e-06, "loss": 0.331564724445343, "step": 382 }, { "epoch": 0.37085451464536434, "grad_norm": 0.1802123785018921, "learning_rate": 6.363636363636364e-06, "loss": 0.2878391742706299, "step": 383 }, { "epoch": 0.3718228031953522, "grad_norm": 0.16118982434272766, "learning_rate": 6.353861192570871e-06, "loss": 0.29216498136520386, "step": 384 }, { "epoch": 0.3727910917453401, "grad_norm": 0.18390141427516937, "learning_rate": 6.344086021505377e-06, "loss": 0.3013034164905548, "step": 385 }, { "epoch": 0.373759380295328, "grad_norm": 0.17186126112937927, "learning_rate": 6.3343108504398835e-06, "loss": 0.2939417362213135, "step": 386 }, { "epoch": 0.3747276688453159, "grad_norm": 0.1863613873720169, "learning_rate": 6.324535679374389e-06, "loss": 0.3011291027069092, "step": 387 }, { "epoch": 0.3756959573953038, "grad_norm": 0.16492682695388794, "learning_rate": 6.3147605083088955e-06, "loss": 0.29598119854927063, "step": 388 }, { "epoch": 0.3766642459452917, "grad_norm": 0.1751633882522583, "learning_rate": 6.304985337243402e-06, "loss": 0.3110932409763336, "step": 389 }, { "epoch": 0.3776325344952796, "grad_norm": 0.1898571252822876, "learning_rate": 6.295210166177908e-06, "loss": 0.30633416771888733, "step": 390 }, { "epoch": 0.3786008230452675, "grad_norm": 0.1563596874475479, "learning_rate": 6.285434995112415e-06, "loss": 0.2938535809516907, "step": 391 }, { "epoch": 0.37956911159525536, "grad_norm": 0.18046635389328003, "learning_rate": 6.275659824046921e-06, "loss": 0.2856330871582031, "step": 392 }, { "epoch": 0.3805374001452433, "grad_norm": 0.15708908438682556, "learning_rate": 6.265884652981428e-06, "loss": 0.2904341518878937, "step": 393 }, { "epoch": 0.38150568869523116, "grad_norm": 0.17719998955726624, "learning_rate": 6.256109481915934e-06, "loss": 0.28807759284973145, "step": 394 }, { "epoch": 0.3824739772452191, "grad_norm": 0.15774236619472504, "learning_rate": 6.2463343108504405e-06, "loss": 0.2675943076610565, "step": 395 }, { "epoch": 0.38344226579520696, "grad_norm": 0.15558338165283203, "learning_rate": 6.236559139784947e-06, "loss": 0.2567376494407654, "step": 396 }, { "epoch": 0.3844105543451949, "grad_norm": 0.1891474723815918, "learning_rate": 6.226783968719453e-06, "loss": 0.2961275279521942, "step": 397 }, { "epoch": 0.38537884289518276, "grad_norm": 0.17778225243091583, "learning_rate": 6.21700879765396e-06, "loss": 0.3132587671279907, "step": 398 }, { "epoch": 0.3863471314451707, "grad_norm": 0.1902502477169037, "learning_rate": 6.207233626588466e-06, "loss": 0.31374305486679077, "step": 399 }, { "epoch": 0.38731541999515856, "grad_norm": 0.1710149198770523, "learning_rate": 6.197458455522972e-06, "loss": 0.34003812074661255, "step": 400 }, { "epoch": 0.38828370854514643, "grad_norm": 0.16460557281970978, "learning_rate": 6.187683284457478e-06, "loss": 0.2728930115699768, "step": 401 }, { "epoch": 0.38925199709513436, "grad_norm": 0.17229019105434418, "learning_rate": 6.177908113391985e-06, "loss": 0.2700308561325073, "step": 402 }, { "epoch": 0.39022028564512223, "grad_norm": 0.18431095778942108, "learning_rate": 6.168132942326491e-06, "loss": 0.2867494821548462, "step": 403 }, { "epoch": 0.39118857419511016, "grad_norm": 0.17898224294185638, "learning_rate": 6.1583577712609975e-06, "loss": 0.26027926802635193, "step": 404 }, { "epoch": 0.39215686274509803, "grad_norm": 0.1536150425672531, "learning_rate": 6.148582600195504e-06, "loss": 0.2740130126476288, "step": 405 }, { "epoch": 0.39312515129508596, "grad_norm": 0.46492230892181396, "learning_rate": 6.13880742913001e-06, "loss": 0.2832326889038086, "step": 406 }, { "epoch": 0.39409343984507383, "grad_norm": 0.18063224852085114, "learning_rate": 6.129032258064517e-06, "loss": 0.2683679163455963, "step": 407 }, { "epoch": 0.3950617283950617, "grad_norm": 0.18479417264461517, "learning_rate": 6.119257086999023e-06, "loss": 0.2960650324821472, "step": 408 }, { "epoch": 0.39603001694504963, "grad_norm": 0.16542568802833557, "learning_rate": 6.10948191593353e-06, "loss": 0.28208404779434204, "step": 409 }, { "epoch": 0.3969983054950375, "grad_norm": 0.1611918956041336, "learning_rate": 6.099706744868036e-06, "loss": 0.2653481364250183, "step": 410 }, { "epoch": 0.39796659404502543, "grad_norm": 0.17886482179164886, "learning_rate": 6.0899315738025425e-06, "loss": 0.33219113945961, "step": 411 }, { "epoch": 0.3989348825950133, "grad_norm": 0.16463807225227356, "learning_rate": 6.080156402737049e-06, "loss": 0.28929123282432556, "step": 412 }, { "epoch": 0.39990317114500123, "grad_norm": 0.18031014502048492, "learning_rate": 6.0703812316715545e-06, "loss": 0.27609509229660034, "step": 413 }, { "epoch": 0.4008714596949891, "grad_norm": 0.18028417229652405, "learning_rate": 6.060606060606061e-06, "loss": 0.29474079608917236, "step": 414 }, { "epoch": 0.401839748244977, "grad_norm": 0.16485083103179932, "learning_rate": 6.050830889540567e-06, "loss": 0.33132994174957275, "step": 415 }, { "epoch": 0.4028080367949649, "grad_norm": 0.17660938203334808, "learning_rate": 6.041055718475074e-06, "loss": 0.2553951144218445, "step": 416 }, { "epoch": 0.4037763253449528, "grad_norm": 0.18007521331310272, "learning_rate": 6.03128054740958e-06, "loss": 0.2640475034713745, "step": 417 }, { "epoch": 0.4047446138949407, "grad_norm": 0.16710299253463745, "learning_rate": 6.021505376344087e-06, "loss": 0.26302963495254517, "step": 418 }, { "epoch": 0.4057129024449286, "grad_norm": 0.1827956736087799, "learning_rate": 6.011730205278593e-06, "loss": 0.3405194878578186, "step": 419 }, { "epoch": 0.4066811909949165, "grad_norm": 0.1711130291223526, "learning_rate": 6.0019550342130995e-06, "loss": 0.280174195766449, "step": 420 }, { "epoch": 0.4076494795449044, "grad_norm": 0.16884659230709076, "learning_rate": 5.992179863147606e-06, "loss": 0.26946425437927246, "step": 421 }, { "epoch": 0.40861776809489225, "grad_norm": 0.17745757102966309, "learning_rate": 5.982404692082112e-06, "loss": 0.3392980396747589, "step": 422 }, { "epoch": 0.4095860566448802, "grad_norm": 0.1780301034450531, "learning_rate": 5.972629521016619e-06, "loss": 0.30674225091934204, "step": 423 }, { "epoch": 0.41055434519486805, "grad_norm": 0.17808158695697784, "learning_rate": 5.962854349951125e-06, "loss": 0.3345290720462799, "step": 424 }, { "epoch": 0.411522633744856, "grad_norm": 0.16129203140735626, "learning_rate": 5.953079178885631e-06, "loss": 0.2831481695175171, "step": 425 }, { "epoch": 0.41249092229484385, "grad_norm": 0.18456275761127472, "learning_rate": 5.943304007820137e-06, "loss": 0.3257300853729248, "step": 426 }, { "epoch": 0.4134592108448318, "grad_norm": 0.18435759842395782, "learning_rate": 5.933528836754644e-06, "loss": 0.26924797892570496, "step": 427 }, { "epoch": 0.41442749939481965, "grad_norm": 0.1941821128129959, "learning_rate": 5.92375366568915e-06, "loss": 0.3252018392086029, "step": 428 }, { "epoch": 0.4153957879448075, "grad_norm": 0.17482848465442657, "learning_rate": 5.9139784946236566e-06, "loss": 0.33910396695137024, "step": 429 }, { "epoch": 0.41636407649479545, "grad_norm": 0.18026143312454224, "learning_rate": 5.904203323558163e-06, "loss": 0.2899131178855896, "step": 430 }, { "epoch": 0.4173323650447833, "grad_norm": 0.18868599832057953, "learning_rate": 5.894428152492669e-06, "loss": 0.26209527254104614, "step": 431 }, { "epoch": 0.41830065359477125, "grad_norm": 0.172159805893898, "learning_rate": 5.884652981427176e-06, "loss": 0.2784045338630676, "step": 432 }, { "epoch": 0.4192689421447591, "grad_norm": 0.19189684092998505, "learning_rate": 5.874877810361682e-06, "loss": 0.3449173867702484, "step": 433 }, { "epoch": 0.42023723069474705, "grad_norm": 0.18038828670978546, "learning_rate": 5.865102639296189e-06, "loss": 0.260070264339447, "step": 434 }, { "epoch": 0.4212055192447349, "grad_norm": 0.17879043519496918, "learning_rate": 5.855327468230695e-06, "loss": 0.2970094382762909, "step": 435 }, { "epoch": 0.42217380779472286, "grad_norm": 0.19369956851005554, "learning_rate": 5.8455522971652016e-06, "loss": 0.262788325548172, "step": 436 }, { "epoch": 0.4231420963447107, "grad_norm": 0.1980774849653244, "learning_rate": 5.835777126099708e-06, "loss": 0.3415115475654602, "step": 437 }, { "epoch": 0.4241103848946986, "grad_norm": 0.1517505943775177, "learning_rate": 5.8260019550342136e-06, "loss": 0.2550700902938843, "step": 438 }, { "epoch": 0.42507867344468653, "grad_norm": 0.16468308866024017, "learning_rate": 5.81622678396872e-06, "loss": 0.3277415633201599, "step": 439 }, { "epoch": 0.4260469619946744, "grad_norm": 0.1632845550775528, "learning_rate": 5.806451612903226e-06, "loss": 0.2696504294872284, "step": 440 }, { "epoch": 0.42701525054466233, "grad_norm": 0.17740678787231445, "learning_rate": 5.796676441837733e-06, "loss": 0.3146612048149109, "step": 441 }, { "epoch": 0.4279835390946502, "grad_norm": 0.1720811426639557, "learning_rate": 5.786901270772239e-06, "loss": 0.293180376291275, "step": 442 }, { "epoch": 0.42895182764463813, "grad_norm": 0.16457650065422058, "learning_rate": 5.777126099706746e-06, "loss": 0.25529271364212036, "step": 443 }, { "epoch": 0.429920116194626, "grad_norm": 0.18886499106884003, "learning_rate": 5.767350928641252e-06, "loss": 0.2667441964149475, "step": 444 }, { "epoch": 0.4308884047446139, "grad_norm": 0.16837763786315918, "learning_rate": 5.7575757575757586e-06, "loss": 0.2874595820903778, "step": 445 }, { "epoch": 0.4318566932946018, "grad_norm": 0.19567479193210602, "learning_rate": 5.747800586510265e-06, "loss": 0.2736223042011261, "step": 446 }, { "epoch": 0.4328249818445897, "grad_norm": 0.18101078271865845, "learning_rate": 5.738025415444771e-06, "loss": 0.3007189631462097, "step": 447 }, { "epoch": 0.4337932703945776, "grad_norm": 0.17572757601737976, "learning_rate": 5.728250244379278e-06, "loss": 0.3632327914237976, "step": 448 }, { "epoch": 0.4347615589445655, "grad_norm": 0.17773869633674622, "learning_rate": 5.718475073313784e-06, "loss": 0.3204823434352875, "step": 449 }, { "epoch": 0.4357298474945534, "grad_norm": 0.1703418791294098, "learning_rate": 5.708699902248291e-06, "loss": 0.31934505701065063, "step": 450 }, { "epoch": 0.4366981360445413, "grad_norm": 0.16851919889450073, "learning_rate": 5.698924731182796e-06, "loss": 0.33900323510169983, "step": 451 }, { "epoch": 0.43766642459452915, "grad_norm": 0.16920781135559082, "learning_rate": 5.689149560117303e-06, "loss": 0.2747448980808258, "step": 452 }, { "epoch": 0.4386347131445171, "grad_norm": 0.20053993165493011, "learning_rate": 5.679374389051809e-06, "loss": 0.28275251388549805, "step": 453 }, { "epoch": 0.43960300169450495, "grad_norm": 0.17686837911605835, "learning_rate": 5.6695992179863156e-06, "loss": 0.26753419637680054, "step": 454 }, { "epoch": 0.4405712902444929, "grad_norm": 0.20442141592502594, "learning_rate": 5.659824046920822e-06, "loss": 0.32636407017707825, "step": 455 }, { "epoch": 0.44153957879448075, "grad_norm": 0.1751495897769928, "learning_rate": 5.6500488758553284e-06, "loss": 0.29740267992019653, "step": 456 }, { "epoch": 0.4425078673444687, "grad_norm": 0.17008022964000702, "learning_rate": 5.640273704789835e-06, "loss": 0.2965855002403259, "step": 457 }, { "epoch": 0.44347615589445655, "grad_norm": 0.1770244538784027, "learning_rate": 5.630498533724341e-06, "loss": 0.39362120628356934, "step": 458 }, { "epoch": 0.4444444444444444, "grad_norm": 0.17790165543556213, "learning_rate": 5.620723362658846e-06, "loss": 0.2864190340042114, "step": 459 }, { "epoch": 0.44541273299443235, "grad_norm": 0.17405082285404205, "learning_rate": 5.6109481915933524e-06, "loss": 0.2946798801422119, "step": 460 }, { "epoch": 0.4463810215444202, "grad_norm": 0.16010600328445435, "learning_rate": 5.601173020527859e-06, "loss": 0.32160502672195435, "step": 461 }, { "epoch": 0.44734931009440815, "grad_norm": 0.1997617781162262, "learning_rate": 5.591397849462365e-06, "loss": 0.32814455032348633, "step": 462 }, { "epoch": 0.448317598644396, "grad_norm": 0.17624011635780334, "learning_rate": 5.581622678396872e-06, "loss": 0.2808952331542969, "step": 463 }, { "epoch": 0.44928588719438395, "grad_norm": 0.16722382605075836, "learning_rate": 5.571847507331378e-06, "loss": 0.26833376288414, "step": 464 }, { "epoch": 0.4502541757443718, "grad_norm": 0.16350014507770538, "learning_rate": 5.562072336265885e-06, "loss": 0.2904164791107178, "step": 465 }, { "epoch": 0.4512224642943597, "grad_norm": 0.15504086017608643, "learning_rate": 5.552297165200391e-06, "loss": 0.3124706745147705, "step": 466 }, { "epoch": 0.4521907528443476, "grad_norm": 0.17865699529647827, "learning_rate": 5.5425219941348974e-06, "loss": 0.30932655930519104, "step": 467 }, { "epoch": 0.4531590413943355, "grad_norm": 0.179380863904953, "learning_rate": 5.532746823069404e-06, "loss": 0.3099682033061981, "step": 468 }, { "epoch": 0.4541273299443234, "grad_norm": 0.1848987489938736, "learning_rate": 5.52297165200391e-06, "loss": 0.310943603515625, "step": 469 }, { "epoch": 0.4550956184943113, "grad_norm": 0.17355690896511078, "learning_rate": 5.513196480938417e-06, "loss": 0.27683690190315247, "step": 470 }, { "epoch": 0.4560639070442992, "grad_norm": 0.18208661675453186, "learning_rate": 5.503421309872923e-06, "loss": 0.26567360758781433, "step": 471 }, { "epoch": 0.4570321955942871, "grad_norm": 0.17654170095920563, "learning_rate": 5.493646138807429e-06, "loss": 0.29490426182746887, "step": 472 }, { "epoch": 0.45800048414427497, "grad_norm": 0.1757243424654007, "learning_rate": 5.483870967741935e-06, "loss": 0.30711159110069275, "step": 473 }, { "epoch": 0.4589687726942629, "grad_norm": 0.17413422465324402, "learning_rate": 5.474095796676442e-06, "loss": 0.28973209857940674, "step": 474 }, { "epoch": 0.45993706124425077, "grad_norm": 0.20302073657512665, "learning_rate": 5.464320625610948e-06, "loss": 0.3249307870864868, "step": 475 }, { "epoch": 0.4609053497942387, "grad_norm": 0.17959873378276825, "learning_rate": 5.4545454545454545e-06, "loss": 0.29579484462738037, "step": 476 }, { "epoch": 0.46187363834422657, "grad_norm": 0.17562335729599, "learning_rate": 5.444770283479961e-06, "loss": 0.3038690984249115, "step": 477 }, { "epoch": 0.4628419268942145, "grad_norm": 0.16495366394519806, "learning_rate": 5.434995112414467e-06, "loss": 0.281146377325058, "step": 478 }, { "epoch": 0.46381021544420237, "grad_norm": 0.17205455899238586, "learning_rate": 5.425219941348974e-06, "loss": 0.2786451280117035, "step": 479 }, { "epoch": 0.4647785039941903, "grad_norm": 0.19133879244327545, "learning_rate": 5.41544477028348e-06, "loss": 0.3336411416530609, "step": 480 }, { "epoch": 0.46574679254417817, "grad_norm": 0.18153399229049683, "learning_rate": 5.405669599217987e-06, "loss": 0.28267285227775574, "step": 481 }, { "epoch": 0.46671508109416604, "grad_norm": 0.16732986271381378, "learning_rate": 5.395894428152493e-06, "loss": 0.2745664119720459, "step": 482 }, { "epoch": 0.46768336964415397, "grad_norm": 0.19961762428283691, "learning_rate": 5.3861192570869995e-06, "loss": 0.2916579246520996, "step": 483 }, { "epoch": 0.46865165819414184, "grad_norm": 0.18672992289066315, "learning_rate": 5.376344086021506e-06, "loss": 0.2882307767868042, "step": 484 }, { "epoch": 0.46961994674412977, "grad_norm": 0.16605433821678162, "learning_rate": 5.3665689149560115e-06, "loss": 0.32832133769989014, "step": 485 }, { "epoch": 0.47058823529411764, "grad_norm": 0.1809573769569397, "learning_rate": 5.356793743890518e-06, "loss": 0.28796786069869995, "step": 486 }, { "epoch": 0.47155652384410557, "grad_norm": 0.15820080041885376, "learning_rate": 5.347018572825024e-06, "loss": 0.24655906856060028, "step": 487 }, { "epoch": 0.47252481239409344, "grad_norm": 0.183393657207489, "learning_rate": 5.337243401759531e-06, "loss": 0.3693656027317047, "step": 488 }, { "epoch": 0.4734931009440813, "grad_norm": 0.17333702743053436, "learning_rate": 5.327468230694037e-06, "loss": 0.2813875079154968, "step": 489 }, { "epoch": 0.47446138949406924, "grad_norm": 0.18470393121242523, "learning_rate": 5.317693059628544e-06, "loss": 0.32118356227874756, "step": 490 }, { "epoch": 0.4754296780440571, "grad_norm": 0.17366191744804382, "learning_rate": 5.30791788856305e-06, "loss": 0.27578046917915344, "step": 491 }, { "epoch": 0.47639796659404504, "grad_norm": 0.16945011913776398, "learning_rate": 5.2981427174975565e-06, "loss": 0.3115886151790619, "step": 492 }, { "epoch": 0.4773662551440329, "grad_norm": 0.20388440787792206, "learning_rate": 5.288367546432063e-06, "loss": 0.309696227312088, "step": 493 }, { "epoch": 0.47833454369402084, "grad_norm": 0.156901016831398, "learning_rate": 5.278592375366569e-06, "loss": 0.27146872878074646, "step": 494 }, { "epoch": 0.4793028322440087, "grad_norm": 0.20242440700531006, "learning_rate": 5.268817204301076e-06, "loss": 0.33286309242248535, "step": 495 }, { "epoch": 0.4802711207939966, "grad_norm": 0.20036989450454712, "learning_rate": 5.259042033235582e-06, "loss": 0.285398006439209, "step": 496 }, { "epoch": 0.4812394093439845, "grad_norm": 0.16521663963794708, "learning_rate": 5.249266862170089e-06, "loss": 0.27880388498306274, "step": 497 }, { "epoch": 0.4822076978939724, "grad_norm": 0.16702234745025635, "learning_rate": 5.239491691104594e-06, "loss": 0.29399362206459045, "step": 498 }, { "epoch": 0.4831759864439603, "grad_norm": 0.18302516639232635, "learning_rate": 5.229716520039101e-06, "loss": 0.2757553160190582, "step": 499 }, { "epoch": 0.4841442749939482, "grad_norm": 0.17423763871192932, "learning_rate": 5.219941348973607e-06, "loss": 0.2870354950428009, "step": 500 }, { "epoch": 0.4851125635439361, "grad_norm": 0.19603262841701508, "learning_rate": 5.2101661779081135e-06, "loss": 0.2726498246192932, "step": 501 }, { "epoch": 0.486080852093924, "grad_norm": 0.1614205241203308, "learning_rate": 5.20039100684262e-06, "loss": 0.25111639499664307, "step": 502 }, { "epoch": 0.48704914064391186, "grad_norm": 0.17319105565547943, "learning_rate": 5.190615835777126e-06, "loss": 0.27468031644821167, "step": 503 }, { "epoch": 0.4880174291938998, "grad_norm": 0.16882063448429108, "learning_rate": 5.180840664711633e-06, "loss": 0.27068573236465454, "step": 504 }, { "epoch": 0.48898571774388766, "grad_norm": 0.18153499066829681, "learning_rate": 5.171065493646139e-06, "loss": 0.28188517689704895, "step": 505 }, { "epoch": 0.4899540062938756, "grad_norm": 0.1816774159669876, "learning_rate": 5.161290322580646e-06, "loss": 0.32222485542297363, "step": 506 }, { "epoch": 0.49092229484386346, "grad_norm": 0.16442593932151794, "learning_rate": 5.151515151515152e-06, "loss": 0.30542707443237305, "step": 507 }, { "epoch": 0.4918905833938514, "grad_norm": 0.1821308732032776, "learning_rate": 5.1417399804496585e-06, "loss": 0.293884813785553, "step": 508 }, { "epoch": 0.49285887194383926, "grad_norm": 0.1683465987443924, "learning_rate": 5.131964809384165e-06, "loss": 0.26638439297676086, "step": 509 }, { "epoch": 0.49382716049382713, "grad_norm": 0.17483524978160858, "learning_rate": 5.1221896383186705e-06, "loss": 0.30652916431427, "step": 510 }, { "epoch": 0.49479544904381506, "grad_norm": 0.1842867136001587, "learning_rate": 5.112414467253177e-06, "loss": 0.364931583404541, "step": 511 }, { "epoch": 0.49576373759380293, "grad_norm": 0.19743406772613525, "learning_rate": 5.102639296187683e-06, "loss": 0.2590721547603607, "step": 512 }, { "epoch": 0.49673202614379086, "grad_norm": 0.18802092969417572, "learning_rate": 5.09286412512219e-06, "loss": 0.31060951948165894, "step": 513 }, { "epoch": 0.49770031469377873, "grad_norm": 0.16384844481945038, "learning_rate": 5.083088954056696e-06, "loss": 0.27959296107292175, "step": 514 }, { "epoch": 0.49866860324376666, "grad_norm": 0.2127850353717804, "learning_rate": 5.073313782991203e-06, "loss": 0.3346613049507141, "step": 515 }, { "epoch": 0.49963689179375453, "grad_norm": 0.17491693794727325, "learning_rate": 5.063538611925709e-06, "loss": 0.2960091531276703, "step": 516 }, { "epoch": 0.5006051803437425, "grad_norm": 0.1880018264055252, "learning_rate": 5.0537634408602155e-06, "loss": 0.2997010350227356, "step": 517 }, { "epoch": 0.5015734688937303, "grad_norm": 0.1748742163181305, "learning_rate": 5.043988269794722e-06, "loss": 0.2931768596172333, "step": 518 }, { "epoch": 0.5025417574437182, "grad_norm": 0.15878638625144958, "learning_rate": 5.034213098729228e-06, "loss": 0.254057914018631, "step": 519 }, { "epoch": 0.5035100459937061, "grad_norm": 0.2069050818681717, "learning_rate": 5.024437927663735e-06, "loss": 0.2735084295272827, "step": 520 }, { "epoch": 0.5044783345436941, "grad_norm": 0.16623827815055847, "learning_rate": 5.014662756598241e-06, "loss": 0.25306957960128784, "step": 521 }, { "epoch": 0.5054466230936819, "grad_norm": 0.1891428381204605, "learning_rate": 5.004887585532748e-06, "loss": 0.2810228765010834, "step": 522 }, { "epoch": 0.5064149116436698, "grad_norm": 0.2315511256456375, "learning_rate": 4.995112414467253e-06, "loss": 0.2733577489852905, "step": 523 }, { "epoch": 0.5073832001936577, "grad_norm": 0.16957992315292358, "learning_rate": 4.98533724340176e-06, "loss": 0.27292630076408386, "step": 524 }, { "epoch": 0.5083514887436456, "grad_norm": 0.17816272377967834, "learning_rate": 4.975562072336266e-06, "loss": 0.27049022912979126, "step": 525 }, { "epoch": 0.5093197772936335, "grad_norm": 0.17525239288806915, "learning_rate": 4.9657869012707725e-06, "loss": 0.2759566903114319, "step": 526 }, { "epoch": 0.5102880658436214, "grad_norm": 0.18764440715312958, "learning_rate": 4.956011730205279e-06, "loss": 0.27127569913864136, "step": 527 }, { "epoch": 0.5112563543936093, "grad_norm": 0.18698008358478546, "learning_rate": 4.946236559139785e-06, "loss": 0.2902853786945343, "step": 528 }, { "epoch": 0.5122246429435972, "grad_norm": 0.17745737731456757, "learning_rate": 4.936461388074292e-06, "loss": 0.32079097628593445, "step": 529 }, { "epoch": 0.5131929314935851, "grad_norm": 0.17994803190231323, "learning_rate": 4.926686217008798e-06, "loss": 0.27671536803245544, "step": 530 }, { "epoch": 0.514161220043573, "grad_norm": 0.1736883968114853, "learning_rate": 4.916911045943305e-06, "loss": 0.29842981696128845, "step": 531 }, { "epoch": 0.5151295085935609, "grad_norm": 0.17682136595249176, "learning_rate": 4.907135874877811e-06, "loss": 0.28436222672462463, "step": 532 }, { "epoch": 0.5160977971435488, "grad_norm": 0.18292061984539032, "learning_rate": 4.8973607038123175e-06, "loss": 0.2722223401069641, "step": 533 }, { "epoch": 0.5170660856935366, "grad_norm": 0.1844838410615921, "learning_rate": 4.887585532746824e-06, "loss": 0.26570263504981995, "step": 534 }, { "epoch": 0.5180343742435246, "grad_norm": 0.18923698365688324, "learning_rate": 4.87781036168133e-06, "loss": 0.3637017607688904, "step": 535 }, { "epoch": 0.5190026627935125, "grad_norm": 0.16404788196086884, "learning_rate": 4.868035190615836e-06, "loss": 0.28690028190612793, "step": 536 }, { "epoch": 0.5199709513435004, "grad_norm": 0.1970244199037552, "learning_rate": 4.858260019550342e-06, "loss": 0.2881229519844055, "step": 537 }, { "epoch": 0.5209392398934882, "grad_norm": 0.1616058647632599, "learning_rate": 4.848484848484849e-06, "loss": 0.2817743122577667, "step": 538 }, { "epoch": 0.5219075284434762, "grad_norm": 0.18213775753974915, "learning_rate": 4.838709677419355e-06, "loss": 0.2646360695362091, "step": 539 }, { "epoch": 0.5228758169934641, "grad_norm": 0.1883658468723297, "learning_rate": 4.828934506353862e-06, "loss": 0.32929307222366333, "step": 540 }, { "epoch": 0.523844105543452, "grad_norm": 0.1898542195558548, "learning_rate": 4.819159335288368e-06, "loss": 0.27511003613471985, "step": 541 }, { "epoch": 0.5248123940934398, "grad_norm": 0.1817118525505066, "learning_rate": 4.8093841642228745e-06, "loss": 0.27474260330200195, "step": 542 }, { "epoch": 0.5257806826434277, "grad_norm": 0.19033664464950562, "learning_rate": 4.799608993157381e-06, "loss": 0.32937076687812805, "step": 543 }, { "epoch": 0.5267489711934157, "grad_norm": 0.18128858506679535, "learning_rate": 4.789833822091887e-06, "loss": 0.3000837564468384, "step": 544 }, { "epoch": 0.5277172597434036, "grad_norm": 0.18828479945659637, "learning_rate": 4.780058651026394e-06, "loss": 0.3411107063293457, "step": 545 }, { "epoch": 0.5286855482933914, "grad_norm": 0.21484431624412537, "learning_rate": 4.7702834799609e-06, "loss": 0.32155299186706543, "step": 546 }, { "epoch": 0.5296538368433793, "grad_norm": 0.19658254086971283, "learning_rate": 4.760508308895407e-06, "loss": 0.2874881327152252, "step": 547 }, { "epoch": 0.5306221253933672, "grad_norm": 0.19206486642360687, "learning_rate": 4.750733137829912e-06, "loss": 0.31940093636512756, "step": 548 }, { "epoch": 0.5315904139433552, "grad_norm": 0.2160305678844452, "learning_rate": 4.740957966764419e-06, "loss": 0.3076990246772766, "step": 549 }, { "epoch": 0.532558702493343, "grad_norm": 0.18269337713718414, "learning_rate": 4.731182795698925e-06, "loss": 0.27614516019821167, "step": 550 }, { "epoch": 0.5335269910433309, "grad_norm": 0.18244397640228271, "learning_rate": 4.7214076246334315e-06, "loss": 0.28050702810287476, "step": 551 }, { "epoch": 0.5344952795933188, "grad_norm": 0.16129615902900696, "learning_rate": 4.711632453567938e-06, "loss": 0.28339844942092896, "step": 552 }, { "epoch": 0.5354635681433068, "grad_norm": 0.1605842411518097, "learning_rate": 4.701857282502444e-06, "loss": 0.28020599484443665, "step": 553 }, { "epoch": 0.5364318566932946, "grad_norm": 0.17767396569252014, "learning_rate": 4.692082111436951e-06, "loss": 0.26483970880508423, "step": 554 }, { "epoch": 0.5374001452432825, "grad_norm": 0.17699919641017914, "learning_rate": 4.682306940371456e-06, "loss": 0.27966004610061646, "step": 555 }, { "epoch": 0.5383684337932704, "grad_norm": 0.19072790443897247, "learning_rate": 4.672531769305963e-06, "loss": 0.282270222902298, "step": 556 }, { "epoch": 0.5393367223432582, "grad_norm": 0.1869659274816513, "learning_rate": 4.662756598240469e-06, "loss": 0.3432008624076843, "step": 557 }, { "epoch": 0.5403050108932462, "grad_norm": 0.18851327896118164, "learning_rate": 4.652981427174976e-06, "loss": 0.2940416932106018, "step": 558 }, { "epoch": 0.5412732994432341, "grad_norm": 0.20195099711418152, "learning_rate": 4.643206256109482e-06, "loss": 0.30535370111465454, "step": 559 }, { "epoch": 0.542241587993222, "grad_norm": 0.17963868379592896, "learning_rate": 4.6334310850439885e-06, "loss": 0.3085969388484955, "step": 560 }, { "epoch": 0.5432098765432098, "grad_norm": 0.170511856675148, "learning_rate": 4.623655913978495e-06, "loss": 0.3072543442249298, "step": 561 }, { "epoch": 0.5441781650931977, "grad_norm": 0.18112339079380035, "learning_rate": 4.613880742913001e-06, "loss": 0.3005993366241455, "step": 562 }, { "epoch": 0.5451464536431857, "grad_norm": 0.18734918534755707, "learning_rate": 4.604105571847508e-06, "loss": 0.2741018533706665, "step": 563 }, { "epoch": 0.5461147421931736, "grad_norm": 0.18844076991081238, "learning_rate": 4.594330400782014e-06, "loss": 0.27082327008247375, "step": 564 }, { "epoch": 0.5470830307431614, "grad_norm": 0.18848098814487457, "learning_rate": 4.58455522971652e-06, "loss": 0.2900712490081787, "step": 565 }, { "epoch": 0.5480513192931493, "grad_norm": 0.18217670917510986, "learning_rate": 4.574780058651026e-06, "loss": 0.2818305492401123, "step": 566 }, { "epoch": 0.5490196078431373, "grad_norm": 0.1847630739212036, "learning_rate": 4.565004887585533e-06, "loss": 0.3052092492580414, "step": 567 }, { "epoch": 0.5499878963931252, "grad_norm": 0.17965678870677948, "learning_rate": 4.555229716520039e-06, "loss": 0.37061765789985657, "step": 568 }, { "epoch": 0.550956184943113, "grad_norm": 0.182081401348114, "learning_rate": 4.5454545454545455e-06, "loss": 0.2812265157699585, "step": 569 }, { "epoch": 0.5519244734931009, "grad_norm": 0.1826234757900238, "learning_rate": 4.535679374389052e-06, "loss": 0.33616483211517334, "step": 570 }, { "epoch": 0.5528927620430888, "grad_norm": 0.18337081372737885, "learning_rate": 4.525904203323558e-06, "loss": 0.26936668157577515, "step": 571 }, { "epoch": 0.5538610505930768, "grad_norm": 0.19079728424549103, "learning_rate": 4.516129032258065e-06, "loss": 0.31582286953926086, "step": 572 }, { "epoch": 0.5548293391430646, "grad_norm": 0.19277691841125488, "learning_rate": 4.506353861192571e-06, "loss": 0.26570555567741394, "step": 573 }, { "epoch": 0.5557976276930525, "grad_norm": 0.1885417103767395, "learning_rate": 4.496578690127078e-06, "loss": 0.283278226852417, "step": 574 }, { "epoch": 0.5567659162430404, "grad_norm": 0.1837887167930603, "learning_rate": 4.486803519061584e-06, "loss": 0.2855049967765808, "step": 575 }, { "epoch": 0.5577342047930284, "grad_norm": 0.1967337280511856, "learning_rate": 4.4770283479960905e-06, "loss": 0.2932886481285095, "step": 576 }, { "epoch": 0.5587024933430162, "grad_norm": 0.17725642025470734, "learning_rate": 4.467253176930597e-06, "loss": 0.27526989579200745, "step": 577 }, { "epoch": 0.5596707818930041, "grad_norm": 0.17137347161769867, "learning_rate": 4.4574780058651025e-06, "loss": 0.3213641047477722, "step": 578 }, { "epoch": 0.560639070442992, "grad_norm": 0.21623080968856812, "learning_rate": 4.447702834799609e-06, "loss": 0.30579251050949097, "step": 579 }, { "epoch": 0.5616073589929799, "grad_norm": 0.17714564502239227, "learning_rate": 4.437927663734115e-06, "loss": 0.27001863718032837, "step": 580 }, { "epoch": 0.5625756475429678, "grad_norm": 0.19795329868793488, "learning_rate": 4.428152492668622e-06, "loss": 0.3162938356399536, "step": 581 }, { "epoch": 0.5635439360929557, "grad_norm": 0.16567392647266388, "learning_rate": 4.418377321603128e-06, "loss": 0.27828705310821533, "step": 582 }, { "epoch": 0.5645122246429436, "grad_norm": 0.19157780706882477, "learning_rate": 4.408602150537635e-06, "loss": 0.26456013321876526, "step": 583 }, { "epoch": 0.5654805131929315, "grad_norm": 0.18285039067268372, "learning_rate": 4.398826979472141e-06, "loss": 0.27962782979011536, "step": 584 }, { "epoch": 0.5664488017429193, "grad_norm": 0.18198364973068237, "learning_rate": 4.3890518084066475e-06, "loss": 0.32034292817115784, "step": 585 }, { "epoch": 0.5674170902929073, "grad_norm": 0.189778670668602, "learning_rate": 4.379276637341154e-06, "loss": 0.27116918563842773, "step": 586 }, { "epoch": 0.5683853788428952, "grad_norm": 0.19017699360847473, "learning_rate": 4.36950146627566e-06, "loss": 0.28804683685302734, "step": 587 }, { "epoch": 0.5693536673928831, "grad_norm": 0.1705840528011322, "learning_rate": 4.359726295210167e-06, "loss": 0.3060193657875061, "step": 588 }, { "epoch": 0.5703219559428709, "grad_norm": 0.22186465561389923, "learning_rate": 4.349951124144673e-06, "loss": 0.26255226135253906, "step": 589 }, { "epoch": 0.5712902444928589, "grad_norm": 0.16935674846172333, "learning_rate": 4.34017595307918e-06, "loss": 0.25682443380355835, "step": 590 }, { "epoch": 0.5722585330428468, "grad_norm": 0.2110513299703598, "learning_rate": 4.330400782013685e-06, "loss": 0.3172002136707306, "step": 591 }, { "epoch": 0.5732268215928347, "grad_norm": 0.17660263180732727, "learning_rate": 4.320625610948192e-06, "loss": 0.2504763603210449, "step": 592 }, { "epoch": 0.5741951101428225, "grad_norm": 0.1752292513847351, "learning_rate": 4.310850439882698e-06, "loss": 0.28053516149520874, "step": 593 }, { "epoch": 0.5751633986928104, "grad_norm": 0.17443427443504333, "learning_rate": 4.3010752688172045e-06, "loss": 0.27481114864349365, "step": 594 }, { "epoch": 0.5761316872427984, "grad_norm": 0.20570909976959229, "learning_rate": 4.291300097751711e-06, "loss": 0.32052427530288696, "step": 595 }, { "epoch": 0.5770999757927863, "grad_norm": 0.17960628867149353, "learning_rate": 4.281524926686217e-06, "loss": 0.30593350529670715, "step": 596 }, { "epoch": 0.5780682643427741, "grad_norm": 0.20899339020252228, "learning_rate": 4.271749755620724e-06, "loss": 0.3231653571128845, "step": 597 }, { "epoch": 0.579036552892762, "grad_norm": 0.17927585542201996, "learning_rate": 4.26197458455523e-06, "loss": 0.23228108882904053, "step": 598 }, { "epoch": 0.5800048414427499, "grad_norm": 0.19766579568386078, "learning_rate": 4.252199413489737e-06, "loss": 0.3512587547302246, "step": 599 }, { "epoch": 0.5809731299927379, "grad_norm": 0.2258554995059967, "learning_rate": 4.242424242424243e-06, "loss": 0.29843974113464355, "step": 600 }, { "epoch": 0.5819414185427257, "grad_norm": 0.19223785400390625, "learning_rate": 4.2326490713587495e-06, "loss": 0.27962884306907654, "step": 601 }, { "epoch": 0.5829097070927136, "grad_norm": 0.17844106256961823, "learning_rate": 4.222873900293256e-06, "loss": 0.27644073963165283, "step": 602 }, { "epoch": 0.5838779956427015, "grad_norm": 0.1867385059595108, "learning_rate": 4.213098729227762e-06, "loss": 0.27366524934768677, "step": 603 }, { "epoch": 0.5848462841926895, "grad_norm": 0.17379915714263916, "learning_rate": 4.203323558162268e-06, "loss": 0.30100804567337036, "step": 604 }, { "epoch": 0.5858145727426773, "grad_norm": 0.1838119775056839, "learning_rate": 4.193548387096774e-06, "loss": 0.3351133167743683, "step": 605 }, { "epoch": 0.5867828612926652, "grad_norm": 0.19593499600887299, "learning_rate": 4.183773216031281e-06, "loss": 0.28100982308387756, "step": 606 }, { "epoch": 0.5877511498426531, "grad_norm": 0.16322395205497742, "learning_rate": 4.173998044965787e-06, "loss": 0.26457294821739197, "step": 607 }, { "epoch": 0.588719438392641, "grad_norm": 0.1786675602197647, "learning_rate": 4.164222873900294e-06, "loss": 0.2559005618095398, "step": 608 }, { "epoch": 0.5896877269426289, "grad_norm": 0.19520226120948792, "learning_rate": 4.1544477028348e-06, "loss": 0.2999897003173828, "step": 609 }, { "epoch": 0.5906560154926168, "grad_norm": 0.17103256285190582, "learning_rate": 4.1446725317693065e-06, "loss": 0.30779922008514404, "step": 610 }, { "epoch": 0.5916243040426047, "grad_norm": 0.17526350915431976, "learning_rate": 4.134897360703813e-06, "loss": 0.29173529148101807, "step": 611 }, { "epoch": 0.5925925925925926, "grad_norm": 0.18206097185611725, "learning_rate": 4.125122189638319e-06, "loss": 0.29199522733688354, "step": 612 }, { "epoch": 0.5935608811425805, "grad_norm": 0.1679670661687851, "learning_rate": 4.115347018572826e-06, "loss": 0.25542762875556946, "step": 613 }, { "epoch": 0.5945291696925684, "grad_norm": 0.19803665578365326, "learning_rate": 4.105571847507332e-06, "loss": 0.2858905792236328, "step": 614 }, { "epoch": 0.5954974582425563, "grad_norm": 0.17995841801166534, "learning_rate": 4.095796676441839e-06, "loss": 0.27671483159065247, "step": 615 }, { "epoch": 0.5964657467925442, "grad_norm": 0.18616031110286713, "learning_rate": 4.086021505376344e-06, "loss": 0.2712816596031189, "step": 616 }, { "epoch": 0.597434035342532, "grad_norm": 0.19008490443229675, "learning_rate": 4.076246334310851e-06, "loss": 0.2625333368778229, "step": 617 }, { "epoch": 0.59840232389252, "grad_norm": 0.1998487263917923, "learning_rate": 4.066471163245357e-06, "loss": 0.28343838453292847, "step": 618 }, { "epoch": 0.5993706124425079, "grad_norm": 0.17429369688034058, "learning_rate": 4.0566959921798636e-06, "loss": 0.2731628715991974, "step": 619 }, { "epoch": 0.6003389009924958, "grad_norm": 0.19498169422149658, "learning_rate": 4.04692082111437e-06, "loss": 0.29789942502975464, "step": 620 }, { "epoch": 0.6013071895424836, "grad_norm": 0.178371399641037, "learning_rate": 4.0371456500488756e-06, "loss": 0.28699758648872375, "step": 621 }, { "epoch": 0.6022754780924715, "grad_norm": 0.1959543526172638, "learning_rate": 4.027370478983382e-06, "loss": 0.32473817467689514, "step": 622 }, { "epoch": 0.6032437666424595, "grad_norm": 0.18459352850914001, "learning_rate": 4.017595307917888e-06, "loss": 0.2685423493385315, "step": 623 }, { "epoch": 0.6042120551924474, "grad_norm": 0.18294654786586761, "learning_rate": 4.007820136852395e-06, "loss": 0.28354576230049133, "step": 624 }, { "epoch": 0.6051803437424352, "grad_norm": 0.19509679079055786, "learning_rate": 3.998044965786901e-06, "loss": 0.30655306577682495, "step": 625 }, { "epoch": 0.6061486322924231, "grad_norm": 0.18222194910049438, "learning_rate": 3.988269794721408e-06, "loss": 0.26319050788879395, "step": 626 }, { "epoch": 0.6071169208424111, "grad_norm": 0.21766740083694458, "learning_rate": 3.978494623655914e-06, "loss": 0.29476338624954224, "step": 627 }, { "epoch": 0.608085209392399, "grad_norm": 0.1838199496269226, "learning_rate": 3.9687194525904206e-06, "loss": 0.313698947429657, "step": 628 }, { "epoch": 0.6090534979423868, "grad_norm": 0.18570809066295624, "learning_rate": 3.958944281524927e-06, "loss": 0.3509555160999298, "step": 629 }, { "epoch": 0.6100217864923747, "grad_norm": 0.19644515216350555, "learning_rate": 3.949169110459433e-06, "loss": 0.2718711197376251, "step": 630 }, { "epoch": 0.6109900750423626, "grad_norm": 0.1909233182668686, "learning_rate": 3.93939393939394e-06, "loss": 0.3205246925354004, "step": 631 }, { "epoch": 0.6119583635923506, "grad_norm": 0.18373022973537445, "learning_rate": 3.929618768328446e-06, "loss": 0.295777827501297, "step": 632 }, { "epoch": 0.6129266521423384, "grad_norm": 0.18277910351753235, "learning_rate": 3.919843597262952e-06, "loss": 0.3180069625377655, "step": 633 }, { "epoch": 0.6138949406923263, "grad_norm": 0.19421808421611786, "learning_rate": 3.910068426197458e-06, "loss": 0.2791898250579834, "step": 634 }, { "epoch": 0.6148632292423142, "grad_norm": 0.17601901292800903, "learning_rate": 3.900293255131965e-06, "loss": 0.26764553785324097, "step": 635 }, { "epoch": 0.615831517792302, "grad_norm": 0.1744976043701172, "learning_rate": 3.890518084066471e-06, "loss": 0.307162344455719, "step": 636 }, { "epoch": 0.61679980634229, "grad_norm": 0.1944838911294937, "learning_rate": 3.8807429130009776e-06, "loss": 0.2940749228000641, "step": 637 }, { "epoch": 0.6177680948922779, "grad_norm": 0.29076093435287476, "learning_rate": 3.870967741935484e-06, "loss": 0.32644060254096985, "step": 638 }, { "epoch": 0.6187363834422658, "grad_norm": 0.18829455971717834, "learning_rate": 3.8611925708699904e-06, "loss": 0.28472450375556946, "step": 639 }, { "epoch": 0.6197046719922537, "grad_norm": 0.1949450969696045, "learning_rate": 3.851417399804497e-06, "loss": 0.2577253580093384, "step": 640 }, { "epoch": 0.6206729605422416, "grad_norm": 0.1973968893289566, "learning_rate": 3.841642228739003e-06, "loss": 0.28368428349494934, "step": 641 }, { "epoch": 0.6216412490922295, "grad_norm": 0.1733219027519226, "learning_rate": 3.83186705767351e-06, "loss": 0.26086172461509705, "step": 642 }, { "epoch": 0.6226095376422174, "grad_norm": 0.20539860427379608, "learning_rate": 3.822091886608016e-06, "loss": 0.3593149483203888, "step": 643 }, { "epoch": 0.6235778261922053, "grad_norm": 0.18563023209571838, "learning_rate": 3.812316715542522e-06, "loss": 0.3003098964691162, "step": 644 }, { "epoch": 0.6245461147421931, "grad_norm": 0.19810666143894196, "learning_rate": 3.8025415444770286e-06, "loss": 0.2925172448158264, "step": 645 }, { "epoch": 0.6255144032921811, "grad_norm": 0.2321307510137558, "learning_rate": 3.792766373411535e-06, "loss": 0.25980299711227417, "step": 646 }, { "epoch": 0.626482691842169, "grad_norm": 0.16675977408885956, "learning_rate": 3.7829912023460414e-06, "loss": 0.258143812417984, "step": 647 }, { "epoch": 0.6274509803921569, "grad_norm": 0.18522602319717407, "learning_rate": 3.773216031280548e-06, "loss": 0.3249315619468689, "step": 648 }, { "epoch": 0.6284192689421447, "grad_norm": 0.17373818159103394, "learning_rate": 3.763440860215054e-06, "loss": 0.289806991815567, "step": 649 }, { "epoch": 0.6293875574921327, "grad_norm": 0.18944744765758514, "learning_rate": 3.7536656891495603e-06, "loss": 0.30416756868362427, "step": 650 }, { "epoch": 0.6303558460421206, "grad_norm": 0.19680985808372498, "learning_rate": 3.7438905180840667e-06, "loss": 0.28972989320755005, "step": 651 }, { "epoch": 0.6313241345921085, "grad_norm": 0.2205217033624649, "learning_rate": 3.734115347018573e-06, "loss": 0.28554368019104004, "step": 652 }, { "epoch": 0.6322924231420963, "grad_norm": 0.172973170876503, "learning_rate": 3.7243401759530796e-06, "loss": 0.331814169883728, "step": 653 }, { "epoch": 0.6332607116920842, "grad_norm": 0.1913972645998001, "learning_rate": 3.714565004887586e-06, "loss": 0.27782005071640015, "step": 654 }, { "epoch": 0.6342290002420722, "grad_norm": 0.19561362266540527, "learning_rate": 3.7047898338220924e-06, "loss": 0.3030650019645691, "step": 655 }, { "epoch": 0.63519728879206, "grad_norm": 0.19253604114055634, "learning_rate": 3.6950146627565984e-06, "loss": 0.29422512650489807, "step": 656 }, { "epoch": 0.6361655773420479, "grad_norm": 0.19124586880207062, "learning_rate": 3.685239491691105e-06, "loss": 0.26767367124557495, "step": 657 }, { "epoch": 0.6371338658920358, "grad_norm": 0.2221280336380005, "learning_rate": 3.6754643206256113e-06, "loss": 0.3479483723640442, "step": 658 }, { "epoch": 0.6381021544420237, "grad_norm": 0.20241160690784454, "learning_rate": 3.6656891495601177e-06, "loss": 0.2787402868270874, "step": 659 }, { "epoch": 0.6390704429920117, "grad_norm": 0.19073940813541412, "learning_rate": 3.655913978494624e-06, "loss": 0.29317712783813477, "step": 660 }, { "epoch": 0.6400387315419995, "grad_norm": 0.20870280265808105, "learning_rate": 3.6461388074291306e-06, "loss": 0.3079635202884674, "step": 661 }, { "epoch": 0.6410070200919874, "grad_norm": 0.18194538354873657, "learning_rate": 3.6363636363636366e-06, "loss": 0.26036518812179565, "step": 662 }, { "epoch": 0.6419753086419753, "grad_norm": 0.19380781054496765, "learning_rate": 3.626588465298143e-06, "loss": 0.32105469703674316, "step": 663 }, { "epoch": 0.6429435971919633, "grad_norm": 0.18779927492141724, "learning_rate": 3.6168132942326494e-06, "loss": 0.23958516120910645, "step": 664 }, { "epoch": 0.6439118857419511, "grad_norm": 0.16800741851329803, "learning_rate": 3.607038123167156e-06, "loss": 0.3183926045894623, "step": 665 }, { "epoch": 0.644880174291939, "grad_norm": 0.18218325078487396, "learning_rate": 3.5972629521016623e-06, "loss": 0.36072227358818054, "step": 666 }, { "epoch": 0.6458484628419269, "grad_norm": 0.1973208338022232, "learning_rate": 3.5874877810361687e-06, "loss": 0.31081509590148926, "step": 667 }, { "epoch": 0.6468167513919147, "grad_norm": 0.17719313502311707, "learning_rate": 3.5777126099706747e-06, "loss": 0.3088850677013397, "step": 668 }, { "epoch": 0.6477850399419027, "grad_norm": 0.22201496362686157, "learning_rate": 3.567937438905181e-06, "loss": 0.2832217812538147, "step": 669 }, { "epoch": 0.6487533284918906, "grad_norm": 0.2052207589149475, "learning_rate": 3.5581622678396876e-06, "loss": 0.2777295708656311, "step": 670 }, { "epoch": 0.6497216170418785, "grad_norm": 0.17530739307403564, "learning_rate": 3.548387096774194e-06, "loss": 0.3057093620300293, "step": 671 }, { "epoch": 0.6506899055918663, "grad_norm": 0.20253078639507294, "learning_rate": 3.5386119257087004e-06, "loss": 0.2525123059749603, "step": 672 }, { "epoch": 0.6516581941418542, "grad_norm": 0.19099098443984985, "learning_rate": 3.528836754643207e-06, "loss": 0.26486071944236755, "step": 673 }, { "epoch": 0.6526264826918422, "grad_norm": 0.19429947435855865, "learning_rate": 3.5190615835777133e-06, "loss": 0.27915486693382263, "step": 674 }, { "epoch": 0.6535947712418301, "grad_norm": 0.19641940295696259, "learning_rate": 3.5092864125122193e-06, "loss": 0.2952028214931488, "step": 675 }, { "epoch": 0.654563059791818, "grad_norm": 0.18606482446193695, "learning_rate": 3.4995112414467257e-06, "loss": 0.26710712909698486, "step": 676 }, { "epoch": 0.6555313483418058, "grad_norm": 0.18616363406181335, "learning_rate": 3.489736070381232e-06, "loss": 0.2896000146865845, "step": 677 }, { "epoch": 0.6564996368917938, "grad_norm": 0.18305549025535583, "learning_rate": 3.4799608993157386e-06, "loss": 0.27804529666900635, "step": 678 }, { "epoch": 0.6574679254417817, "grad_norm": 0.19162502884864807, "learning_rate": 3.470185728250245e-06, "loss": 0.3180793821811676, "step": 679 }, { "epoch": 0.6584362139917695, "grad_norm": 0.17288638651371002, "learning_rate": 3.4604105571847514e-06, "loss": 0.27254006266593933, "step": 680 }, { "epoch": 0.6594045025417574, "grad_norm": 0.20115594565868378, "learning_rate": 3.4506353861192575e-06, "loss": 0.326080858707428, "step": 681 }, { "epoch": 0.6603727910917453, "grad_norm": 0.20309938490390778, "learning_rate": 3.440860215053764e-06, "loss": 0.29796141386032104, "step": 682 }, { "epoch": 0.6613410796417333, "grad_norm": 0.20176127552986145, "learning_rate": 3.43108504398827e-06, "loss": 0.2814856469631195, "step": 683 }, { "epoch": 0.6623093681917211, "grad_norm": 0.21620069444179535, "learning_rate": 3.4213098729227763e-06, "loss": 0.36335426568984985, "step": 684 }, { "epoch": 0.663277656741709, "grad_norm": 0.20982684195041656, "learning_rate": 3.4115347018572823e-06, "loss": 0.2819657027721405, "step": 685 }, { "epoch": 0.6642459452916969, "grad_norm": 0.18432947993278503, "learning_rate": 3.4017595307917887e-06, "loss": 0.32050448656082153, "step": 686 }, { "epoch": 0.6652142338416849, "grad_norm": 0.17828144133090973, "learning_rate": 3.391984359726295e-06, "loss": 0.3236311376094818, "step": 687 }, { "epoch": 0.6661825223916727, "grad_norm": 0.1964399665594101, "learning_rate": 3.3822091886608016e-06, "loss": 0.32314908504486084, "step": 688 }, { "epoch": 0.6671508109416606, "grad_norm": 0.19078870117664337, "learning_rate": 3.372434017595308e-06, "loss": 0.27393656969070435, "step": 689 }, { "epoch": 0.6681190994916485, "grad_norm": 0.19160780310630798, "learning_rate": 3.3626588465298145e-06, "loss": 0.3088667690753937, "step": 690 }, { "epoch": 0.6690873880416364, "grad_norm": 0.18718208372592926, "learning_rate": 3.352883675464321e-06, "loss": 0.28694334626197815, "step": 691 }, { "epoch": 0.6700556765916243, "grad_norm": 0.19036638736724854, "learning_rate": 3.343108504398827e-06, "loss": 0.2764681279659271, "step": 692 }, { "epoch": 0.6710239651416122, "grad_norm": 0.17227678000926971, "learning_rate": 3.3333333333333333e-06, "loss": 0.2784879207611084, "step": 693 }, { "epoch": 0.6719922536916001, "grad_norm": 0.20473547279834747, "learning_rate": 3.3235581622678398e-06, "loss": 0.2824912667274475, "step": 694 }, { "epoch": 0.672960542241588, "grad_norm": 0.1921864002943039, "learning_rate": 3.313782991202346e-06, "loss": 0.2795690894126892, "step": 695 }, { "epoch": 0.6739288307915758, "grad_norm": 0.2057105302810669, "learning_rate": 3.3040078201368526e-06, "loss": 0.27492284774780273, "step": 696 }, { "epoch": 0.6748971193415638, "grad_norm": 0.2041766345500946, "learning_rate": 3.294232649071359e-06, "loss": 0.30277037620544434, "step": 697 }, { "epoch": 0.6758654078915517, "grad_norm": 0.19042398035526276, "learning_rate": 3.284457478005865e-06, "loss": 0.31011852622032166, "step": 698 }, { "epoch": 0.6768336964415396, "grad_norm": 0.18352696299552917, "learning_rate": 3.2746823069403715e-06, "loss": 0.28382012248039246, "step": 699 }, { "epoch": 0.6778019849915274, "grad_norm": 0.2007741928100586, "learning_rate": 3.264907135874878e-06, "loss": 0.28195974230766296, "step": 700 }, { "epoch": 0.6787702735415154, "grad_norm": 0.20310088992118835, "learning_rate": 3.2551319648093843e-06, "loss": 0.2988584637641907, "step": 701 }, { "epoch": 0.6797385620915033, "grad_norm": 0.20353393256664276, "learning_rate": 3.2453567937438908e-06, "loss": 0.24649690091609955, "step": 702 }, { "epoch": 0.6807068506414912, "grad_norm": 0.1926201432943344, "learning_rate": 3.235581622678397e-06, "loss": 0.2895974814891815, "step": 703 }, { "epoch": 0.681675139191479, "grad_norm": 0.19565631449222565, "learning_rate": 3.225806451612903e-06, "loss": 0.2735288441181183, "step": 704 }, { "epoch": 0.6826434277414669, "grad_norm": 0.20555929839611053, "learning_rate": 3.2160312805474096e-06, "loss": 0.2749082148075104, "step": 705 }, { "epoch": 0.6836117162914549, "grad_norm": 0.19519391655921936, "learning_rate": 3.206256109481916e-06, "loss": 0.35463032126426697, "step": 706 }, { "epoch": 0.6845800048414428, "grad_norm": 0.19124329090118408, "learning_rate": 3.1964809384164225e-06, "loss": 0.2960769832134247, "step": 707 }, { "epoch": 0.6855482933914306, "grad_norm": 0.19353725016117096, "learning_rate": 3.186705767350929e-06, "loss": 0.29588258266448975, "step": 708 }, { "epoch": 0.6865165819414185, "grad_norm": 0.1908576339483261, "learning_rate": 3.1769305962854353e-06, "loss": 0.32410839200019836, "step": 709 }, { "epoch": 0.6874848704914064, "grad_norm": 0.19978390634059906, "learning_rate": 3.1671554252199418e-06, "loss": 0.26154428720474243, "step": 710 }, { "epoch": 0.6884531590413944, "grad_norm": 0.17735745012760162, "learning_rate": 3.1573802541544478e-06, "loss": 0.2741011083126068, "step": 711 }, { "epoch": 0.6894214475913822, "grad_norm": 0.19261346757411957, "learning_rate": 3.147605083088954e-06, "loss": 0.29346680641174316, "step": 712 }, { "epoch": 0.6903897361413701, "grad_norm": 0.18815375864505768, "learning_rate": 3.1378299120234606e-06, "loss": 0.317450612783432, "step": 713 }, { "epoch": 0.691358024691358, "grad_norm": 0.1747797578573227, "learning_rate": 3.128054740957967e-06, "loss": 0.26710936427116394, "step": 714 }, { "epoch": 0.692326313241346, "grad_norm": 0.1850060522556305, "learning_rate": 3.1182795698924735e-06, "loss": 0.3440788984298706, "step": 715 }, { "epoch": 0.6932946017913338, "grad_norm": 0.19904842972755432, "learning_rate": 3.10850439882698e-06, "loss": 0.27237698435783386, "step": 716 }, { "epoch": 0.6942628903413217, "grad_norm": 0.19219987094402313, "learning_rate": 3.098729227761486e-06, "loss": 0.2665986716747284, "step": 717 }, { "epoch": 0.6952311788913096, "grad_norm": 0.1957559734582901, "learning_rate": 3.0889540566959923e-06, "loss": 0.28654614090919495, "step": 718 }, { "epoch": 0.6961994674412975, "grad_norm": 0.2007106989622116, "learning_rate": 3.0791788856304988e-06, "loss": 0.30569547414779663, "step": 719 }, { "epoch": 0.6971677559912854, "grad_norm": 0.21884313225746155, "learning_rate": 3.069403714565005e-06, "loss": 0.2851307690143585, "step": 720 }, { "epoch": 0.6981360445412733, "grad_norm": 0.18904490768909454, "learning_rate": 3.0596285434995116e-06, "loss": 0.32544124126434326, "step": 721 }, { "epoch": 0.6991043330912612, "grad_norm": 0.22827713191509247, "learning_rate": 3.049853372434018e-06, "loss": 0.2876453101634979, "step": 722 }, { "epoch": 0.7000726216412491, "grad_norm": 0.18982501327991486, "learning_rate": 3.0400782013685245e-06, "loss": 0.28896069526672363, "step": 723 }, { "epoch": 0.701040910191237, "grad_norm": 0.208974227309227, "learning_rate": 3.0303030303030305e-06, "loss": 0.26989954710006714, "step": 724 }, { "epoch": 0.7020091987412249, "grad_norm": 0.19682757556438446, "learning_rate": 3.020527859237537e-06, "loss": 0.316387414932251, "step": 725 }, { "epoch": 0.7029774872912128, "grad_norm": 0.1741049438714981, "learning_rate": 3.0107526881720433e-06, "loss": 0.26443612575531006, "step": 726 }, { "epoch": 0.7039457758412007, "grad_norm": 0.2087400257587433, "learning_rate": 3.0009775171065498e-06, "loss": 0.2930486500263214, "step": 727 }, { "epoch": 0.7049140643911885, "grad_norm": 0.19682444632053375, "learning_rate": 2.991202346041056e-06, "loss": 0.2777274250984192, "step": 728 }, { "epoch": 0.7058823529411765, "grad_norm": 0.18029047548770905, "learning_rate": 2.9814271749755626e-06, "loss": 0.30682748556137085, "step": 729 }, { "epoch": 0.7068506414911644, "grad_norm": 0.21413344144821167, "learning_rate": 2.9716520039100686e-06, "loss": 0.2852901220321655, "step": 730 }, { "epoch": 0.7078189300411523, "grad_norm": 0.20641835033893585, "learning_rate": 2.961876832844575e-06, "loss": 0.30264589190483093, "step": 731 }, { "epoch": 0.7087872185911401, "grad_norm": 0.20583511888980865, "learning_rate": 2.9521016617790815e-06, "loss": 0.31246519088745117, "step": 732 }, { "epoch": 0.709755507141128, "grad_norm": 0.19352665543556213, "learning_rate": 2.942326490713588e-06, "loss": 0.25201672315597534, "step": 733 }, { "epoch": 0.710723795691116, "grad_norm": 0.19948013126850128, "learning_rate": 2.9325513196480943e-06, "loss": 0.2469996213912964, "step": 734 }, { "epoch": 0.7116920842411039, "grad_norm": 0.20024363696575165, "learning_rate": 2.9227761485826008e-06, "loss": 0.28980398178100586, "step": 735 }, { "epoch": 0.7126603727910917, "grad_norm": 0.19101053476333618, "learning_rate": 2.9130009775171068e-06, "loss": 0.27129659056663513, "step": 736 }, { "epoch": 0.7136286613410796, "grad_norm": 0.19807986915111542, "learning_rate": 2.903225806451613e-06, "loss": 0.2989445924758911, "step": 737 }, { "epoch": 0.7145969498910676, "grad_norm": 0.2047462910413742, "learning_rate": 2.8934506353861196e-06, "loss": 0.29249265789985657, "step": 738 }, { "epoch": 0.7155652384410555, "grad_norm": 0.21451207995414734, "learning_rate": 2.883675464320626e-06, "loss": 0.308368980884552, "step": 739 }, { "epoch": 0.7165335269910433, "grad_norm": 0.18969380855560303, "learning_rate": 2.8739002932551325e-06, "loss": 0.30544131994247437, "step": 740 }, { "epoch": 0.7175018155410312, "grad_norm": 0.21949923038482666, "learning_rate": 2.864125122189639e-06, "loss": 0.2871190011501312, "step": 741 }, { "epoch": 0.7184701040910191, "grad_norm": 0.18441982567310333, "learning_rate": 2.8543499511241454e-06, "loss": 0.34001511335372925, "step": 742 }, { "epoch": 0.7194383926410071, "grad_norm": 0.20495833456516266, "learning_rate": 2.8445747800586514e-06, "loss": 0.31153956055641174, "step": 743 }, { "epoch": 0.7204066811909949, "grad_norm": 0.17847374081611633, "learning_rate": 2.8347996089931578e-06, "loss": 0.2785325348377228, "step": 744 }, { "epoch": 0.7213749697409828, "grad_norm": 0.20845407247543335, "learning_rate": 2.8250244379276642e-06, "loss": 0.28710830211639404, "step": 745 }, { "epoch": 0.7223432582909707, "grad_norm": 0.20801788568496704, "learning_rate": 2.8152492668621706e-06, "loss": 0.2709939181804657, "step": 746 }, { "epoch": 0.7233115468409586, "grad_norm": 0.17509667575359344, "learning_rate": 2.8054740957966762e-06, "loss": 0.24158413708209991, "step": 747 }, { "epoch": 0.7242798353909465, "grad_norm": 0.2237170934677124, "learning_rate": 2.7956989247311827e-06, "loss": 0.26651033759117126, "step": 748 }, { "epoch": 0.7252481239409344, "grad_norm": 0.1964648962020874, "learning_rate": 2.785923753665689e-06, "loss": 0.26544153690338135, "step": 749 }, { "epoch": 0.7262164124909223, "grad_norm": 0.1828320175409317, "learning_rate": 2.7761485826001955e-06, "loss": 0.24963009357452393, "step": 750 }, { "epoch": 0.7271847010409102, "grad_norm": 0.17765893042087555, "learning_rate": 2.766373411534702e-06, "loss": 0.2530496418476105, "step": 751 }, { "epoch": 0.7281529895908981, "grad_norm": 0.17918957769870758, "learning_rate": 2.7565982404692084e-06, "loss": 0.2385520339012146, "step": 752 }, { "epoch": 0.729121278140886, "grad_norm": 0.1830013394355774, "learning_rate": 2.7468230694037144e-06, "loss": 0.26376885175704956, "step": 753 }, { "epoch": 0.7300895666908739, "grad_norm": 0.20502547919750214, "learning_rate": 2.737047898338221e-06, "loss": 0.2629661560058594, "step": 754 }, { "epoch": 0.7310578552408618, "grad_norm": 0.19126304984092712, "learning_rate": 2.7272727272727272e-06, "loss": 0.27432548999786377, "step": 755 }, { "epoch": 0.7320261437908496, "grad_norm": 0.1837206333875656, "learning_rate": 2.7174975562072337e-06, "loss": 0.2646147906780243, "step": 756 }, { "epoch": 0.7329944323408376, "grad_norm": 0.22238245606422424, "learning_rate": 2.70772238514174e-06, "loss": 0.29708367586135864, "step": 757 }, { "epoch": 0.7339627208908255, "grad_norm": 0.19030597805976868, "learning_rate": 2.6979472140762465e-06, "loss": 0.3007453680038452, "step": 758 }, { "epoch": 0.7349310094408134, "grad_norm": 0.18150079250335693, "learning_rate": 2.688172043010753e-06, "loss": 0.28624916076660156, "step": 759 }, { "epoch": 0.7358992979908012, "grad_norm": 0.21237732470035553, "learning_rate": 2.678396871945259e-06, "loss": 0.31297317147254944, "step": 760 }, { "epoch": 0.7368675865407892, "grad_norm": 0.2071557343006134, "learning_rate": 2.6686217008797654e-06, "loss": 0.25083449482917786, "step": 761 }, { "epoch": 0.7378358750907771, "grad_norm": 0.18313196301460266, "learning_rate": 2.658846529814272e-06, "loss": 0.28581753373146057, "step": 762 }, { "epoch": 0.738804163640765, "grad_norm": 0.20016784965991974, "learning_rate": 2.6490713587487782e-06, "loss": 0.28767916560173035, "step": 763 }, { "epoch": 0.7397724521907528, "grad_norm": 0.1874615103006363, "learning_rate": 2.6392961876832847e-06, "loss": 0.28244420886039734, "step": 764 }, { "epoch": 0.7407407407407407, "grad_norm": 0.21257996559143066, "learning_rate": 2.629521016617791e-06, "loss": 0.2639189064502716, "step": 765 }, { "epoch": 0.7417090292907287, "grad_norm": 0.21034327149391174, "learning_rate": 2.619745845552297e-06, "loss": 0.2773539125919342, "step": 766 }, { "epoch": 0.7426773178407166, "grad_norm": 0.21635524928569794, "learning_rate": 2.6099706744868035e-06, "loss": 0.283179372549057, "step": 767 }, { "epoch": 0.7436456063907044, "grad_norm": 0.19200022518634796, "learning_rate": 2.60019550342131e-06, "loss": 0.2603984475135803, "step": 768 }, { "epoch": 0.7446138949406923, "grad_norm": 0.20428141951560974, "learning_rate": 2.5904203323558164e-06, "loss": 0.3322230577468872, "step": 769 }, { "epoch": 0.7455821834906802, "grad_norm": 0.17995081841945648, "learning_rate": 2.580645161290323e-06, "loss": 0.26364511251449585, "step": 770 }, { "epoch": 0.7465504720406682, "grad_norm": 0.19678199291229248, "learning_rate": 2.5708699902248292e-06, "loss": 0.3625681698322296, "step": 771 }, { "epoch": 0.747518760590656, "grad_norm": 0.183084636926651, "learning_rate": 2.5610948191593352e-06, "loss": 0.2772168517112732, "step": 772 }, { "epoch": 0.7484870491406439, "grad_norm": 0.2048066258430481, "learning_rate": 2.5513196480938417e-06, "loss": 0.30713188648223877, "step": 773 }, { "epoch": 0.7494553376906318, "grad_norm": 0.21669703722000122, "learning_rate": 2.541544477028348e-06, "loss": 0.3376876413822174, "step": 774 }, { "epoch": 0.7504236262406198, "grad_norm": 0.16890452802181244, "learning_rate": 2.5317693059628545e-06, "loss": 0.28936320543289185, "step": 775 }, { "epoch": 0.7513919147906076, "grad_norm": 0.2113950401544571, "learning_rate": 2.521994134897361e-06, "loss": 0.3068625330924988, "step": 776 }, { "epoch": 0.7523602033405955, "grad_norm": 0.19548510015010834, "learning_rate": 2.5122189638318674e-06, "loss": 0.2764047384262085, "step": 777 }, { "epoch": 0.7533284918905834, "grad_norm": 0.19676341116428375, "learning_rate": 2.502443792766374e-06, "loss": 0.32238852977752686, "step": 778 }, { "epoch": 0.7542967804405712, "grad_norm": 0.20870518684387207, "learning_rate": 2.49266862170088e-06, "loss": 0.2966168224811554, "step": 779 }, { "epoch": 0.7552650689905592, "grad_norm": 0.19091863930225372, "learning_rate": 2.4828934506353862e-06, "loss": 0.260260671377182, "step": 780 }, { "epoch": 0.7562333575405471, "grad_norm": 0.18716365098953247, "learning_rate": 2.4731182795698927e-06, "loss": 0.2716587781906128, "step": 781 }, { "epoch": 0.757201646090535, "grad_norm": 0.19777894020080566, "learning_rate": 2.463343108504399e-06, "loss": 0.2737089693546295, "step": 782 }, { "epoch": 0.7581699346405228, "grad_norm": 0.1986621618270874, "learning_rate": 2.4535679374389055e-06, "loss": 0.27934715151786804, "step": 783 }, { "epoch": 0.7591382231905107, "grad_norm": 0.2001214176416397, "learning_rate": 2.443792766373412e-06, "loss": 0.29675161838531494, "step": 784 }, { "epoch": 0.7601065117404987, "grad_norm": 0.17941324412822723, "learning_rate": 2.434017595307918e-06, "loss": 0.2796166241168976, "step": 785 }, { "epoch": 0.7610748002904866, "grad_norm": 0.18563294410705566, "learning_rate": 2.4242424242424244e-06, "loss": 0.2594640851020813, "step": 786 }, { "epoch": 0.7620430888404744, "grad_norm": 0.1819997876882553, "learning_rate": 2.414467253176931e-06, "loss": 0.28631582856178284, "step": 787 }, { "epoch": 0.7630113773904623, "grad_norm": 0.2092135101556778, "learning_rate": 2.4046920821114372e-06, "loss": 0.29993587732315063, "step": 788 }, { "epoch": 0.7639796659404503, "grad_norm": 0.20817267894744873, "learning_rate": 2.3949169110459437e-06, "loss": 0.2964945435523987, "step": 789 }, { "epoch": 0.7649479544904382, "grad_norm": 0.18305228650569916, "learning_rate": 2.38514173998045e-06, "loss": 0.2470388114452362, "step": 790 }, { "epoch": 0.765916243040426, "grad_norm": 0.18974260985851288, "learning_rate": 2.375366568914956e-06, "loss": 0.26321178674697876, "step": 791 }, { "epoch": 0.7668845315904139, "grad_norm": 0.22661836445331573, "learning_rate": 2.3655913978494625e-06, "loss": 0.28920090198516846, "step": 792 }, { "epoch": 0.7678528201404018, "grad_norm": 0.21956227719783783, "learning_rate": 2.355816226783969e-06, "loss": 0.2883264422416687, "step": 793 }, { "epoch": 0.7688211086903898, "grad_norm": 0.21458660066127777, "learning_rate": 2.3460410557184754e-06, "loss": 0.3575912117958069, "step": 794 }, { "epoch": 0.7697893972403776, "grad_norm": 0.19066624343395233, "learning_rate": 2.3362658846529814e-06, "loss": 0.25565528869628906, "step": 795 }, { "epoch": 0.7707576857903655, "grad_norm": 0.19037111103534698, "learning_rate": 2.326490713587488e-06, "loss": 0.26588374376296997, "step": 796 }, { "epoch": 0.7717259743403534, "grad_norm": 0.1706329733133316, "learning_rate": 2.3167155425219943e-06, "loss": 0.2640436887741089, "step": 797 }, { "epoch": 0.7726942628903414, "grad_norm": 0.203688383102417, "learning_rate": 2.3069403714565007e-06, "loss": 0.272479772567749, "step": 798 }, { "epoch": 0.7736625514403292, "grad_norm": 0.21687336266040802, "learning_rate": 2.297165200391007e-06, "loss": 0.2606707811355591, "step": 799 }, { "epoch": 0.7746308399903171, "grad_norm": 0.18459083139896393, "learning_rate": 2.287390029325513e-06, "loss": 0.2953495979309082, "step": 800 }, { "epoch": 0.775599128540305, "grad_norm": 0.2097976803779602, "learning_rate": 2.2776148582600195e-06, "loss": 0.29703575372695923, "step": 801 }, { "epoch": 0.7765674170902929, "grad_norm": 0.20715487003326416, "learning_rate": 2.267839687194526e-06, "loss": 0.2804234027862549, "step": 802 }, { "epoch": 0.7775357056402808, "grad_norm": 0.21985439956188202, "learning_rate": 2.2580645161290324e-06, "loss": 0.29094335436820984, "step": 803 }, { "epoch": 0.7785039941902687, "grad_norm": 0.17857959866523743, "learning_rate": 2.248289345063539e-06, "loss": 0.2993057668209076, "step": 804 }, { "epoch": 0.7794722827402566, "grad_norm": 0.20267243683338165, "learning_rate": 2.2385141739980453e-06, "loss": 0.28471803665161133, "step": 805 }, { "epoch": 0.7804405712902445, "grad_norm": 0.18737877905368805, "learning_rate": 2.2287390029325513e-06, "loss": 0.27943700551986694, "step": 806 }, { "epoch": 0.7814088598402323, "grad_norm": 0.17687441408634186, "learning_rate": 2.2189638318670577e-06, "loss": 0.2751350402832031, "step": 807 }, { "epoch": 0.7823771483902203, "grad_norm": 0.20583491027355194, "learning_rate": 2.209188660801564e-06, "loss": 0.28236058354377747, "step": 808 }, { "epoch": 0.7833454369402082, "grad_norm": 0.22925525903701782, "learning_rate": 2.1994134897360705e-06, "loss": 0.2999430000782013, "step": 809 }, { "epoch": 0.7843137254901961, "grad_norm": 0.1996539980173111, "learning_rate": 2.189638318670577e-06, "loss": 0.29116010665893555, "step": 810 }, { "epoch": 0.7852820140401839, "grad_norm": 0.19890666007995605, "learning_rate": 2.1798631476050834e-06, "loss": 0.2903507947921753, "step": 811 }, { "epoch": 0.7862503025901719, "grad_norm": 0.1992999017238617, "learning_rate": 2.17008797653959e-06, "loss": 0.2690543532371521, "step": 812 }, { "epoch": 0.7872185911401598, "grad_norm": 0.1835276484489441, "learning_rate": 2.160312805474096e-06, "loss": 0.28388747572898865, "step": 813 }, { "epoch": 0.7881868796901477, "grad_norm": 0.236952006816864, "learning_rate": 2.1505376344086023e-06, "loss": 0.2714405953884125, "step": 814 }, { "epoch": 0.7891551682401355, "grad_norm": 0.19345760345458984, "learning_rate": 2.1407624633431087e-06, "loss": 0.2626250982284546, "step": 815 }, { "epoch": 0.7901234567901234, "grad_norm": 0.20259200036525726, "learning_rate": 2.130987292277615e-06, "loss": 0.29853078722953796, "step": 816 }, { "epoch": 0.7910917453401114, "grad_norm": 0.1846383810043335, "learning_rate": 2.1212121212121216e-06, "loss": 0.27077630162239075, "step": 817 }, { "epoch": 0.7920600338900993, "grad_norm": 0.21752354502677917, "learning_rate": 2.111436950146628e-06, "loss": 0.28987622261047363, "step": 818 }, { "epoch": 0.7930283224400871, "grad_norm": 0.18915565311908722, "learning_rate": 2.101661779081134e-06, "loss": 0.2888622581958771, "step": 819 }, { "epoch": 0.793996610990075, "grad_norm": 0.2110828459262848, "learning_rate": 2.0918866080156404e-06, "loss": 0.24480582773685455, "step": 820 }, { "epoch": 0.7949648995400629, "grad_norm": 0.19739995896816254, "learning_rate": 2.082111436950147e-06, "loss": 0.26558613777160645, "step": 821 }, { "epoch": 0.7959331880900509, "grad_norm": 0.17837020754814148, "learning_rate": 2.0723362658846533e-06, "loss": 0.2380271553993225, "step": 822 }, { "epoch": 0.7969014766400387, "grad_norm": 0.2132730782032013, "learning_rate": 2.0625610948191597e-06, "loss": 0.2731876075267792, "step": 823 }, { "epoch": 0.7978697651900266, "grad_norm": 0.18625319004058838, "learning_rate": 2.052785923753666e-06, "loss": 0.2940404415130615, "step": 824 }, { "epoch": 0.7988380537400145, "grad_norm": 0.18981625139713287, "learning_rate": 2.043010752688172e-06, "loss": 0.25833550095558167, "step": 825 }, { "epoch": 0.7998063422900025, "grad_norm": 0.19009682536125183, "learning_rate": 2.0332355816226786e-06, "loss": 0.26862984895706177, "step": 826 }, { "epoch": 0.8007746308399903, "grad_norm": 0.17396694421768188, "learning_rate": 2.023460410557185e-06, "loss": 0.2869129180908203, "step": 827 }, { "epoch": 0.8017429193899782, "grad_norm": 0.19141492247581482, "learning_rate": 2.013685239491691e-06, "loss": 0.32933974266052246, "step": 828 }, { "epoch": 0.8027112079399661, "grad_norm": 0.22585217654705048, "learning_rate": 2.0039100684261974e-06, "loss": 0.25727906823158264, "step": 829 }, { "epoch": 0.803679496489954, "grad_norm": 0.20204074680805206, "learning_rate": 1.994134897360704e-06, "loss": 0.28683584928512573, "step": 830 }, { "epoch": 0.8046477850399419, "grad_norm": 0.1816793978214264, "learning_rate": 1.9843597262952103e-06, "loss": 0.2783251702785492, "step": 831 }, { "epoch": 0.8056160735899298, "grad_norm": 0.19098123908042908, "learning_rate": 1.9745845552297167e-06, "loss": 0.28205838799476624, "step": 832 }, { "epoch": 0.8065843621399177, "grad_norm": 0.2102154642343521, "learning_rate": 1.964809384164223e-06, "loss": 0.32708585262298584, "step": 833 }, { "epoch": 0.8075526506899056, "grad_norm": 0.2377101480960846, "learning_rate": 1.955034213098729e-06, "loss": 0.3074392080307007, "step": 834 }, { "epoch": 0.8085209392398935, "grad_norm": 0.21340312063694, "learning_rate": 1.9452590420332356e-06, "loss": 0.28936126828193665, "step": 835 }, { "epoch": 0.8094892277898814, "grad_norm": 0.19761207699775696, "learning_rate": 1.935483870967742e-06, "loss": 0.30385932326316833, "step": 836 }, { "epoch": 0.8104575163398693, "grad_norm": 0.17896802723407745, "learning_rate": 1.9257086999022484e-06, "loss": 0.2657051682472229, "step": 837 }, { "epoch": 0.8114258048898572, "grad_norm": 0.19170638918876648, "learning_rate": 1.915933528836755e-06, "loss": 0.3132804036140442, "step": 838 }, { "epoch": 0.812394093439845, "grad_norm": 0.18938247859477997, "learning_rate": 1.906158357771261e-06, "loss": 0.260288804769516, "step": 839 }, { "epoch": 0.813362381989833, "grad_norm": 0.18173451721668243, "learning_rate": 1.8963831867057675e-06, "loss": 0.2886829078197479, "step": 840 }, { "epoch": 0.8143306705398209, "grad_norm": 0.1915765106678009, "learning_rate": 1.886608015640274e-06, "loss": 0.30934807658195496, "step": 841 }, { "epoch": 0.8152989590898088, "grad_norm": 0.2193581461906433, "learning_rate": 1.8768328445747801e-06, "loss": 0.29573243856430054, "step": 842 }, { "epoch": 0.8162672476397966, "grad_norm": 0.1817786544561386, "learning_rate": 1.8670576735092866e-06, "loss": 0.2668893337249756, "step": 843 }, { "epoch": 0.8172355361897845, "grad_norm": 0.19725021719932556, "learning_rate": 1.857282502443793e-06, "loss": 0.3286668062210083, "step": 844 }, { "epoch": 0.8182038247397725, "grad_norm": 0.20280499756336212, "learning_rate": 1.8475073313782992e-06, "loss": 0.26897329092025757, "step": 845 }, { "epoch": 0.8191721132897604, "grad_norm": 0.19977053999900818, "learning_rate": 1.8377321603128056e-06, "loss": 0.27279871702194214, "step": 846 }, { "epoch": 0.8201404018397482, "grad_norm": 0.19068841636180878, "learning_rate": 1.827956989247312e-06, "loss": 0.254513144493103, "step": 847 }, { "epoch": 0.8211086903897361, "grad_norm": 0.2015547901391983, "learning_rate": 1.8181818181818183e-06, "loss": 0.29649272561073303, "step": 848 }, { "epoch": 0.8220769789397241, "grad_norm": 0.18814009428024292, "learning_rate": 1.8084066471163247e-06, "loss": 0.2868715524673462, "step": 849 }, { "epoch": 0.823045267489712, "grad_norm": 0.19368094205856323, "learning_rate": 1.7986314760508311e-06, "loss": 0.2806050777435303, "step": 850 }, { "epoch": 0.8240135560396998, "grad_norm": 0.20298543572425842, "learning_rate": 1.7888563049853374e-06, "loss": 0.26234903931617737, "step": 851 }, { "epoch": 0.8249818445896877, "grad_norm": 0.1959095001220703, "learning_rate": 1.7790811339198438e-06, "loss": 0.28573155403137207, "step": 852 }, { "epoch": 0.8259501331396756, "grad_norm": 0.20691703259944916, "learning_rate": 1.7693059628543502e-06, "loss": 0.2719816565513611, "step": 853 }, { "epoch": 0.8269184216896636, "grad_norm": 0.21501125395298004, "learning_rate": 1.7595307917888567e-06, "loss": 0.29406917095184326, "step": 854 }, { "epoch": 0.8278867102396514, "grad_norm": 0.17245161533355713, "learning_rate": 1.7497556207233629e-06, "loss": 0.2694648206233978, "step": 855 }, { "epoch": 0.8288549987896393, "grad_norm": 0.18521907925605774, "learning_rate": 1.7399804496578693e-06, "loss": 0.2755904793739319, "step": 856 }, { "epoch": 0.8298232873396272, "grad_norm": 0.20708146691322327, "learning_rate": 1.7302052785923757e-06, "loss": 0.2739972472190857, "step": 857 }, { "epoch": 0.830791575889615, "grad_norm": 0.2165932059288025, "learning_rate": 1.720430107526882e-06, "loss": 0.30347809195518494, "step": 858 }, { "epoch": 0.831759864439603, "grad_norm": 0.2044944018125534, "learning_rate": 1.7106549364613882e-06, "loss": 0.30577352643013, "step": 859 }, { "epoch": 0.8327281529895909, "grad_norm": 0.23014850914478302, "learning_rate": 1.7008797653958944e-06, "loss": 0.2837938070297241, "step": 860 }, { "epoch": 0.8336964415395788, "grad_norm": 0.170841246843338, "learning_rate": 1.6911045943304008e-06, "loss": 0.27039510011672974, "step": 861 }, { "epoch": 0.8346647300895667, "grad_norm": 0.2066902071237564, "learning_rate": 1.6813294232649072e-06, "loss": 0.3122199773788452, "step": 862 }, { "epoch": 0.8356330186395546, "grad_norm": 0.21400435268878937, "learning_rate": 1.6715542521994134e-06, "loss": 0.2904992997646332, "step": 863 }, { "epoch": 0.8366013071895425, "grad_norm": 0.23855531215667725, "learning_rate": 1.6617790811339199e-06, "loss": 0.2858680486679077, "step": 864 }, { "epoch": 0.8375695957395304, "grad_norm": 0.20174764096736908, "learning_rate": 1.6520039100684263e-06, "loss": 0.2764103412628174, "step": 865 }, { "epoch": 0.8385378842895183, "grad_norm": 0.1859450787305832, "learning_rate": 1.6422287390029325e-06, "loss": 0.2620023488998413, "step": 866 }, { "epoch": 0.8395061728395061, "grad_norm": 0.18559077382087708, "learning_rate": 1.632453567937439e-06, "loss": 0.2956124544143677, "step": 867 }, { "epoch": 0.8404744613894941, "grad_norm": 0.1958460807800293, "learning_rate": 1.6226783968719454e-06, "loss": 0.24393334984779358, "step": 868 }, { "epoch": 0.841442749939482, "grad_norm": 0.20028391480445862, "learning_rate": 1.6129032258064516e-06, "loss": 0.2675096392631531, "step": 869 }, { "epoch": 0.8424110384894699, "grad_norm": 0.18042640388011932, "learning_rate": 1.603128054740958e-06, "loss": 0.2402784675359726, "step": 870 }, { "epoch": 0.8433793270394577, "grad_norm": 0.21275922656059265, "learning_rate": 1.5933528836754645e-06, "loss": 0.2840040922164917, "step": 871 }, { "epoch": 0.8443476155894457, "grad_norm": 0.19365417957305908, "learning_rate": 1.5835777126099709e-06, "loss": 0.28499388694763184, "step": 872 }, { "epoch": 0.8453159041394336, "grad_norm": 0.1794516146183014, "learning_rate": 1.573802541544477e-06, "loss": 0.24146252870559692, "step": 873 }, { "epoch": 0.8462841926894215, "grad_norm": 0.2163521647453308, "learning_rate": 1.5640273704789835e-06, "loss": 0.3129892349243164, "step": 874 }, { "epoch": 0.8472524812394093, "grad_norm": 0.1975439339876175, "learning_rate": 1.55425219941349e-06, "loss": 0.2796524167060852, "step": 875 }, { "epoch": 0.8482207697893972, "grad_norm": 0.2034914195537567, "learning_rate": 1.5444770283479962e-06, "loss": 0.279870867729187, "step": 876 }, { "epoch": 0.8491890583393852, "grad_norm": 0.19650639593601227, "learning_rate": 1.5347018572825026e-06, "loss": 0.2665901184082031, "step": 877 }, { "epoch": 0.8501573468893731, "grad_norm": 0.2097690999507904, "learning_rate": 1.524926686217009e-06, "loss": 0.27686014771461487, "step": 878 }, { "epoch": 0.8511256354393609, "grad_norm": 0.2037818878889084, "learning_rate": 1.5151515151515152e-06, "loss": 0.3026971220970154, "step": 879 }, { "epoch": 0.8520939239893488, "grad_norm": 0.20769764482975006, "learning_rate": 1.5053763440860217e-06, "loss": 0.27736592292785645, "step": 880 }, { "epoch": 0.8530622125393367, "grad_norm": 0.1871424913406372, "learning_rate": 1.495601173020528e-06, "loss": 0.2646699547767639, "step": 881 }, { "epoch": 0.8540305010893247, "grad_norm": 0.17274564504623413, "learning_rate": 1.4858260019550343e-06, "loss": 0.2835018038749695, "step": 882 }, { "epoch": 0.8549987896393125, "grad_norm": 0.19041228294372559, "learning_rate": 1.4760508308895407e-06, "loss": 0.2666222155094147, "step": 883 }, { "epoch": 0.8559670781893004, "grad_norm": 0.2032071202993393, "learning_rate": 1.4662756598240472e-06, "loss": 0.2845078706741333, "step": 884 }, { "epoch": 0.8569353667392883, "grad_norm": 0.19567905366420746, "learning_rate": 1.4565004887585534e-06, "loss": 0.2767939567565918, "step": 885 }, { "epoch": 0.8579036552892763, "grad_norm": 0.21539276838302612, "learning_rate": 1.4467253176930598e-06, "loss": 0.28917932510375977, "step": 886 }, { "epoch": 0.8588719438392641, "grad_norm": 0.19841663539409637, "learning_rate": 1.4369501466275662e-06, "loss": 0.2754652202129364, "step": 887 }, { "epoch": 0.859840232389252, "grad_norm": 0.1980779618024826, "learning_rate": 1.4271749755620727e-06, "loss": 0.24697673320770264, "step": 888 }, { "epoch": 0.8608085209392399, "grad_norm": 0.21110616624355316, "learning_rate": 1.4173998044965789e-06, "loss": 0.2851879894733429, "step": 889 }, { "epoch": 0.8617768094892277, "grad_norm": 0.19414329528808594, "learning_rate": 1.4076246334310853e-06, "loss": 0.25482916831970215, "step": 890 }, { "epoch": 0.8627450980392157, "grad_norm": 0.21367470920085907, "learning_rate": 1.3978494623655913e-06, "loss": 0.2666151821613312, "step": 891 }, { "epoch": 0.8637133865892036, "grad_norm": 0.1971525102853775, "learning_rate": 1.3880742913000978e-06, "loss": 0.28804174065589905, "step": 892 }, { "epoch": 0.8646816751391915, "grad_norm": 0.196051225066185, "learning_rate": 1.3782991202346042e-06, "loss": 0.2798953354358673, "step": 893 }, { "epoch": 0.8656499636891793, "grad_norm": 0.19818323850631714, "learning_rate": 1.3685239491691104e-06, "loss": 0.251752108335495, "step": 894 }, { "epoch": 0.8666182522391672, "grad_norm": 0.19199031591415405, "learning_rate": 1.3587487781036168e-06, "loss": 0.27647408843040466, "step": 895 }, { "epoch": 0.8675865407891552, "grad_norm": 0.22557084262371063, "learning_rate": 1.3489736070381233e-06, "loss": 0.3452335000038147, "step": 896 }, { "epoch": 0.8685548293391431, "grad_norm": 0.21375709772109985, "learning_rate": 1.3391984359726295e-06, "loss": 0.31028902530670166, "step": 897 }, { "epoch": 0.869523117889131, "grad_norm": 0.2083037942647934, "learning_rate": 1.329423264907136e-06, "loss": 0.27808475494384766, "step": 898 }, { "epoch": 0.8704914064391188, "grad_norm": 0.19114944338798523, "learning_rate": 1.3196480938416423e-06, "loss": 0.2660242021083832, "step": 899 }, { "epoch": 0.8714596949891068, "grad_norm": 0.2077726423740387, "learning_rate": 1.3098729227761485e-06, "loss": 0.3196616470813751, "step": 900 }, { "epoch": 0.8724279835390947, "grad_norm": 0.19043967127799988, "learning_rate": 1.300097751710655e-06, "loss": 0.2752097547054291, "step": 901 }, { "epoch": 0.8733962720890825, "grad_norm": 0.1956516057252884, "learning_rate": 1.2903225806451614e-06, "loss": 0.2782442271709442, "step": 902 }, { "epoch": 0.8743645606390704, "grad_norm": 0.21374346315860748, "learning_rate": 1.2805474095796676e-06, "loss": 0.3336328864097595, "step": 903 }, { "epoch": 0.8753328491890583, "grad_norm": 0.17390403151512146, "learning_rate": 1.270772238514174e-06, "loss": 0.28889116644859314, "step": 904 }, { "epoch": 0.8763011377390463, "grad_norm": 0.1946377009153366, "learning_rate": 1.2609970674486805e-06, "loss": 0.26131391525268555, "step": 905 }, { "epoch": 0.8772694262890341, "grad_norm": 0.19059988856315613, "learning_rate": 1.251221896383187e-06, "loss": 0.27641037106513977, "step": 906 }, { "epoch": 0.878237714839022, "grad_norm": 0.21638603508472443, "learning_rate": 1.2414467253176931e-06, "loss": 0.2549016773700714, "step": 907 }, { "epoch": 0.8792060033890099, "grad_norm": 0.18561683595180511, "learning_rate": 1.2316715542521995e-06, "loss": 0.2516704797744751, "step": 908 }, { "epoch": 0.8801742919389978, "grad_norm": 0.18754595518112183, "learning_rate": 1.221896383186706e-06, "loss": 0.24743372201919556, "step": 909 }, { "epoch": 0.8811425804889857, "grad_norm": 0.17627929151058197, "learning_rate": 1.2121212121212122e-06, "loss": 0.2443106323480606, "step": 910 }, { "epoch": 0.8821108690389736, "grad_norm": 0.21671797335147858, "learning_rate": 1.2023460410557186e-06, "loss": 0.2789687514305115, "step": 911 }, { "epoch": 0.8830791575889615, "grad_norm": 0.18491996824741364, "learning_rate": 1.192570869990225e-06, "loss": 0.280285507440567, "step": 912 }, { "epoch": 0.8840474461389494, "grad_norm": 0.19200359284877777, "learning_rate": 1.1827956989247313e-06, "loss": 0.3697912096977234, "step": 913 }, { "epoch": 0.8850157346889373, "grad_norm": 0.19149360060691833, "learning_rate": 1.1730205278592377e-06, "loss": 0.2533896565437317, "step": 914 }, { "epoch": 0.8859840232389252, "grad_norm": 0.1858339011669159, "learning_rate": 1.163245356793744e-06, "loss": 0.2724184989929199, "step": 915 }, { "epoch": 0.8869523117889131, "grad_norm": 0.18043696880340576, "learning_rate": 1.1534701857282503e-06, "loss": 0.3179680109024048, "step": 916 }, { "epoch": 0.887920600338901, "grad_norm": 0.2031916230916977, "learning_rate": 1.1436950146627566e-06, "loss": 0.2644922733306885, "step": 917 }, { "epoch": 0.8888888888888888, "grad_norm": 0.2100847363471985, "learning_rate": 1.133919843597263e-06, "loss": 0.2834533154964447, "step": 918 }, { "epoch": 0.8898571774388768, "grad_norm": 0.20932041108608246, "learning_rate": 1.1241446725317694e-06, "loss": 0.2602953314781189, "step": 919 }, { "epoch": 0.8908254659888647, "grad_norm": 0.1940714567899704, "learning_rate": 1.1143695014662756e-06, "loss": 0.29519274830818176, "step": 920 }, { "epoch": 0.8917937545388526, "grad_norm": 0.20699529349803925, "learning_rate": 1.104594330400782e-06, "loss": 0.2826448678970337, "step": 921 }, { "epoch": 0.8927620430888404, "grad_norm": 0.18003135919570923, "learning_rate": 1.0948191593352885e-06, "loss": 0.3036431670188904, "step": 922 }, { "epoch": 0.8937303316388284, "grad_norm": 0.2626630961894989, "learning_rate": 1.085043988269795e-06, "loss": 0.2694006860256195, "step": 923 }, { "epoch": 0.8946986201888163, "grad_norm": 0.21386921405792236, "learning_rate": 1.0752688172043011e-06, "loss": 0.2830575704574585, "step": 924 }, { "epoch": 0.8956669087388042, "grad_norm": 0.20465651154518127, "learning_rate": 1.0654936461388076e-06, "loss": 0.2928478419780731, "step": 925 }, { "epoch": 0.896635197288792, "grad_norm": 0.218974307179451, "learning_rate": 1.055718475073314e-06, "loss": 0.265733003616333, "step": 926 }, { "epoch": 0.8976034858387799, "grad_norm": 0.18097904324531555, "learning_rate": 1.0459433040078202e-06, "loss": 0.297993540763855, "step": 927 }, { "epoch": 0.8985717743887679, "grad_norm": 0.18121756613254547, "learning_rate": 1.0361681329423266e-06, "loss": 0.3206055760383606, "step": 928 }, { "epoch": 0.8995400629387558, "grad_norm": 0.18943090736865997, "learning_rate": 1.026392961876833e-06, "loss": 0.3015185594558716, "step": 929 }, { "epoch": 0.9005083514887436, "grad_norm": 0.19779494404792786, "learning_rate": 1.0166177908113393e-06, "loss": 0.27546051144599915, "step": 930 }, { "epoch": 0.9014766400387315, "grad_norm": 0.1858789324760437, "learning_rate": 1.0068426197458455e-06, "loss": 0.2784835994243622, "step": 931 }, { "epoch": 0.9024449285887194, "grad_norm": 0.19459734857082367, "learning_rate": 9.97067448680352e-07, "loss": 0.28571465611457825, "step": 932 }, { "epoch": 0.9034132171387074, "grad_norm": 0.18275073170661926, "learning_rate": 9.872922776148584e-07, "loss": 0.2614639401435852, "step": 933 }, { "epoch": 0.9043815056886952, "grad_norm": 0.19755122065544128, "learning_rate": 9.775171065493646e-07, "loss": 0.3016014099121094, "step": 934 }, { "epoch": 0.9053497942386831, "grad_norm": 0.21569618582725525, "learning_rate": 9.67741935483871e-07, "loss": 0.29818177223205566, "step": 935 }, { "epoch": 0.906318082788671, "grad_norm": 0.18675316870212555, "learning_rate": 9.579667644183774e-07, "loss": 0.3368891477584839, "step": 936 }, { "epoch": 0.907286371338659, "grad_norm": 0.19871239364147186, "learning_rate": 9.481915933528838e-07, "loss": 0.3153863549232483, "step": 937 }, { "epoch": 0.9082546598886468, "grad_norm": 0.22014066576957703, "learning_rate": 9.384164222873901e-07, "loss": 0.2810421884059906, "step": 938 }, { "epoch": 0.9092229484386347, "grad_norm": 0.19278523325920105, "learning_rate": 9.286412512218965e-07, "loss": 0.2553982138633728, "step": 939 }, { "epoch": 0.9101912369886226, "grad_norm": 0.20471501350402832, "learning_rate": 9.188660801564028e-07, "loss": 0.3324427902698517, "step": 940 }, { "epoch": 0.9111595255386105, "grad_norm": 0.19074149429798126, "learning_rate": 9.090909090909091e-07, "loss": 0.2935166656970978, "step": 941 }, { "epoch": 0.9121278140885984, "grad_norm": 0.19555461406707764, "learning_rate": 8.993157380254156e-07, "loss": 0.27848702669143677, "step": 942 }, { "epoch": 0.9130961026385863, "grad_norm": 0.1958128958940506, "learning_rate": 8.895405669599219e-07, "loss": 0.3214573860168457, "step": 943 }, { "epoch": 0.9140643911885742, "grad_norm": 0.20188724994659424, "learning_rate": 8.797653958944283e-07, "loss": 0.29266253113746643, "step": 944 }, { "epoch": 0.9150326797385621, "grad_norm": 0.2061896175146103, "learning_rate": 8.699902248289346e-07, "loss": 0.26876091957092285, "step": 945 }, { "epoch": 0.9160009682885499, "grad_norm": 0.18365229666233063, "learning_rate": 8.60215053763441e-07, "loss": 0.24429546296596527, "step": 946 }, { "epoch": 0.9169692568385379, "grad_norm": 0.2009628713130951, "learning_rate": 8.504398826979472e-07, "loss": 0.2813577651977539, "step": 947 }, { "epoch": 0.9179375453885258, "grad_norm": 0.21519120037555695, "learning_rate": 8.406647116324536e-07, "loss": 0.29421091079711914, "step": 948 }, { "epoch": 0.9189058339385137, "grad_norm": 0.19519393146038055, "learning_rate": 8.308895405669599e-07, "loss": 0.27097785472869873, "step": 949 }, { "epoch": 0.9198741224885015, "grad_norm": 0.18344323337078094, "learning_rate": 8.211143695014663e-07, "loss": 0.27933016419410706, "step": 950 }, { "epoch": 0.9208424110384895, "grad_norm": 0.18683570623397827, "learning_rate": 8.113391984359727e-07, "loss": 0.28024059534072876, "step": 951 }, { "epoch": 0.9218106995884774, "grad_norm": 0.2764555513858795, "learning_rate": 8.01564027370479e-07, "loss": 0.2519608438014984, "step": 952 }, { "epoch": 0.9227789881384653, "grad_norm": 0.20227362215518951, "learning_rate": 7.917888563049854e-07, "loss": 0.2634407877922058, "step": 953 }, { "epoch": 0.9237472766884531, "grad_norm": 0.20687641203403473, "learning_rate": 7.820136852394918e-07, "loss": 0.2730368375778198, "step": 954 }, { "epoch": 0.924715565238441, "grad_norm": 0.18547162413597107, "learning_rate": 7.722385141739981e-07, "loss": 0.26113927364349365, "step": 955 }, { "epoch": 0.925683853788429, "grad_norm": 0.1972709447145462, "learning_rate": 7.624633431085045e-07, "loss": 0.3210276663303375, "step": 956 }, { "epoch": 0.9266521423384169, "grad_norm": 0.22296936810016632, "learning_rate": 7.526881720430108e-07, "loss": 0.2896474301815033, "step": 957 }, { "epoch": 0.9276204308884047, "grad_norm": 0.1758430004119873, "learning_rate": 7.429130009775172e-07, "loss": 0.25095510482788086, "step": 958 }, { "epoch": 0.9285887194383926, "grad_norm": 0.20484335720539093, "learning_rate": 7.331378299120236e-07, "loss": 0.27182087302207947, "step": 959 }, { "epoch": 0.9295570079883806, "grad_norm": 0.18908201158046722, "learning_rate": 7.233626588465299e-07, "loss": 0.2869470417499542, "step": 960 }, { "epoch": 0.9305252965383685, "grad_norm": 0.20601920783519745, "learning_rate": 7.135874877810363e-07, "loss": 0.31839150190353394, "step": 961 }, { "epoch": 0.9314935850883563, "grad_norm": 0.2003796547651291, "learning_rate": 7.038123167155427e-07, "loss": 0.28072643280029297, "step": 962 }, { "epoch": 0.9324618736383442, "grad_norm": 0.21452200412750244, "learning_rate": 6.940371456500489e-07, "loss": 0.3070773780345917, "step": 963 }, { "epoch": 0.9334301621883321, "grad_norm": 0.20407654345035553, "learning_rate": 6.842619745845552e-07, "loss": 0.28470179438591003, "step": 964 }, { "epoch": 0.9343984507383201, "grad_norm": 0.21125538647174835, "learning_rate": 6.744868035190616e-07, "loss": 0.29014891386032104, "step": 965 }, { "epoch": 0.9353667392883079, "grad_norm": 0.18405841290950775, "learning_rate": 6.64711632453568e-07, "loss": 0.2623524069786072, "step": 966 }, { "epoch": 0.9363350278382958, "grad_norm": 0.2125682681798935, "learning_rate": 6.549364613880743e-07, "loss": 0.3087378144264221, "step": 967 }, { "epoch": 0.9373033163882837, "grad_norm": 0.20235757529735565, "learning_rate": 6.451612903225807e-07, "loss": 0.2936643660068512, "step": 968 }, { "epoch": 0.9382716049382716, "grad_norm": 0.1939656287431717, "learning_rate": 6.35386119257087e-07, "loss": 0.2780473828315735, "step": 969 }, { "epoch": 0.9392398934882595, "grad_norm": 0.20643159747123718, "learning_rate": 6.256109481915935e-07, "loss": 0.2650626003742218, "step": 970 }, { "epoch": 0.9402081820382474, "grad_norm": 0.1930253654718399, "learning_rate": 6.158357771260998e-07, "loss": 0.305324912071228, "step": 971 }, { "epoch": 0.9411764705882353, "grad_norm": 0.19949081540107727, "learning_rate": 6.060606060606061e-07, "loss": 0.27924615144729614, "step": 972 }, { "epoch": 0.9421447591382232, "grad_norm": 0.1923617720603943, "learning_rate": 5.962854349951125e-07, "loss": 0.33369550108909607, "step": 973 }, { "epoch": 0.9431130476882111, "grad_norm": 0.1924324929714203, "learning_rate": 5.865102639296188e-07, "loss": 0.2702648937702179, "step": 974 }, { "epoch": 0.944081336238199, "grad_norm": 0.189810648560524, "learning_rate": 5.767350928641252e-07, "loss": 0.2990330457687378, "step": 975 }, { "epoch": 0.9450496247881869, "grad_norm": 0.2015506476163864, "learning_rate": 5.669599217986315e-07, "loss": 0.30142831802368164, "step": 976 }, { "epoch": 0.9460179133381748, "grad_norm": 0.21465028822422028, "learning_rate": 5.571847507331378e-07, "loss": 0.2767145037651062, "step": 977 }, { "epoch": 0.9469862018881626, "grad_norm": 0.19279153645038605, "learning_rate": 5.474095796676442e-07, "loss": 0.24644437432289124, "step": 978 }, { "epoch": 0.9479544904381506, "grad_norm": 0.20867611467838287, "learning_rate": 5.376344086021506e-07, "loss": 0.27333688735961914, "step": 979 }, { "epoch": 0.9489227789881385, "grad_norm": 0.19024871289730072, "learning_rate": 5.27859237536657e-07, "loss": 0.2586132884025574, "step": 980 }, { "epoch": 0.9498910675381264, "grad_norm": 0.18523293733596802, "learning_rate": 5.180840664711633e-07, "loss": 0.2814341187477112, "step": 981 }, { "epoch": 0.9508593560881142, "grad_norm": 0.19874310493469238, "learning_rate": 5.083088954056696e-07, "loss": 0.27490949630737305, "step": 982 }, { "epoch": 0.9518276446381021, "grad_norm": 0.21202170848846436, "learning_rate": 4.98533724340176e-07, "loss": 0.2904297411441803, "step": 983 }, { "epoch": 0.9527959331880901, "grad_norm": 0.2094363272190094, "learning_rate": 4.887585532746823e-07, "loss": 0.27371150255203247, "step": 984 }, { "epoch": 0.953764221738078, "grad_norm": 0.18295787274837494, "learning_rate": 4.789833822091887e-07, "loss": 0.2708626985549927, "step": 985 }, { "epoch": 0.9547325102880658, "grad_norm": 0.2100997418165207, "learning_rate": 4.6920821114369504e-07, "loss": 0.26008886098861694, "step": 986 }, { "epoch": 0.9557007988380537, "grad_norm": 0.20343877375125885, "learning_rate": 4.594330400782014e-07, "loss": 0.2885707914829254, "step": 987 }, { "epoch": 0.9566690873880417, "grad_norm": 0.2062508761882782, "learning_rate": 4.496578690127078e-07, "loss": 0.2915845215320587, "step": 988 }, { "epoch": 0.9576373759380296, "grad_norm": 0.21393194794654846, "learning_rate": 4.3988269794721416e-07, "loss": 0.3045470714569092, "step": 989 }, { "epoch": 0.9586056644880174, "grad_norm": 0.20916247367858887, "learning_rate": 4.301075268817205e-07, "loss": 0.3206391930580139, "step": 990 }, { "epoch": 0.9595739530380053, "grad_norm": 0.20829743146896362, "learning_rate": 4.203323558162268e-07, "loss": 0.281288743019104, "step": 991 }, { "epoch": 0.9605422415879932, "grad_norm": 0.21254244446754456, "learning_rate": 4.1055718475073313e-07, "loss": 0.29028719663619995, "step": 992 }, { "epoch": 0.9615105301379812, "grad_norm": 0.20817913115024567, "learning_rate": 4.007820136852395e-07, "loss": 0.3223232626914978, "step": 993 }, { "epoch": 0.962478818687969, "grad_norm": 0.19397568702697754, "learning_rate": 3.910068426197459e-07, "loss": 0.2968447208404541, "step": 994 }, { "epoch": 0.9634471072379569, "grad_norm": 0.1994376927614212, "learning_rate": 3.8123167155425226e-07, "loss": 0.2874579131603241, "step": 995 }, { "epoch": 0.9644153957879448, "grad_norm": 0.20042456686496735, "learning_rate": 3.714565004887586e-07, "loss": 0.25470271706581116, "step": 996 }, { "epoch": 0.9653836843379328, "grad_norm": 0.21064911782741547, "learning_rate": 3.6168132942326495e-07, "loss": 0.25948402285575867, "step": 997 }, { "epoch": 0.9663519728879206, "grad_norm": 0.19920513033866882, "learning_rate": 3.5190615835777133e-07, "loss": 0.2682594358921051, "step": 998 }, { "epoch": 0.9673202614379085, "grad_norm": 0.1974617838859558, "learning_rate": 3.421309872922776e-07, "loss": 0.2706855833530426, "step": 999 }, { "epoch": 0.9682885499878964, "grad_norm": 0.19910918176174164, "learning_rate": 3.32355816226784e-07, "loss": 0.2881166338920593, "step": 1000 }, { "epoch": 0.9692568385378842, "grad_norm": 0.19066068530082703, "learning_rate": 3.2258064516129035e-07, "loss": 0.2593529224395752, "step": 1001 }, { "epoch": 0.9702251270878722, "grad_norm": 0.21316994726657867, "learning_rate": 3.128054740957967e-07, "loss": 0.2673231363296509, "step": 1002 }, { "epoch": 0.9711934156378601, "grad_norm": 0.1907181590795517, "learning_rate": 3.0303030303030305e-07, "loss": 0.2969304323196411, "step": 1003 }, { "epoch": 0.972161704187848, "grad_norm": 0.2059427797794342, "learning_rate": 2.932551319648094e-07, "loss": 0.2977202832698822, "step": 1004 }, { "epoch": 0.9731299927378358, "grad_norm": 0.19578853249549866, "learning_rate": 2.8347996089931575e-07, "loss": 0.2898738980293274, "step": 1005 }, { "epoch": 0.9740982812878237, "grad_norm": 0.19571205973625183, "learning_rate": 2.737047898338221e-07, "loss": 0.2661632299423218, "step": 1006 }, { "epoch": 0.9750665698378117, "grad_norm": 0.21246828138828278, "learning_rate": 2.639296187683285e-07, "loss": 0.26930439472198486, "step": 1007 }, { "epoch": 0.9760348583877996, "grad_norm": 0.20583873987197876, "learning_rate": 2.541544477028348e-07, "loss": 0.2842969298362732, "step": 1008 }, { "epoch": 0.9770031469377874, "grad_norm": 0.18972201645374298, "learning_rate": 2.4437927663734114e-07, "loss": 0.28072866797447205, "step": 1009 }, { "epoch": 0.9779714354877753, "grad_norm": 0.2141742706298828, "learning_rate": 2.3460410557184752e-07, "loss": 0.28579071164131165, "step": 1010 }, { "epoch": 0.9789397240377633, "grad_norm": 0.2145223468542099, "learning_rate": 2.248289345063539e-07, "loss": 0.29237863421440125, "step": 1011 }, { "epoch": 0.9799080125877512, "grad_norm": 0.18137916922569275, "learning_rate": 2.1505376344086024e-07, "loss": 0.27613335847854614, "step": 1012 }, { "epoch": 0.980876301137739, "grad_norm": 0.18069401383399963, "learning_rate": 2.0527859237536657e-07, "loss": 0.255997896194458, "step": 1013 }, { "epoch": 0.9818445896877269, "grad_norm": 0.1869657039642334, "learning_rate": 1.9550342130987294e-07, "loss": 0.25478553771972656, "step": 1014 }, { "epoch": 0.9828128782377148, "grad_norm": 0.22846192121505737, "learning_rate": 1.857282502443793e-07, "loss": 0.2954884171485901, "step": 1015 }, { "epoch": 0.9837811667877028, "grad_norm": 0.20486541092395782, "learning_rate": 1.7595307917888567e-07, "loss": 0.2752358317375183, "step": 1016 }, { "epoch": 0.9847494553376906, "grad_norm": 0.20248091220855713, "learning_rate": 1.66177908113392e-07, "loss": 0.27697792649269104, "step": 1017 }, { "epoch": 0.9857177438876785, "grad_norm": 0.2098854035139084, "learning_rate": 1.5640273704789836e-07, "loss": 0.30580762028694153, "step": 1018 }, { "epoch": 0.9866860324376664, "grad_norm": 0.19671432673931122, "learning_rate": 1.466275659824047e-07, "loss": 0.2934240698814392, "step": 1019 }, { "epoch": 0.9876543209876543, "grad_norm": 0.2045270800590515, "learning_rate": 1.3685239491691106e-07, "loss": 0.26624009013175964, "step": 1020 }, { "epoch": 0.9886226095376422, "grad_norm": 0.18321022391319275, "learning_rate": 1.270772238514174e-07, "loss": 0.22881919145584106, "step": 1021 }, { "epoch": 0.9895908980876301, "grad_norm": 0.1937808096408844, "learning_rate": 1.1730205278592376e-07, "loss": 0.27172714471817017, "step": 1022 }, { "epoch": 0.990559186637618, "grad_norm": 0.22623102366924286, "learning_rate": 1.0752688172043012e-07, "loss": 0.3045693039894104, "step": 1023 }, { "epoch": 0.9915274751876059, "grad_norm": 0.2050536870956421, "learning_rate": 9.775171065493647e-08, "loss": 0.26483532786369324, "step": 1024 }, { "epoch": 0.9924957637375939, "grad_norm": 0.19890232384204865, "learning_rate": 8.797653958944283e-08, "loss": 0.25764352083206177, "step": 1025 }, { "epoch": 0.9934640522875817, "grad_norm": 0.198257714509964, "learning_rate": 7.820136852394918e-08, "loss": 0.27279871702194214, "step": 1026 }, { "epoch": 0.9944323408375696, "grad_norm": 0.19575795531272888, "learning_rate": 6.842619745845553e-08, "loss": 0.2848638594150543, "step": 1027 }, { "epoch": 0.9954006293875575, "grad_norm": 0.19270561635494232, "learning_rate": 5.865102639296188e-08, "loss": 0.26724010705947876, "step": 1028 }, { "epoch": 0.9963689179375453, "grad_norm": 0.2059524953365326, "learning_rate": 4.8875855327468235e-08, "loss": 0.28259921073913574, "step": 1029 }, { "epoch": 0.9973372064875333, "grad_norm": 0.22036604583263397, "learning_rate": 3.910068426197459e-08, "loss": 0.2710026502609253, "step": 1030 }, { "epoch": 0.9983054950375212, "grad_norm": 0.19827115535736084, "learning_rate": 2.932551319648094e-08, "loss": 0.2782309949398041, "step": 1031 }, { "epoch": 0.9992737835875091, "grad_norm": 0.19505129754543304, "learning_rate": 1.9550342130987295e-08, "loss": 0.2857624292373657, "step": 1032 }, { "epoch": 1.0, "grad_norm": 0.22344279289245605, "learning_rate": 9.775171065493648e-09, "loss": 0.25447842478752136, "step": 1033 } ], "logging_steps": 1, "max_steps": 1033, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.907215148242811e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }