{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4853779881082393, "eval_steps": 500, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012134449702705982, "grad_norm": 0.8723246455192566, "learning_rate": 0.0, "loss": 0.8186817169189453, "step": 1 }, { "epoch": 0.00024268899405411964, "grad_norm": 0.3477829396724701, "learning_rate": 2.0000000000000002e-07, "loss": 0.16060441732406616, "step": 2 }, { "epoch": 0.00036403349108117945, "grad_norm": 0.7758898735046387, "learning_rate": 4.0000000000000003e-07, "loss": 0.2928085923194885, "step": 3 }, { "epoch": 0.00048537798810823927, "grad_norm": 0.7930212616920471, "learning_rate": 6.000000000000001e-07, "loss": 0.4934349060058594, "step": 4 }, { "epoch": 0.0006067224851352991, "grad_norm": 0.49778178334236145, "learning_rate": 8.000000000000001e-07, "loss": 0.1982222944498062, "step": 5 }, { "epoch": 0.0007280669821623589, "grad_norm": 0.9954462051391602, "learning_rate": 1.0000000000000002e-06, "loss": 0.779515266418457, "step": 6 }, { "epoch": 0.0008494114791894187, "grad_norm": 0.7579694986343384, "learning_rate": 1.2000000000000002e-06, "loss": 0.8047744035720825, "step": 7 }, { "epoch": 0.0009707559762164785, "grad_norm": 1.1304748058319092, "learning_rate": 1.4000000000000001e-06, "loss": 0.5158179998397827, "step": 8 }, { "epoch": 0.0010921004732435385, "grad_norm": 0.6553785800933838, "learning_rate": 1.6000000000000001e-06, "loss": 0.5927454233169556, "step": 9 }, { "epoch": 0.0012134449702705982, "grad_norm": 0.6902636885643005, "learning_rate": 1.8000000000000001e-06, "loss": 0.5002865791320801, "step": 10 }, { "epoch": 0.001334789467297658, "grad_norm": 0.8518681526184082, "learning_rate": 2.0000000000000003e-06, "loss": 0.6353203654289246, "step": 11 }, { "epoch": 0.0014561339643247178, "grad_norm": 0.7608593702316284, "learning_rate": 2.2e-06, "loss": 0.5984488129615784, "step": 12 }, { "epoch": 0.0015774784613517777, "grad_norm": 0.523412823677063, "learning_rate": 2.4000000000000003e-06, "loss": 0.2241007536649704, "step": 13 }, { "epoch": 0.0016988229583788375, "grad_norm": 0.5840254426002502, "learning_rate": 2.6e-06, "loss": 0.1746365875005722, "step": 14 }, { "epoch": 0.0018201674554058974, "grad_norm": 1.0090986490249634, "learning_rate": 2.8000000000000003e-06, "loss": 0.5960265398025513, "step": 15 }, { "epoch": 0.001941511952432957, "grad_norm": 0.5519396066665649, "learning_rate": 3e-06, "loss": 0.11730103194713593, "step": 16 }, { "epoch": 0.002062856449460017, "grad_norm": 0.6301160454750061, "learning_rate": 3.2000000000000003e-06, "loss": 0.3237765431404114, "step": 17 }, { "epoch": 0.002184200946487077, "grad_norm": 0.6448126435279846, "learning_rate": 3.4000000000000005e-06, "loss": 0.36336758732795715, "step": 18 }, { "epoch": 0.0023055454435141364, "grad_norm": 1.1270724534988403, "learning_rate": 3.6000000000000003e-06, "loss": 0.9434967041015625, "step": 19 }, { "epoch": 0.0024268899405411964, "grad_norm": 0.8342574238777161, "learning_rate": 3.8000000000000005e-06, "loss": 0.3132971525192261, "step": 20 }, { "epoch": 0.0025482344375682563, "grad_norm": 0.6736285090446472, "learning_rate": 4.000000000000001e-06, "loss": 0.2724347412586212, "step": 21 }, { "epoch": 0.002669578934595316, "grad_norm": 0.831219494342804, "learning_rate": 4.2000000000000004e-06, "loss": 0.5868111848831177, "step": 22 }, { "epoch": 0.002790923431622376, "grad_norm": 0.7777939438819885, "learning_rate": 4.4e-06, "loss": 0.1734337955713272, "step": 23 }, { "epoch": 0.0029122679286494356, "grad_norm": 0.37845781445503235, "learning_rate": 4.600000000000001e-06, "loss": 0.7677285075187683, "step": 24 }, { "epoch": 0.0030336124256764956, "grad_norm": 0.9251959323883057, "learning_rate": 4.800000000000001e-06, "loss": 0.6172207593917847, "step": 25 }, { "epoch": 0.0031549569227035555, "grad_norm": 0.9809829592704773, "learning_rate": 5e-06, "loss": 0.906048059463501, "step": 26 }, { "epoch": 0.0032763014197306154, "grad_norm": 0.6690176725387573, "learning_rate": 5.2e-06, "loss": 0.4358732998371124, "step": 27 }, { "epoch": 0.003397645916757675, "grad_norm": 0.7016831636428833, "learning_rate": 5.400000000000001e-06, "loss": 0.7462010979652405, "step": 28 }, { "epoch": 0.003518990413784735, "grad_norm": 1.2273560762405396, "learning_rate": 5.600000000000001e-06, "loss": 0.4869852662086487, "step": 29 }, { "epoch": 0.0036403349108117948, "grad_norm": 0.6704553365707397, "learning_rate": 5.8e-06, "loss": 0.65860915184021, "step": 30 }, { "epoch": 0.0037616794078388547, "grad_norm": 0.6678578853607178, "learning_rate": 6e-06, "loss": 0.22405284643173218, "step": 31 }, { "epoch": 0.003883023904865914, "grad_norm": 0.875632107257843, "learning_rate": 6.200000000000001e-06, "loss": 0.2586999237537384, "step": 32 }, { "epoch": 0.004004368401892974, "grad_norm": 0.8029634356498718, "learning_rate": 6.4000000000000006e-06, "loss": 0.37517064809799194, "step": 33 }, { "epoch": 0.004125712898920034, "grad_norm": 0.9273082613945007, "learning_rate": 6.600000000000001e-06, "loss": 0.5981779098510742, "step": 34 }, { "epoch": 0.004247057395947094, "grad_norm": 0.8178271055221558, "learning_rate": 6.800000000000001e-06, "loss": 0.6380455493927002, "step": 35 }, { "epoch": 0.004368401892974154, "grad_norm": 0.7530136108398438, "learning_rate": 7e-06, "loss": 0.559657096862793, "step": 36 }, { "epoch": 0.004489746390001214, "grad_norm": 0.8515595197677612, "learning_rate": 7.2000000000000005e-06, "loss": 0.5521618127822876, "step": 37 }, { "epoch": 0.004611090887028273, "grad_norm": 0.937356173992157, "learning_rate": 7.4e-06, "loss": 0.764209508895874, "step": 38 }, { "epoch": 0.004732435384055333, "grad_norm": 0.8871638178825378, "learning_rate": 7.600000000000001e-06, "loss": 0.3783448040485382, "step": 39 }, { "epoch": 0.004853779881082393, "grad_norm": 0.5699480772018433, "learning_rate": 7.800000000000002e-06, "loss": 0.45090657472610474, "step": 40 }, { "epoch": 0.004975124378109453, "grad_norm": 0.7399379014968872, "learning_rate": 8.000000000000001e-06, "loss": 0.22258038818836212, "step": 41 }, { "epoch": 0.005096468875136513, "grad_norm": 0.8235787749290466, "learning_rate": 8.2e-06, "loss": 0.4367460608482361, "step": 42 }, { "epoch": 0.0052178133721635725, "grad_norm": 0.5692397952079773, "learning_rate": 8.400000000000001e-06, "loss": 0.497313916683197, "step": 43 }, { "epoch": 0.005339157869190632, "grad_norm": 0.8195589780807495, "learning_rate": 8.6e-06, "loss": 0.7818059325218201, "step": 44 }, { "epoch": 0.005460502366217692, "grad_norm": 1.0632492303848267, "learning_rate": 8.8e-06, "loss": 0.5494105219841003, "step": 45 }, { "epoch": 0.005581846863244752, "grad_norm": 0.6694285869598389, "learning_rate": 9e-06, "loss": 0.35489311814308167, "step": 46 }, { "epoch": 0.005703191360271811, "grad_norm": 0.9679849147796631, "learning_rate": 9.200000000000002e-06, "loss": 0.9104418754577637, "step": 47 }, { "epoch": 0.005824535857298871, "grad_norm": 0.8836600184440613, "learning_rate": 9.4e-06, "loss": 0.33178386092185974, "step": 48 }, { "epoch": 0.005945880354325931, "grad_norm": 0.9069057106971741, "learning_rate": 9.600000000000001e-06, "loss": 0.47104474902153015, "step": 49 }, { "epoch": 0.006067224851352991, "grad_norm": 0.9322941899299622, "learning_rate": 9.800000000000001e-06, "loss": 0.8761706948280334, "step": 50 }, { "epoch": 0.006188569348380051, "grad_norm": 0.96770840883255, "learning_rate": 1e-05, "loss": 0.7080501317977905, "step": 51 }, { "epoch": 0.006309913845407111, "grad_norm": 0.8596216440200806, "learning_rate": 1.02e-05, "loss": 0.6395684480667114, "step": 52 }, { "epoch": 0.006431258342434171, "grad_norm": 0.9308010339736938, "learning_rate": 1.04e-05, "loss": 0.776948869228363, "step": 53 }, { "epoch": 0.006552602839461231, "grad_norm": 0.9452096223831177, "learning_rate": 1.0600000000000002e-05, "loss": 0.5925815105438232, "step": 54 }, { "epoch": 0.00667394733648829, "grad_norm": 0.7136799693107605, "learning_rate": 1.0800000000000002e-05, "loss": 0.4017346203327179, "step": 55 }, { "epoch": 0.00679529183351535, "grad_norm": 0.8862737417221069, "learning_rate": 1.1000000000000001e-05, "loss": 0.7030301094055176, "step": 56 }, { "epoch": 0.00691663633054241, "grad_norm": 0.8768438696861267, "learning_rate": 1.1200000000000001e-05, "loss": 0.1365959346294403, "step": 57 }, { "epoch": 0.00703798082756947, "grad_norm": 0.6508520245552063, "learning_rate": 1.14e-05, "loss": 0.14912734925746918, "step": 58 }, { "epoch": 0.00715932532459653, "grad_norm": 0.942901074886322, "learning_rate": 1.16e-05, "loss": 0.5415380001068115, "step": 59 }, { "epoch": 0.0072806698216235895, "grad_norm": 1.0844477415084839, "learning_rate": 1.18e-05, "loss": 0.20876353979110718, "step": 60 }, { "epoch": 0.007402014318650649, "grad_norm": 0.9388653039932251, "learning_rate": 1.2e-05, "loss": 0.4371647238731384, "step": 61 }, { "epoch": 0.007523358815677709, "grad_norm": 0.8884531855583191, "learning_rate": 1.22e-05, "loss": 0.2126459777355194, "step": 62 }, { "epoch": 0.007644703312704768, "grad_norm": 0.9780679941177368, "learning_rate": 1.2400000000000002e-05, "loss": 0.5204879641532898, "step": 63 }, { "epoch": 0.007766047809731828, "grad_norm": 0.9438526630401611, "learning_rate": 1.2600000000000001e-05, "loss": 0.6052622199058533, "step": 64 }, { "epoch": 0.00788739230675889, "grad_norm": 0.6636860370635986, "learning_rate": 1.2800000000000001e-05, "loss": 0.4711700975894928, "step": 65 }, { "epoch": 0.008008736803785948, "grad_norm": 0.9902396202087402, "learning_rate": 1.3000000000000001e-05, "loss": 0.21103627979755402, "step": 66 }, { "epoch": 0.008130081300813009, "grad_norm": 0.6545515060424805, "learning_rate": 1.3200000000000002e-05, "loss": 0.22432354092597961, "step": 67 }, { "epoch": 0.008251425797840068, "grad_norm": 0.7995378971099854, "learning_rate": 1.3400000000000002e-05, "loss": 0.44884270429611206, "step": 68 }, { "epoch": 0.008372770294867127, "grad_norm": 0.9875938296318054, "learning_rate": 1.3600000000000002e-05, "loss": 0.5456703305244446, "step": 69 }, { "epoch": 0.008494114791894188, "grad_norm": 1.2119238376617432, "learning_rate": 1.38e-05, "loss": 0.5117138624191284, "step": 70 }, { "epoch": 0.008615459288921247, "grad_norm": 0.7160502672195435, "learning_rate": 1.4e-05, "loss": 0.4587688446044922, "step": 71 }, { "epoch": 0.008736803785948308, "grad_norm": 0.8775944113731384, "learning_rate": 1.4200000000000001e-05, "loss": 0.181992307305336, "step": 72 }, { "epoch": 0.008858148282975367, "grad_norm": 1.1906511783599854, "learning_rate": 1.4400000000000001e-05, "loss": 0.8971315622329712, "step": 73 }, { "epoch": 0.008979492780002428, "grad_norm": 1.5473064184188843, "learning_rate": 1.46e-05, "loss": 0.5879963636398315, "step": 74 }, { "epoch": 0.009100837277029487, "grad_norm": 1.413874864578247, "learning_rate": 1.48e-05, "loss": 0.4934973120689392, "step": 75 }, { "epoch": 0.009222181774056546, "grad_norm": 0.9657158851623535, "learning_rate": 1.5000000000000002e-05, "loss": 0.3402983248233795, "step": 76 }, { "epoch": 0.009343526271083607, "grad_norm": 0.8786820769309998, "learning_rate": 1.5200000000000002e-05, "loss": 0.31571996212005615, "step": 77 }, { "epoch": 0.009464870768110666, "grad_norm": 0.981968343257904, "learning_rate": 1.54e-05, "loss": 0.6843433380126953, "step": 78 }, { "epoch": 0.009586215265137726, "grad_norm": 1.0725715160369873, "learning_rate": 1.5600000000000003e-05, "loss": 0.34444135427474976, "step": 79 }, { "epoch": 0.009707559762164785, "grad_norm": 1.2156447172164917, "learning_rate": 1.58e-05, "loss": 0.5834903120994568, "step": 80 }, { "epoch": 0.009828904259191846, "grad_norm": 1.1125974655151367, "learning_rate": 1.6000000000000003e-05, "loss": 0.34024637937545776, "step": 81 }, { "epoch": 0.009950248756218905, "grad_norm": 0.6233447790145874, "learning_rate": 1.62e-05, "loss": 0.46968546509742737, "step": 82 }, { "epoch": 0.010071593253245966, "grad_norm": 1.2955931425094604, "learning_rate": 1.64e-05, "loss": 0.3877994418144226, "step": 83 }, { "epoch": 0.010192937750273025, "grad_norm": 1.098732829093933, "learning_rate": 1.66e-05, "loss": 0.5838992595672607, "step": 84 }, { "epoch": 0.010314282247300084, "grad_norm": 0.6143050789833069, "learning_rate": 1.6800000000000002e-05, "loss": 0.16620726883411407, "step": 85 }, { "epoch": 0.010435626744327145, "grad_norm": 0.8536654710769653, "learning_rate": 1.7e-05, "loss": 0.46331706643104553, "step": 86 }, { "epoch": 0.010556971241354204, "grad_norm": 0.9140154719352722, "learning_rate": 1.72e-05, "loss": 0.46669042110443115, "step": 87 }, { "epoch": 0.010678315738381265, "grad_norm": 0.8299479484558105, "learning_rate": 1.7400000000000003e-05, "loss": 0.39312833547592163, "step": 88 }, { "epoch": 0.010799660235408324, "grad_norm": 1.2406340837478638, "learning_rate": 1.76e-05, "loss": 0.670952320098877, "step": 89 }, { "epoch": 0.010921004732435385, "grad_norm": 0.7150036692619324, "learning_rate": 1.7800000000000002e-05, "loss": 0.49945852160453796, "step": 90 }, { "epoch": 0.011042349229462444, "grad_norm": 1.02498197555542, "learning_rate": 1.8e-05, "loss": 0.4261772036552429, "step": 91 }, { "epoch": 0.011163693726489505, "grad_norm": 0.849663496017456, "learning_rate": 1.8200000000000002e-05, "loss": 0.5062468647956848, "step": 92 }, { "epoch": 0.011285038223516564, "grad_norm": 0.9148527979850769, "learning_rate": 1.8400000000000003e-05, "loss": 0.7943180799484253, "step": 93 }, { "epoch": 0.011406382720543623, "grad_norm": 0.6996797323226929, "learning_rate": 1.86e-05, "loss": 0.453007310628891, "step": 94 }, { "epoch": 0.011527727217570683, "grad_norm": 1.9976884126663208, "learning_rate": 1.88e-05, "loss": 0.3840827941894531, "step": 95 }, { "epoch": 0.011649071714597743, "grad_norm": 0.7963491082191467, "learning_rate": 1.9e-05, "loss": 0.3547995686531067, "step": 96 }, { "epoch": 0.011770416211624803, "grad_norm": 0.6255000233650208, "learning_rate": 1.9200000000000003e-05, "loss": 0.1267634928226471, "step": 97 }, { "epoch": 0.011891760708651862, "grad_norm": 1.1216952800750732, "learning_rate": 1.94e-05, "loss": 0.5351840853691101, "step": 98 }, { "epoch": 0.012013105205678923, "grad_norm": 0.933724045753479, "learning_rate": 1.9600000000000002e-05, "loss": 0.28443726897239685, "step": 99 }, { "epoch": 0.012134449702705982, "grad_norm": 1.23550283908844, "learning_rate": 1.98e-05, "loss": 0.9422104954719543, "step": 100 }, { "epoch": 0.012255794199733041, "grad_norm": 0.6336556077003479, "learning_rate": 2e-05, "loss": 0.12934568524360657, "step": 101 }, { "epoch": 0.012377138696760102, "grad_norm": 1.0921475887298584, "learning_rate": 1.9997543299348976e-05, "loss": 0.5496861338615417, "step": 102 }, { "epoch": 0.012498483193787161, "grad_norm": 1.4995360374450684, "learning_rate": 1.999508659869795e-05, "loss": 0.450344443321228, "step": 103 }, { "epoch": 0.012619827690814222, "grad_norm": 1.3966925144195557, "learning_rate": 1.9992629898046924e-05, "loss": 0.4021853804588318, "step": 104 }, { "epoch": 0.012741172187841281, "grad_norm": 0.6302288770675659, "learning_rate": 1.99901731973959e-05, "loss": 0.16195248067378998, "step": 105 }, { "epoch": 0.012862516684868342, "grad_norm": 0.8286706209182739, "learning_rate": 1.9987716496744873e-05, "loss": 0.5685713291168213, "step": 106 }, { "epoch": 0.0129838611818954, "grad_norm": 1.2312484979629517, "learning_rate": 1.9985259796093847e-05, "loss": 0.4577338993549347, "step": 107 }, { "epoch": 0.013105205678922462, "grad_norm": 0.7556170225143433, "learning_rate": 1.998280309544282e-05, "loss": 0.47135791182518005, "step": 108 }, { "epoch": 0.01322655017594952, "grad_norm": 0.9953917860984802, "learning_rate": 1.9980346394791796e-05, "loss": 0.8566328883171082, "step": 109 }, { "epoch": 0.01334789467297658, "grad_norm": 0.43752583861351013, "learning_rate": 1.997788969414077e-05, "loss": 0.03474525734782219, "step": 110 }, { "epoch": 0.01346923917000364, "grad_norm": 0.8211821913719177, "learning_rate": 1.9975432993489744e-05, "loss": 0.5059197545051575, "step": 111 }, { "epoch": 0.0135905836670307, "grad_norm": 1.6312147378921509, "learning_rate": 1.997297629283872e-05, "loss": 0.44580140709877014, "step": 112 }, { "epoch": 0.01371192816405776, "grad_norm": 1.2254769802093506, "learning_rate": 1.9970519592187693e-05, "loss": 0.5274494886398315, "step": 113 }, { "epoch": 0.01383327266108482, "grad_norm": 0.9368826150894165, "learning_rate": 1.9968062891536667e-05, "loss": 0.4496898651123047, "step": 114 }, { "epoch": 0.01395461715811188, "grad_norm": 0.7070733308792114, "learning_rate": 1.996560619088564e-05, "loss": 0.3537105619907379, "step": 115 }, { "epoch": 0.01407596165513894, "grad_norm": 1.085019826889038, "learning_rate": 1.9963149490234616e-05, "loss": 0.5918015837669373, "step": 116 }, { "epoch": 0.014197306152166, "grad_norm": 0.8331693410873413, "learning_rate": 1.996069278958359e-05, "loss": 0.7372145652770996, "step": 117 }, { "epoch": 0.01431865064919306, "grad_norm": 1.0224517583847046, "learning_rate": 1.9958236088932564e-05, "loss": 0.6304808855056763, "step": 118 }, { "epoch": 0.014439995146220118, "grad_norm": 1.0307385921478271, "learning_rate": 1.9955779388281538e-05, "loss": 0.4330332279205322, "step": 119 }, { "epoch": 0.014561339643247179, "grad_norm": 1.1663846969604492, "learning_rate": 1.9953322687630513e-05, "loss": 1.0179262161254883, "step": 120 }, { "epoch": 0.014682684140274238, "grad_norm": 1.2464661598205566, "learning_rate": 1.995086598697949e-05, "loss": 0.4001690149307251, "step": 121 }, { "epoch": 0.014804028637301299, "grad_norm": 0.6119124889373779, "learning_rate": 1.9948409286328464e-05, "loss": 0.5289167761802673, "step": 122 }, { "epoch": 0.014925373134328358, "grad_norm": 0.7121961712837219, "learning_rate": 1.994595258567744e-05, "loss": 0.39589789509773254, "step": 123 }, { "epoch": 0.015046717631355419, "grad_norm": 0.8262454867362976, "learning_rate": 1.9943495885026413e-05, "loss": 0.3565620481967926, "step": 124 }, { "epoch": 0.015168062128382478, "grad_norm": 0.9075021743774414, "learning_rate": 1.9941039184375387e-05, "loss": 0.2919665575027466, "step": 125 }, { "epoch": 0.015289406625409537, "grad_norm": 1.4302308559417725, "learning_rate": 1.993858248372436e-05, "loss": 0.5871132612228394, "step": 126 }, { "epoch": 0.015410751122436598, "grad_norm": 1.2167500257492065, "learning_rate": 1.9936125783073336e-05, "loss": 0.2407664954662323, "step": 127 }, { "epoch": 0.015532095619463657, "grad_norm": 1.038041353225708, "learning_rate": 1.993366908242231e-05, "loss": 0.8497026562690735, "step": 128 }, { "epoch": 0.015653440116490717, "grad_norm": 1.1356406211853027, "learning_rate": 1.9931212381771284e-05, "loss": 0.4753378927707672, "step": 129 }, { "epoch": 0.01577478461351778, "grad_norm": 1.1320428848266602, "learning_rate": 1.992875568112026e-05, "loss": 0.4532396197319031, "step": 130 }, { "epoch": 0.015896129110544836, "grad_norm": 0.9516323804855347, "learning_rate": 1.9926298980469233e-05, "loss": 0.38531002402305603, "step": 131 }, { "epoch": 0.016017473607571896, "grad_norm": 1.3494288921356201, "learning_rate": 1.9923842279818207e-05, "loss": 0.7311023473739624, "step": 132 }, { "epoch": 0.016138818104598957, "grad_norm": 1.1651663780212402, "learning_rate": 1.992138557916718e-05, "loss": 0.4712047278881073, "step": 133 }, { "epoch": 0.016260162601626018, "grad_norm": 1.558090090751648, "learning_rate": 1.9918928878516156e-05, "loss": 0.5418444871902466, "step": 134 }, { "epoch": 0.016381507098653075, "grad_norm": 0.792945384979248, "learning_rate": 1.991647217786513e-05, "loss": 0.2568298876285553, "step": 135 }, { "epoch": 0.016502851595680136, "grad_norm": 0.7723349928855896, "learning_rate": 1.9914015477214104e-05, "loss": 0.6914761066436768, "step": 136 }, { "epoch": 0.016624196092707197, "grad_norm": 0.8072736859321594, "learning_rate": 1.991155877656308e-05, "loss": 0.5070819854736328, "step": 137 }, { "epoch": 0.016745540589734254, "grad_norm": 0.8866456747055054, "learning_rate": 1.9909102075912053e-05, "loss": 0.48820972442626953, "step": 138 }, { "epoch": 0.016866885086761315, "grad_norm": 0.6745986342430115, "learning_rate": 1.9906645375261027e-05, "loss": 0.4791775643825531, "step": 139 }, { "epoch": 0.016988229583788376, "grad_norm": 0.7663500905036926, "learning_rate": 1.990418867461e-05, "loss": 0.27651312947273254, "step": 140 }, { "epoch": 0.017109574080815437, "grad_norm": 0.7036702632904053, "learning_rate": 1.9901731973958975e-05, "loss": 0.34517037868499756, "step": 141 }, { "epoch": 0.017230918577842494, "grad_norm": 1.1952064037322998, "learning_rate": 1.989927527330795e-05, "loss": 0.7066012024879456, "step": 142 }, { "epoch": 0.017352263074869555, "grad_norm": 1.4770616292953491, "learning_rate": 1.9896818572656924e-05, "loss": 0.5686489343643188, "step": 143 }, { "epoch": 0.017473607571896616, "grad_norm": 0.9231040477752686, "learning_rate": 1.9894361872005898e-05, "loss": 0.5055479407310486, "step": 144 }, { "epoch": 0.017594952068923673, "grad_norm": 2.1276888847351074, "learning_rate": 1.9891905171354872e-05, "loss": 0.676661491394043, "step": 145 }, { "epoch": 0.017716296565950734, "grad_norm": 1.249248743057251, "learning_rate": 1.9889448470703847e-05, "loss": 0.7343191504478455, "step": 146 }, { "epoch": 0.017837641062977794, "grad_norm": 0.7005143761634827, "learning_rate": 1.988699177005282e-05, "loss": 0.1592637598514557, "step": 147 }, { "epoch": 0.017958985560004855, "grad_norm": 1.0925079584121704, "learning_rate": 1.9884535069401795e-05, "loss": 0.46038728952407837, "step": 148 }, { "epoch": 0.018080330057031913, "grad_norm": 1.1379766464233398, "learning_rate": 1.988207836875077e-05, "loss": 0.21679271757602692, "step": 149 }, { "epoch": 0.018201674554058973, "grad_norm": 1.6844356060028076, "learning_rate": 1.9879621668099744e-05, "loss": 0.460208535194397, "step": 150 }, { "epoch": 0.018323019051086034, "grad_norm": 0.9418531060218811, "learning_rate": 1.9877164967448718e-05, "loss": 0.2926967442035675, "step": 151 }, { "epoch": 0.01844436354811309, "grad_norm": 1.0456334352493286, "learning_rate": 1.9874708266797692e-05, "loss": 0.6844074726104736, "step": 152 }, { "epoch": 0.018565708045140152, "grad_norm": 0.8679888248443604, "learning_rate": 1.9872251566146666e-05, "loss": 0.5056055188179016, "step": 153 }, { "epoch": 0.018687052542167213, "grad_norm": 0.8799395561218262, "learning_rate": 1.986979486549564e-05, "loss": 0.3345467448234558, "step": 154 }, { "epoch": 0.018808397039194274, "grad_norm": 1.5138970613479614, "learning_rate": 1.9867338164844615e-05, "loss": 0.46227532625198364, "step": 155 }, { "epoch": 0.01892974153622133, "grad_norm": 0.959060788154602, "learning_rate": 1.986488146419359e-05, "loss": 0.598260223865509, "step": 156 }, { "epoch": 0.019051086033248392, "grad_norm": 0.9521774649620056, "learning_rate": 1.9862424763542563e-05, "loss": 0.4968634843826294, "step": 157 }, { "epoch": 0.019172430530275453, "grad_norm": 1.074791669845581, "learning_rate": 1.9859968062891538e-05, "loss": 0.3660491704940796, "step": 158 }, { "epoch": 0.019293775027302514, "grad_norm": 0.7658405303955078, "learning_rate": 1.9857511362240512e-05, "loss": 0.31504422426223755, "step": 159 }, { "epoch": 0.01941511952432957, "grad_norm": 1.2017768621444702, "learning_rate": 1.9855054661589486e-05, "loss": 0.2452397644519806, "step": 160 }, { "epoch": 0.01953646402135663, "grad_norm": 0.8670822978019714, "learning_rate": 1.9852597960938464e-05, "loss": 0.2822229862213135, "step": 161 }, { "epoch": 0.019657808518383692, "grad_norm": 1.3473888635635376, "learning_rate": 1.9850141260287438e-05, "loss": 0.349812388420105, "step": 162 }, { "epoch": 0.01977915301541075, "grad_norm": 0.9460965394973755, "learning_rate": 1.9847684559636412e-05, "loss": 0.3580651879310608, "step": 163 }, { "epoch": 0.01990049751243781, "grad_norm": 1.454413652420044, "learning_rate": 1.9845227858985387e-05, "loss": 0.3063688278198242, "step": 164 }, { "epoch": 0.02002184200946487, "grad_norm": 1.1844851970672607, "learning_rate": 1.984277115833436e-05, "loss": 0.3315426707267761, "step": 165 }, { "epoch": 0.020143186506491932, "grad_norm": 1.5113213062286377, "learning_rate": 1.9840314457683335e-05, "loss": 0.2392674684524536, "step": 166 }, { "epoch": 0.02026453100351899, "grad_norm": 0.9833793640136719, "learning_rate": 1.983785775703231e-05, "loss": 0.5257890820503235, "step": 167 }, { "epoch": 0.02038587550054605, "grad_norm": 1.514121651649475, "learning_rate": 1.9835401056381284e-05, "loss": 0.6917684078216553, "step": 168 }, { "epoch": 0.02050721999757311, "grad_norm": 0.9638065099716187, "learning_rate": 1.9832944355730255e-05, "loss": 0.41345733404159546, "step": 169 }, { "epoch": 0.02062856449460017, "grad_norm": 1.2704676389694214, "learning_rate": 1.983048765507923e-05, "loss": 0.6704689264297485, "step": 170 }, { "epoch": 0.02074990899162723, "grad_norm": 0.9627780914306641, "learning_rate": 1.9828030954428203e-05, "loss": 0.3117220997810364, "step": 171 }, { "epoch": 0.02087125348865429, "grad_norm": 0.8880823850631714, "learning_rate": 1.9825574253777177e-05, "loss": 0.5640937089920044, "step": 172 }, { "epoch": 0.02099259798568135, "grad_norm": 1.2231365442276, "learning_rate": 1.982311755312615e-05, "loss": 0.3630719780921936, "step": 173 }, { "epoch": 0.021113942482708408, "grad_norm": 1.5474482774734497, "learning_rate": 1.9820660852475126e-05, "loss": 0.7506128549575806, "step": 174 }, { "epoch": 0.02123528697973547, "grad_norm": 1.6173280477523804, "learning_rate": 1.98182041518241e-05, "loss": 0.4840516746044159, "step": 175 }, { "epoch": 0.02135663147676253, "grad_norm": 0.610375702381134, "learning_rate": 1.9815747451173074e-05, "loss": 0.12637916207313538, "step": 176 }, { "epoch": 0.021477975973789587, "grad_norm": 0.8489571809768677, "learning_rate": 1.981329075052205e-05, "loss": 0.19583982229232788, "step": 177 }, { "epoch": 0.021599320470816648, "grad_norm": 1.4588679075241089, "learning_rate": 1.9810834049871023e-05, "loss": 0.6651678085327148, "step": 178 }, { "epoch": 0.02172066496784371, "grad_norm": 0.778512716293335, "learning_rate": 1.9808377349219997e-05, "loss": 0.37274667620658875, "step": 179 }, { "epoch": 0.02184200946487077, "grad_norm": 1.3927290439605713, "learning_rate": 1.980592064856897e-05, "loss": 0.4347939193248749, "step": 180 }, { "epoch": 0.021963353961897827, "grad_norm": 1.2548933029174805, "learning_rate": 1.9803463947917946e-05, "loss": 0.36539730429649353, "step": 181 }, { "epoch": 0.022084698458924888, "grad_norm": 11.010635375976562, "learning_rate": 1.980100724726692e-05, "loss": 0.44999217987060547, "step": 182 }, { "epoch": 0.02220604295595195, "grad_norm": 1.3671001195907593, "learning_rate": 1.9798550546615894e-05, "loss": 0.18597112596035004, "step": 183 }, { "epoch": 0.02232738745297901, "grad_norm": 1.1180782318115234, "learning_rate": 1.979609384596487e-05, "loss": 0.2359408438205719, "step": 184 }, { "epoch": 0.022448731950006066, "grad_norm": 0.8790715932846069, "learning_rate": 1.9793637145313843e-05, "loss": 0.22656098008155823, "step": 185 }, { "epoch": 0.022570076447033127, "grad_norm": 1.5535168647766113, "learning_rate": 1.9791180444662817e-05, "loss": 0.2677188813686371, "step": 186 }, { "epoch": 0.022691420944060188, "grad_norm": 0.7048156261444092, "learning_rate": 1.9788723744011795e-05, "loss": 0.15088553726673126, "step": 187 }, { "epoch": 0.022812765441087245, "grad_norm": 0.8741711378097534, "learning_rate": 1.978626704336077e-05, "loss": 0.2621510922908783, "step": 188 }, { "epoch": 0.022934109938114306, "grad_norm": 1.5229653120040894, "learning_rate": 1.9783810342709743e-05, "loss": 0.6171689033508301, "step": 189 }, { "epoch": 0.023055454435141367, "grad_norm": 1.302876353263855, "learning_rate": 1.9781353642058717e-05, "loss": 0.7471094131469727, "step": 190 }, { "epoch": 0.023176798932168428, "grad_norm": 1.594721794128418, "learning_rate": 1.977889694140769e-05, "loss": 0.40545159578323364, "step": 191 }, { "epoch": 0.023298143429195485, "grad_norm": 1.2041152715682983, "learning_rate": 1.9776440240756666e-05, "loss": 0.538151204586029, "step": 192 }, { "epoch": 0.023419487926222546, "grad_norm": 1.349916934967041, "learning_rate": 1.977398354010564e-05, "loss": 0.5056011080741882, "step": 193 }, { "epoch": 0.023540832423249607, "grad_norm": 1.6573007106781006, "learning_rate": 1.9771526839454614e-05, "loss": 0.679570734500885, "step": 194 }, { "epoch": 0.023662176920276664, "grad_norm": 1.1572545766830444, "learning_rate": 1.976907013880359e-05, "loss": 0.18730156123638153, "step": 195 }, { "epoch": 0.023783521417303725, "grad_norm": 0.9210532903671265, "learning_rate": 1.9766613438152563e-05, "loss": 0.18758049607276917, "step": 196 }, { "epoch": 0.023904865914330786, "grad_norm": 1.0380648374557495, "learning_rate": 1.9764156737501537e-05, "loss": 0.4383860230445862, "step": 197 }, { "epoch": 0.024026210411357846, "grad_norm": 1.1509276628494263, "learning_rate": 1.976170003685051e-05, "loss": 0.5359134078025818, "step": 198 }, { "epoch": 0.024147554908384904, "grad_norm": 1.693115234375, "learning_rate": 1.9759243336199486e-05, "loss": 0.13348811864852905, "step": 199 }, { "epoch": 0.024268899405411964, "grad_norm": 1.350980281829834, "learning_rate": 1.975678663554846e-05, "loss": 0.32777535915374756, "step": 200 }, { "epoch": 0.024390243902439025, "grad_norm": 1.4759892225265503, "learning_rate": 1.9754329934897434e-05, "loss": 0.810895562171936, "step": 201 }, { "epoch": 0.024511588399466083, "grad_norm": 1.1393275260925293, "learning_rate": 1.975187323424641e-05, "loss": 0.1503686010837555, "step": 202 }, { "epoch": 0.024632932896493143, "grad_norm": 0.7875057458877563, "learning_rate": 1.9749416533595383e-05, "loss": 0.07484026253223419, "step": 203 }, { "epoch": 0.024754277393520204, "grad_norm": 0.8852109909057617, "learning_rate": 1.9746959832944357e-05, "loss": 0.2887484133243561, "step": 204 }, { "epoch": 0.024875621890547265, "grad_norm": 1.01993727684021, "learning_rate": 1.974450313229333e-05, "loss": 0.43222254514694214, "step": 205 }, { "epoch": 0.024996966387574322, "grad_norm": 0.8183336853981018, "learning_rate": 1.9742046431642306e-05, "loss": 0.19282624125480652, "step": 206 }, { "epoch": 0.025118310884601383, "grad_norm": 1.5417038202285767, "learning_rate": 1.973958973099128e-05, "loss": 0.46405136585235596, "step": 207 }, { "epoch": 0.025239655381628444, "grad_norm": 1.2917892932891846, "learning_rate": 1.9737133030340254e-05, "loss": 0.23757943511009216, "step": 208 }, { "epoch": 0.025360999878655505, "grad_norm": 1.2024424076080322, "learning_rate": 1.9734676329689228e-05, "loss": 0.06261929869651794, "step": 209 }, { "epoch": 0.025482344375682562, "grad_norm": 1.0761046409606934, "learning_rate": 1.9732219629038203e-05, "loss": 0.2930389940738678, "step": 210 }, { "epoch": 0.025603688872709623, "grad_norm": 1.4010992050170898, "learning_rate": 1.9729762928387177e-05, "loss": 0.42875391244888306, "step": 211 }, { "epoch": 0.025725033369736684, "grad_norm": 1.1493103504180908, "learning_rate": 1.972730622773615e-05, "loss": 0.665216326713562, "step": 212 }, { "epoch": 0.02584637786676374, "grad_norm": 1.3462070226669312, "learning_rate": 1.9724849527085125e-05, "loss": 0.547272264957428, "step": 213 }, { "epoch": 0.0259677223637908, "grad_norm": 0.8287025690078735, "learning_rate": 1.97223928264341e-05, "loss": 0.1717478632926941, "step": 214 }, { "epoch": 0.026089066860817862, "grad_norm": 1.4044225215911865, "learning_rate": 1.9719936125783074e-05, "loss": 0.5439479351043701, "step": 215 }, { "epoch": 0.026210411357844923, "grad_norm": 1.362339973449707, "learning_rate": 1.9717479425132048e-05, "loss": 0.38482236862182617, "step": 216 }, { "epoch": 0.02633175585487198, "grad_norm": 1.0870776176452637, "learning_rate": 1.9715022724481022e-05, "loss": 0.2305067777633667, "step": 217 }, { "epoch": 0.02645310035189904, "grad_norm": 1.8639475107192993, "learning_rate": 1.9712566023829997e-05, "loss": 0.7351633906364441, "step": 218 }, { "epoch": 0.026574444848926102, "grad_norm": 1.2605034112930298, "learning_rate": 1.971010932317897e-05, "loss": 0.3830586373806, "step": 219 }, { "epoch": 0.02669578934595316, "grad_norm": 1.146794080734253, "learning_rate": 1.9707652622527945e-05, "loss": 0.4568835198879242, "step": 220 }, { "epoch": 0.02681713384298022, "grad_norm": 1.3597760200500488, "learning_rate": 1.970519592187692e-05, "loss": 0.3955010175704956, "step": 221 }, { "epoch": 0.02693847834000728, "grad_norm": 1.0351749658584595, "learning_rate": 1.9702739221225894e-05, "loss": 0.24032047390937805, "step": 222 }, { "epoch": 0.027059822837034342, "grad_norm": 1.7738534212112427, "learning_rate": 1.9700282520574868e-05, "loss": 0.4054880142211914, "step": 223 }, { "epoch": 0.0271811673340614, "grad_norm": 1.0380606651306152, "learning_rate": 1.9697825819923842e-05, "loss": 0.3499588668346405, "step": 224 }, { "epoch": 0.02730251183108846, "grad_norm": 0.9284490942955017, "learning_rate": 1.9695369119272816e-05, "loss": 0.1194787323474884, "step": 225 }, { "epoch": 0.02742385632811552, "grad_norm": 1.1300830841064453, "learning_rate": 1.969291241862179e-05, "loss": 0.2504207193851471, "step": 226 }, { "epoch": 0.027545200825142578, "grad_norm": 1.2859063148498535, "learning_rate": 1.969045571797077e-05, "loss": 0.7922459840774536, "step": 227 }, { "epoch": 0.02766654532216964, "grad_norm": 0.8715733289718628, "learning_rate": 1.9687999017319743e-05, "loss": 0.157515287399292, "step": 228 }, { "epoch": 0.0277878898191967, "grad_norm": 0.7158194184303284, "learning_rate": 1.9685542316668717e-05, "loss": 0.0951254814863205, "step": 229 }, { "epoch": 0.02790923431622376, "grad_norm": 1.2676968574523926, "learning_rate": 1.968308561601769e-05, "loss": 0.3762540817260742, "step": 230 }, { "epoch": 0.028030578813250818, "grad_norm": 1.142686367034912, "learning_rate": 1.9680628915366665e-05, "loss": 0.4209720194339752, "step": 231 }, { "epoch": 0.02815192331027788, "grad_norm": 0.9411739706993103, "learning_rate": 1.967817221471564e-05, "loss": 0.19837401807308197, "step": 232 }, { "epoch": 0.02827326780730494, "grad_norm": 1.1360442638397217, "learning_rate": 1.9675715514064614e-05, "loss": 0.2968640923500061, "step": 233 }, { "epoch": 0.028394612304332, "grad_norm": 1.573232650756836, "learning_rate": 1.9673258813413588e-05, "loss": 0.45530423521995544, "step": 234 }, { "epoch": 0.028515956801359058, "grad_norm": 1.6560251712799072, "learning_rate": 1.9670802112762562e-05, "loss": 0.3799901604652405, "step": 235 }, { "epoch": 0.02863730129838612, "grad_norm": 0.7339390516281128, "learning_rate": 1.9668345412111537e-05, "loss": 0.3144574463367462, "step": 236 }, { "epoch": 0.02875864579541318, "grad_norm": 1.104168176651001, "learning_rate": 1.966588871146051e-05, "loss": 0.3688885569572449, "step": 237 }, { "epoch": 0.028879990292440236, "grad_norm": 0.6204283237457275, "learning_rate": 1.9663432010809485e-05, "loss": 0.07376033812761307, "step": 238 }, { "epoch": 0.029001334789467297, "grad_norm": 1.2197209596633911, "learning_rate": 1.966097531015846e-05, "loss": 0.6477288603782654, "step": 239 }, { "epoch": 0.029122679286494358, "grad_norm": 1.0721914768218994, "learning_rate": 1.9658518609507434e-05, "loss": 0.2640606164932251, "step": 240 }, { "epoch": 0.02924402378352142, "grad_norm": 0.9805510640144348, "learning_rate": 1.9656061908856408e-05, "loss": 0.4014909267425537, "step": 241 }, { "epoch": 0.029365368280548476, "grad_norm": 0.8440432548522949, "learning_rate": 1.9653605208205382e-05, "loss": 0.0517299547791481, "step": 242 }, { "epoch": 0.029486712777575537, "grad_norm": 1.2538248300552368, "learning_rate": 1.9651148507554356e-05, "loss": 0.46744751930236816, "step": 243 }, { "epoch": 0.029608057274602598, "grad_norm": 1.3388352394104004, "learning_rate": 1.964869180690333e-05, "loss": 0.6701927185058594, "step": 244 }, { "epoch": 0.029729401771629655, "grad_norm": 1.1601953506469727, "learning_rate": 1.9646235106252305e-05, "loss": 0.5082125067710876, "step": 245 }, { "epoch": 0.029850746268656716, "grad_norm": 1.445131778717041, "learning_rate": 1.964377840560128e-05, "loss": 0.5758916139602661, "step": 246 }, { "epoch": 0.029972090765683777, "grad_norm": 0.9608274698257446, "learning_rate": 1.9641321704950253e-05, "loss": 0.47571951150894165, "step": 247 }, { "epoch": 0.030093435262710837, "grad_norm": 1.0206998586654663, "learning_rate": 1.9638865004299228e-05, "loss": 0.16756445169448853, "step": 248 }, { "epoch": 0.030214779759737895, "grad_norm": 1.00753653049469, "learning_rate": 1.9636408303648202e-05, "loss": 0.2088935673236847, "step": 249 }, { "epoch": 0.030336124256764956, "grad_norm": 1.2297927141189575, "learning_rate": 1.9633951602997176e-05, "loss": 0.904977023601532, "step": 250 }, { "epoch": 0.030457468753792016, "grad_norm": 0.9755082726478577, "learning_rate": 1.963149490234615e-05, "loss": 0.15337421000003815, "step": 251 }, { "epoch": 0.030578813250819074, "grad_norm": 1.074012279510498, "learning_rate": 1.9629038201695125e-05, "loss": 0.6006253957748413, "step": 252 }, { "epoch": 0.030700157747846134, "grad_norm": 0.9824368953704834, "learning_rate": 1.96265815010441e-05, "loss": 0.379146933555603, "step": 253 }, { "epoch": 0.030821502244873195, "grad_norm": 1.5277190208435059, "learning_rate": 1.9624124800393073e-05, "loss": 0.6424251198768616, "step": 254 }, { "epoch": 0.030942846741900256, "grad_norm": 0.8885117769241333, "learning_rate": 1.9621668099742048e-05, "loss": 0.28905048966407776, "step": 255 }, { "epoch": 0.031064191238927313, "grad_norm": 0.633774995803833, "learning_rate": 1.9619211399091022e-05, "loss": 0.045177094638347626, "step": 256 }, { "epoch": 0.031185535735954374, "grad_norm": 1.156493902206421, "learning_rate": 1.9616754698439996e-05, "loss": 0.2632194757461548, "step": 257 }, { "epoch": 0.031306880232981435, "grad_norm": 1.1284003257751465, "learning_rate": 1.961429799778897e-05, "loss": 0.3057049512863159, "step": 258 }, { "epoch": 0.031428224730008496, "grad_norm": 0.7986247539520264, "learning_rate": 1.9611841297137945e-05, "loss": 0.20919805765151978, "step": 259 }, { "epoch": 0.03154956922703556, "grad_norm": 0.9055368304252625, "learning_rate": 1.960938459648692e-05, "loss": 0.39165881276130676, "step": 260 }, { "epoch": 0.03167091372406261, "grad_norm": 1.911102056503296, "learning_rate": 1.9606927895835893e-05, "loss": 0.45982038974761963, "step": 261 }, { "epoch": 0.03179225822108967, "grad_norm": 0.8879384398460388, "learning_rate": 1.9604471195184867e-05, "loss": 0.29178065061569214, "step": 262 }, { "epoch": 0.03191360271811673, "grad_norm": 1.4164674282073975, "learning_rate": 1.960201449453384e-05, "loss": 0.16232866048812866, "step": 263 }, { "epoch": 0.03203494721514379, "grad_norm": 1.9253220558166504, "learning_rate": 1.9599557793882816e-05, "loss": 0.42316365242004395, "step": 264 }, { "epoch": 0.032156291712170854, "grad_norm": 0.7398253083229065, "learning_rate": 1.959710109323179e-05, "loss": 0.23177435994148254, "step": 265 }, { "epoch": 0.032277636209197914, "grad_norm": 1.0191659927368164, "learning_rate": 1.9594644392580768e-05, "loss": 0.26261794567108154, "step": 266 }, { "epoch": 0.032398980706224975, "grad_norm": 1.2973541021347046, "learning_rate": 1.9592187691929742e-05, "loss": 0.3316439986228943, "step": 267 }, { "epoch": 0.032520325203252036, "grad_norm": 1.374732255935669, "learning_rate": 1.9589730991278716e-05, "loss": 0.4202871322631836, "step": 268 }, { "epoch": 0.03264166970027909, "grad_norm": 1.5805230140686035, "learning_rate": 1.958727429062769e-05, "loss": 0.5804644823074341, "step": 269 }, { "epoch": 0.03276301419730615, "grad_norm": 0.008758433163166046, "learning_rate": 1.9584817589976665e-05, "loss": 0.00015381905541289598, "step": 270 }, { "epoch": 0.03288435869433321, "grad_norm": 0.9753175377845764, "learning_rate": 1.958236088932564e-05, "loss": 0.2967366576194763, "step": 271 }, { "epoch": 0.03300570319136027, "grad_norm": 1.6118595600128174, "learning_rate": 1.9579904188674613e-05, "loss": 0.33840176463127136, "step": 272 }, { "epoch": 0.03312704768838733, "grad_norm": 1.6024789810180664, "learning_rate": 1.9577447488023588e-05, "loss": 0.6967238187789917, "step": 273 }, { "epoch": 0.033248392185414394, "grad_norm": 1.214158535003662, "learning_rate": 1.9574990787372562e-05, "loss": 0.5965083837509155, "step": 274 }, { "epoch": 0.033369736682441455, "grad_norm": 1.519809603691101, "learning_rate": 1.9572534086721536e-05, "loss": 0.4016053080558777, "step": 275 }, { "epoch": 0.03349108117946851, "grad_norm": 1.6006823778152466, "learning_rate": 1.957007738607051e-05, "loss": 0.39769408106803894, "step": 276 }, { "epoch": 0.03361242567649557, "grad_norm": 0.9429724216461182, "learning_rate": 1.9567620685419485e-05, "loss": 0.20001475512981415, "step": 277 }, { "epoch": 0.03373377017352263, "grad_norm": 1.666814923286438, "learning_rate": 1.956516398476846e-05, "loss": 0.34259510040283203, "step": 278 }, { "epoch": 0.03385511467054969, "grad_norm": 0.3574555814266205, "learning_rate": 1.9562707284117433e-05, "loss": 0.017914820462465286, "step": 279 }, { "epoch": 0.03397645916757675, "grad_norm": 1.5245598554611206, "learning_rate": 1.9560250583466407e-05, "loss": 0.5376090407371521, "step": 280 }, { "epoch": 0.03409780366460381, "grad_norm": 1.0760517120361328, "learning_rate": 1.955779388281538e-05, "loss": 0.2445524036884308, "step": 281 }, { "epoch": 0.03421914816163087, "grad_norm": 1.4953891038894653, "learning_rate": 1.9555337182164356e-05, "loss": 0.5643976330757141, "step": 282 }, { "epoch": 0.03434049265865793, "grad_norm": 1.616189956665039, "learning_rate": 1.955288048151333e-05, "loss": 0.2937336564064026, "step": 283 }, { "epoch": 0.03446183715568499, "grad_norm": 0.9644538164138794, "learning_rate": 1.9550423780862304e-05, "loss": 0.125191330909729, "step": 284 }, { "epoch": 0.03458318165271205, "grad_norm": 1.649443507194519, "learning_rate": 1.954796708021128e-05, "loss": 0.5455986857414246, "step": 285 }, { "epoch": 0.03470452614973911, "grad_norm": 1.6259334087371826, "learning_rate": 1.9545510379560253e-05, "loss": 0.42649298906326294, "step": 286 }, { "epoch": 0.03482587064676617, "grad_norm": 1.2836439609527588, "learning_rate": 1.9543053678909227e-05, "loss": 0.5799545645713806, "step": 287 }, { "epoch": 0.03494721514379323, "grad_norm": 0.9867308735847473, "learning_rate": 1.95405969782582e-05, "loss": 0.39529839158058167, "step": 288 }, { "epoch": 0.03506855964082029, "grad_norm": 1.5423862934112549, "learning_rate": 1.9538140277607176e-05, "loss": 0.35383814573287964, "step": 289 }, { "epoch": 0.035189904137847346, "grad_norm": 1.2744529247283936, "learning_rate": 1.953568357695615e-05, "loss": 0.22410523891448975, "step": 290 }, { "epoch": 0.035311248634874406, "grad_norm": 1.787878394126892, "learning_rate": 1.9533226876305124e-05, "loss": 0.6131466031074524, "step": 291 }, { "epoch": 0.03543259313190147, "grad_norm": 1.1000661849975586, "learning_rate": 1.95307701756541e-05, "loss": 0.3855549097061157, "step": 292 }, { "epoch": 0.03555393762892853, "grad_norm": 1.2115724086761475, "learning_rate": 1.9528313475003073e-05, "loss": 0.4426603317260742, "step": 293 }, { "epoch": 0.03567528212595559, "grad_norm": 1.5347511768341064, "learning_rate": 1.9525856774352047e-05, "loss": 0.4702164828777313, "step": 294 }, { "epoch": 0.03579662662298265, "grad_norm": 4.0347466468811035, "learning_rate": 1.952340007370102e-05, "loss": 0.2603279948234558, "step": 295 }, { "epoch": 0.03591797112000971, "grad_norm": 1.5106102228164673, "learning_rate": 1.9520943373049996e-05, "loss": 0.5465511083602905, "step": 296 }, { "epoch": 0.036039315617036764, "grad_norm": 1.0082039833068848, "learning_rate": 1.951848667239897e-05, "loss": 0.05971769616007805, "step": 297 }, { "epoch": 0.036160660114063825, "grad_norm": 1.33002769947052, "learning_rate": 1.9516029971747944e-05, "loss": 0.44476959109306335, "step": 298 }, { "epoch": 0.036282004611090886, "grad_norm": 1.1616014242172241, "learning_rate": 1.951357327109692e-05, "loss": 0.29045650362968445, "step": 299 }, { "epoch": 0.03640334910811795, "grad_norm": 1.1139581203460693, "learning_rate": 1.9511116570445893e-05, "loss": 0.1310575008392334, "step": 300 }, { "epoch": 0.03652469360514501, "grad_norm": 1.2292546033859253, "learning_rate": 1.9508659869794867e-05, "loss": 0.2641526162624359, "step": 301 }, { "epoch": 0.03664603810217207, "grad_norm": 0.9400305151939392, "learning_rate": 1.950620316914384e-05, "loss": 0.19729545712471008, "step": 302 }, { "epoch": 0.03676738259919913, "grad_norm": 1.266480565071106, "learning_rate": 1.9503746468492815e-05, "loss": 0.44374752044677734, "step": 303 }, { "epoch": 0.03688872709622618, "grad_norm": 0.6891433596611023, "learning_rate": 1.950128976784179e-05, "loss": 0.055492326617240906, "step": 304 }, { "epoch": 0.037010071593253244, "grad_norm": 1.0153636932373047, "learning_rate": 1.9498833067190764e-05, "loss": 0.16541236639022827, "step": 305 }, { "epoch": 0.037131416090280304, "grad_norm": 1.5183488130569458, "learning_rate": 1.9496376366539738e-05, "loss": 0.1984182596206665, "step": 306 }, { "epoch": 0.037252760587307365, "grad_norm": 0.5728133320808411, "learning_rate": 1.9493919665888712e-05, "loss": 0.04827238991856575, "step": 307 }, { "epoch": 0.037374105084334426, "grad_norm": 1.2158844470977783, "learning_rate": 1.9491462965237687e-05, "loss": 0.20424340665340424, "step": 308 }, { "epoch": 0.03749544958136149, "grad_norm": 1.7594895362854004, "learning_rate": 1.948900626458666e-05, "loss": 0.3363400995731354, "step": 309 }, { "epoch": 0.03761679407838855, "grad_norm": 1.7387893199920654, "learning_rate": 1.9486549563935635e-05, "loss": 0.5342078804969788, "step": 310 }, { "epoch": 0.0377381385754156, "grad_norm": 0.7087361812591553, "learning_rate": 1.948409286328461e-05, "loss": 0.054663654416799545, "step": 311 }, { "epoch": 0.03785948307244266, "grad_norm": 1.1876829862594604, "learning_rate": 1.9481636162633584e-05, "loss": 0.2173326164484024, "step": 312 }, { "epoch": 0.03798082756946972, "grad_norm": 1.8285133838653564, "learning_rate": 1.9479179461982558e-05, "loss": 0.2650899589061737, "step": 313 }, { "epoch": 0.038102172066496784, "grad_norm": 1.2233879566192627, "learning_rate": 1.9476722761331532e-05, "loss": 0.36404329538345337, "step": 314 }, { "epoch": 0.038223516563523845, "grad_norm": 1.1021192073822021, "learning_rate": 1.9474266060680506e-05, "loss": 0.5485677123069763, "step": 315 }, { "epoch": 0.038344861060550905, "grad_norm": 1.15021812915802, "learning_rate": 1.947180936002948e-05, "loss": 0.39669936895370483, "step": 316 }, { "epoch": 0.038466205557577966, "grad_norm": 1.1393232345581055, "learning_rate": 1.9469352659378455e-05, "loss": 0.2991783618927002, "step": 317 }, { "epoch": 0.03858755005460503, "grad_norm": 0.939648449420929, "learning_rate": 1.946689595872743e-05, "loss": 0.15160952508449554, "step": 318 }, { "epoch": 0.03870889455163208, "grad_norm": 1.3451627492904663, "learning_rate": 1.9464439258076403e-05, "loss": 0.5237815380096436, "step": 319 }, { "epoch": 0.03883023904865914, "grad_norm": 1.3838374614715576, "learning_rate": 1.9461982557425378e-05, "loss": 0.19870781898498535, "step": 320 }, { "epoch": 0.0389515835456862, "grad_norm": 1.3146677017211914, "learning_rate": 1.9459525856774352e-05, "loss": 0.33396434783935547, "step": 321 }, { "epoch": 0.03907292804271326, "grad_norm": 0.8067727088928223, "learning_rate": 1.9457069156123326e-05, "loss": 0.49019211530685425, "step": 322 }, { "epoch": 0.039194272539740324, "grad_norm": 1.2528276443481445, "learning_rate": 1.94546124554723e-05, "loss": 0.2535683810710907, "step": 323 }, { "epoch": 0.039315617036767385, "grad_norm": 1.3783961534500122, "learning_rate": 1.9452155754821275e-05, "loss": 0.5087883472442627, "step": 324 }, { "epoch": 0.039436961533794446, "grad_norm": 1.6668391227722168, "learning_rate": 1.944969905417025e-05, "loss": 0.5472003817558289, "step": 325 }, { "epoch": 0.0395583060308215, "grad_norm": 1.0295048952102661, "learning_rate": 1.9447242353519223e-05, "loss": 0.40367716550827026, "step": 326 }, { "epoch": 0.03967965052784856, "grad_norm": 1.3721429109573364, "learning_rate": 1.9444785652868198e-05, "loss": 0.6061164736747742, "step": 327 }, { "epoch": 0.03980099502487562, "grad_norm": 1.2872403860092163, "learning_rate": 1.9442328952217172e-05, "loss": 0.23954078555107117, "step": 328 }, { "epoch": 0.03992233952190268, "grad_norm": 1.9958724975585938, "learning_rate": 1.9439872251566146e-05, "loss": 0.6404703855514526, "step": 329 }, { "epoch": 0.04004368401892974, "grad_norm": 1.2831591367721558, "learning_rate": 1.943741555091512e-05, "loss": 0.5384237766265869, "step": 330 }, { "epoch": 0.040165028515956804, "grad_norm": 0.716844379901886, "learning_rate": 1.9434958850264095e-05, "loss": 0.0433930829167366, "step": 331 }, { "epoch": 0.040286373012983864, "grad_norm": 1.186511754989624, "learning_rate": 1.9432502149613072e-05, "loss": 0.5770034193992615, "step": 332 }, { "epoch": 0.04040771751001092, "grad_norm": 1.5067001581192017, "learning_rate": 1.9430045448962046e-05, "loss": 0.4589945673942566, "step": 333 }, { "epoch": 0.04052906200703798, "grad_norm": 0.7389973402023315, "learning_rate": 1.942758874831102e-05, "loss": 0.17588192224502563, "step": 334 }, { "epoch": 0.04065040650406504, "grad_norm": 1.0256229639053345, "learning_rate": 1.9425132047659995e-05, "loss": 0.27561086416244507, "step": 335 }, { "epoch": 0.0407717510010921, "grad_norm": 1.3332164287567139, "learning_rate": 1.942267534700897e-05, "loss": 0.681479275226593, "step": 336 }, { "epoch": 0.04089309549811916, "grad_norm": 1.355659008026123, "learning_rate": 1.9420218646357944e-05, "loss": 0.2441406548023224, "step": 337 }, { "epoch": 0.04101443999514622, "grad_norm": 0.6369838714599609, "learning_rate": 1.9417761945706918e-05, "loss": 0.13781212270259857, "step": 338 }, { "epoch": 0.04113578449217328, "grad_norm": 0.9141871333122253, "learning_rate": 1.9415305245055892e-05, "loss": 0.41506925225257874, "step": 339 }, { "epoch": 0.04125712898920034, "grad_norm": 0.7715675830841064, "learning_rate": 1.9412848544404866e-05, "loss": 0.2426312416791916, "step": 340 }, { "epoch": 0.0413784734862274, "grad_norm": 1.2251585721969604, "learning_rate": 1.941039184375384e-05, "loss": 0.5912874937057495, "step": 341 }, { "epoch": 0.04149981798325446, "grad_norm": 1.3260804414749146, "learning_rate": 1.9407935143102815e-05, "loss": 0.40357863903045654, "step": 342 }, { "epoch": 0.04162116248028152, "grad_norm": 1.37641179561615, "learning_rate": 1.940547844245179e-05, "loss": 0.4066475033760071, "step": 343 }, { "epoch": 0.04174250697730858, "grad_norm": 1.1889725923538208, "learning_rate": 1.9403021741800763e-05, "loss": 0.5459408760070801, "step": 344 }, { "epoch": 0.04186385147433564, "grad_norm": 1.1155644655227661, "learning_rate": 1.9400565041149738e-05, "loss": 0.41826558113098145, "step": 345 }, { "epoch": 0.0419851959713627, "grad_norm": 1.9572199583053589, "learning_rate": 1.9398108340498712e-05, "loss": 0.505934476852417, "step": 346 }, { "epoch": 0.042106540468389755, "grad_norm": 1.1355222463607788, "learning_rate": 1.9395651639847686e-05, "loss": 0.3404514193534851, "step": 347 }, { "epoch": 0.042227884965416816, "grad_norm": 0.9914278388023376, "learning_rate": 1.939319493919666e-05, "loss": 0.3612911105155945, "step": 348 }, { "epoch": 0.04234922946244388, "grad_norm": 1.6539616584777832, "learning_rate": 1.9390738238545635e-05, "loss": 0.31920090317726135, "step": 349 }, { "epoch": 0.04247057395947094, "grad_norm": 1.2495417594909668, "learning_rate": 1.938828153789461e-05, "loss": 0.2993244230747223, "step": 350 }, { "epoch": 0.042591918456498, "grad_norm": 1.154098629951477, "learning_rate": 1.9385824837243583e-05, "loss": 0.27929073572158813, "step": 351 }, { "epoch": 0.04271326295352506, "grad_norm": 1.6769806146621704, "learning_rate": 1.9383368136592557e-05, "loss": 0.31991660594940186, "step": 352 }, { "epoch": 0.04283460745055212, "grad_norm": 1.692519187927246, "learning_rate": 1.938091143594153e-05, "loss": 0.6428519487380981, "step": 353 }, { "epoch": 0.042955951947579174, "grad_norm": 1.7793716192245483, "learning_rate": 1.9378454735290506e-05, "loss": 0.2733398973941803, "step": 354 }, { "epoch": 0.043077296444606235, "grad_norm": 1.1902222633361816, "learning_rate": 1.937599803463948e-05, "loss": 0.20086917281150818, "step": 355 }, { "epoch": 0.043198640941633296, "grad_norm": 0.530981183052063, "learning_rate": 1.9373541333988454e-05, "loss": 0.03699813410639763, "step": 356 }, { "epoch": 0.043319985438660356, "grad_norm": 1.1668962240219116, "learning_rate": 1.937108463333743e-05, "loss": 0.6546061635017395, "step": 357 }, { "epoch": 0.04344132993568742, "grad_norm": 1.5194002389907837, "learning_rate": 1.9368627932686403e-05, "loss": 0.3529921770095825, "step": 358 }, { "epoch": 0.04356267443271448, "grad_norm": 1.5513792037963867, "learning_rate": 1.9366171232035377e-05, "loss": 0.3908025324344635, "step": 359 }, { "epoch": 0.04368401892974154, "grad_norm": 1.0645341873168945, "learning_rate": 1.936371453138435e-05, "loss": 0.25461000204086304, "step": 360 }, { "epoch": 0.04380536342676859, "grad_norm": 1.077723503112793, "learning_rate": 1.9361257830733326e-05, "loss": 0.5458009243011475, "step": 361 }, { "epoch": 0.04392670792379565, "grad_norm": 1.0532881021499634, "learning_rate": 1.93588011300823e-05, "loss": 0.31512653827667236, "step": 362 }, { "epoch": 0.044048052420822714, "grad_norm": 1.0603703260421753, "learning_rate": 1.9356344429431274e-05, "loss": 0.3389180302619934, "step": 363 }, { "epoch": 0.044169396917849775, "grad_norm": 1.0187443494796753, "learning_rate": 1.935388772878025e-05, "loss": 0.5617249011993408, "step": 364 }, { "epoch": 0.044290741414876836, "grad_norm": 1.0340298414230347, "learning_rate": 1.9351431028129223e-05, "loss": 0.22403107583522797, "step": 365 }, { "epoch": 0.0444120859119039, "grad_norm": 1.4951913356781006, "learning_rate": 1.9348974327478197e-05, "loss": 0.31615158915519714, "step": 366 }, { "epoch": 0.04453343040893096, "grad_norm": 1.050989031791687, "learning_rate": 1.934651762682717e-05, "loss": 0.46508675813674927, "step": 367 }, { "epoch": 0.04465477490595802, "grad_norm": 1.4088681936264038, "learning_rate": 1.9344060926176146e-05, "loss": 0.6532362103462219, "step": 368 }, { "epoch": 0.04477611940298507, "grad_norm": 0.6320997476577759, "learning_rate": 1.934160422552512e-05, "loss": 0.2274395227432251, "step": 369 }, { "epoch": 0.04489746390001213, "grad_norm": 1.3754754066467285, "learning_rate": 1.9339147524874094e-05, "loss": 0.7371792793273926, "step": 370 }, { "epoch": 0.045018808397039194, "grad_norm": 0.7076073884963989, "learning_rate": 1.9336690824223068e-05, "loss": 0.06037794053554535, "step": 371 }, { "epoch": 0.045140152894066254, "grad_norm": 0.9627287983894348, "learning_rate": 1.9334234123572046e-05, "loss": 0.33090823888778687, "step": 372 }, { "epoch": 0.045261497391093315, "grad_norm": 1.2749892473220825, "learning_rate": 1.933177742292102e-05, "loss": 0.17351853847503662, "step": 373 }, { "epoch": 0.045382841888120376, "grad_norm": 1.4868316650390625, "learning_rate": 1.9329320722269994e-05, "loss": 0.4319830536842346, "step": 374 }, { "epoch": 0.04550418638514744, "grad_norm": 1.2494465112686157, "learning_rate": 1.932686402161897e-05, "loss": 0.26532214879989624, "step": 375 }, { "epoch": 0.04562553088217449, "grad_norm": 1.0503991842269897, "learning_rate": 1.9324407320967943e-05, "loss": 0.2953014671802521, "step": 376 }, { "epoch": 0.04574687537920155, "grad_norm": 1.8432469367980957, "learning_rate": 1.9321950620316917e-05, "loss": 0.4219951629638672, "step": 377 }, { "epoch": 0.04586821987622861, "grad_norm": 1.6262578964233398, "learning_rate": 1.931949391966589e-05, "loss": 0.2898225784301758, "step": 378 }, { "epoch": 0.04598956437325567, "grad_norm": 0.6830425262451172, "learning_rate": 1.9317037219014866e-05, "loss": 0.07195363193750381, "step": 379 }, { "epoch": 0.046110908870282734, "grad_norm": 2.3727402687072754, "learning_rate": 1.931458051836384e-05, "loss": 0.5569823980331421, "step": 380 }, { "epoch": 0.046232253367309795, "grad_norm": 1.520284652709961, "learning_rate": 1.9312123817712814e-05, "loss": 0.5095877647399902, "step": 381 }, { "epoch": 0.046353597864336855, "grad_norm": 1.143632411956787, "learning_rate": 1.930966711706179e-05, "loss": 0.2604987621307373, "step": 382 }, { "epoch": 0.04647494236136391, "grad_norm": 1.01264226436615, "learning_rate": 1.9307210416410763e-05, "loss": 0.3068877160549164, "step": 383 }, { "epoch": 0.04659628685839097, "grad_norm": 1.8846100568771362, "learning_rate": 1.9304753715759737e-05, "loss": 0.3760281503200531, "step": 384 }, { "epoch": 0.04671763135541803, "grad_norm": 1.457566261291504, "learning_rate": 1.930229701510871e-05, "loss": 0.36060136556625366, "step": 385 }, { "epoch": 0.04683897585244509, "grad_norm": 1.2625572681427002, "learning_rate": 1.9299840314457686e-05, "loss": 0.35702618956565857, "step": 386 }, { "epoch": 0.04696032034947215, "grad_norm": 1.4238474369049072, "learning_rate": 1.929738361380666e-05, "loss": 0.4716323912143707, "step": 387 }, { "epoch": 0.04708166484649921, "grad_norm": 1.07540762424469, "learning_rate": 1.9294926913155634e-05, "loss": 0.3101731538772583, "step": 388 }, { "epoch": 0.047203009343526274, "grad_norm": 0.9744298458099365, "learning_rate": 1.929247021250461e-05, "loss": 0.12369873374700546, "step": 389 }, { "epoch": 0.04732435384055333, "grad_norm": 0.677290678024292, "learning_rate": 1.9290013511853583e-05, "loss": 0.13228675723075867, "step": 390 }, { "epoch": 0.04744569833758039, "grad_norm": 1.0706666707992554, "learning_rate": 1.9287556811202557e-05, "loss": 0.171414315700531, "step": 391 }, { "epoch": 0.04756704283460745, "grad_norm": 1.5353080034255981, "learning_rate": 1.928510011055153e-05, "loss": 0.24053412675857544, "step": 392 }, { "epoch": 0.04768838733163451, "grad_norm": 2.8229808807373047, "learning_rate": 1.9282643409900505e-05, "loss": 0.4611489176750183, "step": 393 }, { "epoch": 0.04780973182866157, "grad_norm": 1.421183466911316, "learning_rate": 1.928018670924948e-05, "loss": 0.26332008838653564, "step": 394 }, { "epoch": 0.04793107632568863, "grad_norm": 1.5224473476409912, "learning_rate": 1.9277730008598454e-05, "loss": 0.6890032291412354, "step": 395 }, { "epoch": 0.04805242082271569, "grad_norm": 1.2183220386505127, "learning_rate": 1.9275273307947428e-05, "loss": 0.24116888642311096, "step": 396 }, { "epoch": 0.048173765319742747, "grad_norm": 1.4274941682815552, "learning_rate": 1.9272816607296402e-05, "loss": 0.686172604560852, "step": 397 }, { "epoch": 0.04829510981676981, "grad_norm": 1.118133306503296, "learning_rate": 1.9270359906645377e-05, "loss": 0.2652670443058014, "step": 398 }, { "epoch": 0.04841645431379687, "grad_norm": 1.4742660522460938, "learning_rate": 1.926790320599435e-05, "loss": 0.4737281799316406, "step": 399 }, { "epoch": 0.04853779881082393, "grad_norm": 1.1788684129714966, "learning_rate": 1.9265446505343325e-05, "loss": 0.21296805143356323, "step": 400 }, { "epoch": 0.04865914330785099, "grad_norm": 1.2983324527740479, "learning_rate": 1.92629898046923e-05, "loss": 0.16385263204574585, "step": 401 }, { "epoch": 0.04878048780487805, "grad_norm": 0.004901287145912647, "learning_rate": 1.9260533104041274e-05, "loss": 9.251898882212117e-05, "step": 402 }, { "epoch": 0.04890183230190511, "grad_norm": 1.0808979272842407, "learning_rate": 1.9258076403390248e-05, "loss": 0.28296127915382385, "step": 403 }, { "epoch": 0.049023176798932165, "grad_norm": 1.846316933631897, "learning_rate": 1.9255619702739222e-05, "loss": 0.3567678928375244, "step": 404 }, { "epoch": 0.049144521295959226, "grad_norm": 1.4769459962844849, "learning_rate": 1.9253163002088196e-05, "loss": 0.6017166972160339, "step": 405 }, { "epoch": 0.04926586579298629, "grad_norm": 1.3384523391723633, "learning_rate": 1.925070630143717e-05, "loss": 0.3126193881034851, "step": 406 }, { "epoch": 0.04938721029001335, "grad_norm": 1.2163547277450562, "learning_rate": 1.9248249600786145e-05, "loss": 0.25492674112319946, "step": 407 }, { "epoch": 0.04950855478704041, "grad_norm": 1.340456247329712, "learning_rate": 1.924579290013512e-05, "loss": 0.6409405469894409, "step": 408 }, { "epoch": 0.04962989928406747, "grad_norm": 1.3869006633758545, "learning_rate": 1.9243336199484093e-05, "loss": 0.3362444043159485, "step": 409 }, { "epoch": 0.04975124378109453, "grad_norm": 1.314749002456665, "learning_rate": 1.9240879498833068e-05, "loss": 0.5342424511909485, "step": 410 }, { "epoch": 0.049872588278121584, "grad_norm": 2.0399301052093506, "learning_rate": 1.9238422798182045e-05, "loss": 0.19150543212890625, "step": 411 }, { "epoch": 0.049993932775148645, "grad_norm": 1.3778923749923706, "learning_rate": 1.923596609753102e-05, "loss": 0.40402331948280334, "step": 412 }, { "epoch": 0.050115277272175705, "grad_norm": 1.1802595853805542, "learning_rate": 1.9233509396879994e-05, "loss": 0.45945459604263306, "step": 413 }, { "epoch": 0.050236621769202766, "grad_norm": 1.1052002906799316, "learning_rate": 1.9231052696228968e-05, "loss": 0.08993005007505417, "step": 414 }, { "epoch": 0.05035796626622983, "grad_norm": 1.3626540899276733, "learning_rate": 1.9228595995577942e-05, "loss": 0.6543693542480469, "step": 415 }, { "epoch": 0.05047931076325689, "grad_norm": 1.4075640439987183, "learning_rate": 1.9226139294926917e-05, "loss": 0.24491381645202637, "step": 416 }, { "epoch": 0.05060065526028395, "grad_norm": 1.5025800466537476, "learning_rate": 1.922368259427589e-05, "loss": 0.5007286071777344, "step": 417 }, { "epoch": 0.05072199975731101, "grad_norm": 2.165350914001465, "learning_rate": 1.9221225893624865e-05, "loss": 0.5292732119560242, "step": 418 }, { "epoch": 0.05084334425433806, "grad_norm": 1.0646313428878784, "learning_rate": 1.921876919297384e-05, "loss": 0.31505468487739563, "step": 419 }, { "epoch": 0.050964688751365124, "grad_norm": 1.194830298423767, "learning_rate": 1.9216312492322814e-05, "loss": 0.2772429883480072, "step": 420 }, { "epoch": 0.051086033248392185, "grad_norm": 1.535117745399475, "learning_rate": 1.9213855791671788e-05, "loss": 0.4633476734161377, "step": 421 }, { "epoch": 0.051207377745419246, "grad_norm": 1.5908533334732056, "learning_rate": 1.9211399091020762e-05, "loss": 0.6686984300613403, "step": 422 }, { "epoch": 0.051328722242446306, "grad_norm": 1.5840030908584595, "learning_rate": 1.9208942390369736e-05, "loss": 0.34995588660240173, "step": 423 }, { "epoch": 0.05145006673947337, "grad_norm": 1.0218498706817627, "learning_rate": 1.920648568971871e-05, "loss": 0.16546282172203064, "step": 424 }, { "epoch": 0.05157141123650043, "grad_norm": 1.6098021268844604, "learning_rate": 1.9204028989067685e-05, "loss": 0.5376899242401123, "step": 425 }, { "epoch": 0.05169275573352748, "grad_norm": 1.1683999300003052, "learning_rate": 1.920157228841666e-05, "loss": 0.31663447618484497, "step": 426 }, { "epoch": 0.05181410023055454, "grad_norm": 0.8926824927330017, "learning_rate": 1.9199115587765634e-05, "loss": 0.08985061198472977, "step": 427 }, { "epoch": 0.0519354447275816, "grad_norm": 0.5471736192703247, "learning_rate": 1.9196658887114608e-05, "loss": 0.05214562639594078, "step": 428 }, { "epoch": 0.052056789224608664, "grad_norm": 1.3294402360916138, "learning_rate": 1.9194202186463582e-05, "loss": 0.2623544931411743, "step": 429 }, { "epoch": 0.052178133721635725, "grad_norm": 1.1779325008392334, "learning_rate": 1.9191745485812556e-05, "loss": 0.2422659695148468, "step": 430 }, { "epoch": 0.052299478218662786, "grad_norm": 1.0145118236541748, "learning_rate": 1.918928878516153e-05, "loss": 0.490875780582428, "step": 431 }, { "epoch": 0.052420822715689847, "grad_norm": 1.2716591358184814, "learning_rate": 1.9186832084510505e-05, "loss": 0.39811912178993225, "step": 432 }, { "epoch": 0.0525421672127169, "grad_norm": 0.8617557287216187, "learning_rate": 1.918437538385948e-05, "loss": 0.1596330851316452, "step": 433 }, { "epoch": 0.05266351170974396, "grad_norm": 1.282023310661316, "learning_rate": 1.9181918683208453e-05, "loss": 0.1750062257051468, "step": 434 }, { "epoch": 0.05278485620677102, "grad_norm": 1.3805968761444092, "learning_rate": 1.9179461982557428e-05, "loss": 0.6116123795509338, "step": 435 }, { "epoch": 0.05290620070379808, "grad_norm": 1.3073794841766357, "learning_rate": 1.9177005281906402e-05, "loss": 0.40776562690734863, "step": 436 }, { "epoch": 0.053027545200825144, "grad_norm": 1.3929219245910645, "learning_rate": 1.9174548581255376e-05, "loss": 0.2514992952346802, "step": 437 }, { "epoch": 0.053148889697852204, "grad_norm": 1.6806739568710327, "learning_rate": 1.917209188060435e-05, "loss": 0.4892512559890747, "step": 438 }, { "epoch": 0.053270234194879265, "grad_norm": 1.1705715656280518, "learning_rate": 1.9169635179953325e-05, "loss": 0.44782862067222595, "step": 439 }, { "epoch": 0.05339157869190632, "grad_norm": 1.4459282159805298, "learning_rate": 1.91671784793023e-05, "loss": 0.2908768951892853, "step": 440 }, { "epoch": 0.05351292318893338, "grad_norm": 1.9081748723983765, "learning_rate": 1.9164721778651273e-05, "loss": 0.5691676139831543, "step": 441 }, { "epoch": 0.05363426768596044, "grad_norm": 1.0076907873153687, "learning_rate": 1.9162265078000247e-05, "loss": 0.2442394495010376, "step": 442 }, { "epoch": 0.0537556121829875, "grad_norm": 1.482526421546936, "learning_rate": 1.915980837734922e-05, "loss": 0.07226604223251343, "step": 443 }, { "epoch": 0.05387695668001456, "grad_norm": 1.7005579471588135, "learning_rate": 1.9157351676698196e-05, "loss": 0.6055192351341248, "step": 444 }, { "epoch": 0.05399830117704162, "grad_norm": 1.4745745658874512, "learning_rate": 1.915489497604717e-05, "loss": 0.7092527151107788, "step": 445 }, { "epoch": 0.054119645674068684, "grad_norm": 1.3547440767288208, "learning_rate": 1.9152438275396144e-05, "loss": 0.596627414226532, "step": 446 }, { "epoch": 0.05424099017109574, "grad_norm": 1.1186338663101196, "learning_rate": 1.914998157474512e-05, "loss": 0.1992674469947815, "step": 447 }, { "epoch": 0.0543623346681228, "grad_norm": 0.933089017868042, "learning_rate": 1.9147524874094093e-05, "loss": 0.11702962219715118, "step": 448 }, { "epoch": 0.05448367916514986, "grad_norm": 1.5945713520050049, "learning_rate": 1.9145068173443067e-05, "loss": 0.5891073942184448, "step": 449 }, { "epoch": 0.05460502366217692, "grad_norm": 1.8687334060668945, "learning_rate": 1.914261147279204e-05, "loss": 0.5784303545951843, "step": 450 }, { "epoch": 0.05472636815920398, "grad_norm": 1.6527996063232422, "learning_rate": 1.9140154772141016e-05, "loss": 0.5086016654968262, "step": 451 }, { "epoch": 0.05484771265623104, "grad_norm": 1.261507272720337, "learning_rate": 1.913769807148999e-05, "loss": 0.5271745324134827, "step": 452 }, { "epoch": 0.0549690571532581, "grad_norm": 1.0135947465896606, "learning_rate": 1.9135241370838964e-05, "loss": 0.5260915160179138, "step": 453 }, { "epoch": 0.055090401650285156, "grad_norm": 1.6660957336425781, "learning_rate": 1.913278467018794e-05, "loss": 0.3362421989440918, "step": 454 }, { "epoch": 0.05521174614731222, "grad_norm": 1.1020255088806152, "learning_rate": 1.9130327969536913e-05, "loss": 0.09704497456550598, "step": 455 }, { "epoch": 0.05533309064433928, "grad_norm": 2.270763874053955, "learning_rate": 1.9127871268885887e-05, "loss": 0.6879133582115173, "step": 456 }, { "epoch": 0.05545443514136634, "grad_norm": 1.3732212781906128, "learning_rate": 1.912541456823486e-05, "loss": 0.3480108976364136, "step": 457 }, { "epoch": 0.0555757796383934, "grad_norm": 1.5962785482406616, "learning_rate": 1.9122957867583836e-05, "loss": 0.31269481778144836, "step": 458 }, { "epoch": 0.05569712413542046, "grad_norm": 1.1583328247070312, "learning_rate": 1.912050116693281e-05, "loss": 0.22761228680610657, "step": 459 }, { "epoch": 0.05581846863244752, "grad_norm": 0.7482461333274841, "learning_rate": 1.9118044466281784e-05, "loss": 0.062126971781253815, "step": 460 }, { "epoch": 0.05593981312947458, "grad_norm": 1.0668491125106812, "learning_rate": 1.9115587765630758e-05, "loss": 0.7171897888183594, "step": 461 }, { "epoch": 0.056061157626501636, "grad_norm": 1.107728362083435, "learning_rate": 1.9113131064979733e-05, "loss": 0.24725040793418884, "step": 462 }, { "epoch": 0.056182502123528696, "grad_norm": 1.5959255695343018, "learning_rate": 1.9110674364328707e-05, "loss": 0.6176046133041382, "step": 463 }, { "epoch": 0.05630384662055576, "grad_norm": 0.48865923285484314, "learning_rate": 1.910821766367768e-05, "loss": 0.10571181029081345, "step": 464 }, { "epoch": 0.05642519111758282, "grad_norm": 0.04387793317437172, "learning_rate": 1.9105760963026655e-05, "loss": 0.001076234970241785, "step": 465 }, { "epoch": 0.05654653561460988, "grad_norm": 1.5911579132080078, "learning_rate": 1.910330426237563e-05, "loss": 0.4412878751754761, "step": 466 }, { "epoch": 0.05666788011163694, "grad_norm": 1.2079088687896729, "learning_rate": 1.9100847561724604e-05, "loss": 0.1528819501399994, "step": 467 }, { "epoch": 0.056789224608664, "grad_norm": 1.6561167240142822, "learning_rate": 1.9098390861073578e-05, "loss": 0.39373156428337097, "step": 468 }, { "epoch": 0.056910569105691054, "grad_norm": 1.4517277479171753, "learning_rate": 1.9095934160422552e-05, "loss": 0.45171377062797546, "step": 469 }, { "epoch": 0.057031913602718115, "grad_norm": 1.3444980382919312, "learning_rate": 1.9093477459771527e-05, "loss": 0.3764626681804657, "step": 470 }, { "epoch": 0.057153258099745176, "grad_norm": 1.5290117263793945, "learning_rate": 1.90910207591205e-05, "loss": 0.41870003938674927, "step": 471 }, { "epoch": 0.05727460259677224, "grad_norm": 0.6672114133834839, "learning_rate": 1.9088564058469475e-05, "loss": 0.10688811540603638, "step": 472 }, { "epoch": 0.0573959470937993, "grad_norm": 1.296738624572754, "learning_rate": 1.908610735781845e-05, "loss": 0.4453229606151581, "step": 473 }, { "epoch": 0.05751729159082636, "grad_norm": 1.166654109954834, "learning_rate": 1.9083650657167424e-05, "loss": 0.3140110373497009, "step": 474 }, { "epoch": 0.05763863608785342, "grad_norm": 1.5547395944595337, "learning_rate": 1.9081193956516398e-05, "loss": 0.38892969489097595, "step": 475 }, { "epoch": 0.05775998058488047, "grad_norm": 1.580997109413147, "learning_rate": 1.9078737255865372e-05, "loss": 0.5940026640892029, "step": 476 }, { "epoch": 0.057881325081907534, "grad_norm": 1.5121760368347168, "learning_rate": 1.907628055521435e-05, "loss": 0.6734681725502014, "step": 477 }, { "epoch": 0.058002669578934594, "grad_norm": 1.4761507511138916, "learning_rate": 1.9073823854563324e-05, "loss": 0.3817492127418518, "step": 478 }, { "epoch": 0.058124014075961655, "grad_norm": 0.8833889961242676, "learning_rate": 1.90713671539123e-05, "loss": 0.11671547591686249, "step": 479 }, { "epoch": 0.058245358572988716, "grad_norm": 1.3200422525405884, "learning_rate": 1.9068910453261273e-05, "loss": 0.4326450228691101, "step": 480 }, { "epoch": 0.05836670307001578, "grad_norm": 1.7077932357788086, "learning_rate": 1.9066453752610247e-05, "loss": 0.2545737326145172, "step": 481 }, { "epoch": 0.05848804756704284, "grad_norm": 0.9082516431808472, "learning_rate": 1.906399705195922e-05, "loss": 0.3537253141403198, "step": 482 }, { "epoch": 0.05860939206406989, "grad_norm": 2.068510055541992, "learning_rate": 1.9061540351308195e-05, "loss": 0.6128970384597778, "step": 483 }, { "epoch": 0.05873073656109695, "grad_norm": 1.1531740427017212, "learning_rate": 1.905908365065717e-05, "loss": 0.1987893432378769, "step": 484 }, { "epoch": 0.05885208105812401, "grad_norm": 1.568968653678894, "learning_rate": 1.9056626950006144e-05, "loss": 0.21139925718307495, "step": 485 }, { "epoch": 0.058973425555151074, "grad_norm": 1.657418131828308, "learning_rate": 1.9054170249355118e-05, "loss": 0.5464147329330444, "step": 486 }, { "epoch": 0.059094770052178135, "grad_norm": 1.0412527322769165, "learning_rate": 1.9051713548704092e-05, "loss": 0.21365782618522644, "step": 487 }, { "epoch": 0.059216114549205195, "grad_norm": 0.047710780054330826, "learning_rate": 1.9049256848053067e-05, "loss": 0.0005216295248828828, "step": 488 }, { "epoch": 0.059337459046232256, "grad_norm": 1.0616328716278076, "learning_rate": 1.904680014740204e-05, "loss": 0.2274220585823059, "step": 489 }, { "epoch": 0.05945880354325931, "grad_norm": 0.7739994525909424, "learning_rate": 1.9044343446751015e-05, "loss": 0.13946875929832458, "step": 490 }, { "epoch": 0.05958014804028637, "grad_norm": 1.4279123544692993, "learning_rate": 1.904188674609999e-05, "loss": 0.20513373613357544, "step": 491 }, { "epoch": 0.05970149253731343, "grad_norm": 2.026980400085449, "learning_rate": 1.9039430045448964e-05, "loss": 0.31660470366477966, "step": 492 }, { "epoch": 0.05982283703434049, "grad_norm": 1.2501922845840454, "learning_rate": 1.9036973344797938e-05, "loss": 0.13497872650623322, "step": 493 }, { "epoch": 0.05994418153136755, "grad_norm": 2.2237708568573, "learning_rate": 1.9034516644146912e-05, "loss": 0.6203042268753052, "step": 494 }, { "epoch": 0.060065526028394614, "grad_norm": 1.9448051452636719, "learning_rate": 1.9032059943495886e-05, "loss": 0.23030954599380493, "step": 495 }, { "epoch": 0.060186870525421675, "grad_norm": 1.8851776123046875, "learning_rate": 1.902960324284486e-05, "loss": 0.3695673942565918, "step": 496 }, { "epoch": 0.06030821502244873, "grad_norm": 1.1489198207855225, "learning_rate": 1.9027146542193835e-05, "loss": 0.407935231924057, "step": 497 }, { "epoch": 0.06042955951947579, "grad_norm": 1.5059272050857544, "learning_rate": 1.902468984154281e-05, "loss": 0.46162354946136475, "step": 498 }, { "epoch": 0.06055090401650285, "grad_norm": 2.244837760925293, "learning_rate": 1.9022233140891784e-05, "loss": 0.5662449598312378, "step": 499 }, { "epoch": 0.06067224851352991, "grad_norm": 1.3596556186676025, "learning_rate": 1.9019776440240758e-05, "loss": 0.3611029386520386, "step": 500 }, { "epoch": 0.06079359301055697, "grad_norm": 1.878268837928772, "learning_rate": 1.9017319739589732e-05, "loss": 0.48691171407699585, "step": 501 }, { "epoch": 0.06091493750758403, "grad_norm": 1.615587830543518, "learning_rate": 1.9014863038938706e-05, "loss": 0.1986699253320694, "step": 502 }, { "epoch": 0.061036282004611093, "grad_norm": 1.384652018547058, "learning_rate": 1.901240633828768e-05, "loss": 0.4047502875328064, "step": 503 }, { "epoch": 0.06115762650163815, "grad_norm": 0.8918592929840088, "learning_rate": 1.9009949637636655e-05, "loss": 0.08136077225208282, "step": 504 }, { "epoch": 0.06127897099866521, "grad_norm": 1.2311476469039917, "learning_rate": 1.900749293698563e-05, "loss": 0.1510145664215088, "step": 505 }, { "epoch": 0.06140031549569227, "grad_norm": 1.1738781929016113, "learning_rate": 1.9005036236334603e-05, "loss": 0.14017607271671295, "step": 506 }, { "epoch": 0.06152165999271933, "grad_norm": 1.477210283279419, "learning_rate": 1.9002579535683578e-05, "loss": 0.38299548625946045, "step": 507 }, { "epoch": 0.06164300448974639, "grad_norm": 1.4116266965866089, "learning_rate": 1.9000122835032552e-05, "loss": 0.4138404130935669, "step": 508 }, { "epoch": 0.06176434898677345, "grad_norm": 1.5390279293060303, "learning_rate": 1.8997666134381526e-05, "loss": 0.13937437534332275, "step": 509 }, { "epoch": 0.06188569348380051, "grad_norm": 2.1501710414886475, "learning_rate": 1.89952094337305e-05, "loss": 0.30845504999160767, "step": 510 }, { "epoch": 0.06200703798082757, "grad_norm": 1.35932457447052, "learning_rate": 1.8992752733079475e-05, "loss": 0.2177773267030716, "step": 511 }, { "epoch": 0.06212838247785463, "grad_norm": 1.361790418624878, "learning_rate": 1.899029603242845e-05, "loss": 0.2292359620332718, "step": 512 }, { "epoch": 0.06224972697488169, "grad_norm": 1.9305003881454468, "learning_rate": 1.8987839331777423e-05, "loss": 0.37796324491500854, "step": 513 }, { "epoch": 0.06237107147190875, "grad_norm": 1.1280158758163452, "learning_rate": 1.8985382631126397e-05, "loss": 0.150230273604393, "step": 514 }, { "epoch": 0.06249241596893581, "grad_norm": 2.7915799617767334, "learning_rate": 1.898292593047537e-05, "loss": 0.4558163285255432, "step": 515 }, { "epoch": 0.06261376046596287, "grad_norm": 1.4745959043502808, "learning_rate": 1.898046922982435e-05, "loss": 0.4183858633041382, "step": 516 }, { "epoch": 0.06273510496298992, "grad_norm": 1.6241668462753296, "learning_rate": 1.8978012529173324e-05, "loss": 0.5954026579856873, "step": 517 }, { "epoch": 0.06285644946001699, "grad_norm": 2.7014975547790527, "learning_rate": 1.8975555828522298e-05, "loss": 0.3073278069496155, "step": 518 }, { "epoch": 0.06297779395704405, "grad_norm": 1.5124610662460327, "learning_rate": 1.8973099127871272e-05, "loss": 0.4629092812538147, "step": 519 }, { "epoch": 0.06309913845407111, "grad_norm": 1.3076839447021484, "learning_rate": 1.8970642427220246e-05, "loss": 0.359159380197525, "step": 520 }, { "epoch": 0.06322048295109817, "grad_norm": 2.850494146347046, "learning_rate": 1.896818572656922e-05, "loss": 0.5875416398048401, "step": 521 }, { "epoch": 0.06334182744812522, "grad_norm": 1.2282150983810425, "learning_rate": 1.8965729025918195e-05, "loss": 0.6359026432037354, "step": 522 }, { "epoch": 0.06346317194515229, "grad_norm": 2.336590528488159, "learning_rate": 1.896327232526717e-05, "loss": 0.29532766342163086, "step": 523 }, { "epoch": 0.06358451644217934, "grad_norm": 1.3243014812469482, "learning_rate": 1.8960815624616143e-05, "loss": 0.2546826899051666, "step": 524 }, { "epoch": 0.06370586093920641, "grad_norm": 0.884636402130127, "learning_rate": 1.8958358923965118e-05, "loss": 0.07391630858182907, "step": 525 }, { "epoch": 0.06382720543623346, "grad_norm": 1.5499517917633057, "learning_rate": 1.8955902223314092e-05, "loss": 0.38295266032218933, "step": 526 }, { "epoch": 0.06394854993326053, "grad_norm": 1.8321927785873413, "learning_rate": 1.8953445522663066e-05, "loss": 0.2729780673980713, "step": 527 }, { "epoch": 0.06406989443028759, "grad_norm": 1.900599718093872, "learning_rate": 1.895098882201204e-05, "loss": 0.5558912754058838, "step": 528 }, { "epoch": 0.06419123892731464, "grad_norm": 1.706315279006958, "learning_rate": 1.8948532121361015e-05, "loss": 0.7064175605773926, "step": 529 }, { "epoch": 0.06431258342434171, "grad_norm": 1.0284161567687988, "learning_rate": 1.894607542070999e-05, "loss": 0.36064383387565613, "step": 530 }, { "epoch": 0.06443392792136876, "grad_norm": 1.0576647520065308, "learning_rate": 1.8943618720058963e-05, "loss": 0.14544187486171722, "step": 531 }, { "epoch": 0.06455527241839583, "grad_norm": 1.206278681755066, "learning_rate": 1.8941162019407937e-05, "loss": 0.743944525718689, "step": 532 }, { "epoch": 0.06467661691542288, "grad_norm": 1.364727258682251, "learning_rate": 1.893870531875691e-05, "loss": 0.4518415927886963, "step": 533 }, { "epoch": 0.06479796141244995, "grad_norm": 1.4188811779022217, "learning_rate": 1.8936248618105886e-05, "loss": 0.37862730026245117, "step": 534 }, { "epoch": 0.064919305909477, "grad_norm": 1.7733440399169922, "learning_rate": 1.893379191745486e-05, "loss": 0.3728104829788208, "step": 535 }, { "epoch": 0.06504065040650407, "grad_norm": 1.6150974035263062, "learning_rate": 1.8931335216803834e-05, "loss": 0.4845805764198303, "step": 536 }, { "epoch": 0.06516199490353113, "grad_norm": 1.8303987979888916, "learning_rate": 1.892887851615281e-05, "loss": 0.1986956149339676, "step": 537 }, { "epoch": 0.06528333940055818, "grad_norm": 1.9609991312026978, "learning_rate": 1.8926421815501783e-05, "loss": 0.3362962603569031, "step": 538 }, { "epoch": 0.06540468389758525, "grad_norm": 1.5091760158538818, "learning_rate": 1.8923965114850757e-05, "loss": 0.43894076347351074, "step": 539 }, { "epoch": 0.0655260283946123, "grad_norm": 1.83540678024292, "learning_rate": 1.892150841419973e-05, "loss": 0.5007857084274292, "step": 540 }, { "epoch": 0.06564737289163937, "grad_norm": 1.6493889093399048, "learning_rate": 1.8919051713548706e-05, "loss": 0.31600168347358704, "step": 541 }, { "epoch": 0.06576871738866642, "grad_norm": 1.0712192058563232, "learning_rate": 1.891659501289768e-05, "loss": 0.23161625862121582, "step": 542 }, { "epoch": 0.06589006188569349, "grad_norm": 2.0619916915893555, "learning_rate": 1.8914138312246654e-05, "loss": 0.6339962482452393, "step": 543 }, { "epoch": 0.06601140638272054, "grad_norm": 1.6993929147720337, "learning_rate": 1.891168161159563e-05, "loss": 0.5062494874000549, "step": 544 }, { "epoch": 0.0661327508797476, "grad_norm": 1.5165525674819946, "learning_rate": 1.8909224910944603e-05, "loss": 0.29236191511154175, "step": 545 }, { "epoch": 0.06625409537677467, "grad_norm": 0.7951221466064453, "learning_rate": 1.8906768210293577e-05, "loss": 0.11672580987215042, "step": 546 }, { "epoch": 0.06637543987380172, "grad_norm": 1.2992923259735107, "learning_rate": 1.890431150964255e-05, "loss": 0.3086448907852173, "step": 547 }, { "epoch": 0.06649678437082879, "grad_norm": 1.2831530570983887, "learning_rate": 1.8901854808991526e-05, "loss": 0.29797589778900146, "step": 548 }, { "epoch": 0.06661812886785584, "grad_norm": 1.8516546487808228, "learning_rate": 1.88993981083405e-05, "loss": 0.42842912673950195, "step": 549 }, { "epoch": 0.06673947336488291, "grad_norm": 1.7039152383804321, "learning_rate": 1.8896941407689474e-05, "loss": 0.383547842502594, "step": 550 }, { "epoch": 0.06686081786190996, "grad_norm": 1.2621033191680908, "learning_rate": 1.889448470703845e-05, "loss": 0.19925151765346527, "step": 551 }, { "epoch": 0.06698216235893702, "grad_norm": 1.5435742139816284, "learning_rate": 1.8892028006387423e-05, "loss": 0.37353450059890747, "step": 552 }, { "epoch": 0.06710350685596408, "grad_norm": 2.185764789581299, "learning_rate": 1.8889571305736397e-05, "loss": 0.5865182876586914, "step": 553 }, { "epoch": 0.06722485135299114, "grad_norm": 1.4764949083328247, "learning_rate": 1.888711460508537e-05, "loss": 0.3307309150695801, "step": 554 }, { "epoch": 0.0673461958500182, "grad_norm": 1.880235195159912, "learning_rate": 1.8884657904434345e-05, "loss": 0.7467969059944153, "step": 555 }, { "epoch": 0.06746754034704526, "grad_norm": 2.191826581954956, "learning_rate": 1.8882201203783323e-05, "loss": 0.4978898763656616, "step": 556 }, { "epoch": 0.06758888484407233, "grad_norm": 0.9400010108947754, "learning_rate": 1.8879744503132297e-05, "loss": 0.29659247398376465, "step": 557 }, { "epoch": 0.06771022934109938, "grad_norm": 1.570605754852295, "learning_rate": 1.887728780248127e-05, "loss": 0.28735125064849854, "step": 558 }, { "epoch": 0.06783157383812644, "grad_norm": 1.8483541011810303, "learning_rate": 1.8874831101830246e-05, "loss": 0.3445549011230469, "step": 559 }, { "epoch": 0.0679529183351535, "grad_norm": 1.9162882566452026, "learning_rate": 1.887237440117922e-05, "loss": 0.5881408452987671, "step": 560 }, { "epoch": 0.06807426283218056, "grad_norm": 1.7977851629257202, "learning_rate": 1.8869917700528194e-05, "loss": 0.41886040568351746, "step": 561 }, { "epoch": 0.06819560732920762, "grad_norm": 1.3679685592651367, "learning_rate": 1.886746099987717e-05, "loss": 0.26222050189971924, "step": 562 }, { "epoch": 0.06831695182623468, "grad_norm": 1.60178804397583, "learning_rate": 1.8865004299226143e-05, "loss": 0.341381311416626, "step": 563 }, { "epoch": 0.06843829632326175, "grad_norm": 0.8967300653457642, "learning_rate": 1.8862547598575117e-05, "loss": 0.29784804582595825, "step": 564 }, { "epoch": 0.0685596408202888, "grad_norm": 2.0774638652801514, "learning_rate": 1.886009089792409e-05, "loss": 0.3782203793525696, "step": 565 }, { "epoch": 0.06868098531731585, "grad_norm": 1.6538910865783691, "learning_rate": 1.8857634197273066e-05, "loss": 0.7248138785362244, "step": 566 }, { "epoch": 0.06880232981434292, "grad_norm": 1.3195281028747559, "learning_rate": 1.885517749662204e-05, "loss": 0.44836243987083435, "step": 567 }, { "epoch": 0.06892367431136998, "grad_norm": 0.2957918345928192, "learning_rate": 1.8852720795971014e-05, "loss": 0.004815039690583944, "step": 568 }, { "epoch": 0.06904501880839704, "grad_norm": 1.6919680833816528, "learning_rate": 1.885026409531999e-05, "loss": 0.48703718185424805, "step": 569 }, { "epoch": 0.0691663633054241, "grad_norm": 2.1297340393066406, "learning_rate": 1.8847807394668963e-05, "loss": 0.4740438163280487, "step": 570 }, { "epoch": 0.06928770780245117, "grad_norm": 1.4757106304168701, "learning_rate": 1.8845350694017937e-05, "loss": 0.42466428875923157, "step": 571 }, { "epoch": 0.06940905229947822, "grad_norm": 0.9064832329750061, "learning_rate": 1.884289399336691e-05, "loss": 0.1427098661661148, "step": 572 }, { "epoch": 0.06953039679650527, "grad_norm": 1.0294671058654785, "learning_rate": 1.8840437292715885e-05, "loss": 0.2024490237236023, "step": 573 }, { "epoch": 0.06965174129353234, "grad_norm": 1.66569185256958, "learning_rate": 1.883798059206486e-05, "loss": 0.9233149290084839, "step": 574 }, { "epoch": 0.0697730857905594, "grad_norm": 1.395765781402588, "learning_rate": 1.8835523891413834e-05, "loss": 0.24765007197856903, "step": 575 }, { "epoch": 0.06989443028758646, "grad_norm": 1.773474097251892, "learning_rate": 1.8833067190762805e-05, "loss": 0.38082554936408997, "step": 576 }, { "epoch": 0.07001577478461352, "grad_norm": 1.2505320310592651, "learning_rate": 1.883061049011178e-05, "loss": 0.348379909992218, "step": 577 }, { "epoch": 0.07013711928164058, "grad_norm": 1.428821086883545, "learning_rate": 1.8828153789460753e-05, "loss": 0.33820322155952454, "step": 578 }, { "epoch": 0.07025846377866764, "grad_norm": 1.0174955129623413, "learning_rate": 1.8825697088809728e-05, "loss": 0.250951886177063, "step": 579 }, { "epoch": 0.07037980827569469, "grad_norm": 1.4117581844329834, "learning_rate": 1.8823240388158702e-05, "loss": 0.36001020669937134, "step": 580 }, { "epoch": 0.07050115277272176, "grad_norm": 0.23253233730793, "learning_rate": 1.8820783687507676e-05, "loss": 0.010078919120132923, "step": 581 }, { "epoch": 0.07062249726974881, "grad_norm": 1.21126127243042, "learning_rate": 1.8818326986856654e-05, "loss": 0.402252733707428, "step": 582 }, { "epoch": 0.07074384176677588, "grad_norm": 1.4796817302703857, "learning_rate": 1.8815870286205628e-05, "loss": 0.3913116753101349, "step": 583 }, { "epoch": 0.07086518626380293, "grad_norm": 1.842236042022705, "learning_rate": 1.8813413585554602e-05, "loss": 0.7778733968734741, "step": 584 }, { "epoch": 0.07098653076083, "grad_norm": 1.782017707824707, "learning_rate": 1.8810956884903576e-05, "loss": 0.3612639904022217, "step": 585 }, { "epoch": 0.07110787525785706, "grad_norm": 1.6680972576141357, "learning_rate": 1.880850018425255e-05, "loss": 0.762682318687439, "step": 586 }, { "epoch": 0.07122921975488411, "grad_norm": 1.2227903604507446, "learning_rate": 1.8806043483601525e-05, "loss": 0.5214573740959167, "step": 587 }, { "epoch": 0.07135056425191118, "grad_norm": 1.5926989316940308, "learning_rate": 1.88035867829505e-05, "loss": 0.5479337573051453, "step": 588 }, { "epoch": 0.07147190874893823, "grad_norm": 1.989459753036499, "learning_rate": 1.8801130082299474e-05, "loss": 0.3255896270275116, "step": 589 }, { "epoch": 0.0715932532459653, "grad_norm": 1.7789111137390137, "learning_rate": 1.8798673381648448e-05, "loss": 0.5009081363677979, "step": 590 }, { "epoch": 0.07171459774299235, "grad_norm": 1.1982905864715576, "learning_rate": 1.8796216680997422e-05, "loss": 0.6694433093070984, "step": 591 }, { "epoch": 0.07183594224001942, "grad_norm": 1.6150325536727905, "learning_rate": 1.8793759980346396e-05, "loss": 0.4211908280849457, "step": 592 }, { "epoch": 0.07195728673704647, "grad_norm": 1.140662431716919, "learning_rate": 1.879130327969537e-05, "loss": 0.28169333934783936, "step": 593 }, { "epoch": 0.07207863123407353, "grad_norm": 2.1310620307922363, "learning_rate": 1.8788846579044345e-05, "loss": 0.5164771676063538, "step": 594 }, { "epoch": 0.0721999757311006, "grad_norm": 1.233970046043396, "learning_rate": 1.878638987839332e-05, "loss": 0.3783186376094818, "step": 595 }, { "epoch": 0.07232132022812765, "grad_norm": 1.430479645729065, "learning_rate": 1.8783933177742293e-05, "loss": 0.5629549622535706, "step": 596 }, { "epoch": 0.07244266472515472, "grad_norm": 1.5569998025894165, "learning_rate": 1.8781476477091268e-05, "loss": 0.27930858731269836, "step": 597 }, { "epoch": 0.07256400922218177, "grad_norm": 1.2450876235961914, "learning_rate": 1.8779019776440242e-05, "loss": 0.7581778764724731, "step": 598 }, { "epoch": 0.07268535371920884, "grad_norm": 2.200653076171875, "learning_rate": 1.8776563075789216e-05, "loss": 0.24638301134109497, "step": 599 }, { "epoch": 0.0728066982162359, "grad_norm": 0.1584484577178955, "learning_rate": 1.877410637513819e-05, "loss": 0.0029435211326926947, "step": 600 }, { "epoch": 0.07292804271326295, "grad_norm": 1.201370358467102, "learning_rate": 1.8771649674487165e-05, "loss": 0.17360961437225342, "step": 601 }, { "epoch": 0.07304938721029001, "grad_norm": 1.5583597421646118, "learning_rate": 1.876919297383614e-05, "loss": 0.19166655838489532, "step": 602 }, { "epoch": 0.07317073170731707, "grad_norm": 1.9324877262115479, "learning_rate": 1.8766736273185113e-05, "loss": 0.5034867525100708, "step": 603 }, { "epoch": 0.07329207620434414, "grad_norm": 1.4319108724594116, "learning_rate": 1.8764279572534087e-05, "loss": 0.16094264388084412, "step": 604 }, { "epoch": 0.07341342070137119, "grad_norm": 2.0198020935058594, "learning_rate": 1.876182287188306e-05, "loss": 0.8958789706230164, "step": 605 }, { "epoch": 0.07353476519839826, "grad_norm": 1.1970410346984863, "learning_rate": 1.8759366171232036e-05, "loss": 0.456455796957016, "step": 606 }, { "epoch": 0.07365610969542531, "grad_norm": 1.285672903060913, "learning_rate": 1.875690947058101e-05, "loss": 0.6159220933914185, "step": 607 }, { "epoch": 0.07377745419245237, "grad_norm": 1.3117607831954956, "learning_rate": 1.8754452769929984e-05, "loss": 0.40198272466659546, "step": 608 }, { "epoch": 0.07389879868947943, "grad_norm": 1.767972469329834, "learning_rate": 1.875199606927896e-05, "loss": 0.7003971934318542, "step": 609 }, { "epoch": 0.07402014318650649, "grad_norm": 1.3937331438064575, "learning_rate": 1.8749539368627933e-05, "loss": 0.11055199801921844, "step": 610 }, { "epoch": 0.07414148768353356, "grad_norm": 1.1846213340759277, "learning_rate": 1.8747082667976907e-05, "loss": 0.819743812084198, "step": 611 }, { "epoch": 0.07426283218056061, "grad_norm": 1.818548560142517, "learning_rate": 1.874462596732588e-05, "loss": 0.6363766193389893, "step": 612 }, { "epoch": 0.07438417667758768, "grad_norm": 2.3553943634033203, "learning_rate": 1.8742169266674856e-05, "loss": 0.8139269351959229, "step": 613 }, { "epoch": 0.07450552117461473, "grad_norm": 0.9787309169769287, "learning_rate": 1.873971256602383e-05, "loss": 0.17067642509937286, "step": 614 }, { "epoch": 0.07462686567164178, "grad_norm": 2.027261257171631, "learning_rate": 1.8737255865372804e-05, "loss": 0.17268799245357513, "step": 615 }, { "epoch": 0.07474821016866885, "grad_norm": 1.1048139333724976, "learning_rate": 1.873479916472178e-05, "loss": 0.33995521068573, "step": 616 }, { "epoch": 0.0748695546656959, "grad_norm": 1.7998343706130981, "learning_rate": 1.8732342464070753e-05, "loss": 0.15454964339733124, "step": 617 }, { "epoch": 0.07499089916272297, "grad_norm": 1.2843570709228516, "learning_rate": 1.8729885763419727e-05, "loss": 0.14955607056617737, "step": 618 }, { "epoch": 0.07511224365975003, "grad_norm": 1.1684657335281372, "learning_rate": 1.87274290627687e-05, "loss": 0.5342341661453247, "step": 619 }, { "epoch": 0.0752335881567771, "grad_norm": 1.2346407175064087, "learning_rate": 1.8724972362117676e-05, "loss": 0.18371909856796265, "step": 620 }, { "epoch": 0.07535493265380415, "grad_norm": 1.3623450994491577, "learning_rate": 1.872251566146665e-05, "loss": 0.30086177587509155, "step": 621 }, { "epoch": 0.0754762771508312, "grad_norm": 1.979787826538086, "learning_rate": 1.8720058960815627e-05, "loss": 0.7226313352584839, "step": 622 }, { "epoch": 0.07559762164785827, "grad_norm": 1.101216435432434, "learning_rate": 1.87176022601646e-05, "loss": 0.1416693776845932, "step": 623 }, { "epoch": 0.07571896614488532, "grad_norm": 1.8791908025741577, "learning_rate": 1.8715145559513576e-05, "loss": 0.5394376516342163, "step": 624 }, { "epoch": 0.07584031064191239, "grad_norm": 1.2553093433380127, "learning_rate": 1.871268885886255e-05, "loss": 0.19903355836868286, "step": 625 }, { "epoch": 0.07596165513893945, "grad_norm": 1.5131609439849854, "learning_rate": 1.8710232158211524e-05, "loss": 0.2551720142364502, "step": 626 }, { "epoch": 0.07608299963596651, "grad_norm": 0.9879768490791321, "learning_rate": 1.87077754575605e-05, "loss": 0.09607790410518646, "step": 627 }, { "epoch": 0.07620434413299357, "grad_norm": 1.5272071361541748, "learning_rate": 1.8705318756909473e-05, "loss": 0.16435827314853668, "step": 628 }, { "epoch": 0.07632568863002062, "grad_norm": 1.7799582481384277, "learning_rate": 1.8702862056258447e-05, "loss": 0.3401604890823364, "step": 629 }, { "epoch": 0.07644703312704769, "grad_norm": 1.4967886209487915, "learning_rate": 1.870040535560742e-05, "loss": 0.41344839334487915, "step": 630 }, { "epoch": 0.07656837762407474, "grad_norm": 1.9690594673156738, "learning_rate": 1.8697948654956396e-05, "loss": 0.8188158273696899, "step": 631 }, { "epoch": 0.07668972212110181, "grad_norm": 1.2859214544296265, "learning_rate": 1.869549195430537e-05, "loss": 0.21575604379177094, "step": 632 }, { "epoch": 0.07681106661812886, "grad_norm": 1.5824931859970093, "learning_rate": 1.8693035253654344e-05, "loss": 0.6719478368759155, "step": 633 }, { "epoch": 0.07693241111515593, "grad_norm": 2.319385290145874, "learning_rate": 1.869057855300332e-05, "loss": 0.3536515235900879, "step": 634 }, { "epoch": 0.07705375561218299, "grad_norm": 1.7407270669937134, "learning_rate": 1.8688121852352293e-05, "loss": 0.4539559781551361, "step": 635 }, { "epoch": 0.07717510010921005, "grad_norm": 1.8825663328170776, "learning_rate": 1.8685665151701267e-05, "loss": 0.37786608934402466, "step": 636 }, { "epoch": 0.07729644460623711, "grad_norm": 1.678596019744873, "learning_rate": 1.868320845105024e-05, "loss": 0.42622077465057373, "step": 637 }, { "epoch": 0.07741778910326416, "grad_norm": 1.5039141178131104, "learning_rate": 1.8680751750399216e-05, "loss": 0.22307798266410828, "step": 638 }, { "epoch": 0.07753913360029123, "grad_norm": 1.364247441291809, "learning_rate": 1.867829504974819e-05, "loss": 0.15877319872379303, "step": 639 }, { "epoch": 0.07766047809731828, "grad_norm": 1.4648646116256714, "learning_rate": 1.8675838349097164e-05, "loss": 0.3406936228275299, "step": 640 }, { "epoch": 0.07778182259434535, "grad_norm": 1.3261477947235107, "learning_rate": 1.867338164844614e-05, "loss": 0.12792298197746277, "step": 641 }, { "epoch": 0.0779031670913724, "grad_norm": 1.1395256519317627, "learning_rate": 1.8670924947795113e-05, "loss": 0.3013390600681305, "step": 642 }, { "epoch": 0.07802451158839947, "grad_norm": 1.2113020420074463, "learning_rate": 1.8668468247144087e-05, "loss": 0.13082636892795563, "step": 643 }, { "epoch": 0.07814585608542653, "grad_norm": 1.6144537925720215, "learning_rate": 1.866601154649306e-05, "loss": 0.16165593266487122, "step": 644 }, { "epoch": 0.07826720058245358, "grad_norm": 1.3433409929275513, "learning_rate": 1.8663554845842035e-05, "loss": 0.1441866159439087, "step": 645 }, { "epoch": 0.07838854507948065, "grad_norm": 1.0517584085464478, "learning_rate": 1.866109814519101e-05, "loss": 0.2606707215309143, "step": 646 }, { "epoch": 0.0785098895765077, "grad_norm": 1.180669903755188, "learning_rate": 1.8658641444539984e-05, "loss": 0.21883589029312134, "step": 647 }, { "epoch": 0.07863123407353477, "grad_norm": 0.658696174621582, "learning_rate": 1.8656184743888958e-05, "loss": 0.03140142932534218, "step": 648 }, { "epoch": 0.07875257857056182, "grad_norm": 0.051501210778951645, "learning_rate": 1.8653728043237932e-05, "loss": 0.0010449824621900916, "step": 649 }, { "epoch": 0.07887392306758889, "grad_norm": 1.6505728960037231, "learning_rate": 1.8651271342586907e-05, "loss": 0.6298208236694336, "step": 650 }, { "epoch": 0.07899526756461595, "grad_norm": 1.3531675338745117, "learning_rate": 1.864881464193588e-05, "loss": 0.26334941387176514, "step": 651 }, { "epoch": 0.079116612061643, "grad_norm": 1.7941267490386963, "learning_rate": 1.8646357941284855e-05, "loss": 0.3610610067844391, "step": 652 }, { "epoch": 0.07923795655867007, "grad_norm": 2.6597185134887695, "learning_rate": 1.864390124063383e-05, "loss": 0.5727459788322449, "step": 653 }, { "epoch": 0.07935930105569712, "grad_norm": 1.6264383792877197, "learning_rate": 1.8641444539982804e-05, "loss": 0.23900170624256134, "step": 654 }, { "epoch": 0.07948064555272419, "grad_norm": 3.0904436111450195, "learning_rate": 1.8638987839331778e-05, "loss": 0.5079998970031738, "step": 655 }, { "epoch": 0.07960199004975124, "grad_norm": 1.3061164617538452, "learning_rate": 1.8636531138680752e-05, "loss": 0.19908404350280762, "step": 656 }, { "epoch": 0.07972333454677831, "grad_norm": 1.316514492034912, "learning_rate": 1.8634074438029726e-05, "loss": 0.3161211609840393, "step": 657 }, { "epoch": 0.07984467904380536, "grad_norm": 1.1303502321243286, "learning_rate": 1.86316177373787e-05, "loss": 0.29433509707450867, "step": 658 }, { "epoch": 0.07996602354083242, "grad_norm": 0.0029176683165133, "learning_rate": 1.8629161036727675e-05, "loss": 3.159634070470929e-05, "step": 659 }, { "epoch": 0.08008736803785949, "grad_norm": 1.445120096206665, "learning_rate": 1.862670433607665e-05, "loss": 0.31950703263282776, "step": 660 }, { "epoch": 0.08020871253488654, "grad_norm": 1.4325833320617676, "learning_rate": 1.8624247635425627e-05, "loss": 0.3625190556049347, "step": 661 }, { "epoch": 0.08033005703191361, "grad_norm": 2.9001710414886475, "learning_rate": 1.86217909347746e-05, "loss": 0.17414988577365875, "step": 662 }, { "epoch": 0.08045140152894066, "grad_norm": 1.1585091352462769, "learning_rate": 1.8619334234123575e-05, "loss": 0.2450495958328247, "step": 663 }, { "epoch": 0.08057274602596773, "grad_norm": 1.7768579721450806, "learning_rate": 1.861687753347255e-05, "loss": 0.2746257781982422, "step": 664 }, { "epoch": 0.08069409052299478, "grad_norm": 1.5646682977676392, "learning_rate": 1.8614420832821524e-05, "loss": 0.3015173375606537, "step": 665 }, { "epoch": 0.08081543502002184, "grad_norm": 1.926649570465088, "learning_rate": 1.8611964132170498e-05, "loss": 0.38973182439804077, "step": 666 }, { "epoch": 0.0809367795170489, "grad_norm": 1.0687040090560913, "learning_rate": 1.8609507431519472e-05, "loss": 0.21271148324012756, "step": 667 }, { "epoch": 0.08105812401407596, "grad_norm": 2.043708086013794, "learning_rate": 1.8607050730868447e-05, "loss": 0.1781885027885437, "step": 668 }, { "epoch": 0.08117946851110303, "grad_norm": 1.3621066808700562, "learning_rate": 1.860459403021742e-05, "loss": 0.39263036847114563, "step": 669 }, { "epoch": 0.08130081300813008, "grad_norm": 1.9370335340499878, "learning_rate": 1.8602137329566395e-05, "loss": 0.454712450504303, "step": 670 }, { "epoch": 0.08142215750515715, "grad_norm": 0.988834023475647, "learning_rate": 1.859968062891537e-05, "loss": 0.0948018729686737, "step": 671 }, { "epoch": 0.0815435020021842, "grad_norm": 1.7850199937820435, "learning_rate": 1.8597223928264344e-05, "loss": 0.43955737352371216, "step": 672 }, { "epoch": 0.08166484649921125, "grad_norm": 1.6654014587402344, "learning_rate": 1.8594767227613318e-05, "loss": 0.3758952021598816, "step": 673 }, { "epoch": 0.08178619099623832, "grad_norm": 2.2570245265960693, "learning_rate": 1.8592310526962292e-05, "loss": 0.46082913875579834, "step": 674 }, { "epoch": 0.08190753549326538, "grad_norm": 1.4857622385025024, "learning_rate": 1.8589853826311266e-05, "loss": 0.6366814374923706, "step": 675 }, { "epoch": 0.08202887999029244, "grad_norm": 0.6030565500259399, "learning_rate": 1.858739712566024e-05, "loss": 0.0605991892516613, "step": 676 }, { "epoch": 0.0821502244873195, "grad_norm": 2.023913621902466, "learning_rate": 1.8584940425009215e-05, "loss": 0.2816411554813385, "step": 677 }, { "epoch": 0.08227156898434657, "grad_norm": 1.27947199344635, "learning_rate": 1.858248372435819e-05, "loss": 0.5554491877555847, "step": 678 }, { "epoch": 0.08239291348137362, "grad_norm": 1.3732556104660034, "learning_rate": 1.8580027023707164e-05, "loss": 0.26525643467903137, "step": 679 }, { "epoch": 0.08251425797840067, "grad_norm": 0.8106752038002014, "learning_rate": 1.8577570323056138e-05, "loss": 0.05604014918208122, "step": 680 }, { "epoch": 0.08263560247542774, "grad_norm": 1.0731443166732788, "learning_rate": 1.8575113622405112e-05, "loss": 0.2855074107646942, "step": 681 }, { "epoch": 0.0827569469724548, "grad_norm": 1.1271634101867676, "learning_rate": 1.8572656921754086e-05, "loss": 0.42854225635528564, "step": 682 }, { "epoch": 0.08287829146948186, "grad_norm": 1.4456641674041748, "learning_rate": 1.857020022110306e-05, "loss": 0.24991053342819214, "step": 683 }, { "epoch": 0.08299963596650892, "grad_norm": 2.6240429878234863, "learning_rate": 1.8567743520452035e-05, "loss": 0.6288108229637146, "step": 684 }, { "epoch": 0.08312098046353598, "grad_norm": 1.276291012763977, "learning_rate": 1.856528681980101e-05, "loss": 0.08958227187395096, "step": 685 }, { "epoch": 0.08324232496056304, "grad_norm": 1.562274694442749, "learning_rate": 1.8562830119149983e-05, "loss": 0.2525687515735626, "step": 686 }, { "epoch": 0.08336366945759009, "grad_norm": 1.218352198600769, "learning_rate": 1.8560373418498958e-05, "loss": 0.24451139569282532, "step": 687 }, { "epoch": 0.08348501395461716, "grad_norm": 1.408584713935852, "learning_rate": 1.8557916717847932e-05, "loss": 0.4832595884799957, "step": 688 }, { "epoch": 0.08360635845164421, "grad_norm": 2.0641961097717285, "learning_rate": 1.8555460017196906e-05, "loss": 0.3223777413368225, "step": 689 }, { "epoch": 0.08372770294867128, "grad_norm": 1.7355111837387085, "learning_rate": 1.855300331654588e-05, "loss": 0.4831840991973877, "step": 690 }, { "epoch": 0.08384904744569834, "grad_norm": 2.660064935684204, "learning_rate": 1.8550546615894855e-05, "loss": 0.48859894275665283, "step": 691 }, { "epoch": 0.0839703919427254, "grad_norm": 17.630550384521484, "learning_rate": 1.854808991524383e-05, "loss": 0.24826133251190186, "step": 692 }, { "epoch": 0.08409173643975246, "grad_norm": 1.8000401258468628, "learning_rate": 1.8545633214592803e-05, "loss": 0.4145761728286743, "step": 693 }, { "epoch": 0.08421308093677951, "grad_norm": 1.4511135816574097, "learning_rate": 1.8543176513941777e-05, "loss": 0.417000949382782, "step": 694 }, { "epoch": 0.08433442543380658, "grad_norm": 2.309809446334839, "learning_rate": 1.854071981329075e-05, "loss": 0.38240236043930054, "step": 695 }, { "epoch": 0.08445576993083363, "grad_norm": 1.9691797494888306, "learning_rate": 1.8538263112639726e-05, "loss": 0.5611581206321716, "step": 696 }, { "epoch": 0.0845771144278607, "grad_norm": 0.88741135597229, "learning_rate": 1.85358064119887e-05, "loss": 0.2746793031692505, "step": 697 }, { "epoch": 0.08469845892488775, "grad_norm": 1.727298378944397, "learning_rate": 1.8533349711337674e-05, "loss": 0.42077144980430603, "step": 698 }, { "epoch": 0.08481980342191482, "grad_norm": 1.5723243951797485, "learning_rate": 1.853089301068665e-05, "loss": 0.30194780230522156, "step": 699 }, { "epoch": 0.08494114791894188, "grad_norm": 0.6778169274330139, "learning_rate": 1.8528436310035623e-05, "loss": 0.06341211497783661, "step": 700 }, { "epoch": 0.08506249241596893, "grad_norm": 1.1526869535446167, "learning_rate": 1.85259796093846e-05, "loss": 0.14544203877449036, "step": 701 }, { "epoch": 0.085183836912996, "grad_norm": 1.8126407861709595, "learning_rate": 1.8523522908733575e-05, "loss": 0.5493510961532593, "step": 702 }, { "epoch": 0.08530518141002305, "grad_norm": 1.4785159826278687, "learning_rate": 1.852106620808255e-05, "loss": 0.28663206100463867, "step": 703 }, { "epoch": 0.08542652590705012, "grad_norm": 1.6556025743484497, "learning_rate": 1.8518609507431523e-05, "loss": 0.2000901848077774, "step": 704 }, { "epoch": 0.08554787040407717, "grad_norm": 0.6264957785606384, "learning_rate": 1.8516152806780498e-05, "loss": 0.0764586552977562, "step": 705 }, { "epoch": 0.08566921490110424, "grad_norm": 1.0878223180770874, "learning_rate": 1.8513696106129472e-05, "loss": 0.21222439408302307, "step": 706 }, { "epoch": 0.0857905593981313, "grad_norm": 2.753580331802368, "learning_rate": 1.8511239405478446e-05, "loss": 0.5297750234603882, "step": 707 }, { "epoch": 0.08591190389515835, "grad_norm": 1.854968786239624, "learning_rate": 1.850878270482742e-05, "loss": 0.3501977324485779, "step": 708 }, { "epoch": 0.08603324839218542, "grad_norm": 0.9170989990234375, "learning_rate": 1.8506326004176395e-05, "loss": 0.45474666357040405, "step": 709 }, { "epoch": 0.08615459288921247, "grad_norm": 1.7949663400650024, "learning_rate": 1.850386930352537e-05, "loss": 0.26724478602409363, "step": 710 }, { "epoch": 0.08627593738623954, "grad_norm": 1.7690072059631348, "learning_rate": 1.8501412602874343e-05, "loss": 0.18915711343288422, "step": 711 }, { "epoch": 0.08639728188326659, "grad_norm": 1.5598441362380981, "learning_rate": 1.8498955902223314e-05, "loss": 0.22758185863494873, "step": 712 }, { "epoch": 0.08651862638029366, "grad_norm": 1.995851993560791, "learning_rate": 1.849649920157229e-05, "loss": 0.38368889689445496, "step": 713 }, { "epoch": 0.08663997087732071, "grad_norm": 2.4046647548675537, "learning_rate": 1.8494042500921263e-05, "loss": 0.37635087966918945, "step": 714 }, { "epoch": 0.08676131537434777, "grad_norm": 1.9158389568328857, "learning_rate": 1.8491585800270237e-05, "loss": 0.4280529320240021, "step": 715 }, { "epoch": 0.08688265987137483, "grad_norm": 1.2061840295791626, "learning_rate": 1.848912909961921e-05, "loss": 0.21197004616260529, "step": 716 }, { "epoch": 0.08700400436840189, "grad_norm": 2.140751600265503, "learning_rate": 1.8486672398968185e-05, "loss": 0.18690326809883118, "step": 717 }, { "epoch": 0.08712534886542896, "grad_norm": 0.8397735357284546, "learning_rate": 1.848421569831716e-05, "loss": 0.10207987576723099, "step": 718 }, { "epoch": 0.08724669336245601, "grad_norm": 1.7140696048736572, "learning_rate": 1.8481758997666134e-05, "loss": 0.41563165187835693, "step": 719 }, { "epoch": 0.08736803785948308, "grad_norm": 1.6332520246505737, "learning_rate": 1.8479302297015108e-05, "loss": 0.272539883852005, "step": 720 }, { "epoch": 0.08748938235651013, "grad_norm": 1.8010307550430298, "learning_rate": 1.8476845596364082e-05, "loss": 0.6407032608985901, "step": 721 }, { "epoch": 0.08761072685353719, "grad_norm": 1.55129873752594, "learning_rate": 1.8474388895713057e-05, "loss": 0.23682965338230133, "step": 722 }, { "epoch": 0.08773207135056425, "grad_norm": 2.135488986968994, "learning_rate": 1.847193219506203e-05, "loss": 0.7710912227630615, "step": 723 }, { "epoch": 0.0878534158475913, "grad_norm": 2.4005861282348633, "learning_rate": 1.8469475494411005e-05, "loss": 0.564693808555603, "step": 724 }, { "epoch": 0.08797476034461837, "grad_norm": 1.3816473484039307, "learning_rate": 1.846701879375998e-05, "loss": 0.4674697518348694, "step": 725 }, { "epoch": 0.08809610484164543, "grad_norm": 1.5146907567977905, "learning_rate": 1.8464562093108954e-05, "loss": 0.5922137498855591, "step": 726 }, { "epoch": 0.0882174493386725, "grad_norm": 2.8482632637023926, "learning_rate": 1.846210539245793e-05, "loss": 0.8533211350440979, "step": 727 }, { "epoch": 0.08833879383569955, "grad_norm": 1.5686261653900146, "learning_rate": 1.8459648691806906e-05, "loss": 0.31927329301834106, "step": 728 }, { "epoch": 0.08846013833272662, "grad_norm": 1.3741048574447632, "learning_rate": 1.845719199115588e-05, "loss": 0.30205488204956055, "step": 729 }, { "epoch": 0.08858148282975367, "grad_norm": 1.9760974645614624, "learning_rate": 1.8454735290504854e-05, "loss": 0.27656230330467224, "step": 730 }, { "epoch": 0.08870282732678073, "grad_norm": 1.5433790683746338, "learning_rate": 1.845227858985383e-05, "loss": 0.5273905992507935, "step": 731 }, { "epoch": 0.0888241718238078, "grad_norm": 1.6777325868606567, "learning_rate": 1.8449821889202803e-05, "loss": 0.17607736587524414, "step": 732 }, { "epoch": 0.08894551632083485, "grad_norm": 1.780245065689087, "learning_rate": 1.8447365188551777e-05, "loss": 0.31574001908302307, "step": 733 }, { "epoch": 0.08906686081786191, "grad_norm": 1.0304222106933594, "learning_rate": 1.844490848790075e-05, "loss": 0.12235292792320251, "step": 734 }, { "epoch": 0.08918820531488897, "grad_norm": 2.0634799003601074, "learning_rate": 1.8442451787249725e-05, "loss": 0.4336833953857422, "step": 735 }, { "epoch": 0.08930954981191604, "grad_norm": 1.5077424049377441, "learning_rate": 1.84399950865987e-05, "loss": 0.3145570755004883, "step": 736 }, { "epoch": 0.08943089430894309, "grad_norm": 1.5537257194519043, "learning_rate": 1.8437538385947674e-05, "loss": 0.32689836621284485, "step": 737 }, { "epoch": 0.08955223880597014, "grad_norm": 1.7519437074661255, "learning_rate": 1.8435081685296648e-05, "loss": 0.8935326933860779, "step": 738 }, { "epoch": 0.08967358330299721, "grad_norm": 1.947508454322815, "learning_rate": 1.8432624984645622e-05, "loss": 0.3590155839920044, "step": 739 }, { "epoch": 0.08979492780002427, "grad_norm": 1.9229471683502197, "learning_rate": 1.8430168283994597e-05, "loss": 0.3991833031177521, "step": 740 }, { "epoch": 0.08991627229705133, "grad_norm": 1.3160052299499512, "learning_rate": 1.842771158334357e-05, "loss": 0.2500799894332886, "step": 741 }, { "epoch": 0.09003761679407839, "grad_norm": 1.176512598991394, "learning_rate": 1.8425254882692545e-05, "loss": 0.09526380151510239, "step": 742 }, { "epoch": 0.09015896129110545, "grad_norm": 1.6869382858276367, "learning_rate": 1.842279818204152e-05, "loss": 0.26154953241348267, "step": 743 }, { "epoch": 0.09028030578813251, "grad_norm": 2.0682740211486816, "learning_rate": 1.8420341481390494e-05, "loss": 0.3846953511238098, "step": 744 }, { "epoch": 0.09040165028515956, "grad_norm": 1.7865413427352905, "learning_rate": 1.8417884780739468e-05, "loss": 0.32650160789489746, "step": 745 }, { "epoch": 0.09052299478218663, "grad_norm": 2.0560121536254883, "learning_rate": 1.8415428080088442e-05, "loss": 0.2540816068649292, "step": 746 }, { "epoch": 0.09064433927921368, "grad_norm": 1.3295836448669434, "learning_rate": 1.8412971379437416e-05, "loss": 0.29928016662597656, "step": 747 }, { "epoch": 0.09076568377624075, "grad_norm": 1.530879020690918, "learning_rate": 1.841051467878639e-05, "loss": 0.40534618496894836, "step": 748 }, { "epoch": 0.0908870282732678, "grad_norm": 1.5177699327468872, "learning_rate": 1.8408057978135365e-05, "loss": 0.39622828364372253, "step": 749 }, { "epoch": 0.09100837277029487, "grad_norm": 1.8274325132369995, "learning_rate": 1.840560127748434e-05, "loss": 0.24163588881492615, "step": 750 }, { "epoch": 0.09112971726732193, "grad_norm": 1.2737239599227905, "learning_rate": 1.8403144576833314e-05, "loss": 0.174016535282135, "step": 751 }, { "epoch": 0.09125106176434898, "grad_norm": 2.5806877613067627, "learning_rate": 1.8400687876182288e-05, "loss": 0.5132497549057007, "step": 752 }, { "epoch": 0.09137240626137605, "grad_norm": 2.5546791553497314, "learning_rate": 1.8398231175531262e-05, "loss": 0.43177586793899536, "step": 753 }, { "epoch": 0.0914937507584031, "grad_norm": 1.690147042274475, "learning_rate": 1.8395774474880236e-05, "loss": 0.24541637301445007, "step": 754 }, { "epoch": 0.09161509525543017, "grad_norm": 1.4954241514205933, "learning_rate": 1.839331777422921e-05, "loss": 0.16959670186042786, "step": 755 }, { "epoch": 0.09173643975245722, "grad_norm": 0.9505935907363892, "learning_rate": 1.8390861073578185e-05, "loss": 0.12421949207782745, "step": 756 }, { "epoch": 0.09185778424948429, "grad_norm": 1.4919354915618896, "learning_rate": 1.838840437292716e-05, "loss": 0.3456411361694336, "step": 757 }, { "epoch": 0.09197912874651135, "grad_norm": 1.6589082479476929, "learning_rate": 1.8385947672276133e-05, "loss": 0.24717597663402557, "step": 758 }, { "epoch": 0.0921004732435384, "grad_norm": 1.4445219039916992, "learning_rate": 1.8383490971625108e-05, "loss": 0.3239576816558838, "step": 759 }, { "epoch": 0.09222181774056547, "grad_norm": 1.030391812324524, "learning_rate": 1.8381034270974082e-05, "loss": 0.09199359267950058, "step": 760 }, { "epoch": 0.09234316223759252, "grad_norm": 1.5500389337539673, "learning_rate": 1.8378577570323056e-05, "loss": 0.5370050668716431, "step": 761 }, { "epoch": 0.09246450673461959, "grad_norm": 1.5590590238571167, "learning_rate": 1.837612086967203e-05, "loss": 0.31459516286849976, "step": 762 }, { "epoch": 0.09258585123164664, "grad_norm": 2.197721481323242, "learning_rate": 1.8373664169021005e-05, "loss": 0.674180269241333, "step": 763 }, { "epoch": 0.09270719572867371, "grad_norm": 1.6125683784484863, "learning_rate": 1.837120746836998e-05, "loss": 0.3430609405040741, "step": 764 }, { "epoch": 0.09282854022570076, "grad_norm": 1.7196494340896606, "learning_rate": 1.8368750767718953e-05, "loss": 0.22308464348316193, "step": 765 }, { "epoch": 0.09294988472272782, "grad_norm": 2.1390604972839355, "learning_rate": 1.8366294067067927e-05, "loss": 0.5569056272506714, "step": 766 }, { "epoch": 0.09307122921975489, "grad_norm": 1.9109262228012085, "learning_rate": 1.8363837366416905e-05, "loss": 0.5384218692779541, "step": 767 }, { "epoch": 0.09319257371678194, "grad_norm": 2.4339253902435303, "learning_rate": 1.836138066576588e-05, "loss": 0.4181733727455139, "step": 768 }, { "epoch": 0.09331391821380901, "grad_norm": 1.388688325881958, "learning_rate": 1.8358923965114854e-05, "loss": 0.052905499935150146, "step": 769 }, { "epoch": 0.09343526271083606, "grad_norm": 1.6281681060791016, "learning_rate": 1.8356467264463828e-05, "loss": 0.6276299357414246, "step": 770 }, { "epoch": 0.09355660720786313, "grad_norm": 1.908575177192688, "learning_rate": 1.8354010563812802e-05, "loss": 0.376204252243042, "step": 771 }, { "epoch": 0.09367795170489018, "grad_norm": 1.5560134649276733, "learning_rate": 1.8351553863161776e-05, "loss": 0.2944219410419464, "step": 772 }, { "epoch": 0.09379929620191724, "grad_norm": 1.2017306089401245, "learning_rate": 1.834909716251075e-05, "loss": 0.15335910022258759, "step": 773 }, { "epoch": 0.0939206406989443, "grad_norm": 1.7475948333740234, "learning_rate": 1.8346640461859725e-05, "loss": 0.3523990511894226, "step": 774 }, { "epoch": 0.09404198519597136, "grad_norm": 2.778636932373047, "learning_rate": 1.83441837612087e-05, "loss": 0.14939619600772858, "step": 775 }, { "epoch": 0.09416332969299843, "grad_norm": 1.7566347122192383, "learning_rate": 1.8341727060557673e-05, "loss": 0.534525454044342, "step": 776 }, { "epoch": 0.09428467419002548, "grad_norm": 1.0987921953201294, "learning_rate": 1.8339270359906648e-05, "loss": 0.45171016454696655, "step": 777 }, { "epoch": 0.09440601868705255, "grad_norm": 1.7017377614974976, "learning_rate": 1.8336813659255622e-05, "loss": 0.3392978310585022, "step": 778 }, { "epoch": 0.0945273631840796, "grad_norm": 1.220464825630188, "learning_rate": 1.8334356958604596e-05, "loss": 0.1805679053068161, "step": 779 }, { "epoch": 0.09464870768110666, "grad_norm": 1.404594898223877, "learning_rate": 1.833190025795357e-05, "loss": 0.3082903027534485, "step": 780 }, { "epoch": 0.09477005217813372, "grad_norm": 1.8245242834091187, "learning_rate": 1.8329443557302545e-05, "loss": 0.41371673345565796, "step": 781 }, { "epoch": 0.09489139667516078, "grad_norm": 2.1793110370635986, "learning_rate": 1.832698685665152e-05, "loss": 0.548283040523529, "step": 782 }, { "epoch": 0.09501274117218785, "grad_norm": 2.5097601413726807, "learning_rate": 1.8324530156000493e-05, "loss": 0.8257855176925659, "step": 783 }, { "epoch": 0.0951340856692149, "grad_norm": 1.4109855890274048, "learning_rate": 1.8322073455349467e-05, "loss": 0.3920208811759949, "step": 784 }, { "epoch": 0.09525543016624197, "grad_norm": 1.6185084581375122, "learning_rate": 1.831961675469844e-05, "loss": 0.5085690021514893, "step": 785 }, { "epoch": 0.09537677466326902, "grad_norm": 1.548444151878357, "learning_rate": 1.8317160054047416e-05, "loss": 0.16307635605335236, "step": 786 }, { "epoch": 0.09549811916029607, "grad_norm": 1.5586305856704712, "learning_rate": 1.831470335339639e-05, "loss": 0.3123549520969391, "step": 787 }, { "epoch": 0.09561946365732314, "grad_norm": 1.7862480878829956, "learning_rate": 1.8312246652745364e-05, "loss": 0.5024250149726868, "step": 788 }, { "epoch": 0.0957408081543502, "grad_norm": 1.647065281867981, "learning_rate": 1.830978995209434e-05, "loss": 0.26289641857147217, "step": 789 }, { "epoch": 0.09586215265137726, "grad_norm": 1.5449492931365967, "learning_rate": 1.8307333251443313e-05, "loss": 0.20891058444976807, "step": 790 }, { "epoch": 0.09598349714840432, "grad_norm": 2.2280101776123047, "learning_rate": 1.8304876550792287e-05, "loss": 0.1946217268705368, "step": 791 }, { "epoch": 0.09610484164543139, "grad_norm": 1.7998099327087402, "learning_rate": 1.830241985014126e-05, "loss": 0.5536755323410034, "step": 792 }, { "epoch": 0.09622618614245844, "grad_norm": 1.2248331308364868, "learning_rate": 1.8299963149490236e-05, "loss": 0.1123380959033966, "step": 793 }, { "epoch": 0.09634753063948549, "grad_norm": 1.9093798398971558, "learning_rate": 1.829750644883921e-05, "loss": 0.2815214991569519, "step": 794 }, { "epoch": 0.09646887513651256, "grad_norm": 1.4137870073318481, "learning_rate": 1.8295049748188184e-05, "loss": 0.1948762834072113, "step": 795 }, { "epoch": 0.09659021963353961, "grad_norm": 1.9202245473861694, "learning_rate": 1.829259304753716e-05, "loss": 0.22015176713466644, "step": 796 }, { "epoch": 0.09671156413056668, "grad_norm": 1.1859745979309082, "learning_rate": 1.8290136346886133e-05, "loss": 0.3085130751132965, "step": 797 }, { "epoch": 0.09683290862759374, "grad_norm": 1.4333667755126953, "learning_rate": 1.8287679646235107e-05, "loss": 0.18858560919761658, "step": 798 }, { "epoch": 0.0969542531246208, "grad_norm": 1.2532932758331299, "learning_rate": 1.828522294558408e-05, "loss": 0.15305699408054352, "step": 799 }, { "epoch": 0.09707559762164786, "grad_norm": 1.6238371133804321, "learning_rate": 1.8282766244933056e-05, "loss": 0.10041648149490356, "step": 800 }, { "epoch": 0.09719694211867491, "grad_norm": 1.7450344562530518, "learning_rate": 1.828030954428203e-05, "loss": 0.12939795851707458, "step": 801 }, { "epoch": 0.09731828661570198, "grad_norm": 1.793636441230774, "learning_rate": 1.8277852843631004e-05, "loss": 0.6190224885940552, "step": 802 }, { "epoch": 0.09743963111272903, "grad_norm": 1.11686110496521, "learning_rate": 1.827539614297998e-05, "loss": 0.21365734934806824, "step": 803 }, { "epoch": 0.0975609756097561, "grad_norm": 1.3621420860290527, "learning_rate": 1.8272939442328953e-05, "loss": 0.15825393795967102, "step": 804 }, { "epoch": 0.09768232010678315, "grad_norm": 1.776959776878357, "learning_rate": 1.8270482741677927e-05, "loss": 0.3757792115211487, "step": 805 }, { "epoch": 0.09780366460381022, "grad_norm": 1.7959625720977783, "learning_rate": 1.8268026041026904e-05, "loss": 0.3489462435245514, "step": 806 }, { "epoch": 0.09792500910083728, "grad_norm": 2.1753323078155518, "learning_rate": 1.826556934037588e-05, "loss": 0.3603140413761139, "step": 807 }, { "epoch": 0.09804635359786433, "grad_norm": 1.625357747077942, "learning_rate": 1.8263112639724853e-05, "loss": 0.3582020699977875, "step": 808 }, { "epoch": 0.0981676980948914, "grad_norm": 2.369624376296997, "learning_rate": 1.8260655939073827e-05, "loss": 0.60666424036026, "step": 809 }, { "epoch": 0.09828904259191845, "grad_norm": 1.7844915390014648, "learning_rate": 1.82581992384228e-05, "loss": 0.15997028350830078, "step": 810 }, { "epoch": 0.09841038708894552, "grad_norm": 1.6896699666976929, "learning_rate": 1.8255742537771776e-05, "loss": 0.329281210899353, "step": 811 }, { "epoch": 0.09853173158597257, "grad_norm": 1.1069085597991943, "learning_rate": 1.825328583712075e-05, "loss": 0.49478620290756226, "step": 812 }, { "epoch": 0.09865307608299964, "grad_norm": 1.5067551136016846, "learning_rate": 1.8250829136469724e-05, "loss": 0.42493683099746704, "step": 813 }, { "epoch": 0.0987744205800267, "grad_norm": 1.9894484281539917, "learning_rate": 1.82483724358187e-05, "loss": 0.33686619997024536, "step": 814 }, { "epoch": 0.09889576507705375, "grad_norm": 1.79850172996521, "learning_rate": 1.8245915735167673e-05, "loss": 0.4596613645553589, "step": 815 }, { "epoch": 0.09901710957408082, "grad_norm": 1.3043336868286133, "learning_rate": 1.8243459034516647e-05, "loss": 0.39217016100883484, "step": 816 }, { "epoch": 0.09913845407110787, "grad_norm": 1.2223256826400757, "learning_rate": 1.824100233386562e-05, "loss": 0.17283861339092255, "step": 817 }, { "epoch": 0.09925979856813494, "grad_norm": 1.9951136112213135, "learning_rate": 1.8238545633214596e-05, "loss": 0.37487325072288513, "step": 818 }, { "epoch": 0.09938114306516199, "grad_norm": 1.6043702363967896, "learning_rate": 1.823608893256357e-05, "loss": 0.3740643858909607, "step": 819 }, { "epoch": 0.09950248756218906, "grad_norm": 1.5967137813568115, "learning_rate": 1.8233632231912544e-05, "loss": 0.4586668610572815, "step": 820 }, { "epoch": 0.09962383205921611, "grad_norm": 2.1203699111938477, "learning_rate": 1.823117553126152e-05, "loss": 0.43344107270240784, "step": 821 }, { "epoch": 0.09974517655624317, "grad_norm": 2.03326678276062, "learning_rate": 1.8228718830610493e-05, "loss": 0.2968045473098755, "step": 822 }, { "epoch": 0.09986652105327024, "grad_norm": 2.2074062824249268, "learning_rate": 1.8226262129959467e-05, "loss": 0.4776389002799988, "step": 823 }, { "epoch": 0.09998786555029729, "grad_norm": 1.5093281269073486, "learning_rate": 1.822380542930844e-05, "loss": 0.30169808864593506, "step": 824 }, { "epoch": 0.10010921004732436, "grad_norm": 1.4926306009292603, "learning_rate": 1.8221348728657415e-05, "loss": 0.36792951822280884, "step": 825 }, { "epoch": 0.10023055454435141, "grad_norm": 1.9243760108947754, "learning_rate": 1.821889202800639e-05, "loss": 0.3194350600242615, "step": 826 }, { "epoch": 0.10035189904137848, "grad_norm": 2.0720691680908203, "learning_rate": 1.8216435327355364e-05, "loss": 0.5761525630950928, "step": 827 }, { "epoch": 0.10047324353840553, "grad_norm": 1.1331757307052612, "learning_rate": 1.8213978626704338e-05, "loss": 0.12145671993494034, "step": 828 }, { "epoch": 0.1005945880354326, "grad_norm": 2.4218826293945312, "learning_rate": 1.8211521926053312e-05, "loss": 0.30834150314331055, "step": 829 }, { "epoch": 0.10071593253245965, "grad_norm": 1.6021833419799805, "learning_rate": 1.8209065225402287e-05, "loss": 0.24986329674720764, "step": 830 }, { "epoch": 0.10083727702948671, "grad_norm": 1.436381220817566, "learning_rate": 1.820660852475126e-05, "loss": 0.3261716961860657, "step": 831 }, { "epoch": 0.10095862152651378, "grad_norm": 1.4951610565185547, "learning_rate": 1.8204151824100235e-05, "loss": 0.2016591727733612, "step": 832 }, { "epoch": 0.10107996602354083, "grad_norm": 1.6854465007781982, "learning_rate": 1.820169512344921e-05, "loss": 0.18829268217086792, "step": 833 }, { "epoch": 0.1012013105205679, "grad_norm": 1.5968453884124756, "learning_rate": 1.8199238422798184e-05, "loss": 0.27957627177238464, "step": 834 }, { "epoch": 0.10132265501759495, "grad_norm": 1.384221076965332, "learning_rate": 1.8196781722147158e-05, "loss": 0.11764024198055267, "step": 835 }, { "epoch": 0.10144399951462202, "grad_norm": 1.5695202350616455, "learning_rate": 1.8194325021496132e-05, "loss": 0.5740670561790466, "step": 836 }, { "epoch": 0.10156534401164907, "grad_norm": 1.2498831748962402, "learning_rate": 1.8191868320845106e-05, "loss": 0.4904257655143738, "step": 837 }, { "epoch": 0.10168668850867613, "grad_norm": 1.4824366569519043, "learning_rate": 1.818941162019408e-05, "loss": 0.19962450861930847, "step": 838 }, { "epoch": 0.1018080330057032, "grad_norm": 0.6617300510406494, "learning_rate": 1.8186954919543055e-05, "loss": 0.036515962332487106, "step": 839 }, { "epoch": 0.10192937750273025, "grad_norm": 1.7322537899017334, "learning_rate": 1.818449821889203e-05, "loss": 0.3243387043476105, "step": 840 }, { "epoch": 0.10205072199975732, "grad_norm": 2.164475917816162, "learning_rate": 1.8182041518241004e-05, "loss": 0.4636027216911316, "step": 841 }, { "epoch": 0.10217206649678437, "grad_norm": 1.594534993171692, "learning_rate": 1.8179584817589978e-05, "loss": 0.08477664738893509, "step": 842 }, { "epoch": 0.10229341099381144, "grad_norm": 2.2461769580841064, "learning_rate": 1.8177128116938952e-05, "loss": 0.4827083647251129, "step": 843 }, { "epoch": 0.10241475549083849, "grad_norm": 1.0204533338546753, "learning_rate": 1.8174671416287926e-05, "loss": 0.017575763165950775, "step": 844 }, { "epoch": 0.10253609998786554, "grad_norm": 2.0975234508514404, "learning_rate": 1.81722147156369e-05, "loss": 0.1643562614917755, "step": 845 }, { "epoch": 0.10265744448489261, "grad_norm": 2.3310747146606445, "learning_rate": 1.8169758014985878e-05, "loss": 0.5198527574539185, "step": 846 }, { "epoch": 0.10277878898191967, "grad_norm": 1.4509798288345337, "learning_rate": 1.8167301314334852e-05, "loss": 0.38531947135925293, "step": 847 }, { "epoch": 0.10290013347894673, "grad_norm": 1.9895381927490234, "learning_rate": 1.8164844613683823e-05, "loss": 0.3467338979244232, "step": 848 }, { "epoch": 0.10302147797597379, "grad_norm": 2.63995361328125, "learning_rate": 1.8162387913032798e-05, "loss": 0.6168738603591919, "step": 849 }, { "epoch": 0.10314282247300086, "grad_norm": 1.148128628730774, "learning_rate": 1.8159931212381772e-05, "loss": 0.20436763763427734, "step": 850 }, { "epoch": 0.10326416697002791, "grad_norm": 1.5788880586624146, "learning_rate": 1.8157474511730746e-05, "loss": 0.6665417551994324, "step": 851 }, { "epoch": 0.10338551146705496, "grad_norm": 1.841687560081482, "learning_rate": 1.815501781107972e-05, "loss": 0.917460560798645, "step": 852 }, { "epoch": 0.10350685596408203, "grad_norm": 1.3689830303192139, "learning_rate": 1.8152561110428695e-05, "loss": 0.0624203160405159, "step": 853 }, { "epoch": 0.10362820046110909, "grad_norm": 0.030568469315767288, "learning_rate": 1.815010440977767e-05, "loss": 0.000579987361561507, "step": 854 }, { "epoch": 0.10374954495813615, "grad_norm": 1.6430957317352295, "learning_rate": 1.8147647709126643e-05, "loss": 0.34169209003448486, "step": 855 }, { "epoch": 0.1038708894551632, "grad_norm": 0.8496302962303162, "learning_rate": 1.8145191008475617e-05, "loss": 0.1893075406551361, "step": 856 }, { "epoch": 0.10399223395219027, "grad_norm": 1.5164453983306885, "learning_rate": 1.814273430782459e-05, "loss": 0.14207033812999725, "step": 857 }, { "epoch": 0.10411357844921733, "grad_norm": 1.661000370979309, "learning_rate": 1.8140277607173566e-05, "loss": 0.302262544631958, "step": 858 }, { "epoch": 0.10423492294624438, "grad_norm": 1.6371593475341797, "learning_rate": 1.813782090652254e-05, "loss": 0.3569563627243042, "step": 859 }, { "epoch": 0.10435626744327145, "grad_norm": 3.8244004249572754, "learning_rate": 1.8135364205871514e-05, "loss": 0.4229733645915985, "step": 860 }, { "epoch": 0.1044776119402985, "grad_norm": 2.6182684898376465, "learning_rate": 1.813290750522049e-05, "loss": 0.28717556595802307, "step": 861 }, { "epoch": 0.10459895643732557, "grad_norm": 1.6072068214416504, "learning_rate": 1.8130450804569463e-05, "loss": 0.27383649349212646, "step": 862 }, { "epoch": 0.10472030093435263, "grad_norm": 1.6131926774978638, "learning_rate": 1.8127994103918437e-05, "loss": 0.36529356241226196, "step": 863 }, { "epoch": 0.10484164543137969, "grad_norm": 1.0998895168304443, "learning_rate": 1.812553740326741e-05, "loss": 0.08332019299268723, "step": 864 }, { "epoch": 0.10496298992840675, "grad_norm": 1.5871561765670776, "learning_rate": 1.8123080702616386e-05, "loss": 0.4199519753456116, "step": 865 }, { "epoch": 0.1050843344254338, "grad_norm": 1.2270817756652832, "learning_rate": 1.812062400196536e-05, "loss": 0.22014616429805756, "step": 866 }, { "epoch": 0.10520567892246087, "grad_norm": 1.2801979780197144, "learning_rate": 1.8118167301314334e-05, "loss": 0.23065850138664246, "step": 867 }, { "epoch": 0.10532702341948792, "grad_norm": 1.5992664098739624, "learning_rate": 1.811571060066331e-05, "loss": 0.34659886360168457, "step": 868 }, { "epoch": 0.10544836791651499, "grad_norm": 1.710518479347229, "learning_rate": 1.8113253900012283e-05, "loss": 0.1520070731639862, "step": 869 }, { "epoch": 0.10556971241354204, "grad_norm": 1.231435775756836, "learning_rate": 1.8110797199361257e-05, "loss": 0.4307411313056946, "step": 870 }, { "epoch": 0.10569105691056911, "grad_norm": 1.7171838283538818, "learning_rate": 1.810834049871023e-05, "loss": 0.6533911228179932, "step": 871 }, { "epoch": 0.10581240140759617, "grad_norm": 2.1740949153900146, "learning_rate": 1.810588379805921e-05, "loss": 0.36171165108680725, "step": 872 }, { "epoch": 0.10593374590462322, "grad_norm": 0.6744754910469055, "learning_rate": 1.8103427097408183e-05, "loss": 0.025561649352312088, "step": 873 }, { "epoch": 0.10605509040165029, "grad_norm": 2.0431101322174072, "learning_rate": 1.8100970396757157e-05, "loss": 0.190989151597023, "step": 874 }, { "epoch": 0.10617643489867734, "grad_norm": 1.504240870475769, "learning_rate": 1.809851369610613e-05, "loss": 0.22664177417755127, "step": 875 }, { "epoch": 0.10629777939570441, "grad_norm": 1.855709433555603, "learning_rate": 1.8096056995455106e-05, "loss": 0.48425769805908203, "step": 876 }, { "epoch": 0.10641912389273146, "grad_norm": 1.0872446298599243, "learning_rate": 1.809360029480408e-05, "loss": 0.36054062843322754, "step": 877 }, { "epoch": 0.10654046838975853, "grad_norm": 1.4773401021957397, "learning_rate": 1.8091143594153054e-05, "loss": 0.16998085379600525, "step": 878 }, { "epoch": 0.10666181288678558, "grad_norm": 1.5159122943878174, "learning_rate": 1.808868689350203e-05, "loss": 0.350002646446228, "step": 879 }, { "epoch": 0.10678315738381264, "grad_norm": 1.9150190353393555, "learning_rate": 1.8086230192851003e-05, "loss": 0.3344387710094452, "step": 880 }, { "epoch": 0.1069045018808397, "grad_norm": 1.570078730583191, "learning_rate": 1.8083773492199977e-05, "loss": 0.13790318369865417, "step": 881 }, { "epoch": 0.10702584637786676, "grad_norm": 1.8442866802215576, "learning_rate": 1.808131679154895e-05, "loss": 0.3512876033782959, "step": 882 }, { "epoch": 0.10714719087489383, "grad_norm": 1.1014479398727417, "learning_rate": 1.8078860090897926e-05, "loss": 0.28830617666244507, "step": 883 }, { "epoch": 0.10726853537192088, "grad_norm": 1.95488440990448, "learning_rate": 1.80764033902469e-05, "loss": 0.5615525245666504, "step": 884 }, { "epoch": 0.10738987986894795, "grad_norm": 1.3948452472686768, "learning_rate": 1.8073946689595874e-05, "loss": 0.20246197283267975, "step": 885 }, { "epoch": 0.107511224365975, "grad_norm": 1.8468222618103027, "learning_rate": 1.807148998894485e-05, "loss": 0.5539135932922363, "step": 886 }, { "epoch": 0.10763256886300206, "grad_norm": 1.8323395252227783, "learning_rate": 1.8069033288293823e-05, "loss": 0.6240039467811584, "step": 887 }, { "epoch": 0.10775391336002912, "grad_norm": 1.7520233392715454, "learning_rate": 1.8066576587642797e-05, "loss": 0.3012508153915405, "step": 888 }, { "epoch": 0.10787525785705618, "grad_norm": 3.212730884552002, "learning_rate": 1.806411988699177e-05, "loss": 0.4331943392753601, "step": 889 }, { "epoch": 0.10799660235408325, "grad_norm": 3.032711982727051, "learning_rate": 1.8061663186340746e-05, "loss": 0.391502320766449, "step": 890 }, { "epoch": 0.1081179468511103, "grad_norm": 1.2783416509628296, "learning_rate": 1.805920648568972e-05, "loss": 0.2393539398908615, "step": 891 }, { "epoch": 0.10823929134813737, "grad_norm": 1.7031818628311157, "learning_rate": 1.8056749785038694e-05, "loss": 0.2811068892478943, "step": 892 }, { "epoch": 0.10836063584516442, "grad_norm": 3.197322368621826, "learning_rate": 1.805429308438767e-05, "loss": 0.4427284300327301, "step": 893 }, { "epoch": 0.10848198034219148, "grad_norm": 1.8068060874938965, "learning_rate": 1.8051836383736643e-05, "loss": 0.5168130397796631, "step": 894 }, { "epoch": 0.10860332483921854, "grad_norm": 2.21925950050354, "learning_rate": 1.8049379683085617e-05, "loss": 1.1132900714874268, "step": 895 }, { "epoch": 0.1087246693362456, "grad_norm": 1.7533857822418213, "learning_rate": 1.804692298243459e-05, "loss": 0.5228627324104309, "step": 896 }, { "epoch": 0.10884601383327266, "grad_norm": 2.0163865089416504, "learning_rate": 1.8044466281783565e-05, "loss": 0.2808172106742859, "step": 897 }, { "epoch": 0.10896735833029972, "grad_norm": 2.2207977771759033, "learning_rate": 1.804200958113254e-05, "loss": 0.4984392821788788, "step": 898 }, { "epoch": 0.10908870282732679, "grad_norm": 1.315753698348999, "learning_rate": 1.8039552880481514e-05, "loss": 0.05970459431409836, "step": 899 }, { "epoch": 0.10921004732435384, "grad_norm": 1.4315860271453857, "learning_rate": 1.8037096179830488e-05, "loss": 0.1566053330898285, "step": 900 }, { "epoch": 0.1093313918213809, "grad_norm": 1.2822933197021484, "learning_rate": 1.8034639479179462e-05, "loss": 0.09656253457069397, "step": 901 }, { "epoch": 0.10945273631840796, "grad_norm": 1.3101221323013306, "learning_rate": 1.8032182778528437e-05, "loss": 0.16047993302345276, "step": 902 }, { "epoch": 0.10957408081543502, "grad_norm": 1.6088323593139648, "learning_rate": 1.802972607787741e-05, "loss": 0.3874964118003845, "step": 903 }, { "epoch": 0.10969542531246208, "grad_norm": 2.3265113830566406, "learning_rate": 1.8027269377226385e-05, "loss": 0.32157737016677856, "step": 904 }, { "epoch": 0.10981676980948914, "grad_norm": 1.6004971265792847, "learning_rate": 1.802481267657536e-05, "loss": 0.3022279441356659, "step": 905 }, { "epoch": 0.1099381143065162, "grad_norm": 2.5602383613586426, "learning_rate": 1.8022355975924334e-05, "loss": 0.21964332461357117, "step": 906 }, { "epoch": 0.11005945880354326, "grad_norm": 1.7562570571899414, "learning_rate": 1.8019899275273308e-05, "loss": 0.4960334002971649, "step": 907 }, { "epoch": 0.11018080330057031, "grad_norm": 2.157813549041748, "learning_rate": 1.8017442574622282e-05, "loss": 0.4423237144947052, "step": 908 }, { "epoch": 0.11030214779759738, "grad_norm": 1.567229986190796, "learning_rate": 1.8014985873971256e-05, "loss": 0.37426209449768066, "step": 909 }, { "epoch": 0.11042349229462443, "grad_norm": 0.5419113636016846, "learning_rate": 1.801252917332023e-05, "loss": 0.024737078696489334, "step": 910 }, { "epoch": 0.1105448367916515, "grad_norm": 1.8386318683624268, "learning_rate": 1.8010072472669205e-05, "loss": 0.38826245069503784, "step": 911 }, { "epoch": 0.11066618128867856, "grad_norm": 2.213700532913208, "learning_rate": 1.8007615772018183e-05, "loss": 0.384283185005188, "step": 912 }, { "epoch": 0.11078752578570562, "grad_norm": 1.9804258346557617, "learning_rate": 1.8005159071367157e-05, "loss": 0.3135190010070801, "step": 913 }, { "epoch": 0.11090887028273268, "grad_norm": 1.7234359979629517, "learning_rate": 1.800270237071613e-05, "loss": 0.3256887197494507, "step": 914 }, { "epoch": 0.11103021477975973, "grad_norm": 1.073075532913208, "learning_rate": 1.8000245670065105e-05, "loss": 0.3075997829437256, "step": 915 }, { "epoch": 0.1111515592767868, "grad_norm": 1.4961111545562744, "learning_rate": 1.799778896941408e-05, "loss": 0.6745727062225342, "step": 916 }, { "epoch": 0.11127290377381385, "grad_norm": 1.4811803102493286, "learning_rate": 1.7995332268763054e-05, "loss": 0.18449625372886658, "step": 917 }, { "epoch": 0.11139424827084092, "grad_norm": 2.448620319366455, "learning_rate": 1.7992875568112028e-05, "loss": 0.4953969120979309, "step": 918 }, { "epoch": 0.11151559276786797, "grad_norm": 2.133965253829956, "learning_rate": 1.7990418867461002e-05, "loss": 0.21984288096427917, "step": 919 }, { "epoch": 0.11163693726489504, "grad_norm": 2.0249030590057373, "learning_rate": 1.7987962166809977e-05, "loss": 0.5486893057823181, "step": 920 }, { "epoch": 0.1117582817619221, "grad_norm": 1.5349080562591553, "learning_rate": 1.798550546615895e-05, "loss": 0.08272730559110641, "step": 921 }, { "epoch": 0.11187962625894916, "grad_norm": 1.4977649450302124, "learning_rate": 1.7983048765507925e-05, "loss": 0.202124685049057, "step": 922 }, { "epoch": 0.11200097075597622, "grad_norm": 1.1629031896591187, "learning_rate": 1.79805920648569e-05, "loss": 0.2228085845708847, "step": 923 }, { "epoch": 0.11212231525300327, "grad_norm": 1.4893561601638794, "learning_rate": 1.7978135364205874e-05, "loss": 0.3735879957675934, "step": 924 }, { "epoch": 0.11224365975003034, "grad_norm": 1.5193450450897217, "learning_rate": 1.7975678663554848e-05, "loss": 0.23259831964969635, "step": 925 }, { "epoch": 0.11236500424705739, "grad_norm": 1.7775638103485107, "learning_rate": 1.7973221962903822e-05, "loss": 0.4484153985977173, "step": 926 }, { "epoch": 0.11248634874408446, "grad_norm": 2.208301305770874, "learning_rate": 1.7970765262252797e-05, "loss": 0.9797415733337402, "step": 927 }, { "epoch": 0.11260769324111151, "grad_norm": 2.22806453704834, "learning_rate": 1.796830856160177e-05, "loss": 0.2920798063278198, "step": 928 }, { "epoch": 0.11272903773813858, "grad_norm": 1.8676373958587646, "learning_rate": 1.7965851860950745e-05, "loss": 0.6016289591789246, "step": 929 }, { "epoch": 0.11285038223516564, "grad_norm": 1.7921056747436523, "learning_rate": 1.796339516029972e-05, "loss": 0.3958570957183838, "step": 930 }, { "epoch": 0.11297172673219269, "grad_norm": 1.4786041975021362, "learning_rate": 1.7960938459648694e-05, "loss": 0.42081084847450256, "step": 931 }, { "epoch": 0.11309307122921976, "grad_norm": 2.0670037269592285, "learning_rate": 1.7958481758997668e-05, "loss": 0.5663632750511169, "step": 932 }, { "epoch": 0.11321441572624681, "grad_norm": 3.5581676959991455, "learning_rate": 1.7956025058346642e-05, "loss": 0.42889904975891113, "step": 933 }, { "epoch": 0.11333576022327388, "grad_norm": 1.748302698135376, "learning_rate": 1.7953568357695616e-05, "loss": 0.22019340097904205, "step": 934 }, { "epoch": 0.11345710472030093, "grad_norm": 1.2991547584533691, "learning_rate": 1.795111165704459e-05, "loss": 0.14706727862358093, "step": 935 }, { "epoch": 0.113578449217328, "grad_norm": 1.810490369796753, "learning_rate": 1.7948654956393565e-05, "loss": 0.39529550075531006, "step": 936 }, { "epoch": 0.11369979371435505, "grad_norm": 1.8873316049575806, "learning_rate": 1.794619825574254e-05, "loss": 0.17087635397911072, "step": 937 }, { "epoch": 0.11382113821138211, "grad_norm": 2.495636463165283, "learning_rate": 1.7943741555091513e-05, "loss": 0.21028999984264374, "step": 938 }, { "epoch": 0.11394248270840918, "grad_norm": 1.333327054977417, "learning_rate": 1.7941284854440488e-05, "loss": 0.14832647144794464, "step": 939 }, { "epoch": 0.11406382720543623, "grad_norm": 1.412825107574463, "learning_rate": 1.7938828153789462e-05, "loss": 0.08243104070425034, "step": 940 }, { "epoch": 0.1141851717024633, "grad_norm": 2.038456439971924, "learning_rate": 1.7936371453138436e-05, "loss": 0.26321762800216675, "step": 941 }, { "epoch": 0.11430651619949035, "grad_norm": 1.5243250131607056, "learning_rate": 1.793391475248741e-05, "loss": 0.24915413558483124, "step": 942 }, { "epoch": 0.11442786069651742, "grad_norm": 2.2460672855377197, "learning_rate": 1.7931458051836385e-05, "loss": 0.25913840532302856, "step": 943 }, { "epoch": 0.11454920519354447, "grad_norm": 1.7053711414337158, "learning_rate": 1.792900135118536e-05, "loss": 0.43879109621047974, "step": 944 }, { "epoch": 0.11467054969057153, "grad_norm": 1.959026575088501, "learning_rate": 1.7926544650534333e-05, "loss": 0.3923461437225342, "step": 945 }, { "epoch": 0.1147918941875986, "grad_norm": 1.6823830604553223, "learning_rate": 1.7924087949883307e-05, "loss": 0.2293175607919693, "step": 946 }, { "epoch": 0.11491323868462565, "grad_norm": 2.255762815475464, "learning_rate": 1.792163124923228e-05, "loss": 0.3342325687408447, "step": 947 }, { "epoch": 0.11503458318165272, "grad_norm": 0.44634830951690674, "learning_rate": 1.7919174548581256e-05, "loss": 0.012504082173109055, "step": 948 }, { "epoch": 0.11515592767867977, "grad_norm": 1.1759107112884521, "learning_rate": 1.791671784793023e-05, "loss": 0.18447083234786987, "step": 949 }, { "epoch": 0.11527727217570684, "grad_norm": 2.7203738689422607, "learning_rate": 1.7914261147279204e-05, "loss": 0.2561550736427307, "step": 950 }, { "epoch": 0.11539861667273389, "grad_norm": 1.4413387775421143, "learning_rate": 1.7911804446628182e-05, "loss": 0.40774011611938477, "step": 951 }, { "epoch": 0.11551996116976095, "grad_norm": 2.1084396839141846, "learning_rate": 1.7909347745977156e-05, "loss": 0.19283291697502136, "step": 952 }, { "epoch": 0.11564130566678801, "grad_norm": 2.9261622428894043, "learning_rate": 1.790689104532613e-05, "loss": 0.6466803550720215, "step": 953 }, { "epoch": 0.11576265016381507, "grad_norm": 3.529693841934204, "learning_rate": 1.7904434344675105e-05, "loss": 0.2920832335948944, "step": 954 }, { "epoch": 0.11588399466084214, "grad_norm": 2.1412620544433594, "learning_rate": 1.790197764402408e-05, "loss": 0.33081355690956116, "step": 955 }, { "epoch": 0.11600533915786919, "grad_norm": 1.9772106409072876, "learning_rate": 1.7899520943373053e-05, "loss": 0.49043765664100647, "step": 956 }, { "epoch": 0.11612668365489626, "grad_norm": 1.2700328826904297, "learning_rate": 1.7897064242722028e-05, "loss": 0.3909747898578644, "step": 957 }, { "epoch": 0.11624802815192331, "grad_norm": 1.4992220401763916, "learning_rate": 1.7894607542071002e-05, "loss": 0.11110074073076248, "step": 958 }, { "epoch": 0.11636937264895036, "grad_norm": 1.2691748142242432, "learning_rate": 1.7892150841419976e-05, "loss": 0.1457030028104782, "step": 959 }, { "epoch": 0.11649071714597743, "grad_norm": 2.2520852088928223, "learning_rate": 1.788969414076895e-05, "loss": 0.4143160581588745, "step": 960 }, { "epoch": 0.11661206164300449, "grad_norm": 2.852043390274048, "learning_rate": 1.7887237440117925e-05, "loss": 0.45678117871284485, "step": 961 }, { "epoch": 0.11673340614003155, "grad_norm": 1.3313668966293335, "learning_rate": 1.78847807394669e-05, "loss": 0.16556109488010406, "step": 962 }, { "epoch": 0.11685475063705861, "grad_norm": 1.356170654296875, "learning_rate": 1.7882324038815873e-05, "loss": 0.2957010567188263, "step": 963 }, { "epoch": 0.11697609513408568, "grad_norm": 2.6344823837280273, "learning_rate": 1.7879867338164847e-05, "loss": 0.6716679334640503, "step": 964 }, { "epoch": 0.11709743963111273, "grad_norm": 1.3099849224090576, "learning_rate": 1.7877410637513822e-05, "loss": 0.18413875997066498, "step": 965 }, { "epoch": 0.11721878412813978, "grad_norm": 1.4843673706054688, "learning_rate": 1.7874953936862796e-05, "loss": 0.12193383276462555, "step": 966 }, { "epoch": 0.11734012862516685, "grad_norm": 1.3463475704193115, "learning_rate": 1.787249723621177e-05, "loss": 0.49620726704597473, "step": 967 }, { "epoch": 0.1174614731221939, "grad_norm": 1.257489800453186, "learning_rate": 1.7870040535560744e-05, "loss": 0.0774109736084938, "step": 968 }, { "epoch": 0.11758281761922097, "grad_norm": 1.8335658311843872, "learning_rate": 1.786758383490972e-05, "loss": 0.5049456357955933, "step": 969 }, { "epoch": 0.11770416211624803, "grad_norm": 1.2735215425491333, "learning_rate": 1.7865127134258693e-05, "loss": 0.5225581526756287, "step": 970 }, { "epoch": 0.1178255066132751, "grad_norm": 2.409795045852661, "learning_rate": 1.7862670433607667e-05, "loss": 0.32925283908843994, "step": 971 }, { "epoch": 0.11794685111030215, "grad_norm": 1.4397212266921997, "learning_rate": 1.786021373295664e-05, "loss": 0.3886275291442871, "step": 972 }, { "epoch": 0.1180681956073292, "grad_norm": 2.468719720840454, "learning_rate": 1.7857757032305616e-05, "loss": 0.40028074383735657, "step": 973 }, { "epoch": 0.11818954010435627, "grad_norm": 2.085083246231079, "learning_rate": 1.785530033165459e-05, "loss": 0.5391139984130859, "step": 974 }, { "epoch": 0.11831088460138332, "grad_norm": 1.8129987716674805, "learning_rate": 1.7852843631003564e-05, "loss": 0.12351949512958527, "step": 975 }, { "epoch": 0.11843222909841039, "grad_norm": 2.5870203971862793, "learning_rate": 1.785038693035254e-05, "loss": 0.5224878191947937, "step": 976 }, { "epoch": 0.11855357359543744, "grad_norm": 1.4589635133743286, "learning_rate": 1.7847930229701513e-05, "loss": 0.21820476651191711, "step": 977 }, { "epoch": 0.11867491809246451, "grad_norm": 1.6059958934783936, "learning_rate": 1.7845473529050487e-05, "loss": 0.21393930912017822, "step": 978 }, { "epoch": 0.11879626258949157, "grad_norm": 1.6004008054733276, "learning_rate": 1.784301682839946e-05, "loss": 0.26858049631118774, "step": 979 }, { "epoch": 0.11891760708651862, "grad_norm": 2.0756053924560547, "learning_rate": 1.7840560127748436e-05, "loss": 0.30814725160598755, "step": 980 }, { "epoch": 0.11903895158354569, "grad_norm": 1.4374176263809204, "learning_rate": 1.783810342709741e-05, "loss": 0.3138670325279236, "step": 981 }, { "epoch": 0.11916029608057274, "grad_norm": 1.748745083808899, "learning_rate": 1.7835646726446384e-05, "loss": 0.5385026931762695, "step": 982 }, { "epoch": 0.11928164057759981, "grad_norm": 1.986444115638733, "learning_rate": 1.783319002579536e-05, "loss": 0.3967476785182953, "step": 983 }, { "epoch": 0.11940298507462686, "grad_norm": 2.7463040351867676, "learning_rate": 1.7830733325144333e-05, "loss": 0.19557945430278778, "step": 984 }, { "epoch": 0.11952432957165393, "grad_norm": 2.5255661010742188, "learning_rate": 1.7828276624493307e-05, "loss": 0.22208239138126373, "step": 985 }, { "epoch": 0.11964567406868098, "grad_norm": 2.8263087272644043, "learning_rate": 1.782581992384228e-05, "loss": 0.5413483381271362, "step": 986 }, { "epoch": 0.11976701856570804, "grad_norm": 2.419537305831909, "learning_rate": 1.7823363223191255e-05, "loss": 0.4241293668746948, "step": 987 }, { "epoch": 0.1198883630627351, "grad_norm": 1.8854936361312866, "learning_rate": 1.782090652254023e-05, "loss": 0.33829793334007263, "step": 988 }, { "epoch": 0.12000970755976216, "grad_norm": 1.954064130783081, "learning_rate": 1.7818449821889204e-05, "loss": 0.2632986307144165, "step": 989 }, { "epoch": 0.12013105205678923, "grad_norm": 2.0794637203216553, "learning_rate": 1.7815993121238178e-05, "loss": 0.4402806758880615, "step": 990 }, { "epoch": 0.12025239655381628, "grad_norm": 1.2783046960830688, "learning_rate": 1.7813536420587152e-05, "loss": 0.45365461707115173, "step": 991 }, { "epoch": 0.12037374105084335, "grad_norm": 2.022775411605835, "learning_rate": 1.7811079719936127e-05, "loss": 0.17468230426311493, "step": 992 }, { "epoch": 0.1204950855478704, "grad_norm": 1.8082324266433716, "learning_rate": 1.78086230192851e-05, "loss": 0.2507702112197876, "step": 993 }, { "epoch": 0.12061643004489746, "grad_norm": 2.072026252746582, "learning_rate": 1.7806166318634075e-05, "loss": 0.39948770403862, "step": 994 }, { "epoch": 0.12073777454192453, "grad_norm": 0.9502598643302917, "learning_rate": 1.780370961798305e-05, "loss": 0.4071555733680725, "step": 995 }, { "epoch": 0.12085911903895158, "grad_norm": 1.7930656671524048, "learning_rate": 1.7801252917332024e-05, "loss": 0.3525886535644531, "step": 996 }, { "epoch": 0.12098046353597865, "grad_norm": 2.9511780738830566, "learning_rate": 1.7798796216680998e-05, "loss": 0.4926954507827759, "step": 997 }, { "epoch": 0.1211018080330057, "grad_norm": 2.4663655757904053, "learning_rate": 1.7796339516029972e-05, "loss": 0.4453226923942566, "step": 998 }, { "epoch": 0.12122315253003277, "grad_norm": 1.8839552402496338, "learning_rate": 1.7793882815378946e-05, "loss": 0.4104011058807373, "step": 999 }, { "epoch": 0.12134449702705982, "grad_norm": 1.7265866994857788, "learning_rate": 1.779142611472792e-05, "loss": 0.3227764964103699, "step": 1000 }, { "epoch": 0.12146584152408688, "grad_norm": 2.007580518722534, "learning_rate": 1.7788969414076895e-05, "loss": 0.5954450368881226, "step": 1001 }, { "epoch": 0.12158718602111394, "grad_norm": 1.7668275833129883, "learning_rate": 1.778651271342587e-05, "loss": 0.2731704115867615, "step": 1002 }, { "epoch": 0.121708530518141, "grad_norm": 2.556995153427124, "learning_rate": 1.7784056012774844e-05, "loss": 0.4359130859375, "step": 1003 }, { "epoch": 0.12182987501516807, "grad_norm": 2.3546860218048096, "learning_rate": 1.7781599312123818e-05, "loss": 0.7123581171035767, "step": 1004 }, { "epoch": 0.12195121951219512, "grad_norm": 1.7314367294311523, "learning_rate": 1.7779142611472792e-05, "loss": 0.3100089132785797, "step": 1005 }, { "epoch": 0.12207256400922219, "grad_norm": 1.5493261814117432, "learning_rate": 1.7776685910821766e-05, "loss": 0.49244967103004456, "step": 1006 }, { "epoch": 0.12219390850624924, "grad_norm": 1.078848123550415, "learning_rate": 1.777422921017074e-05, "loss": 0.2843085825443268, "step": 1007 }, { "epoch": 0.1223152530032763, "grad_norm": 0.9978542327880859, "learning_rate": 1.7771772509519715e-05, "loss": 0.24063430726528168, "step": 1008 }, { "epoch": 0.12243659750030336, "grad_norm": 1.8010896444320679, "learning_rate": 1.776931580886869e-05, "loss": 0.4071890115737915, "step": 1009 }, { "epoch": 0.12255794199733042, "grad_norm": 1.3634896278381348, "learning_rate": 1.7766859108217663e-05, "loss": 0.38111934065818787, "step": 1010 }, { "epoch": 0.12267928649435748, "grad_norm": 1.713880181312561, "learning_rate": 1.7764402407566638e-05, "loss": 0.47287237644195557, "step": 1011 }, { "epoch": 0.12280063099138454, "grad_norm": 0.9091008901596069, "learning_rate": 1.7761945706915612e-05, "loss": 0.29161202907562256, "step": 1012 }, { "epoch": 0.1229219754884116, "grad_norm": 1.7493350505828857, "learning_rate": 1.7759489006264586e-05, "loss": 0.1336471140384674, "step": 1013 }, { "epoch": 0.12304331998543866, "grad_norm": 2.463874578475952, "learning_rate": 1.775703230561356e-05, "loss": 0.32139432430267334, "step": 1014 }, { "epoch": 0.12316466448246571, "grad_norm": 1.8679782152175903, "learning_rate": 1.7754575604962535e-05, "loss": 0.22383840382099152, "step": 1015 }, { "epoch": 0.12328600897949278, "grad_norm": 1.7886244058609009, "learning_rate": 1.775211890431151e-05, "loss": 0.2850145995616913, "step": 1016 }, { "epoch": 0.12340735347651983, "grad_norm": 1.5212403535842896, "learning_rate": 1.7749662203660487e-05, "loss": 0.19067168235778809, "step": 1017 }, { "epoch": 0.1235286979735469, "grad_norm": 1.0102014541625977, "learning_rate": 1.774720550300946e-05, "loss": 0.20473000407218933, "step": 1018 }, { "epoch": 0.12365004247057396, "grad_norm": 3.234743595123291, "learning_rate": 1.7744748802358435e-05, "loss": 0.4390396475791931, "step": 1019 }, { "epoch": 0.12377138696760102, "grad_norm": 1.182918906211853, "learning_rate": 1.774229210170741e-05, "loss": 0.379639208316803, "step": 1020 }, { "epoch": 0.12389273146462808, "grad_norm": 2.2215142250061035, "learning_rate": 1.7739835401056384e-05, "loss": 0.3299146890640259, "step": 1021 }, { "epoch": 0.12401407596165515, "grad_norm": 1.7570428848266602, "learning_rate": 1.7737378700405358e-05, "loss": 0.5761438608169556, "step": 1022 }, { "epoch": 0.1241354204586822, "grad_norm": 2.0322439670562744, "learning_rate": 1.7734921999754332e-05, "loss": 0.4319761395454407, "step": 1023 }, { "epoch": 0.12425676495570925, "grad_norm": 1.6962997913360596, "learning_rate": 1.7732465299103306e-05, "loss": 0.19291162490844727, "step": 1024 }, { "epoch": 0.12437810945273632, "grad_norm": 2.4372918605804443, "learning_rate": 1.773000859845228e-05, "loss": 0.2651346027851105, "step": 1025 }, { "epoch": 0.12449945394976338, "grad_norm": 2.495164632797241, "learning_rate": 1.7727551897801255e-05, "loss": 0.26666179299354553, "step": 1026 }, { "epoch": 0.12462079844679044, "grad_norm": 2.1924889087677, "learning_rate": 1.772509519715023e-05, "loss": 0.4076659083366394, "step": 1027 }, { "epoch": 0.1247421429438175, "grad_norm": 1.9566283226013184, "learning_rate": 1.7722638496499203e-05, "loss": 0.41398754715919495, "step": 1028 }, { "epoch": 0.12486348744084456, "grad_norm": 0.9982297420501709, "learning_rate": 1.7720181795848178e-05, "loss": 0.07757793366909027, "step": 1029 }, { "epoch": 0.12498483193787162, "grad_norm": 1.4906138181686401, "learning_rate": 1.7717725095197152e-05, "loss": 0.22355546057224274, "step": 1030 }, { "epoch": 0.12510617643489869, "grad_norm": 1.2983722686767578, "learning_rate": 1.7715268394546126e-05, "loss": 0.18219910562038422, "step": 1031 }, { "epoch": 0.12522752093192574, "grad_norm": 2.4365146160125732, "learning_rate": 1.77128116938951e-05, "loss": 0.5142960548400879, "step": 1032 }, { "epoch": 0.1253488654289528, "grad_norm": 1.5570324659347534, "learning_rate": 1.7710354993244075e-05, "loss": 0.7369379997253418, "step": 1033 }, { "epoch": 0.12547020992597985, "grad_norm": 1.2866076231002808, "learning_rate": 1.770789829259305e-05, "loss": 0.35260820388793945, "step": 1034 }, { "epoch": 0.12559155442300693, "grad_norm": 1.987706184387207, "learning_rate": 1.7705441591942023e-05, "loss": 0.22281603515148163, "step": 1035 }, { "epoch": 0.12571289892003398, "grad_norm": 1.536669373512268, "learning_rate": 1.7702984891290997e-05, "loss": 0.44109636545181274, "step": 1036 }, { "epoch": 0.12583424341706104, "grad_norm": 2.0726189613342285, "learning_rate": 1.770052819063997e-05, "loss": 0.4802488088607788, "step": 1037 }, { "epoch": 0.1259555879140881, "grad_norm": 1.770462155342102, "learning_rate": 1.7698071489988946e-05, "loss": 0.21857525408267975, "step": 1038 }, { "epoch": 0.12607693241111514, "grad_norm": 1.78280508518219, "learning_rate": 1.769561478933792e-05, "loss": 0.29267618060112, "step": 1039 }, { "epoch": 0.12619827690814223, "grad_norm": 2.35788631439209, "learning_rate": 1.7693158088686894e-05, "loss": 0.3705042898654938, "step": 1040 }, { "epoch": 0.12631962140516928, "grad_norm": 1.8159054517745972, "learning_rate": 1.769070138803587e-05, "loss": 0.8092231154441833, "step": 1041 }, { "epoch": 0.12644096590219633, "grad_norm": 1.4524773359298706, "learning_rate": 1.7688244687384843e-05, "loss": 0.2000163197517395, "step": 1042 }, { "epoch": 0.1265623103992234, "grad_norm": 1.3470537662506104, "learning_rate": 1.7685787986733817e-05, "loss": 0.11520944535732269, "step": 1043 }, { "epoch": 0.12668365489625044, "grad_norm": 1.9596749544143677, "learning_rate": 1.768333128608279e-05, "loss": 0.22145286202430725, "step": 1044 }, { "epoch": 0.12680499939327752, "grad_norm": 1.0872288942337036, "learning_rate": 1.7680874585431766e-05, "loss": 0.24465565383434296, "step": 1045 }, { "epoch": 0.12692634389030458, "grad_norm": 1.756108283996582, "learning_rate": 1.767841788478074e-05, "loss": 0.2880557179450989, "step": 1046 }, { "epoch": 0.12704768838733163, "grad_norm": 1.3416721820831299, "learning_rate": 1.7675961184129714e-05, "loss": 0.22841346263885498, "step": 1047 }, { "epoch": 0.12716903288435868, "grad_norm": 1.3795925378799438, "learning_rate": 1.767350448347869e-05, "loss": 0.18589796125888824, "step": 1048 }, { "epoch": 0.12729037738138577, "grad_norm": 1.842610239982605, "learning_rate": 1.7671047782827663e-05, "loss": 0.25392380356788635, "step": 1049 }, { "epoch": 0.12741172187841282, "grad_norm": 0.906227171421051, "learning_rate": 1.7668591082176637e-05, "loss": 0.019568203017115593, "step": 1050 }, { "epoch": 0.12753306637543987, "grad_norm": 3.2031447887420654, "learning_rate": 1.766613438152561e-05, "loss": 0.4728550314903259, "step": 1051 }, { "epoch": 0.12765441087246693, "grad_norm": 1.2640734910964966, "learning_rate": 1.7663677680874586e-05, "loss": 0.0975283533334732, "step": 1052 }, { "epoch": 0.12777575536949398, "grad_norm": 2.3426437377929688, "learning_rate": 1.766122098022356e-05, "loss": 0.4550413191318512, "step": 1053 }, { "epoch": 0.12789709986652106, "grad_norm": 1.7087280750274658, "learning_rate": 1.7658764279572534e-05, "loss": 0.48773813247680664, "step": 1054 }, { "epoch": 0.12801844436354812, "grad_norm": 2.2121965885162354, "learning_rate": 1.765630757892151e-05, "loss": 0.27385658025741577, "step": 1055 }, { "epoch": 0.12813978886057517, "grad_norm": 2.641083002090454, "learning_rate": 1.7653850878270483e-05, "loss": 0.33635473251342773, "step": 1056 }, { "epoch": 0.12826113335760223, "grad_norm": 1.84109628200531, "learning_rate": 1.765139417761946e-05, "loss": 0.09408122301101685, "step": 1057 }, { "epoch": 0.12838247785462928, "grad_norm": 1.1630797386169434, "learning_rate": 1.7648937476968434e-05, "loss": 0.17066405713558197, "step": 1058 }, { "epoch": 0.12850382235165636, "grad_norm": 0.3085211217403412, "learning_rate": 1.764648077631741e-05, "loss": 0.004453294910490513, "step": 1059 }, { "epoch": 0.12862516684868341, "grad_norm": 3.949612617492676, "learning_rate": 1.7644024075666383e-05, "loss": 0.30024462938308716, "step": 1060 }, { "epoch": 0.12874651134571047, "grad_norm": 2.1858408451080322, "learning_rate": 1.7641567375015357e-05, "loss": 0.26950281858444214, "step": 1061 }, { "epoch": 0.12886785584273752, "grad_norm": 2.139246702194214, "learning_rate": 1.763911067436433e-05, "loss": 0.15661706030368805, "step": 1062 }, { "epoch": 0.1289892003397646, "grad_norm": 2.098543167114258, "learning_rate": 1.7636653973713306e-05, "loss": 0.2845885455608368, "step": 1063 }, { "epoch": 0.12911054483679166, "grad_norm": 2.557609796524048, "learning_rate": 1.763419727306228e-05, "loss": 0.37870073318481445, "step": 1064 }, { "epoch": 0.1292318893338187, "grad_norm": 2.1333935260772705, "learning_rate": 1.7631740572411254e-05, "loss": 0.1683444082736969, "step": 1065 }, { "epoch": 0.12935323383084577, "grad_norm": 2.926968574523926, "learning_rate": 1.762928387176023e-05, "loss": 0.6188098192214966, "step": 1066 }, { "epoch": 0.12947457832787282, "grad_norm": 2.2622830867767334, "learning_rate": 1.7626827171109203e-05, "loss": 0.30848801136016846, "step": 1067 }, { "epoch": 0.1295959228248999, "grad_norm": 2.319904327392578, "learning_rate": 1.7624370470458177e-05, "loss": 0.5221332311630249, "step": 1068 }, { "epoch": 0.12971726732192695, "grad_norm": 1.7250056266784668, "learning_rate": 1.762191376980715e-05, "loss": 0.23689205944538116, "step": 1069 }, { "epoch": 0.129838611818954, "grad_norm": 1.9344574213027954, "learning_rate": 1.7619457069156126e-05, "loss": 0.30666399002075195, "step": 1070 }, { "epoch": 0.12995995631598106, "grad_norm": 1.857367992401123, "learning_rate": 1.76170003685051e-05, "loss": 0.6061972975730896, "step": 1071 }, { "epoch": 0.13008130081300814, "grad_norm": 2.382561445236206, "learning_rate": 1.7614543667854074e-05, "loss": 0.7764930725097656, "step": 1072 }, { "epoch": 0.1302026453100352, "grad_norm": 1.8192449808120728, "learning_rate": 1.761208696720305e-05, "loss": 0.1905331015586853, "step": 1073 }, { "epoch": 0.13032398980706225, "grad_norm": 0.7959886789321899, "learning_rate": 1.7609630266552023e-05, "loss": 0.036777134984731674, "step": 1074 }, { "epoch": 0.1304453343040893, "grad_norm": 2.9312210083007812, "learning_rate": 1.7607173565900997e-05, "loss": 0.5323508977890015, "step": 1075 }, { "epoch": 0.13056667880111636, "grad_norm": 1.7050628662109375, "learning_rate": 1.760471686524997e-05, "loss": 0.3697940707206726, "step": 1076 }, { "epoch": 0.13068802329814344, "grad_norm": 2.098642587661743, "learning_rate": 1.7602260164598945e-05, "loss": 0.45317330956459045, "step": 1077 }, { "epoch": 0.1308093677951705, "grad_norm": 1.8114123344421387, "learning_rate": 1.759980346394792e-05, "loss": 0.46396714448928833, "step": 1078 }, { "epoch": 0.13093071229219755, "grad_norm": 2.046638011932373, "learning_rate": 1.7597346763296894e-05, "loss": 0.31400540471076965, "step": 1079 }, { "epoch": 0.1310520567892246, "grad_norm": 2.056553602218628, "learning_rate": 1.7594890062645868e-05, "loss": 0.21949997544288635, "step": 1080 }, { "epoch": 0.13117340128625166, "grad_norm": 1.797045111656189, "learning_rate": 1.7592433361994842e-05, "loss": 0.6279269456863403, "step": 1081 }, { "epoch": 0.13129474578327874, "grad_norm": 2.286411762237549, "learning_rate": 1.7589976661343817e-05, "loss": 0.3866305649280548, "step": 1082 }, { "epoch": 0.1314160902803058, "grad_norm": 1.5553948879241943, "learning_rate": 1.758751996069279e-05, "loss": 0.13357111811637878, "step": 1083 }, { "epoch": 0.13153743477733285, "grad_norm": 1.6972168684005737, "learning_rate": 1.7585063260041765e-05, "loss": 0.1964944750070572, "step": 1084 }, { "epoch": 0.1316587792743599, "grad_norm": 1.53740394115448, "learning_rate": 1.758260655939074e-05, "loss": 0.20683187246322632, "step": 1085 }, { "epoch": 0.13178012377138698, "grad_norm": 1.4312472343444824, "learning_rate": 1.7580149858739714e-05, "loss": 0.17711327970027924, "step": 1086 }, { "epoch": 0.13190146826841403, "grad_norm": 1.7033421993255615, "learning_rate": 1.7577693158088688e-05, "loss": 0.14665207266807556, "step": 1087 }, { "epoch": 0.1320228127654411, "grad_norm": 1.8516016006469727, "learning_rate": 1.7575236457437662e-05, "loss": 0.22185489535331726, "step": 1088 }, { "epoch": 0.13214415726246814, "grad_norm": 2.1618032455444336, "learning_rate": 1.7572779756786636e-05, "loss": 0.4268803298473358, "step": 1089 }, { "epoch": 0.1322655017594952, "grad_norm": 1.8190878629684448, "learning_rate": 1.757032305613561e-05, "loss": 0.31076788902282715, "step": 1090 }, { "epoch": 0.13238684625652228, "grad_norm": 1.728493571281433, "learning_rate": 1.7567866355484585e-05, "loss": 0.42295384407043457, "step": 1091 }, { "epoch": 0.13250819075354933, "grad_norm": 1.6901476383209229, "learning_rate": 1.756540965483356e-05, "loss": 0.2597338557243347, "step": 1092 }, { "epoch": 0.13262953525057639, "grad_norm": 1.8863033056259155, "learning_rate": 1.7562952954182534e-05, "loss": 0.5735151767730713, "step": 1093 }, { "epoch": 0.13275087974760344, "grad_norm": 1.6283295154571533, "learning_rate": 1.7560496253531508e-05, "loss": 0.22838622331619263, "step": 1094 }, { "epoch": 0.1328722242446305, "grad_norm": 2.0172297954559326, "learning_rate": 1.7558039552880482e-05, "loss": 0.45103025436401367, "step": 1095 }, { "epoch": 0.13299356874165758, "grad_norm": 0.24614541232585907, "learning_rate": 1.755558285222946e-05, "loss": 0.002931026741862297, "step": 1096 }, { "epoch": 0.13311491323868463, "grad_norm": 1.4380757808685303, "learning_rate": 1.7553126151578434e-05, "loss": 0.3003803491592407, "step": 1097 }, { "epoch": 0.13323625773571168, "grad_norm": 2.0845935344696045, "learning_rate": 1.7550669450927408e-05, "loss": 0.5532746315002441, "step": 1098 }, { "epoch": 0.13335760223273874, "grad_norm": 1.5353543758392334, "learning_rate": 1.7548212750276382e-05, "loss": 0.19028273224830627, "step": 1099 }, { "epoch": 0.13347894672976582, "grad_norm": 0.20537756383419037, "learning_rate": 1.7545756049625357e-05, "loss": 0.002893675584346056, "step": 1100 }, { "epoch": 0.13360029122679287, "grad_norm": 3.135423183441162, "learning_rate": 1.754329934897433e-05, "loss": 0.3149774670600891, "step": 1101 }, { "epoch": 0.13372163572381993, "grad_norm": 2.225597620010376, "learning_rate": 1.7540842648323305e-05, "loss": 0.28401559591293335, "step": 1102 }, { "epoch": 0.13384298022084698, "grad_norm": 1.264791488647461, "learning_rate": 1.753838594767228e-05, "loss": 0.32886746525764465, "step": 1103 }, { "epoch": 0.13396432471787403, "grad_norm": 0.3391306698322296, "learning_rate": 1.7535929247021254e-05, "loss": 0.071807861328125, "step": 1104 }, { "epoch": 0.13408566921490112, "grad_norm": 1.0494962930679321, "learning_rate": 1.7533472546370228e-05, "loss": 0.21048381924629211, "step": 1105 }, { "epoch": 0.13420701371192817, "grad_norm": 1.717574954032898, "learning_rate": 1.7531015845719202e-05, "loss": 0.4358229637145996, "step": 1106 }, { "epoch": 0.13432835820895522, "grad_norm": 1.9669817686080933, "learning_rate": 1.7528559145068177e-05, "loss": 0.6443597674369812, "step": 1107 }, { "epoch": 0.13444970270598228, "grad_norm": 2.6505227088928223, "learning_rate": 1.752610244441715e-05, "loss": 0.5362628102302551, "step": 1108 }, { "epoch": 0.13457104720300933, "grad_norm": 2.0963194370269775, "learning_rate": 1.7523645743766125e-05, "loss": 0.3740871250629425, "step": 1109 }, { "epoch": 0.1346923917000364, "grad_norm": 1.8487685918807983, "learning_rate": 1.75211890431151e-05, "loss": 0.41866418719291687, "step": 1110 }, { "epoch": 0.13481373619706347, "grad_norm": 2.146871328353882, "learning_rate": 1.7518732342464074e-05, "loss": 0.561809778213501, "step": 1111 }, { "epoch": 0.13493508069409052, "grad_norm": 1.8240047693252563, "learning_rate": 1.7516275641813048e-05, "loss": 0.5480957627296448, "step": 1112 }, { "epoch": 0.13505642519111757, "grad_norm": 1.8582134246826172, "learning_rate": 1.7513818941162022e-05, "loss": 0.3318016529083252, "step": 1113 }, { "epoch": 0.13517776968814466, "grad_norm": 2.6997921466827393, "learning_rate": 1.7511362240510996e-05, "loss": 0.2811315357685089, "step": 1114 }, { "epoch": 0.1352991141851717, "grad_norm": 2.0669479370117188, "learning_rate": 1.750890553985997e-05, "loss": 0.39928320050239563, "step": 1115 }, { "epoch": 0.13542045868219876, "grad_norm": 1.7311495542526245, "learning_rate": 1.7506448839208945e-05, "loss": 0.3384130895137787, "step": 1116 }, { "epoch": 0.13554180317922582, "grad_norm": 1.5945545434951782, "learning_rate": 1.750399213855792e-05, "loss": 0.23259413242340088, "step": 1117 }, { "epoch": 0.13566314767625287, "grad_norm": 1.2761726379394531, "learning_rate": 1.7501535437906893e-05, "loss": 0.2338969111442566, "step": 1118 }, { "epoch": 0.13578449217327995, "grad_norm": 1.9124693870544434, "learning_rate": 1.7499078737255864e-05, "loss": 0.34637853503227234, "step": 1119 }, { "epoch": 0.135905836670307, "grad_norm": 2.1211183071136475, "learning_rate": 1.749662203660484e-05, "loss": 0.3605934679508209, "step": 1120 }, { "epoch": 0.13602718116733406, "grad_norm": 1.2206485271453857, "learning_rate": 1.7494165335953813e-05, "loss": 0.35341042280197144, "step": 1121 }, { "epoch": 0.13614852566436111, "grad_norm": 1.5597529411315918, "learning_rate": 1.749170863530279e-05, "loss": 0.2815924286842346, "step": 1122 }, { "epoch": 0.13626987016138817, "grad_norm": 2.247127056121826, "learning_rate": 1.7489251934651765e-05, "loss": 0.3463256359100342, "step": 1123 }, { "epoch": 0.13639121465841525, "grad_norm": 2.1039061546325684, "learning_rate": 1.748679523400074e-05, "loss": 0.1709476262331009, "step": 1124 }, { "epoch": 0.1365125591554423, "grad_norm": 2.266066074371338, "learning_rate": 1.7484338533349713e-05, "loss": 0.46501120924949646, "step": 1125 }, { "epoch": 0.13663390365246936, "grad_norm": 1.4224648475646973, "learning_rate": 1.7481881832698687e-05, "loss": 0.11234971880912781, "step": 1126 }, { "epoch": 0.1367552481494964, "grad_norm": 2.30653977394104, "learning_rate": 1.747942513204766e-05, "loss": 0.36751458048820496, "step": 1127 }, { "epoch": 0.1368765926465235, "grad_norm": 1.368033766746521, "learning_rate": 1.7476968431396636e-05, "loss": 0.26016345620155334, "step": 1128 }, { "epoch": 0.13699793714355055, "grad_norm": 1.8139694929122925, "learning_rate": 1.747451173074561e-05, "loss": 0.12573404610157013, "step": 1129 }, { "epoch": 0.1371192816405776, "grad_norm": 1.769921898841858, "learning_rate": 1.7472055030094584e-05, "loss": 0.26439419388771057, "step": 1130 }, { "epoch": 0.13724062613760465, "grad_norm": 1.402004599571228, "learning_rate": 1.746959832944356e-05, "loss": 0.18855473399162292, "step": 1131 }, { "epoch": 0.1373619706346317, "grad_norm": 2.369947671890259, "learning_rate": 1.7467141628792533e-05, "loss": 0.5614909529685974, "step": 1132 }, { "epoch": 0.1374833151316588, "grad_norm": 1.4672306776046753, "learning_rate": 1.7464684928141507e-05, "loss": 0.23816195130348206, "step": 1133 }, { "epoch": 0.13760465962868584, "grad_norm": 1.4055780172348022, "learning_rate": 1.746222822749048e-05, "loss": 0.38255172967910767, "step": 1134 }, { "epoch": 0.1377260041257129, "grad_norm": 2.0331690311431885, "learning_rate": 1.7459771526839456e-05, "loss": 0.44152066111564636, "step": 1135 }, { "epoch": 0.13784734862273995, "grad_norm": 1.058262825012207, "learning_rate": 1.745731482618843e-05, "loss": 0.10875988751649857, "step": 1136 }, { "epoch": 0.137968693119767, "grad_norm": 1.6434763669967651, "learning_rate": 1.7454858125537404e-05, "loss": 0.2978910505771637, "step": 1137 }, { "epoch": 0.1380900376167941, "grad_norm": 2.271061897277832, "learning_rate": 1.745240142488638e-05, "loss": 0.516408383846283, "step": 1138 }, { "epoch": 0.13821138211382114, "grad_norm": 2.4035356044769287, "learning_rate": 1.7449944724235353e-05, "loss": 0.379251629114151, "step": 1139 }, { "epoch": 0.1383327266108482, "grad_norm": 1.6674612760543823, "learning_rate": 1.7447488023584327e-05, "loss": 0.44433078169822693, "step": 1140 }, { "epoch": 0.13845407110787525, "grad_norm": 1.2432188987731934, "learning_rate": 1.74450313229333e-05, "loss": 0.4149407744407654, "step": 1141 }, { "epoch": 0.13857541560490233, "grad_norm": 1.6661003828048706, "learning_rate": 1.7442574622282276e-05, "loss": 0.42093947529792786, "step": 1142 }, { "epoch": 0.13869676010192938, "grad_norm": 2.2240869998931885, "learning_rate": 1.744011792163125e-05, "loss": 0.2196013480424881, "step": 1143 }, { "epoch": 0.13881810459895644, "grad_norm": 1.9101980924606323, "learning_rate": 1.7437661220980224e-05, "loss": 0.3927350342273712, "step": 1144 }, { "epoch": 0.1389394490959835, "grad_norm": 2.2990522384643555, "learning_rate": 1.74352045203292e-05, "loss": 0.35650429129600525, "step": 1145 }, { "epoch": 0.13906079359301055, "grad_norm": 1.582953691482544, "learning_rate": 1.7432747819678173e-05, "loss": 0.2683204412460327, "step": 1146 }, { "epoch": 0.13918213809003763, "grad_norm": 1.6646018028259277, "learning_rate": 1.7430291119027147e-05, "loss": 0.2388467639684677, "step": 1147 }, { "epoch": 0.13930348258706468, "grad_norm": 1.9748457670211792, "learning_rate": 1.742783441837612e-05, "loss": 0.2437652200460434, "step": 1148 }, { "epoch": 0.13942482708409173, "grad_norm": 2.2980165481567383, "learning_rate": 1.7425377717725095e-05, "loss": 0.19980880618095398, "step": 1149 }, { "epoch": 0.1395461715811188, "grad_norm": 1.4055594205856323, "learning_rate": 1.742292101707407e-05, "loss": 0.5625031590461731, "step": 1150 }, { "epoch": 0.13966751607814584, "grad_norm": 1.8812947273254395, "learning_rate": 1.7420464316423044e-05, "loss": 0.4220775067806244, "step": 1151 }, { "epoch": 0.13978886057517292, "grad_norm": 1.6144556999206543, "learning_rate": 1.7418007615772018e-05, "loss": 0.3916918635368347, "step": 1152 }, { "epoch": 0.13991020507219998, "grad_norm": 1.558756947517395, "learning_rate": 1.7415550915120992e-05, "loss": 0.2850634455680847, "step": 1153 }, { "epoch": 0.14003154956922703, "grad_norm": 1.508731484413147, "learning_rate": 1.7413094214469967e-05, "loss": 0.23739512264728546, "step": 1154 }, { "epoch": 0.14015289406625409, "grad_norm": 1.8177855014801025, "learning_rate": 1.741063751381894e-05, "loss": 0.5787197947502136, "step": 1155 }, { "epoch": 0.14027423856328117, "grad_norm": 1.2913404703140259, "learning_rate": 1.7408180813167915e-05, "loss": 0.14389440417289734, "step": 1156 }, { "epoch": 0.14039558306030822, "grad_norm": 1.9017482995986938, "learning_rate": 1.740572411251689e-05, "loss": 0.592059850692749, "step": 1157 }, { "epoch": 0.14051692755733527, "grad_norm": 2.6217381954193115, "learning_rate": 1.7403267411865864e-05, "loss": 0.28019648790359497, "step": 1158 }, { "epoch": 0.14063827205436233, "grad_norm": 1.557847499847412, "learning_rate": 1.7400810711214838e-05, "loss": 0.41275694966316223, "step": 1159 }, { "epoch": 0.14075961655138938, "grad_norm": 1.5144991874694824, "learning_rate": 1.7398354010563812e-05, "loss": 0.6181284785270691, "step": 1160 }, { "epoch": 0.14088096104841646, "grad_norm": 1.5369459390640259, "learning_rate": 1.7395897309912786e-05, "loss": 0.3667132258415222, "step": 1161 }, { "epoch": 0.14100230554544352, "grad_norm": 3.4152791500091553, "learning_rate": 1.7393440609261764e-05, "loss": 0.354489803314209, "step": 1162 }, { "epoch": 0.14112365004247057, "grad_norm": 3.8756494522094727, "learning_rate": 1.739098390861074e-05, "loss": 0.6173999905586243, "step": 1163 }, { "epoch": 0.14124499453949763, "grad_norm": 2.1881439685821533, "learning_rate": 1.7388527207959713e-05, "loss": 0.24914789199829102, "step": 1164 }, { "epoch": 0.1413663390365247, "grad_norm": 2.234002113342285, "learning_rate": 1.7386070507308687e-05, "loss": 0.5625263452529907, "step": 1165 }, { "epoch": 0.14148768353355176, "grad_norm": 1.6645379066467285, "learning_rate": 1.738361380665766e-05, "loss": 0.24430695176124573, "step": 1166 }, { "epoch": 0.14160902803057882, "grad_norm": 1.9651813507080078, "learning_rate": 1.7381157106006635e-05, "loss": 0.5734595060348511, "step": 1167 }, { "epoch": 0.14173037252760587, "grad_norm": 1.7164967060089111, "learning_rate": 1.737870040535561e-05, "loss": 0.3440057635307312, "step": 1168 }, { "epoch": 0.14185171702463292, "grad_norm": 0.0067315357737243176, "learning_rate": 1.7376243704704584e-05, "loss": 7.191597978817299e-05, "step": 1169 }, { "epoch": 0.14197306152166, "grad_norm": 1.702622413635254, "learning_rate": 1.7373787004053558e-05, "loss": 0.32997044920921326, "step": 1170 }, { "epoch": 0.14209440601868706, "grad_norm": 1.761008620262146, "learning_rate": 1.7371330303402532e-05, "loss": 0.2606213390827179, "step": 1171 }, { "epoch": 0.1422157505157141, "grad_norm": 1.6407947540283203, "learning_rate": 1.7368873602751507e-05, "loss": 0.21850815415382385, "step": 1172 }, { "epoch": 0.14233709501274117, "grad_norm": 1.0973106622695923, "learning_rate": 1.736641690210048e-05, "loss": 0.11652334034442902, "step": 1173 }, { "epoch": 0.14245843950976822, "grad_norm": 1.4407575130462646, "learning_rate": 1.7363960201449455e-05, "loss": 0.2936984896659851, "step": 1174 }, { "epoch": 0.1425797840067953, "grad_norm": 2.2742743492126465, "learning_rate": 1.736150350079843e-05, "loss": 0.26105767488479614, "step": 1175 }, { "epoch": 0.14270112850382236, "grad_norm": 1.7479246854782104, "learning_rate": 1.7359046800147404e-05, "loss": 0.18206825852394104, "step": 1176 }, { "epoch": 0.1428224730008494, "grad_norm": 1.737959623336792, "learning_rate": 1.7356590099496378e-05, "loss": 0.1964130699634552, "step": 1177 }, { "epoch": 0.14294381749787646, "grad_norm": 1.4681860208511353, "learning_rate": 1.7354133398845352e-05, "loss": 0.2748938202857971, "step": 1178 }, { "epoch": 0.14306516199490354, "grad_norm": 1.577621579170227, "learning_rate": 1.7351676698194327e-05, "loss": 0.28804469108581543, "step": 1179 }, { "epoch": 0.1431865064919306, "grad_norm": 2.1407663822174072, "learning_rate": 1.73492199975433e-05, "loss": 0.15757066011428833, "step": 1180 }, { "epoch": 0.14330785098895765, "grad_norm": 2.3729565143585205, "learning_rate": 1.7346763296892275e-05, "loss": 0.7062378525733948, "step": 1181 }, { "epoch": 0.1434291954859847, "grad_norm": 1.1742050647735596, "learning_rate": 1.734430659624125e-05, "loss": 0.18161582946777344, "step": 1182 }, { "epoch": 0.14355053998301176, "grad_norm": 1.5152652263641357, "learning_rate": 1.7341849895590224e-05, "loss": 0.3839699923992157, "step": 1183 }, { "epoch": 0.14367188448003884, "grad_norm": 1.7843862771987915, "learning_rate": 1.7339393194939198e-05, "loss": 0.23148319125175476, "step": 1184 }, { "epoch": 0.1437932289770659, "grad_norm": 1.5558139085769653, "learning_rate": 1.7336936494288172e-05, "loss": 0.562111496925354, "step": 1185 }, { "epoch": 0.14391457347409295, "grad_norm": 2.2693772315979004, "learning_rate": 1.7334479793637146e-05, "loss": 0.5566685199737549, "step": 1186 }, { "epoch": 0.14403591797112, "grad_norm": 2.0409531593322754, "learning_rate": 1.733202309298612e-05, "loss": 0.22142720222473145, "step": 1187 }, { "epoch": 0.14415726246814706, "grad_norm": 1.4018200635910034, "learning_rate": 1.7329566392335095e-05, "loss": 0.3808784782886505, "step": 1188 }, { "epoch": 0.14427860696517414, "grad_norm": 1.65623939037323, "learning_rate": 1.732710969168407e-05, "loss": 0.5155322551727295, "step": 1189 }, { "epoch": 0.1443999514622012, "grad_norm": 2.4095706939697266, "learning_rate": 1.7324652991033043e-05, "loss": 0.8260776996612549, "step": 1190 }, { "epoch": 0.14452129595922825, "grad_norm": 2.0686557292938232, "learning_rate": 1.7322196290382018e-05, "loss": 0.3561612069606781, "step": 1191 }, { "epoch": 0.1446426404562553, "grad_norm": 2.2921102046966553, "learning_rate": 1.7319739589730992e-05, "loss": 0.5535135865211487, "step": 1192 }, { "epoch": 0.14476398495328238, "grad_norm": 1.5281484127044678, "learning_rate": 1.7317282889079966e-05, "loss": 0.17620521783828735, "step": 1193 }, { "epoch": 0.14488532945030944, "grad_norm": 1.8255594968795776, "learning_rate": 1.731482618842894e-05, "loss": 0.6243975162506104, "step": 1194 }, { "epoch": 0.1450066739473365, "grad_norm": 1.8629471063613892, "learning_rate": 1.7312369487777915e-05, "loss": 0.3417014479637146, "step": 1195 }, { "epoch": 0.14512801844436354, "grad_norm": 2.4901225566864014, "learning_rate": 1.730991278712689e-05, "loss": 0.3027805685997009, "step": 1196 }, { "epoch": 0.1452493629413906, "grad_norm": 2.5729613304138184, "learning_rate": 1.7307456086475863e-05, "loss": 0.297546923160553, "step": 1197 }, { "epoch": 0.14537070743841768, "grad_norm": 1.570296287536621, "learning_rate": 1.7304999385824837e-05, "loss": 0.0903053879737854, "step": 1198 }, { "epoch": 0.14549205193544473, "grad_norm": 1.8403199911117554, "learning_rate": 1.730254268517381e-05, "loss": 0.3443407118320465, "step": 1199 }, { "epoch": 0.1456133964324718, "grad_norm": 1.7548613548278809, "learning_rate": 1.7300085984522786e-05, "loss": 0.29256072640419006, "step": 1200 }, { "epoch": 0.14573474092949884, "grad_norm": 1.686950445175171, "learning_rate": 1.729762928387176e-05, "loss": 0.19930797815322876, "step": 1201 }, { "epoch": 0.1458560854265259, "grad_norm": 1.3716206550598145, "learning_rate": 1.7295172583220738e-05, "loss": 0.3277512192726135, "step": 1202 }, { "epoch": 0.14597742992355298, "grad_norm": 1.6324442625045776, "learning_rate": 1.7292715882569712e-05, "loss": 0.17892125248908997, "step": 1203 }, { "epoch": 0.14609877442058003, "grad_norm": 1.52149498462677, "learning_rate": 1.7290259181918686e-05, "loss": 0.1897353082895279, "step": 1204 }, { "epoch": 0.14622011891760708, "grad_norm": 2.012589931488037, "learning_rate": 1.728780248126766e-05, "loss": 0.2621530294418335, "step": 1205 }, { "epoch": 0.14634146341463414, "grad_norm": 1.8895965814590454, "learning_rate": 1.7285345780616635e-05, "loss": 0.29131007194519043, "step": 1206 }, { "epoch": 0.14646280791166122, "grad_norm": 1.6223446130752563, "learning_rate": 1.728288907996561e-05, "loss": 0.3021329641342163, "step": 1207 }, { "epoch": 0.14658415240868827, "grad_norm": 1.5214295387268066, "learning_rate": 1.7280432379314583e-05, "loss": 0.16306689381599426, "step": 1208 }, { "epoch": 0.14670549690571533, "grad_norm": 2.3011186122894287, "learning_rate": 1.7277975678663558e-05, "loss": 0.38378268480300903, "step": 1209 }, { "epoch": 0.14682684140274238, "grad_norm": 1.7881890535354614, "learning_rate": 1.7275518978012532e-05, "loss": 0.12493912875652313, "step": 1210 }, { "epoch": 0.14694818589976943, "grad_norm": 1.4234734773635864, "learning_rate": 1.7273062277361506e-05, "loss": 0.3630540668964386, "step": 1211 }, { "epoch": 0.14706953039679652, "grad_norm": 1.7614541053771973, "learning_rate": 1.727060557671048e-05, "loss": 0.05657818168401718, "step": 1212 }, { "epoch": 0.14719087489382357, "grad_norm": 2.1472115516662598, "learning_rate": 1.7268148876059455e-05, "loss": 0.2433232069015503, "step": 1213 }, { "epoch": 0.14731221939085062, "grad_norm": 0.8982884287834167, "learning_rate": 1.726569217540843e-05, "loss": 0.01902024820446968, "step": 1214 }, { "epoch": 0.14743356388787768, "grad_norm": 2.860464572906494, "learning_rate": 1.7263235474757403e-05, "loss": 0.370156466960907, "step": 1215 }, { "epoch": 0.14755490838490473, "grad_norm": 1.7890870571136475, "learning_rate": 1.7260778774106377e-05, "loss": 0.26941537857055664, "step": 1216 }, { "epoch": 0.1476762528819318, "grad_norm": 1.9163086414337158, "learning_rate": 1.7258322073455352e-05, "loss": 0.15845495462417603, "step": 1217 }, { "epoch": 0.14779759737895887, "grad_norm": 1.890729308128357, "learning_rate": 1.7255865372804326e-05, "loss": 0.47432729601860046, "step": 1218 }, { "epoch": 0.14791894187598592, "grad_norm": 1.7181979417800903, "learning_rate": 1.72534086721533e-05, "loss": 0.3108631372451782, "step": 1219 }, { "epoch": 0.14804028637301297, "grad_norm": 2.174555778503418, "learning_rate": 1.7250951971502274e-05, "loss": 0.5981994867324829, "step": 1220 }, { "epoch": 0.14816163087004006, "grad_norm": 1.4610273838043213, "learning_rate": 1.724849527085125e-05, "loss": 0.29758310317993164, "step": 1221 }, { "epoch": 0.1482829753670671, "grad_norm": 2.681323528289795, "learning_rate": 1.7246038570200223e-05, "loss": 0.9386166930198669, "step": 1222 }, { "epoch": 0.14840431986409416, "grad_norm": 2.1302742958068848, "learning_rate": 1.7243581869549197e-05, "loss": 0.17157283425331116, "step": 1223 }, { "epoch": 0.14852566436112122, "grad_norm": 1.9001370668411255, "learning_rate": 1.724112516889817e-05, "loss": 0.47526293992996216, "step": 1224 }, { "epoch": 0.14864700885814827, "grad_norm": 0.009420192800462246, "learning_rate": 1.7238668468247146e-05, "loss": 8.176633855327964e-05, "step": 1225 }, { "epoch": 0.14876835335517535, "grad_norm": 2.4234273433685303, "learning_rate": 1.723621176759612e-05, "loss": 0.25924772024154663, "step": 1226 }, { "epoch": 0.1488896978522024, "grad_norm": 2.9018149375915527, "learning_rate": 1.7233755066945094e-05, "loss": 0.6235582828521729, "step": 1227 }, { "epoch": 0.14901104234922946, "grad_norm": 1.1629736423492432, "learning_rate": 1.723129836629407e-05, "loss": 0.4921530485153198, "step": 1228 }, { "epoch": 0.14913238684625651, "grad_norm": 5.108669281005859, "learning_rate": 1.7228841665643043e-05, "loss": 0.7264307737350464, "step": 1229 }, { "epoch": 0.14925373134328357, "grad_norm": 1.8065292835235596, "learning_rate": 1.7226384964992017e-05, "loss": 0.7250082492828369, "step": 1230 }, { "epoch": 0.14937507584031065, "grad_norm": 1.2931733131408691, "learning_rate": 1.722392826434099e-05, "loss": 0.20766153931617737, "step": 1231 }, { "epoch": 0.1494964203373377, "grad_norm": 2.364239454269409, "learning_rate": 1.7221471563689966e-05, "loss": 0.45660078525543213, "step": 1232 }, { "epoch": 0.14961776483436476, "grad_norm": 2.4389283657073975, "learning_rate": 1.721901486303894e-05, "loss": 0.4532015323638916, "step": 1233 }, { "epoch": 0.1497391093313918, "grad_norm": 2.2398204803466797, "learning_rate": 1.7216558162387914e-05, "loss": 0.6295405626296997, "step": 1234 }, { "epoch": 0.1498604538284189, "grad_norm": 1.3575704097747803, "learning_rate": 1.721410146173689e-05, "loss": 0.40071597695350647, "step": 1235 }, { "epoch": 0.14998179832544595, "grad_norm": 1.4906830787658691, "learning_rate": 1.7211644761085863e-05, "loss": 0.11917506903409958, "step": 1236 }, { "epoch": 0.150103142822473, "grad_norm": 0.0011105046141892672, "learning_rate": 1.7209188060434837e-05, "loss": 3.206374458386563e-05, "step": 1237 }, { "epoch": 0.15022448731950006, "grad_norm": 1.8720066547393799, "learning_rate": 1.720673135978381e-05, "loss": 0.5851035714149475, "step": 1238 }, { "epoch": 0.1503458318165271, "grad_norm": 2.0771634578704834, "learning_rate": 1.7204274659132785e-05, "loss": 0.19338861107826233, "step": 1239 }, { "epoch": 0.1504671763135542, "grad_norm": 2.8861210346221924, "learning_rate": 1.720181795848176e-05, "loss": 0.2231196016073227, "step": 1240 }, { "epoch": 0.15058852081058124, "grad_norm": 1.8362433910369873, "learning_rate": 1.7199361257830737e-05, "loss": 0.15223735570907593, "step": 1241 }, { "epoch": 0.1507098653076083, "grad_norm": 1.6717724800109863, "learning_rate": 1.719690455717971e-05, "loss": 0.11967384070158005, "step": 1242 }, { "epoch": 0.15083120980463535, "grad_norm": 1.8411227464675903, "learning_rate": 1.7194447856528686e-05, "loss": 0.29932233691215515, "step": 1243 }, { "epoch": 0.1509525543016624, "grad_norm": 1.8385282754898071, "learning_rate": 1.719199115587766e-05, "loss": 0.29845061898231506, "step": 1244 }, { "epoch": 0.1510738987986895, "grad_norm": 1.8935575485229492, "learning_rate": 1.7189534455226634e-05, "loss": 0.3484433889389038, "step": 1245 }, { "epoch": 0.15119524329571654, "grad_norm": 2.388352870941162, "learning_rate": 1.718707775457561e-05, "loss": 0.7048746943473816, "step": 1246 }, { "epoch": 0.1513165877927436, "grad_norm": 1.184291124343872, "learning_rate": 1.7184621053924583e-05, "loss": 0.11675606667995453, "step": 1247 }, { "epoch": 0.15143793228977065, "grad_norm": 2.7689146995544434, "learning_rate": 1.7182164353273557e-05, "loss": 0.3825250267982483, "step": 1248 }, { "epoch": 0.15155927678679773, "grad_norm": 1.7167044878005981, "learning_rate": 1.717970765262253e-05, "loss": 0.38567736744880676, "step": 1249 }, { "epoch": 0.15168062128382478, "grad_norm": 1.61948561668396, "learning_rate": 1.7177250951971506e-05, "loss": 0.07746545970439911, "step": 1250 }, { "epoch": 0.15180196578085184, "grad_norm": 1.5445133447647095, "learning_rate": 1.717479425132048e-05, "loss": 0.22907724976539612, "step": 1251 }, { "epoch": 0.1519233102778789, "grad_norm": 2.118232250213623, "learning_rate": 1.7172337550669454e-05, "loss": 0.1413237750530243, "step": 1252 }, { "epoch": 0.15204465477490595, "grad_norm": 1.539642333984375, "learning_rate": 1.716988085001843e-05, "loss": 0.4614269435405731, "step": 1253 }, { "epoch": 0.15216599927193303, "grad_norm": 2.36910343170166, "learning_rate": 1.7167424149367403e-05, "loss": 0.3816324472427368, "step": 1254 }, { "epoch": 0.15228734376896008, "grad_norm": 2.8110387325286865, "learning_rate": 1.7164967448716374e-05, "loss": 0.6992867588996887, "step": 1255 }, { "epoch": 0.15240868826598714, "grad_norm": 2.969259023666382, "learning_rate": 1.7162510748065348e-05, "loss": 0.6973639726638794, "step": 1256 }, { "epoch": 0.1525300327630142, "grad_norm": 1.3217960596084595, "learning_rate": 1.7160054047414322e-05, "loss": 0.1124032661318779, "step": 1257 }, { "epoch": 0.15265137726004124, "grad_norm": 2.619513750076294, "learning_rate": 1.7157597346763296e-05, "loss": 0.37696775794029236, "step": 1258 }, { "epoch": 0.15277272175706832, "grad_norm": 2.0856645107269287, "learning_rate": 1.715514064611227e-05, "loss": 0.15504798293113708, "step": 1259 }, { "epoch": 0.15289406625409538, "grad_norm": 2.5539822578430176, "learning_rate": 1.7152683945461245e-05, "loss": 0.3083515763282776, "step": 1260 }, { "epoch": 0.15301541075112243, "grad_norm": 1.6461360454559326, "learning_rate": 1.715022724481022e-05, "loss": 0.6977939009666443, "step": 1261 }, { "epoch": 0.1531367552481495, "grad_norm": 2.4875383377075195, "learning_rate": 1.7147770544159193e-05, "loss": 0.47936803102493286, "step": 1262 }, { "epoch": 0.15325809974517657, "grad_norm": 3.297154426574707, "learning_rate": 1.7145313843508168e-05, "loss": 0.3359706699848175, "step": 1263 }, { "epoch": 0.15337944424220362, "grad_norm": 2.131556272506714, "learning_rate": 1.7142857142857142e-05, "loss": 0.3174992799758911, "step": 1264 }, { "epoch": 0.15350078873923068, "grad_norm": 1.9300246238708496, "learning_rate": 1.7140400442206116e-05, "loss": 0.4541569948196411, "step": 1265 }, { "epoch": 0.15362213323625773, "grad_norm": 3.374305009841919, "learning_rate": 1.713794374155509e-05, "loss": 0.4640466570854187, "step": 1266 }, { "epoch": 0.15374347773328478, "grad_norm": 2.009493112564087, "learning_rate": 1.7135487040904068e-05, "loss": 0.19491064548492432, "step": 1267 }, { "epoch": 0.15386482223031187, "grad_norm": 1.8354039192199707, "learning_rate": 1.7133030340253042e-05, "loss": 0.3190333843231201, "step": 1268 }, { "epoch": 0.15398616672733892, "grad_norm": 1.7007555961608887, "learning_rate": 1.7130573639602017e-05, "loss": 0.27993080019950867, "step": 1269 }, { "epoch": 0.15410751122436597, "grad_norm": 2.211060047149658, "learning_rate": 1.712811693895099e-05, "loss": 0.17426905035972595, "step": 1270 }, { "epoch": 0.15422885572139303, "grad_norm": 1.33144211769104, "learning_rate": 1.7125660238299965e-05, "loss": 0.24775874614715576, "step": 1271 }, { "epoch": 0.1543502002184201, "grad_norm": 2.5085713863372803, "learning_rate": 1.712320353764894e-05, "loss": 0.5349833965301514, "step": 1272 }, { "epoch": 0.15447154471544716, "grad_norm": 1.8141382932662964, "learning_rate": 1.7120746836997914e-05, "loss": 0.5107168555259705, "step": 1273 }, { "epoch": 0.15459288921247422, "grad_norm": 2.1935603618621826, "learning_rate": 1.7118290136346888e-05, "loss": 0.24863873422145844, "step": 1274 }, { "epoch": 0.15471423370950127, "grad_norm": 2.279149293899536, "learning_rate": 1.7115833435695862e-05, "loss": 0.4017123579978943, "step": 1275 }, { "epoch": 0.15483557820652832, "grad_norm": 1.9747353792190552, "learning_rate": 1.7113376735044836e-05, "loss": 0.4218288064002991, "step": 1276 }, { "epoch": 0.1549569227035554, "grad_norm": 2.6752212047576904, "learning_rate": 1.711092003439381e-05, "loss": 0.39060577750205994, "step": 1277 }, { "epoch": 0.15507826720058246, "grad_norm": 2.636204481124878, "learning_rate": 1.7108463333742785e-05, "loss": 0.4002552628517151, "step": 1278 }, { "epoch": 0.1551996116976095, "grad_norm": 0.532202959060669, "learning_rate": 1.710600663309176e-05, "loss": 0.03157857060432434, "step": 1279 }, { "epoch": 0.15532095619463657, "grad_norm": 1.8398561477661133, "learning_rate": 1.7103549932440733e-05, "loss": 0.27052685618400574, "step": 1280 }, { "epoch": 0.15544230069166362, "grad_norm": 1.626185417175293, "learning_rate": 1.7101093231789708e-05, "loss": 0.09882887452840805, "step": 1281 }, { "epoch": 0.1555636451886907, "grad_norm": 1.96735680103302, "learning_rate": 1.7098636531138682e-05, "loss": 0.4001654088497162, "step": 1282 }, { "epoch": 0.15568498968571776, "grad_norm": 2.2064571380615234, "learning_rate": 1.7096179830487656e-05, "loss": 0.26003214716911316, "step": 1283 }, { "epoch": 0.1558063341827448, "grad_norm": 1.3411259651184082, "learning_rate": 1.709372312983663e-05, "loss": 0.16639862954616547, "step": 1284 }, { "epoch": 0.15592767867977186, "grad_norm": 1.34841787815094, "learning_rate": 1.7091266429185605e-05, "loss": 0.06426498293876648, "step": 1285 }, { "epoch": 0.15604902317679895, "grad_norm": 1.5554633140563965, "learning_rate": 1.708880972853458e-05, "loss": 0.15110211074352264, "step": 1286 }, { "epoch": 0.156170367673826, "grad_norm": 1.845657229423523, "learning_rate": 1.7086353027883553e-05, "loss": 0.776990532875061, "step": 1287 }, { "epoch": 0.15629171217085305, "grad_norm": 1.739893913269043, "learning_rate": 1.7083896327232527e-05, "loss": 0.3289273679256439, "step": 1288 }, { "epoch": 0.1564130566678801, "grad_norm": 1.6401827335357666, "learning_rate": 1.70814396265815e-05, "loss": 0.07742001116275787, "step": 1289 }, { "epoch": 0.15653440116490716, "grad_norm": 2.0880889892578125, "learning_rate": 1.7078982925930476e-05, "loss": 0.43219226598739624, "step": 1290 }, { "epoch": 0.15665574566193424, "grad_norm": 2.825942277908325, "learning_rate": 1.707652622527945e-05, "loss": 0.3840549886226654, "step": 1291 }, { "epoch": 0.1567770901589613, "grad_norm": 2.3352129459381104, "learning_rate": 1.7074069524628424e-05, "loss": 0.44269004464149475, "step": 1292 }, { "epoch": 0.15689843465598835, "grad_norm": 3.0530810356140137, "learning_rate": 1.70716128239774e-05, "loss": 0.4579879641532898, "step": 1293 }, { "epoch": 0.1570197791530154, "grad_norm": 2.729116439819336, "learning_rate": 1.7069156123326373e-05, "loss": 0.29994532465934753, "step": 1294 }, { "epoch": 0.15714112365004246, "grad_norm": 1.3052394390106201, "learning_rate": 1.7066699422675347e-05, "loss": 0.07982388138771057, "step": 1295 }, { "epoch": 0.15726246814706954, "grad_norm": 2.357917547225952, "learning_rate": 1.706424272202432e-05, "loss": 0.2928987145423889, "step": 1296 }, { "epoch": 0.1573838126440966, "grad_norm": 1.4563827514648438, "learning_rate": 1.7061786021373296e-05, "loss": 0.2420179843902588, "step": 1297 }, { "epoch": 0.15750515714112365, "grad_norm": 3.0957210063934326, "learning_rate": 1.705932932072227e-05, "loss": 0.1874426305294037, "step": 1298 }, { "epoch": 0.1576265016381507, "grad_norm": 2.0317540168762207, "learning_rate": 1.7056872620071244e-05, "loss": 0.2113209068775177, "step": 1299 }, { "epoch": 0.15774784613517778, "grad_norm": 1.844716191291809, "learning_rate": 1.705441591942022e-05, "loss": 0.20830923318862915, "step": 1300 }, { "epoch": 0.15786919063220484, "grad_norm": 0.09604529291391373, "learning_rate": 1.7051959218769193e-05, "loss": 0.002145261038094759, "step": 1301 }, { "epoch": 0.1579905351292319, "grad_norm": 1.3927608728408813, "learning_rate": 1.7049502518118167e-05, "loss": 0.4583245515823364, "step": 1302 }, { "epoch": 0.15811187962625894, "grad_norm": 2.9382057189941406, "learning_rate": 1.704704581746714e-05, "loss": 0.6795557141304016, "step": 1303 }, { "epoch": 0.158233224123286, "grad_norm": 3.182617425918579, "learning_rate": 1.7044589116816116e-05, "loss": 0.7760910391807556, "step": 1304 }, { "epoch": 0.15835456862031308, "grad_norm": 2.0352823734283447, "learning_rate": 1.704213241616509e-05, "loss": 0.24128247797489166, "step": 1305 }, { "epoch": 0.15847591311734013, "grad_norm": 2.315619468688965, "learning_rate": 1.7039675715514064e-05, "loss": 0.18327398598194122, "step": 1306 }, { "epoch": 0.1585972576143672, "grad_norm": 2.19699764251709, "learning_rate": 1.7037219014863042e-05, "loss": 0.5556395053863525, "step": 1307 }, { "epoch": 0.15871860211139424, "grad_norm": 1.1444036960601807, "learning_rate": 1.7034762314212016e-05, "loss": 0.12133655697107315, "step": 1308 }, { "epoch": 0.1588399466084213, "grad_norm": 2.0349247455596924, "learning_rate": 1.703230561356099e-05, "loss": 0.3753592073917389, "step": 1309 }, { "epoch": 0.15896129110544838, "grad_norm": 1.413674235343933, "learning_rate": 1.7029848912909964e-05, "loss": 0.14800333976745605, "step": 1310 }, { "epoch": 0.15908263560247543, "grad_norm": 1.274200677871704, "learning_rate": 1.702739221225894e-05, "loss": 0.20592835545539856, "step": 1311 }, { "epoch": 0.15920398009950248, "grad_norm": 2.0405519008636475, "learning_rate": 1.7024935511607913e-05, "loss": 0.605412483215332, "step": 1312 }, { "epoch": 0.15932532459652954, "grad_norm": 2.199052333831787, "learning_rate": 1.7022478810956887e-05, "loss": 0.581245481967926, "step": 1313 }, { "epoch": 0.15944666909355662, "grad_norm": 3.273561954498291, "learning_rate": 1.702002211030586e-05, "loss": 0.19570647180080414, "step": 1314 }, { "epoch": 0.15956801359058367, "grad_norm": 2.3701846599578857, "learning_rate": 1.7017565409654836e-05, "loss": 0.32590925693511963, "step": 1315 }, { "epoch": 0.15968935808761073, "grad_norm": 0.4969005882740021, "learning_rate": 1.701510870900381e-05, "loss": 0.01700139045715332, "step": 1316 }, { "epoch": 0.15981070258463778, "grad_norm": 1.716216802597046, "learning_rate": 1.7012652008352784e-05, "loss": 0.34420692920684814, "step": 1317 }, { "epoch": 0.15993204708166484, "grad_norm": 2.2408816814422607, "learning_rate": 1.701019530770176e-05, "loss": 0.20139765739440918, "step": 1318 }, { "epoch": 0.16005339157869192, "grad_norm": 1.6093449592590332, "learning_rate": 1.7007738607050733e-05, "loss": 0.22592651844024658, "step": 1319 }, { "epoch": 0.16017473607571897, "grad_norm": 1.2304242849349976, "learning_rate": 1.7005281906399707e-05, "loss": 0.06464403867721558, "step": 1320 }, { "epoch": 0.16029608057274602, "grad_norm": 2.2083680629730225, "learning_rate": 1.700282520574868e-05, "loss": 0.5366463661193848, "step": 1321 }, { "epoch": 0.16041742506977308, "grad_norm": 1.9002628326416016, "learning_rate": 1.7000368505097656e-05, "loss": 0.21244825422763824, "step": 1322 }, { "epoch": 0.16053876956680013, "grad_norm": 1.1220264434814453, "learning_rate": 1.699791180444663e-05, "loss": 0.09071967005729675, "step": 1323 }, { "epoch": 0.16066011406382721, "grad_norm": 2.344264030456543, "learning_rate": 1.6995455103795604e-05, "loss": 0.4256601929664612, "step": 1324 }, { "epoch": 0.16078145856085427, "grad_norm": 1.312510371208191, "learning_rate": 1.699299840314458e-05, "loss": 0.07779626548290253, "step": 1325 }, { "epoch": 0.16090280305788132, "grad_norm": 2.834338665008545, "learning_rate": 1.6990541702493553e-05, "loss": 0.46628639101982117, "step": 1326 }, { "epoch": 0.16102414755490838, "grad_norm": 1.238459825515747, "learning_rate": 1.6988085001842527e-05, "loss": 0.11977294832468033, "step": 1327 }, { "epoch": 0.16114549205193546, "grad_norm": 1.8803859949111938, "learning_rate": 1.69856283011915e-05, "loss": 0.25932106375694275, "step": 1328 }, { "epoch": 0.1612668365489625, "grad_norm": 2.950925350189209, "learning_rate": 1.6983171600540475e-05, "loss": 0.32873010635375977, "step": 1329 }, { "epoch": 0.16138818104598956, "grad_norm": 1.580187439918518, "learning_rate": 1.698071489988945e-05, "loss": 0.32839733362197876, "step": 1330 }, { "epoch": 0.16150952554301662, "grad_norm": 2.0951292514801025, "learning_rate": 1.6978258199238424e-05, "loss": 0.2957846522331238, "step": 1331 }, { "epoch": 0.16163087004004367, "grad_norm": 2.181708812713623, "learning_rate": 1.6975801498587398e-05, "loss": 0.2901095151901245, "step": 1332 }, { "epoch": 0.16175221453707075, "grad_norm": 2.0835371017456055, "learning_rate": 1.6973344797936372e-05, "loss": 0.39948153495788574, "step": 1333 }, { "epoch": 0.1618735590340978, "grad_norm": 1.7480794191360474, "learning_rate": 1.6970888097285347e-05, "loss": 0.10842632502317429, "step": 1334 }, { "epoch": 0.16199490353112486, "grad_norm": 1.6553689241409302, "learning_rate": 1.696843139663432e-05, "loss": 0.5176296234130859, "step": 1335 }, { "epoch": 0.16211624802815192, "grad_norm": 2.356034994125366, "learning_rate": 1.6965974695983295e-05, "loss": 0.4009917378425598, "step": 1336 }, { "epoch": 0.16223759252517897, "grad_norm": 1.4407398700714111, "learning_rate": 1.696351799533227e-05, "loss": 0.28117045760154724, "step": 1337 }, { "epoch": 0.16235893702220605, "grad_norm": 1.3323804140090942, "learning_rate": 1.6961061294681244e-05, "loss": 0.24473021924495697, "step": 1338 }, { "epoch": 0.1624802815192331, "grad_norm": 1.888528823852539, "learning_rate": 1.6958604594030218e-05, "loss": 0.33315813541412354, "step": 1339 }, { "epoch": 0.16260162601626016, "grad_norm": 1.6776105165481567, "learning_rate": 1.6956147893379192e-05, "loss": 0.1390353888273239, "step": 1340 }, { "epoch": 0.1627229705132872, "grad_norm": 2.3369064331054688, "learning_rate": 1.6953691192728166e-05, "loss": 0.37260985374450684, "step": 1341 }, { "epoch": 0.1628443150103143, "grad_norm": 1.9706916809082031, "learning_rate": 1.695123449207714e-05, "loss": 0.14245374500751495, "step": 1342 }, { "epoch": 0.16296565950734135, "grad_norm": 2.193972110748291, "learning_rate": 1.6948777791426115e-05, "loss": 0.4535263776779175, "step": 1343 }, { "epoch": 0.1630870040043684, "grad_norm": 2.0166101455688477, "learning_rate": 1.694632109077509e-05, "loss": 0.18072649836540222, "step": 1344 }, { "epoch": 0.16320834850139546, "grad_norm": 0.9884637594223022, "learning_rate": 1.6943864390124064e-05, "loss": 0.14514899253845215, "step": 1345 }, { "epoch": 0.1633296929984225, "grad_norm": 2.3087267875671387, "learning_rate": 1.694140768947304e-05, "loss": 0.31159549951553345, "step": 1346 }, { "epoch": 0.1634510374954496, "grad_norm": 3.271432876586914, "learning_rate": 1.6938950988822015e-05, "loss": 0.27185720205307007, "step": 1347 }, { "epoch": 0.16357238199247665, "grad_norm": 2.102259635925293, "learning_rate": 1.693649428817099e-05, "loss": 0.5381771326065063, "step": 1348 }, { "epoch": 0.1636937264895037, "grad_norm": 1.1917797327041626, "learning_rate": 1.6934037587519964e-05, "loss": 0.10865627229213715, "step": 1349 }, { "epoch": 0.16381507098653075, "grad_norm": 1.9590660333633423, "learning_rate": 1.6931580886868938e-05, "loss": 0.3822226822376251, "step": 1350 }, { "epoch": 0.1639364154835578, "grad_norm": 1.6567803621292114, "learning_rate": 1.6929124186217912e-05, "loss": 0.12453743070363998, "step": 1351 }, { "epoch": 0.1640577599805849, "grad_norm": 1.4354405403137207, "learning_rate": 1.6926667485566887e-05, "loss": 0.2581062912940979, "step": 1352 }, { "epoch": 0.16417910447761194, "grad_norm": 1.3010510206222534, "learning_rate": 1.692421078491586e-05, "loss": 0.5185683965682983, "step": 1353 }, { "epoch": 0.164300448974639, "grad_norm": 1.7877172231674194, "learning_rate": 1.6921754084264835e-05, "loss": 0.21436668932437897, "step": 1354 }, { "epoch": 0.16442179347166605, "grad_norm": 1.7531827688217163, "learning_rate": 1.691929738361381e-05, "loss": 0.12685886025428772, "step": 1355 }, { "epoch": 0.16454313796869313, "grad_norm": 1.4889940023422241, "learning_rate": 1.6916840682962784e-05, "loss": 0.41197526454925537, "step": 1356 }, { "epoch": 0.16466448246572019, "grad_norm": 1.6222858428955078, "learning_rate": 1.6914383982311758e-05, "loss": 0.2630276083946228, "step": 1357 }, { "epoch": 0.16478582696274724, "grad_norm": 2.612492799758911, "learning_rate": 1.6911927281660732e-05, "loss": 0.2662280201911926, "step": 1358 }, { "epoch": 0.1649071714597743, "grad_norm": 1.8409432172775269, "learning_rate": 1.6909470581009707e-05, "loss": 0.3193710446357727, "step": 1359 }, { "epoch": 0.16502851595680135, "grad_norm": 1.8577767610549927, "learning_rate": 1.690701388035868e-05, "loss": 0.40329962968826294, "step": 1360 }, { "epoch": 0.16514986045382843, "grad_norm": 1.5214166641235352, "learning_rate": 1.6904557179707655e-05, "loss": 0.43802523612976074, "step": 1361 }, { "epoch": 0.16527120495085548, "grad_norm": 1.752384901046753, "learning_rate": 1.690210047905663e-05, "loss": 0.15972882509231567, "step": 1362 }, { "epoch": 0.16539254944788254, "grad_norm": 2.470399856567383, "learning_rate": 1.6899643778405604e-05, "loss": 0.5078000426292419, "step": 1363 }, { "epoch": 0.1655138939449096, "grad_norm": 1.7605093717575073, "learning_rate": 1.6897187077754578e-05, "loss": 0.24891872704029083, "step": 1364 }, { "epoch": 0.16563523844193667, "grad_norm": 1.6690492630004883, "learning_rate": 1.6894730377103552e-05, "loss": 0.2679961621761322, "step": 1365 }, { "epoch": 0.16575658293896373, "grad_norm": 2.569883108139038, "learning_rate": 1.6892273676452526e-05, "loss": 0.432903528213501, "step": 1366 }, { "epoch": 0.16587792743599078, "grad_norm": 1.740451693534851, "learning_rate": 1.68898169758015e-05, "loss": 0.12391990423202515, "step": 1367 }, { "epoch": 0.16599927193301783, "grad_norm": 2.0230066776275635, "learning_rate": 1.6887360275150475e-05, "loss": 0.2665267288684845, "step": 1368 }, { "epoch": 0.1661206164300449, "grad_norm": 1.4242761135101318, "learning_rate": 1.688490357449945e-05, "loss": 0.09299017488956451, "step": 1369 }, { "epoch": 0.16624196092707197, "grad_norm": 1.7172342538833618, "learning_rate": 1.6882446873848423e-05, "loss": 0.20614555478096008, "step": 1370 }, { "epoch": 0.16636330542409902, "grad_norm": 1.4239894151687622, "learning_rate": 1.6879990173197398e-05, "loss": 0.34842580556869507, "step": 1371 }, { "epoch": 0.16648464992112608, "grad_norm": 1.8273541927337646, "learning_rate": 1.6877533472546372e-05, "loss": 0.27392613887786865, "step": 1372 }, { "epoch": 0.16660599441815313, "grad_norm": 1.901670217514038, "learning_rate": 1.6875076771895346e-05, "loss": 0.21127557754516602, "step": 1373 }, { "epoch": 0.16672733891518018, "grad_norm": 8.480103492736816, "learning_rate": 1.687262007124432e-05, "loss": 0.15148305892944336, "step": 1374 }, { "epoch": 0.16684868341220727, "grad_norm": 1.8833881616592407, "learning_rate": 1.6870163370593295e-05, "loss": 0.2864915132522583, "step": 1375 }, { "epoch": 0.16697002790923432, "grad_norm": 1.975264549255371, "learning_rate": 1.686770666994227e-05, "loss": 0.36668485403060913, "step": 1376 }, { "epoch": 0.16709137240626137, "grad_norm": 3.24489164352417, "learning_rate": 1.6865249969291243e-05, "loss": 0.45897114276885986, "step": 1377 }, { "epoch": 0.16721271690328843, "grad_norm": 3.0113956928253174, "learning_rate": 1.6862793268640217e-05, "loss": 0.3996666967868805, "step": 1378 }, { "epoch": 0.1673340614003155, "grad_norm": 1.9160172939300537, "learning_rate": 1.6860336567989192e-05, "loss": 0.08457375317811966, "step": 1379 }, { "epoch": 0.16745540589734256, "grad_norm": 2.8236207962036133, "learning_rate": 1.6857879867338166e-05, "loss": 0.36632949113845825, "step": 1380 }, { "epoch": 0.16757675039436962, "grad_norm": 1.984706163406372, "learning_rate": 1.685542316668714e-05, "loss": 0.3010556995868683, "step": 1381 }, { "epoch": 0.16769809489139667, "grad_norm": 1.6785802841186523, "learning_rate": 1.6852966466036114e-05, "loss": 0.4251388907432556, "step": 1382 }, { "epoch": 0.16781943938842372, "grad_norm": 4.960895538330078, "learning_rate": 1.685050976538509e-05, "loss": 0.43394631147384644, "step": 1383 }, { "epoch": 0.1679407838854508, "grad_norm": 1.7378970384597778, "learning_rate": 1.6848053064734063e-05, "loss": 0.29635417461395264, "step": 1384 }, { "epoch": 0.16806212838247786, "grad_norm": 2.0712788105010986, "learning_rate": 1.6845596364083037e-05, "loss": 0.6476526856422424, "step": 1385 }, { "epoch": 0.1681834728795049, "grad_norm": 2.259003162384033, "learning_rate": 1.6843139663432015e-05, "loss": 0.5198768377304077, "step": 1386 }, { "epoch": 0.16830481737653197, "grad_norm": 1.3213210105895996, "learning_rate": 1.684068296278099e-05, "loss": 0.45285072922706604, "step": 1387 }, { "epoch": 0.16842616187355902, "grad_norm": 2.139267683029175, "learning_rate": 1.6838226262129963e-05, "loss": 0.5512986779212952, "step": 1388 }, { "epoch": 0.1685475063705861, "grad_norm": 1.4775429964065552, "learning_rate": 1.6835769561478938e-05, "loss": 0.25231534242630005, "step": 1389 }, { "epoch": 0.16866885086761316, "grad_norm": 1.5690864324569702, "learning_rate": 1.683331286082791e-05, "loss": 0.18181580305099487, "step": 1390 }, { "epoch": 0.1687901953646402, "grad_norm": 2.166705846786499, "learning_rate": 1.6830856160176883e-05, "loss": 0.5808331966400146, "step": 1391 }, { "epoch": 0.16891153986166726, "grad_norm": 2.2479324340820312, "learning_rate": 1.6828399459525857e-05, "loss": 0.3114401400089264, "step": 1392 }, { "epoch": 0.16903288435869435, "grad_norm": 2.7515830993652344, "learning_rate": 1.682594275887483e-05, "loss": 0.7299220561981201, "step": 1393 }, { "epoch": 0.1691542288557214, "grad_norm": 0.346658855676651, "learning_rate": 1.6823486058223806e-05, "loss": 0.007224785629659891, "step": 1394 }, { "epoch": 0.16927557335274845, "grad_norm": 2.2689716815948486, "learning_rate": 1.682102935757278e-05, "loss": 0.3097282946109772, "step": 1395 }, { "epoch": 0.1693969178497755, "grad_norm": 2.001441717147827, "learning_rate": 1.6818572656921754e-05, "loss": 0.1686830371618271, "step": 1396 }, { "epoch": 0.16951826234680256, "grad_norm": 1.140173316001892, "learning_rate": 1.681611595627073e-05, "loss": 0.06480616331100464, "step": 1397 }, { "epoch": 0.16963960684382964, "grad_norm": 2.302633762359619, "learning_rate": 1.6813659255619703e-05, "loss": 0.09449177980422974, "step": 1398 }, { "epoch": 0.1697609513408567, "grad_norm": 1.742457389831543, "learning_rate": 1.6811202554968677e-05, "loss": 0.6511344909667969, "step": 1399 }, { "epoch": 0.16988229583788375, "grad_norm": 2.5629754066467285, "learning_rate": 1.680874585431765e-05, "loss": 0.5311500430107117, "step": 1400 }, { "epoch": 0.1700036403349108, "grad_norm": 2.018834352493286, "learning_rate": 1.6806289153666625e-05, "loss": 0.34516584873199463, "step": 1401 }, { "epoch": 0.17012498483193786, "grad_norm": 2.187851905822754, "learning_rate": 1.68038324530156e-05, "loss": 0.3023394048213959, "step": 1402 }, { "epoch": 0.17024632932896494, "grad_norm": 2.0745787620544434, "learning_rate": 1.6801375752364574e-05, "loss": 0.56155925989151, "step": 1403 }, { "epoch": 0.170367673825992, "grad_norm": 1.7877262830734253, "learning_rate": 1.6798919051713548e-05, "loss": 0.5152330994606018, "step": 1404 }, { "epoch": 0.17048901832301905, "grad_norm": 2.3125364780426025, "learning_rate": 1.6796462351062522e-05, "loss": 0.2775477170944214, "step": 1405 }, { "epoch": 0.1706103628200461, "grad_norm": 2.6091928482055664, "learning_rate": 1.6794005650411497e-05, "loss": 0.2951275706291199, "step": 1406 }, { "epoch": 0.17073170731707318, "grad_norm": 2.670105218887329, "learning_rate": 1.679154894976047e-05, "loss": 0.39512842893600464, "step": 1407 }, { "epoch": 0.17085305181410024, "grad_norm": 2.1665756702423096, "learning_rate": 1.6789092249109445e-05, "loss": 0.7851711511611938, "step": 1408 }, { "epoch": 0.1709743963111273, "grad_norm": 1.8065381050109863, "learning_rate": 1.678663554845842e-05, "loss": 0.15969766676425934, "step": 1409 }, { "epoch": 0.17109574080815435, "grad_norm": 1.518323540687561, "learning_rate": 1.6784178847807394e-05, "loss": 0.20590633153915405, "step": 1410 }, { "epoch": 0.1712170853051814, "grad_norm": 2.523805618286133, "learning_rate": 1.6781722147156368e-05, "loss": 0.31899237632751465, "step": 1411 }, { "epoch": 0.17133842980220848, "grad_norm": 2.5726451873779297, "learning_rate": 1.6779265446505346e-05, "loss": 0.3131209909915924, "step": 1412 }, { "epoch": 0.17145977429923553, "grad_norm": 1.8876440525054932, "learning_rate": 1.677680874585432e-05, "loss": 0.4819314479827881, "step": 1413 }, { "epoch": 0.1715811187962626, "grad_norm": 1.4496047496795654, "learning_rate": 1.6774352045203294e-05, "loss": 0.21618512272834778, "step": 1414 }, { "epoch": 0.17170246329328964, "grad_norm": 1.612060785293579, "learning_rate": 1.677189534455227e-05, "loss": 0.1677655726671219, "step": 1415 }, { "epoch": 0.1718238077903167, "grad_norm": 1.2587897777557373, "learning_rate": 1.6769438643901243e-05, "loss": 0.09883075207471848, "step": 1416 }, { "epoch": 0.17194515228734378, "grad_norm": 2.5266168117523193, "learning_rate": 1.6766981943250217e-05, "loss": 0.5532509088516235, "step": 1417 }, { "epoch": 0.17206649678437083, "grad_norm": 3.3645172119140625, "learning_rate": 1.676452524259919e-05, "loss": 0.14025680720806122, "step": 1418 }, { "epoch": 0.17218784128139789, "grad_norm": 2.097749710083008, "learning_rate": 1.6762068541948165e-05, "loss": 0.37450772523880005, "step": 1419 }, { "epoch": 0.17230918577842494, "grad_norm": 2.6865334510803223, "learning_rate": 1.675961184129714e-05, "loss": 0.46862709522247314, "step": 1420 }, { "epoch": 0.17243053027545202, "grad_norm": 0.005616781767457724, "learning_rate": 1.6757155140646114e-05, "loss": 0.00010598314111120999, "step": 1421 }, { "epoch": 0.17255187477247907, "grad_norm": 1.725932240486145, "learning_rate": 1.6754698439995088e-05, "loss": 0.3152719736099243, "step": 1422 }, { "epoch": 0.17267321926950613, "grad_norm": 2.0537219047546387, "learning_rate": 1.6752241739344062e-05, "loss": 0.2770642042160034, "step": 1423 }, { "epoch": 0.17279456376653318, "grad_norm": 2.266733407974243, "learning_rate": 1.6749785038693037e-05, "loss": 0.20772089064121246, "step": 1424 }, { "epoch": 0.17291590826356024, "grad_norm": 1.9213402271270752, "learning_rate": 1.674732833804201e-05, "loss": 0.378388375043869, "step": 1425 }, { "epoch": 0.17303725276058732, "grad_norm": 0.855719268321991, "learning_rate": 1.6744871637390985e-05, "loss": 0.03948398679494858, "step": 1426 }, { "epoch": 0.17315859725761437, "grad_norm": 2.2846179008483887, "learning_rate": 1.674241493673996e-05, "loss": 0.29622775316238403, "step": 1427 }, { "epoch": 0.17327994175464143, "grad_norm": 1.4371376037597656, "learning_rate": 1.6739958236088934e-05, "loss": 0.29555070400238037, "step": 1428 }, { "epoch": 0.17340128625166848, "grad_norm": 1.5206128358840942, "learning_rate": 1.6737501535437908e-05, "loss": 0.1654072254896164, "step": 1429 }, { "epoch": 0.17352263074869553, "grad_norm": 1.9108872413635254, "learning_rate": 1.6735044834786882e-05, "loss": 0.3061807453632355, "step": 1430 }, { "epoch": 0.17364397524572261, "grad_norm": 2.351694345474243, "learning_rate": 1.6732588134135857e-05, "loss": 0.5521951913833618, "step": 1431 }, { "epoch": 0.17376531974274967, "grad_norm": 1.2257763147354126, "learning_rate": 1.673013143348483e-05, "loss": 0.050480540841817856, "step": 1432 }, { "epoch": 0.17388666423977672, "grad_norm": 1.6109448671340942, "learning_rate": 1.6727674732833805e-05, "loss": 0.7171104550361633, "step": 1433 }, { "epoch": 0.17400800873680378, "grad_norm": 2.370811700820923, "learning_rate": 1.672521803218278e-05, "loss": 0.19278818368911743, "step": 1434 }, { "epoch": 0.17412935323383086, "grad_norm": 1.9877387285232544, "learning_rate": 1.6722761331531754e-05, "loss": 0.6486696004867554, "step": 1435 }, { "epoch": 0.1742506977308579, "grad_norm": 1.7820128202438354, "learning_rate": 1.6720304630880728e-05, "loss": 0.2971023619174957, "step": 1436 }, { "epoch": 0.17437204222788497, "grad_norm": 2.684583902359009, "learning_rate": 1.6717847930229702e-05, "loss": 0.5994515419006348, "step": 1437 }, { "epoch": 0.17449338672491202, "grad_norm": 3.353449583053589, "learning_rate": 1.6715391229578676e-05, "loss": 0.36244866251945496, "step": 1438 }, { "epoch": 0.17461473122193907, "grad_norm": 2.1446571350097656, "learning_rate": 1.671293452892765e-05, "loss": 0.31015217304229736, "step": 1439 }, { "epoch": 0.17473607571896616, "grad_norm": 1.5882110595703125, "learning_rate": 1.6710477828276625e-05, "loss": 0.2135632038116455, "step": 1440 }, { "epoch": 0.1748574202159932, "grad_norm": 1.8885900974273682, "learning_rate": 1.67080211276256e-05, "loss": 0.6606277227401733, "step": 1441 }, { "epoch": 0.17497876471302026, "grad_norm": 1.9658722877502441, "learning_rate": 1.6705564426974573e-05, "loss": 0.6350474953651428, "step": 1442 }, { "epoch": 0.17510010921004732, "grad_norm": 1.3200682401657104, "learning_rate": 1.6703107726323548e-05, "loss": 0.07505234330892563, "step": 1443 }, { "epoch": 0.17522145370707437, "grad_norm": 1.8777532577514648, "learning_rate": 1.6700651025672522e-05, "loss": 0.20652814209461212, "step": 1444 }, { "epoch": 0.17534279820410145, "grad_norm": 2.564035177230835, "learning_rate": 1.6698194325021496e-05, "loss": 0.2737087309360504, "step": 1445 }, { "epoch": 0.1754641427011285, "grad_norm": 1.2406517267227173, "learning_rate": 1.669573762437047e-05, "loss": 0.15805581212043762, "step": 1446 }, { "epoch": 0.17558548719815556, "grad_norm": 1.9846456050872803, "learning_rate": 1.6693280923719445e-05, "loss": 0.2611340284347534, "step": 1447 }, { "epoch": 0.1757068316951826, "grad_norm": 2.8493361473083496, "learning_rate": 1.669082422306842e-05, "loss": 0.4207281470298767, "step": 1448 }, { "epoch": 0.1758281761922097, "grad_norm": 1.8156849145889282, "learning_rate": 1.6688367522417393e-05, "loss": 0.25690674781799316, "step": 1449 }, { "epoch": 0.17594952068923675, "grad_norm": 2.1182401180267334, "learning_rate": 1.6685910821766367e-05, "loss": 0.2869364023208618, "step": 1450 }, { "epoch": 0.1760708651862638, "grad_norm": 1.5937093496322632, "learning_rate": 1.668345412111534e-05, "loss": 0.12525154650211334, "step": 1451 }, { "epoch": 0.17619220968329086, "grad_norm": 1.9949283599853516, "learning_rate": 1.668099742046432e-05, "loss": 0.2950144112110138, "step": 1452 }, { "epoch": 0.1763135541803179, "grad_norm": 2.5047764778137207, "learning_rate": 1.6678540719813294e-05, "loss": 0.4218530058860779, "step": 1453 }, { "epoch": 0.176434898677345, "grad_norm": 1.803932547569275, "learning_rate": 1.6676084019162268e-05, "loss": 0.3246077299118042, "step": 1454 }, { "epoch": 0.17655624317437205, "grad_norm": 2.5857975482940674, "learning_rate": 1.6673627318511242e-05, "loss": 0.31732994318008423, "step": 1455 }, { "epoch": 0.1766775876713991, "grad_norm": 2.1230576038360596, "learning_rate": 1.6671170617860216e-05, "loss": 0.31983518600463867, "step": 1456 }, { "epoch": 0.17679893216842615, "grad_norm": 2.242692470550537, "learning_rate": 1.666871391720919e-05, "loss": 0.2791813910007477, "step": 1457 }, { "epoch": 0.17692027666545324, "grad_norm": 2.2482571601867676, "learning_rate": 1.6666257216558165e-05, "loss": 0.3358612060546875, "step": 1458 }, { "epoch": 0.1770416211624803, "grad_norm": 1.851077675819397, "learning_rate": 1.666380051590714e-05, "loss": 0.22470499575138092, "step": 1459 }, { "epoch": 0.17716296565950734, "grad_norm": 1.292380690574646, "learning_rate": 1.6661343815256113e-05, "loss": 0.07124683260917664, "step": 1460 }, { "epoch": 0.1772843101565344, "grad_norm": 2.1859281063079834, "learning_rate": 1.6658887114605088e-05, "loss": 0.25534307956695557, "step": 1461 }, { "epoch": 0.17740565465356145, "grad_norm": 1.6746493577957153, "learning_rate": 1.6656430413954062e-05, "loss": 0.5188888907432556, "step": 1462 }, { "epoch": 0.17752699915058853, "grad_norm": 1.1798118352890015, "learning_rate": 1.6653973713303036e-05, "loss": 0.13194964826107025, "step": 1463 }, { "epoch": 0.1776483436476156, "grad_norm": 1.6883291006088257, "learning_rate": 1.665151701265201e-05, "loss": 0.3973052501678467, "step": 1464 }, { "epoch": 0.17776968814464264, "grad_norm": 3.1461122035980225, "learning_rate": 1.6649060312000985e-05, "loss": 0.49332958459854126, "step": 1465 }, { "epoch": 0.1778910326416697, "grad_norm": 2.2000954151153564, "learning_rate": 1.664660361134996e-05, "loss": 0.41994062066078186, "step": 1466 }, { "epoch": 0.17801237713869675, "grad_norm": 4.530543804168701, "learning_rate": 1.6644146910698933e-05, "loss": 0.3007562458515167, "step": 1467 }, { "epoch": 0.17813372163572383, "grad_norm": 2.0147387981414795, "learning_rate": 1.6641690210047907e-05, "loss": 0.6290687918663025, "step": 1468 }, { "epoch": 0.17825506613275088, "grad_norm": 2.1812171936035156, "learning_rate": 1.6639233509396882e-05, "loss": 0.27655136585235596, "step": 1469 }, { "epoch": 0.17837641062977794, "grad_norm": 1.9961246252059937, "learning_rate": 1.6636776808745856e-05, "loss": 0.34929129481315613, "step": 1470 }, { "epoch": 0.178497755126805, "grad_norm": 1.2723180055618286, "learning_rate": 1.663432010809483e-05, "loss": 0.13786189258098602, "step": 1471 }, { "epoch": 0.17861909962383207, "grad_norm": 2.7387282848358154, "learning_rate": 1.6631863407443804e-05, "loss": 0.5443019866943359, "step": 1472 }, { "epoch": 0.17874044412085913, "grad_norm": 2.3264379501342773, "learning_rate": 1.662940670679278e-05, "loss": 0.40921467542648315, "step": 1473 }, { "epoch": 0.17886178861788618, "grad_norm": 3.4638664722442627, "learning_rate": 1.6626950006141753e-05, "loss": 0.8635279536247253, "step": 1474 }, { "epoch": 0.17898313311491323, "grad_norm": 2.1667139530181885, "learning_rate": 1.6624493305490727e-05, "loss": 0.27864134311676025, "step": 1475 }, { "epoch": 0.1791044776119403, "grad_norm": 3.282740592956543, "learning_rate": 1.66220366048397e-05, "loss": 0.3811657428741455, "step": 1476 }, { "epoch": 0.17922582210896737, "grad_norm": 1.52030348777771, "learning_rate": 1.6619579904188676e-05, "loss": 0.12177782505750656, "step": 1477 }, { "epoch": 0.17934716660599442, "grad_norm": 1.402393102645874, "learning_rate": 1.661712320353765e-05, "loss": 0.1254013478755951, "step": 1478 }, { "epoch": 0.17946851110302148, "grad_norm": 2.2220330238342285, "learning_rate": 1.6614666502886624e-05, "loss": 0.3898024559020996, "step": 1479 }, { "epoch": 0.17958985560004853, "grad_norm": 1.6085693836212158, "learning_rate": 1.66122098022356e-05, "loss": 0.2051776647567749, "step": 1480 }, { "epoch": 0.17971120009707559, "grad_norm": 3.699589252471924, "learning_rate": 1.6609753101584573e-05, "loss": 0.5865758657455444, "step": 1481 }, { "epoch": 0.17983254459410267, "grad_norm": 1.851828694343567, "learning_rate": 1.6607296400933547e-05, "loss": 0.14690032601356506, "step": 1482 }, { "epoch": 0.17995388909112972, "grad_norm": 2.143031358718872, "learning_rate": 1.660483970028252e-05, "loss": 0.3911144733428955, "step": 1483 }, { "epoch": 0.18007523358815677, "grad_norm": 2.165050745010376, "learning_rate": 1.6602382999631496e-05, "loss": 0.46151846647262573, "step": 1484 }, { "epoch": 0.18019657808518383, "grad_norm": 2.1484158039093018, "learning_rate": 1.659992629898047e-05, "loss": 0.185227170586586, "step": 1485 }, { "epoch": 0.1803179225822109, "grad_norm": 1.8021937608718872, "learning_rate": 1.6597469598329444e-05, "loss": 0.21386900544166565, "step": 1486 }, { "epoch": 0.18043926707923796, "grad_norm": 1.8592053651809692, "learning_rate": 1.659501289767842e-05, "loss": 0.19364283978939056, "step": 1487 }, { "epoch": 0.18056061157626502, "grad_norm": 2.145983934402466, "learning_rate": 1.6592556197027393e-05, "loss": 0.29494696855545044, "step": 1488 }, { "epoch": 0.18068195607329207, "grad_norm": 2.1097612380981445, "learning_rate": 1.6590099496376367e-05, "loss": 0.29082202911376953, "step": 1489 }, { "epoch": 0.18080330057031913, "grad_norm": 1.9546377658843994, "learning_rate": 1.658764279572534e-05, "loss": 0.21720995008945465, "step": 1490 }, { "epoch": 0.1809246450673462, "grad_norm": 1.3910820484161377, "learning_rate": 1.658518609507432e-05, "loss": 0.23794345557689667, "step": 1491 }, { "epoch": 0.18104598956437326, "grad_norm": 1.2930516004562378, "learning_rate": 1.6582729394423293e-05, "loss": 0.07657837867736816, "step": 1492 }, { "epoch": 0.18116733406140031, "grad_norm": 2.9213075637817383, "learning_rate": 1.6580272693772267e-05, "loss": 0.5059197545051575, "step": 1493 }, { "epoch": 0.18128867855842737, "grad_norm": 1.7775009870529175, "learning_rate": 1.657781599312124e-05, "loss": 0.18545974791049957, "step": 1494 }, { "epoch": 0.18141002305545442, "grad_norm": 2.2526183128356934, "learning_rate": 1.6575359292470216e-05, "loss": 0.5661383867263794, "step": 1495 }, { "epoch": 0.1815313675524815, "grad_norm": 2.2152130603790283, "learning_rate": 1.657290259181919e-05, "loss": 0.34863507747650146, "step": 1496 }, { "epoch": 0.18165271204950856, "grad_norm": 1.5461905002593994, "learning_rate": 1.6570445891168164e-05, "loss": 0.2251451462507248, "step": 1497 }, { "epoch": 0.1817740565465356, "grad_norm": 1.752030849456787, "learning_rate": 1.656798919051714e-05, "loss": 0.21238146722316742, "step": 1498 }, { "epoch": 0.18189540104356267, "grad_norm": 2.57700252532959, "learning_rate": 1.6565532489866113e-05, "loss": 0.41275328397750854, "step": 1499 }, { "epoch": 0.18201674554058975, "grad_norm": 1.9533751010894775, "learning_rate": 1.6563075789215087e-05, "loss": 0.6596766710281372, "step": 1500 }, { "epoch": 0.1821380900376168, "grad_norm": 3.1995153427124023, "learning_rate": 1.656061908856406e-05, "loss": 0.3358881175518036, "step": 1501 }, { "epoch": 0.18225943453464385, "grad_norm": 2.530433177947998, "learning_rate": 1.6558162387913036e-05, "loss": 0.468052476644516, "step": 1502 }, { "epoch": 0.1823807790316709, "grad_norm": 1.9043426513671875, "learning_rate": 1.655570568726201e-05, "loss": 0.46296077966690063, "step": 1503 }, { "epoch": 0.18250212352869796, "grad_norm": 1.7315770387649536, "learning_rate": 1.6553248986610984e-05, "loss": 0.35026147961616516, "step": 1504 }, { "epoch": 0.18262346802572504, "grad_norm": 2.0941426753997803, "learning_rate": 1.655079228595996e-05, "loss": 0.159994974732399, "step": 1505 }, { "epoch": 0.1827448125227521, "grad_norm": 2.1495745182037354, "learning_rate": 1.6548335585308933e-05, "loss": 0.19524291157722473, "step": 1506 }, { "epoch": 0.18286615701977915, "grad_norm": 2.2648754119873047, "learning_rate": 1.6545878884657907e-05, "loss": 0.48489266633987427, "step": 1507 }, { "epoch": 0.1829875015168062, "grad_norm": 1.5805639028549194, "learning_rate": 1.654342218400688e-05, "loss": 0.31412607431411743, "step": 1508 }, { "epoch": 0.18310884601383326, "grad_norm": 2.305727243423462, "learning_rate": 1.6540965483355855e-05, "loss": 0.2490062415599823, "step": 1509 }, { "epoch": 0.18323019051086034, "grad_norm": 1.5892924070358276, "learning_rate": 1.653850878270483e-05, "loss": 0.23507654666900635, "step": 1510 }, { "epoch": 0.1833515350078874, "grad_norm": 1.475388765335083, "learning_rate": 1.6536052082053804e-05, "loss": 0.4014114737510681, "step": 1511 }, { "epoch": 0.18347287950491445, "grad_norm": 2.111905336380005, "learning_rate": 1.6533595381402778e-05, "loss": 0.21480782330036163, "step": 1512 }, { "epoch": 0.1835942240019415, "grad_norm": 2.4657700061798096, "learning_rate": 1.6531138680751752e-05, "loss": 0.3230441212654114, "step": 1513 }, { "epoch": 0.18371556849896858, "grad_norm": 2.013392210006714, "learning_rate": 1.6528681980100727e-05, "loss": 0.12354740500450134, "step": 1514 }, { "epoch": 0.18383691299599564, "grad_norm": 1.8254839181900024, "learning_rate": 1.65262252794497e-05, "loss": 0.22884692251682281, "step": 1515 }, { "epoch": 0.1839582574930227, "grad_norm": 1.2895177602767944, "learning_rate": 1.6523768578798675e-05, "loss": 0.14382505416870117, "step": 1516 }, { "epoch": 0.18407960199004975, "grad_norm": 1.4898990392684937, "learning_rate": 1.652131187814765e-05, "loss": 0.07804793864488602, "step": 1517 }, { "epoch": 0.1842009464870768, "grad_norm": 1.7102720737457275, "learning_rate": 1.6518855177496624e-05, "loss": 0.15108470618724823, "step": 1518 }, { "epoch": 0.18432229098410388, "grad_norm": 1.8784161806106567, "learning_rate": 1.6516398476845598e-05, "loss": 0.5007836818695068, "step": 1519 }, { "epoch": 0.18444363548113094, "grad_norm": 2.171732187271118, "learning_rate": 1.6513941776194572e-05, "loss": 0.27269411087036133, "step": 1520 }, { "epoch": 0.184564979978158, "grad_norm": 2.179102897644043, "learning_rate": 1.6511485075543547e-05, "loss": 0.3284929394721985, "step": 1521 }, { "epoch": 0.18468632447518504, "grad_norm": 1.841059684753418, "learning_rate": 1.650902837489252e-05, "loss": 0.25381767749786377, "step": 1522 }, { "epoch": 0.1848076689722121, "grad_norm": 2.1668596267700195, "learning_rate": 1.6506571674241495e-05, "loss": 0.2602318525314331, "step": 1523 }, { "epoch": 0.18492901346923918, "grad_norm": 2.501694917678833, "learning_rate": 1.650411497359047e-05, "loss": 0.3330945372581482, "step": 1524 }, { "epoch": 0.18505035796626623, "grad_norm": 2.025729179382324, "learning_rate": 1.6501658272939444e-05, "loss": 0.28070488572120667, "step": 1525 }, { "epoch": 0.1851717024632933, "grad_norm": 2.1759018898010254, "learning_rate": 1.6499201572288418e-05, "loss": 0.2297292947769165, "step": 1526 }, { "epoch": 0.18529304696032034, "grad_norm": 2.583195447921753, "learning_rate": 1.6496744871637392e-05, "loss": 0.25529593229293823, "step": 1527 }, { "epoch": 0.18541439145734742, "grad_norm": 1.7422884702682495, "learning_rate": 1.6494288170986366e-05, "loss": 0.17456205189228058, "step": 1528 }, { "epoch": 0.18553573595437448, "grad_norm": 2.843953847885132, "learning_rate": 1.649183147033534e-05, "loss": 0.5891610383987427, "step": 1529 }, { "epoch": 0.18565708045140153, "grad_norm": 1.9662164449691772, "learning_rate": 1.6489374769684315e-05, "loss": 0.23069551587104797, "step": 1530 }, { "epoch": 0.18577842494842858, "grad_norm": 2.4861812591552734, "learning_rate": 1.648691806903329e-05, "loss": 0.8122949600219727, "step": 1531 }, { "epoch": 0.18589976944545564, "grad_norm": 1.1293015480041504, "learning_rate": 1.6484461368382263e-05, "loss": 0.018577666953206062, "step": 1532 }, { "epoch": 0.18602111394248272, "grad_norm": 2.1808528900146484, "learning_rate": 1.6482004667731238e-05, "loss": 0.20170214772224426, "step": 1533 }, { "epoch": 0.18614245843950977, "grad_norm": 1.7788127660751343, "learning_rate": 1.6479547967080212e-05, "loss": 0.1955050826072693, "step": 1534 }, { "epoch": 0.18626380293653683, "grad_norm": 1.782490611076355, "learning_rate": 1.6477091266429186e-05, "loss": 0.11952733993530273, "step": 1535 }, { "epoch": 0.18638514743356388, "grad_norm": 1.8649412393569946, "learning_rate": 1.647463456577816e-05, "loss": 0.09318369626998901, "step": 1536 }, { "epoch": 0.18650649193059093, "grad_norm": 2.223051071166992, "learning_rate": 1.6472177865127135e-05, "loss": 0.41792887449264526, "step": 1537 }, { "epoch": 0.18662783642761802, "grad_norm": 2.1368985176086426, "learning_rate": 1.646972116447611e-05, "loss": 0.2785337567329407, "step": 1538 }, { "epoch": 0.18674918092464507, "grad_norm": 2.03661847114563, "learning_rate": 1.6467264463825083e-05, "loss": 0.23641645908355713, "step": 1539 }, { "epoch": 0.18687052542167212, "grad_norm": 1.1259260177612305, "learning_rate": 1.6464807763174057e-05, "loss": 0.10324345529079437, "step": 1540 }, { "epoch": 0.18699186991869918, "grad_norm": 2.2833993434906006, "learning_rate": 1.646235106252303e-05, "loss": 0.2568722367286682, "step": 1541 }, { "epoch": 0.18711321441572626, "grad_norm": 2.33410906791687, "learning_rate": 1.6459894361872006e-05, "loss": 0.4274582266807556, "step": 1542 }, { "epoch": 0.1872345589127533, "grad_norm": 1.0029762983322144, "learning_rate": 1.645743766122098e-05, "loss": 0.15004250407218933, "step": 1543 }, { "epoch": 0.18735590340978037, "grad_norm": 1.938217043876648, "learning_rate": 1.6454980960569954e-05, "loss": 0.42935827374458313, "step": 1544 }, { "epoch": 0.18747724790680742, "grad_norm": 2.3287079334259033, "learning_rate": 1.645252425991893e-05, "loss": 0.5231063365936279, "step": 1545 }, { "epoch": 0.18759859240383447, "grad_norm": 1.9757171869277954, "learning_rate": 1.6450067559267903e-05, "loss": 0.132722407579422, "step": 1546 }, { "epoch": 0.18771993690086156, "grad_norm": 2.0718235969543457, "learning_rate": 1.6447610858616877e-05, "loss": 0.6672481894493103, "step": 1547 }, { "epoch": 0.1878412813978886, "grad_norm": 1.5003156661987305, "learning_rate": 1.644515415796585e-05, "loss": 0.2142794132232666, "step": 1548 }, { "epoch": 0.18796262589491566, "grad_norm": 0.9966719746589661, "learning_rate": 1.6442697457314826e-05, "loss": 0.11649642139673233, "step": 1549 }, { "epoch": 0.18808397039194272, "grad_norm": 1.5972671508789062, "learning_rate": 1.64402407566638e-05, "loss": 0.239975705742836, "step": 1550 }, { "epoch": 0.18820531488896977, "grad_norm": 2.4622395038604736, "learning_rate": 1.6437784056012774e-05, "loss": 0.4359244704246521, "step": 1551 }, { "epoch": 0.18832665938599685, "grad_norm": 1.6812524795532227, "learning_rate": 1.643532735536175e-05, "loss": 0.19567492604255676, "step": 1552 }, { "epoch": 0.1884480038830239, "grad_norm": 1.736446499824524, "learning_rate": 1.6432870654710723e-05, "loss": 0.37146762013435364, "step": 1553 }, { "epoch": 0.18856934838005096, "grad_norm": 2.5409858226776123, "learning_rate": 1.6430413954059697e-05, "loss": 0.544783353805542, "step": 1554 }, { "epoch": 0.18869069287707801, "grad_norm": 1.9650341272354126, "learning_rate": 1.642795725340867e-05, "loss": 0.805822491645813, "step": 1555 }, { "epoch": 0.1888120373741051, "grad_norm": 1.6074107885360718, "learning_rate": 1.6425500552757646e-05, "loss": 0.5202041864395142, "step": 1556 }, { "epoch": 0.18893338187113215, "grad_norm": 2.805347442626953, "learning_rate": 1.6423043852106623e-05, "loss": 0.37637004256248474, "step": 1557 }, { "epoch": 0.1890547263681592, "grad_norm": 0.6213400959968567, "learning_rate": 1.6420587151455597e-05, "loss": 0.0326407290995121, "step": 1558 }, { "epoch": 0.18917607086518626, "grad_norm": 2.420454978942871, "learning_rate": 1.6418130450804572e-05, "loss": 0.2922397255897522, "step": 1559 }, { "epoch": 0.1892974153622133, "grad_norm": 1.9075942039489746, "learning_rate": 1.6415673750153546e-05, "loss": 0.32393327355384827, "step": 1560 }, { "epoch": 0.1894187598592404, "grad_norm": 0.8147814273834229, "learning_rate": 1.641321704950252e-05, "loss": 0.10852165520191193, "step": 1561 }, { "epoch": 0.18954010435626745, "grad_norm": 0.00906144455075264, "learning_rate": 1.6410760348851494e-05, "loss": 0.00013929870328865945, "step": 1562 }, { "epoch": 0.1896614488532945, "grad_norm": 1.9047940969467163, "learning_rate": 1.640830364820047e-05, "loss": 0.13412030041217804, "step": 1563 }, { "epoch": 0.18978279335032155, "grad_norm": 2.203005313873291, "learning_rate": 1.6405846947549443e-05, "loss": 0.4438808560371399, "step": 1564 }, { "epoch": 0.18990413784734864, "grad_norm": 0.6975694298744202, "learning_rate": 1.6403390246898417e-05, "loss": 0.03993745520710945, "step": 1565 }, { "epoch": 0.1900254823443757, "grad_norm": 2.6445183753967285, "learning_rate": 1.640093354624739e-05, "loss": 0.23849809169769287, "step": 1566 }, { "epoch": 0.19014682684140274, "grad_norm": 2.594602346420288, "learning_rate": 1.6398476845596366e-05, "loss": 0.3863987326622009, "step": 1567 }, { "epoch": 0.1902681713384298, "grad_norm": 10.270295143127441, "learning_rate": 1.639602014494534e-05, "loss": 0.41021063923835754, "step": 1568 }, { "epoch": 0.19038951583545685, "grad_norm": 1.9823209047317505, "learning_rate": 1.6393563444294314e-05, "loss": 0.42316073179244995, "step": 1569 }, { "epoch": 0.19051086033248393, "grad_norm": 2.612694263458252, "learning_rate": 1.639110674364329e-05, "loss": 0.9028637409210205, "step": 1570 }, { "epoch": 0.190632204829511, "grad_norm": 2.160759687423706, "learning_rate": 1.6388650042992263e-05, "loss": 0.38729843497276306, "step": 1571 }, { "epoch": 0.19075354932653804, "grad_norm": 1.6438068151474, "learning_rate": 1.6386193342341237e-05, "loss": 0.13886958360671997, "step": 1572 }, { "epoch": 0.1908748938235651, "grad_norm": 2.996663808822632, "learning_rate": 1.638373664169021e-05, "loss": 0.33843570947647095, "step": 1573 }, { "epoch": 0.19099623832059215, "grad_norm": 3.0241034030914307, "learning_rate": 1.6381279941039186e-05, "loss": 0.2785295248031616, "step": 1574 }, { "epoch": 0.19111758281761923, "grad_norm": 2.1101088523864746, "learning_rate": 1.637882324038816e-05, "loss": 0.6817824244499207, "step": 1575 }, { "epoch": 0.19123892731464628, "grad_norm": 1.186375617980957, "learning_rate": 1.6376366539737134e-05, "loss": 0.05503537505865097, "step": 1576 }, { "epoch": 0.19136027181167334, "grad_norm": 1.5986549854278564, "learning_rate": 1.637390983908611e-05, "loss": 0.15630364418029785, "step": 1577 }, { "epoch": 0.1914816163087004, "grad_norm": 2.139268398284912, "learning_rate": 1.6371453138435083e-05, "loss": 0.4205041527748108, "step": 1578 }, { "epoch": 0.19160296080572747, "grad_norm": 1.2108969688415527, "learning_rate": 1.6368996437784057e-05, "loss": 0.10240291804075241, "step": 1579 }, { "epoch": 0.19172430530275453, "grad_norm": 2.612095594406128, "learning_rate": 1.636653973713303e-05, "loss": 0.5215551853179932, "step": 1580 }, { "epoch": 0.19184564979978158, "grad_norm": 1.5930920839309692, "learning_rate": 1.6364083036482005e-05, "loss": 0.25414586067199707, "step": 1581 }, { "epoch": 0.19196699429680864, "grad_norm": 1.0892622470855713, "learning_rate": 1.636162633583098e-05, "loss": 0.20532195270061493, "step": 1582 }, { "epoch": 0.1920883387938357, "grad_norm": 2.093474864959717, "learning_rate": 1.6359169635179954e-05, "loss": 0.7632452249526978, "step": 1583 }, { "epoch": 0.19220968329086277, "grad_norm": 1.594140648841858, "learning_rate": 1.6356712934528928e-05, "loss": 0.13568240404129028, "step": 1584 }, { "epoch": 0.19233102778788982, "grad_norm": 2.5025835037231445, "learning_rate": 1.6354256233877902e-05, "loss": 0.23390145599842072, "step": 1585 }, { "epoch": 0.19245237228491688, "grad_norm": 0.01225997507572174, "learning_rate": 1.6351799533226877e-05, "loss": 0.00020544853759929538, "step": 1586 }, { "epoch": 0.19257371678194393, "grad_norm": 1.9831618070602417, "learning_rate": 1.634934283257585e-05, "loss": 0.1494239866733551, "step": 1587 }, { "epoch": 0.19269506127897099, "grad_norm": 1.9081530570983887, "learning_rate": 1.6346886131924825e-05, "loss": 0.17009824514389038, "step": 1588 }, { "epoch": 0.19281640577599807, "grad_norm": 2.5004465579986572, "learning_rate": 1.63444294312738e-05, "loss": 0.4157010018825531, "step": 1589 }, { "epoch": 0.19293775027302512, "grad_norm": 2.543424367904663, "learning_rate": 1.6341972730622774e-05, "loss": 0.32920312881469727, "step": 1590 }, { "epoch": 0.19305909477005218, "grad_norm": 2.6315767765045166, "learning_rate": 1.6339516029971748e-05, "loss": 0.888789713382721, "step": 1591 }, { "epoch": 0.19318043926707923, "grad_norm": 2.5114357471466064, "learning_rate": 1.6337059329320722e-05, "loss": 0.3034912645816803, "step": 1592 }, { "epoch": 0.1933017837641063, "grad_norm": 1.3469399213790894, "learning_rate": 1.6334602628669697e-05, "loss": 0.2188844382762909, "step": 1593 }, { "epoch": 0.19342312826113336, "grad_norm": 2.7407827377319336, "learning_rate": 1.633214592801867e-05, "loss": 0.27693620324134827, "step": 1594 }, { "epoch": 0.19354447275816042, "grad_norm": 2.3420448303222656, "learning_rate": 1.6329689227367645e-05, "loss": 0.7372701168060303, "step": 1595 }, { "epoch": 0.19366581725518747, "grad_norm": 2.446305990219116, "learning_rate": 1.632723252671662e-05, "loss": 0.34233394265174866, "step": 1596 }, { "epoch": 0.19378716175221453, "grad_norm": 1.8076667785644531, "learning_rate": 1.6324775826065597e-05, "loss": 0.14907054603099823, "step": 1597 }, { "epoch": 0.1939085062492416, "grad_norm": 2.0409836769104004, "learning_rate": 1.632231912541457e-05, "loss": 0.27943429350852966, "step": 1598 }, { "epoch": 0.19402985074626866, "grad_norm": 2.7173757553100586, "learning_rate": 1.6319862424763545e-05, "loss": 0.46326854825019836, "step": 1599 }, { "epoch": 0.19415119524329572, "grad_norm": 2.4713032245635986, "learning_rate": 1.631740572411252e-05, "loss": 0.14638936519622803, "step": 1600 }, { "epoch": 0.19427253974032277, "grad_norm": 0.9708374738693237, "learning_rate": 1.6314949023461494e-05, "loss": 0.05027943104505539, "step": 1601 }, { "epoch": 0.19439388423734982, "grad_norm": 3.3602957725524902, "learning_rate": 1.6312492322810468e-05, "loss": 0.3926747441291809, "step": 1602 }, { "epoch": 0.1945152287343769, "grad_norm": 2.024567127227783, "learning_rate": 1.6310035622159442e-05, "loss": 0.2768881916999817, "step": 1603 }, { "epoch": 0.19463657323140396, "grad_norm": 2.508981227874756, "learning_rate": 1.6307578921508417e-05, "loss": 0.299752414226532, "step": 1604 }, { "epoch": 0.194757917728431, "grad_norm": 2.3269424438476562, "learning_rate": 1.630512222085739e-05, "loss": 0.44652560353279114, "step": 1605 }, { "epoch": 0.19487926222545807, "grad_norm": 2.056466579437256, "learning_rate": 1.6302665520206365e-05, "loss": 0.6093921065330505, "step": 1606 }, { "epoch": 0.19500060672248515, "grad_norm": 3.3587870597839355, "learning_rate": 1.630020881955534e-05, "loss": 0.3951265215873718, "step": 1607 }, { "epoch": 0.1951219512195122, "grad_norm": 2.3945207595825195, "learning_rate": 1.6297752118904314e-05, "loss": 0.19893671572208405, "step": 1608 }, { "epoch": 0.19524329571653926, "grad_norm": 1.694069743156433, "learning_rate": 1.6295295418253288e-05, "loss": 0.10053470730781555, "step": 1609 }, { "epoch": 0.1953646402135663, "grad_norm": 3.7009809017181396, "learning_rate": 1.6292838717602262e-05, "loss": 0.7382639050483704, "step": 1610 }, { "epoch": 0.19548598471059336, "grad_norm": 2.0691487789154053, "learning_rate": 1.6290382016951237e-05, "loss": 0.5720087289810181, "step": 1611 }, { "epoch": 0.19560732920762045, "grad_norm": 1.4953621625900269, "learning_rate": 1.628792531630021e-05, "loss": 0.22495737671852112, "step": 1612 }, { "epoch": 0.1957286737046475, "grad_norm": 2.3584330081939697, "learning_rate": 1.6285468615649185e-05, "loss": 0.4193107783794403, "step": 1613 }, { "epoch": 0.19585001820167455, "grad_norm": 2.2631139755249023, "learning_rate": 1.628301191499816e-05, "loss": 0.21283216774463654, "step": 1614 }, { "epoch": 0.1959713626987016, "grad_norm": 1.281013011932373, "learning_rate": 1.6280555214347134e-05, "loss": 0.11674115061759949, "step": 1615 }, { "epoch": 0.19609270719572866, "grad_norm": 1.323325753211975, "learning_rate": 1.6278098513696108e-05, "loss": 0.12784036993980408, "step": 1616 }, { "epoch": 0.19621405169275574, "grad_norm": 2.992032289505005, "learning_rate": 1.6275641813045082e-05, "loss": 0.6973645687103271, "step": 1617 }, { "epoch": 0.1963353961897828, "grad_norm": 1.3869400024414062, "learning_rate": 1.6273185112394056e-05, "loss": 0.21278777718544006, "step": 1618 }, { "epoch": 0.19645674068680985, "grad_norm": 2.2656288146972656, "learning_rate": 1.627072841174303e-05, "loss": 0.3129113018512726, "step": 1619 }, { "epoch": 0.1965780851838369, "grad_norm": 1.4828702211380005, "learning_rate": 1.6268271711092005e-05, "loss": 0.13820579648017883, "step": 1620 }, { "epoch": 0.19669942968086399, "grad_norm": 2.30009388923645, "learning_rate": 1.626581501044098e-05, "loss": 0.22714544832706451, "step": 1621 }, { "epoch": 0.19682077417789104, "grad_norm": 1.719403624534607, "learning_rate": 1.6263358309789953e-05, "loss": 0.4299517869949341, "step": 1622 }, { "epoch": 0.1969421186749181, "grad_norm": 1.3880891799926758, "learning_rate": 1.6260901609138928e-05, "loss": 0.12024924159049988, "step": 1623 }, { "epoch": 0.19706346317194515, "grad_norm": 0.38428664207458496, "learning_rate": 1.6258444908487902e-05, "loss": 0.014788172207772732, "step": 1624 }, { "epoch": 0.1971848076689722, "grad_norm": 3.133810520172119, "learning_rate": 1.6255988207836876e-05, "loss": 0.4186369776725769, "step": 1625 }, { "epoch": 0.19730615216599928, "grad_norm": 2.946176528930664, "learning_rate": 1.625353150718585e-05, "loss": 0.4143875539302826, "step": 1626 }, { "epoch": 0.19742749666302634, "grad_norm": 1.9746789932250977, "learning_rate": 1.6251074806534825e-05, "loss": 0.33198633790016174, "step": 1627 }, { "epoch": 0.1975488411600534, "grad_norm": 2.5999584197998047, "learning_rate": 1.62486181058838e-05, "loss": 0.19740217924118042, "step": 1628 }, { "epoch": 0.19767018565708044, "grad_norm": 1.9985610246658325, "learning_rate": 1.6246161405232773e-05, "loss": 0.31167468428611755, "step": 1629 }, { "epoch": 0.1977915301541075, "grad_norm": 4.352280616760254, "learning_rate": 1.6243704704581747e-05, "loss": 0.5498289465904236, "step": 1630 }, { "epoch": 0.19791287465113458, "grad_norm": 2.2715535163879395, "learning_rate": 1.6241248003930722e-05, "loss": 0.4732452929019928, "step": 1631 }, { "epoch": 0.19803421914816163, "grad_norm": 1.4650019407272339, "learning_rate": 1.6238791303279696e-05, "loss": 0.06958284229040146, "step": 1632 }, { "epoch": 0.1981555636451887, "grad_norm": 2.1653013229370117, "learning_rate": 1.623633460262867e-05, "loss": 0.20110665261745453, "step": 1633 }, { "epoch": 0.19827690814221574, "grad_norm": 2.947077989578247, "learning_rate": 1.6233877901977644e-05, "loss": 0.44844576716423035, "step": 1634 }, { "epoch": 0.19839825263924282, "grad_norm": 2.272237539291382, "learning_rate": 1.623142120132662e-05, "loss": 0.36181971430778503, "step": 1635 }, { "epoch": 0.19851959713626988, "grad_norm": 1.9349480867385864, "learning_rate": 1.6228964500675596e-05, "loss": 0.07440108060836792, "step": 1636 }, { "epoch": 0.19864094163329693, "grad_norm": 1.5741839408874512, "learning_rate": 1.622650780002457e-05, "loss": 0.3192439377307892, "step": 1637 }, { "epoch": 0.19876228613032398, "grad_norm": 2.3907079696655273, "learning_rate": 1.6224051099373545e-05, "loss": 0.2281782180070877, "step": 1638 }, { "epoch": 0.19888363062735104, "grad_norm": 3.0020861625671387, "learning_rate": 1.622159439872252e-05, "loss": 0.3084147274494171, "step": 1639 }, { "epoch": 0.19900497512437812, "grad_norm": 1.3268686532974243, "learning_rate": 1.6219137698071493e-05, "loss": 0.1354789137840271, "step": 1640 }, { "epoch": 0.19912631962140517, "grad_norm": 2.8789944648742676, "learning_rate": 1.6216680997420468e-05, "loss": 0.3687528967857361, "step": 1641 }, { "epoch": 0.19924766411843223, "grad_norm": 2.311739921569824, "learning_rate": 1.6214224296769442e-05, "loss": 1.1628113985061646, "step": 1642 }, { "epoch": 0.19936900861545928, "grad_norm": 1.3445240259170532, "learning_rate": 1.6211767596118416e-05, "loss": 0.04311354085803032, "step": 1643 }, { "epoch": 0.19949035311248633, "grad_norm": 1.4748131036758423, "learning_rate": 1.620931089546739e-05, "loss": 0.36780551075935364, "step": 1644 }, { "epoch": 0.19961169760951342, "grad_norm": 1.5068604946136475, "learning_rate": 1.6206854194816365e-05, "loss": 0.3677091896533966, "step": 1645 }, { "epoch": 0.19973304210654047, "grad_norm": 2.6158766746520996, "learning_rate": 1.620439749416534e-05, "loss": 0.4385922849178314, "step": 1646 }, { "epoch": 0.19985438660356752, "grad_norm": 1.6956822872161865, "learning_rate": 1.6201940793514313e-05, "loss": 0.11273642629384995, "step": 1647 }, { "epoch": 0.19997573110059458, "grad_norm": 1.862756609916687, "learning_rate": 1.6199484092863287e-05, "loss": 0.25619611144065857, "step": 1648 }, { "epoch": 0.20009707559762166, "grad_norm": 1.0184905529022217, "learning_rate": 1.6197027392212262e-05, "loss": 0.07765750586986542, "step": 1649 }, { "epoch": 0.2002184200946487, "grad_norm": 1.6538012027740479, "learning_rate": 1.6194570691561236e-05, "loss": 0.06520011276006699, "step": 1650 }, { "epoch": 0.20033976459167577, "grad_norm": 2.2978315353393555, "learning_rate": 1.619211399091021e-05, "loss": 0.313754141330719, "step": 1651 }, { "epoch": 0.20046110908870282, "grad_norm": 1.6874979734420776, "learning_rate": 1.6189657290259185e-05, "loss": 0.19735673069953918, "step": 1652 }, { "epoch": 0.20058245358572988, "grad_norm": 2.3955869674682617, "learning_rate": 1.618720058960816e-05, "loss": 0.6544023752212524, "step": 1653 }, { "epoch": 0.20070379808275696, "grad_norm": 2.770576238632202, "learning_rate": 1.6184743888957133e-05, "loss": 0.30553755164146423, "step": 1654 }, { "epoch": 0.200825142579784, "grad_norm": 2.0315816402435303, "learning_rate": 1.6182287188306107e-05, "loss": 0.35758891701698303, "step": 1655 }, { "epoch": 0.20094648707681106, "grad_norm": 1.5492041110992432, "learning_rate": 1.617983048765508e-05, "loss": 0.37275615334510803, "step": 1656 }, { "epoch": 0.20106783157383812, "grad_norm": 1.8539915084838867, "learning_rate": 1.6177373787004056e-05, "loss": 0.4826367199420929, "step": 1657 }, { "epoch": 0.2011891760708652, "grad_norm": 1.8999037742614746, "learning_rate": 1.617491708635303e-05, "loss": 0.4474004805088043, "step": 1658 }, { "epoch": 0.20131052056789225, "grad_norm": 1.7485865354537964, "learning_rate": 1.6172460385702004e-05, "loss": 0.6810899376869202, "step": 1659 }, { "epoch": 0.2014318650649193, "grad_norm": 2.3828468322753906, "learning_rate": 1.617000368505098e-05, "loss": 0.5326903462409973, "step": 1660 }, { "epoch": 0.20155320956194636, "grad_norm": 2.6605119705200195, "learning_rate": 1.6167546984399953e-05, "loss": 0.1593848317861557, "step": 1661 }, { "epoch": 0.20167455405897342, "grad_norm": 2.227161169052124, "learning_rate": 1.6165090283748927e-05, "loss": 0.38251793384552, "step": 1662 }, { "epoch": 0.2017958985560005, "grad_norm": 1.8754427433013916, "learning_rate": 1.61626335830979e-05, "loss": 0.19771966338157654, "step": 1663 }, { "epoch": 0.20191724305302755, "grad_norm": 2.2835958003997803, "learning_rate": 1.6160176882446876e-05, "loss": 0.17081639170646667, "step": 1664 }, { "epoch": 0.2020385875500546, "grad_norm": 1.891542911529541, "learning_rate": 1.615772018179585e-05, "loss": 0.725801944732666, "step": 1665 }, { "epoch": 0.20215993204708166, "grad_norm": 2.1305201053619385, "learning_rate": 1.6155263481144824e-05, "loss": 0.2174772024154663, "step": 1666 }, { "epoch": 0.2022812765441087, "grad_norm": 2.1975913047790527, "learning_rate": 1.61528067804938e-05, "loss": 0.36247697472572327, "step": 1667 }, { "epoch": 0.2024026210411358, "grad_norm": 2.2730934619903564, "learning_rate": 1.6150350079842773e-05, "loss": 0.35575994849205017, "step": 1668 }, { "epoch": 0.20252396553816285, "grad_norm": 2.4796621799468994, "learning_rate": 1.6147893379191747e-05, "loss": 0.6208682060241699, "step": 1669 }, { "epoch": 0.2026453100351899, "grad_norm": 2.4022440910339355, "learning_rate": 1.614543667854072e-05, "loss": 0.5125530362129211, "step": 1670 }, { "epoch": 0.20276665453221696, "grad_norm": 2.6297521591186523, "learning_rate": 1.6142979977889695e-05, "loss": 0.5200785398483276, "step": 1671 }, { "epoch": 0.20288799902924404, "grad_norm": 2.7327070236206055, "learning_rate": 1.614052327723867e-05, "loss": 0.757875919342041, "step": 1672 }, { "epoch": 0.2030093435262711, "grad_norm": 1.6281644105911255, "learning_rate": 1.6138066576587644e-05, "loss": 0.17772671580314636, "step": 1673 }, { "epoch": 0.20313068802329814, "grad_norm": 1.9709150791168213, "learning_rate": 1.6135609875936618e-05, "loss": 0.21530991792678833, "step": 1674 }, { "epoch": 0.2032520325203252, "grad_norm": 2.064605951309204, "learning_rate": 1.6133153175285592e-05, "loss": 0.24332857131958008, "step": 1675 }, { "epoch": 0.20337337701735225, "grad_norm": 2.1962037086486816, "learning_rate": 1.6130696474634567e-05, "loss": 0.34543243050575256, "step": 1676 }, { "epoch": 0.20349472151437933, "grad_norm": 2.6600778102874756, "learning_rate": 1.612823977398354e-05, "loss": 0.47594380378723145, "step": 1677 }, { "epoch": 0.2036160660114064, "grad_norm": 1.7026101350784302, "learning_rate": 1.6125783073332515e-05, "loss": 0.19039933383464813, "step": 1678 }, { "epoch": 0.20373741050843344, "grad_norm": 1.6388001441955566, "learning_rate": 1.612332637268149e-05, "loss": 0.1765032410621643, "step": 1679 }, { "epoch": 0.2038587550054605, "grad_norm": 1.057235598564148, "learning_rate": 1.6120869672030464e-05, "loss": 0.09241899847984314, "step": 1680 }, { "epoch": 0.20398009950248755, "grad_norm": 1.5978493690490723, "learning_rate": 1.6118412971379438e-05, "loss": 0.22115862369537354, "step": 1681 }, { "epoch": 0.20410144399951463, "grad_norm": 2.1803946495056152, "learning_rate": 1.6115956270728412e-05, "loss": 0.5432751178741455, "step": 1682 }, { "epoch": 0.20422278849654169, "grad_norm": 1.2694318294525146, "learning_rate": 1.6113499570077387e-05, "loss": 0.07295191287994385, "step": 1683 }, { "epoch": 0.20434413299356874, "grad_norm": 2.1707067489624023, "learning_rate": 1.611104286942636e-05, "loss": 0.3300400376319885, "step": 1684 }, { "epoch": 0.2044654774905958, "grad_norm": 1.5089091062545776, "learning_rate": 1.6108586168775335e-05, "loss": 0.20541003346443176, "step": 1685 }, { "epoch": 0.20458682198762287, "grad_norm": 2.2536306381225586, "learning_rate": 1.610612946812431e-05, "loss": 0.35043635964393616, "step": 1686 }, { "epoch": 0.20470816648464993, "grad_norm": 1.3641126155853271, "learning_rate": 1.6103672767473284e-05, "loss": 0.5306731462478638, "step": 1687 }, { "epoch": 0.20482951098167698, "grad_norm": 1.6115317344665527, "learning_rate": 1.6101216066822258e-05, "loss": 0.12020076811313629, "step": 1688 }, { "epoch": 0.20495085547870404, "grad_norm": 2.0570430755615234, "learning_rate": 1.6098759366171232e-05, "loss": 0.45422253012657166, "step": 1689 }, { "epoch": 0.2050721999757311, "grad_norm": 0.8935880661010742, "learning_rate": 1.6096302665520206e-05, "loss": 0.2039462774991989, "step": 1690 }, { "epoch": 0.20519354447275817, "grad_norm": 1.405432105064392, "learning_rate": 1.609384596486918e-05, "loss": 0.21527734398841858, "step": 1691 }, { "epoch": 0.20531488896978523, "grad_norm": 1.7508655786514282, "learning_rate": 1.6091389264218155e-05, "loss": 0.15633103251457214, "step": 1692 }, { "epoch": 0.20543623346681228, "grad_norm": 3.1882712841033936, "learning_rate": 1.608893256356713e-05, "loss": 0.12639623880386353, "step": 1693 }, { "epoch": 0.20555757796383933, "grad_norm": 2.8699142932891846, "learning_rate": 1.6086475862916103e-05, "loss": 0.6497660875320435, "step": 1694 }, { "epoch": 0.2056789224608664, "grad_norm": 2.3647992610931396, "learning_rate": 1.6084019162265078e-05, "loss": 0.49607181549072266, "step": 1695 }, { "epoch": 0.20580026695789347, "grad_norm": 2.0349488258361816, "learning_rate": 1.6081562461614052e-05, "loss": 0.4656597375869751, "step": 1696 }, { "epoch": 0.20592161145492052, "grad_norm": 2.6206047534942627, "learning_rate": 1.6079105760963026e-05, "loss": 0.7463130354881287, "step": 1697 }, { "epoch": 0.20604295595194758, "grad_norm": 2.1627695560455322, "learning_rate": 1.6076649060312e-05, "loss": 0.4141017198562622, "step": 1698 }, { "epoch": 0.20616430044897463, "grad_norm": 1.7568947076797485, "learning_rate": 1.6074192359660975e-05, "loss": 0.27343836426734924, "step": 1699 }, { "epoch": 0.2062856449460017, "grad_norm": 2.324117660522461, "learning_rate": 1.607173565900995e-05, "loss": 0.48490825295448303, "step": 1700 }, { "epoch": 0.20640698944302877, "grad_norm": 2.1941676139831543, "learning_rate": 1.6069278958358923e-05, "loss": 0.5859739184379578, "step": 1701 }, { "epoch": 0.20652833394005582, "grad_norm": 1.4805810451507568, "learning_rate": 1.60668222577079e-05, "loss": 0.09182153642177582, "step": 1702 }, { "epoch": 0.20664967843708287, "grad_norm": 2.0386710166931152, "learning_rate": 1.6064365557056875e-05, "loss": 0.45521387457847595, "step": 1703 }, { "epoch": 0.20677102293410993, "grad_norm": 1.5653104782104492, "learning_rate": 1.606190885640585e-05, "loss": 0.13128796219825745, "step": 1704 }, { "epoch": 0.206892367431137, "grad_norm": 0.8828320503234863, "learning_rate": 1.6059452155754824e-05, "loss": 0.3042456805706024, "step": 1705 }, { "epoch": 0.20701371192816406, "grad_norm": 1.8564342260360718, "learning_rate": 1.6056995455103798e-05, "loss": 0.679114580154419, "step": 1706 }, { "epoch": 0.20713505642519112, "grad_norm": 2.100465774536133, "learning_rate": 1.6054538754452772e-05, "loss": 0.4968925714492798, "step": 1707 }, { "epoch": 0.20725640092221817, "grad_norm": 1.758471131324768, "learning_rate": 1.6052082053801746e-05, "loss": 0.3478274643421173, "step": 1708 }, { "epoch": 0.20737774541924522, "grad_norm": 2.1922128200531006, "learning_rate": 1.604962535315072e-05, "loss": 0.23997293412685394, "step": 1709 }, { "epoch": 0.2074990899162723, "grad_norm": 2.635798692703247, "learning_rate": 1.6047168652499695e-05, "loss": 0.4193202257156372, "step": 1710 }, { "epoch": 0.20762043441329936, "grad_norm": 1.5727275609970093, "learning_rate": 1.604471195184867e-05, "loss": 0.286593496799469, "step": 1711 }, { "epoch": 0.2077417789103264, "grad_norm": 2.323625326156616, "learning_rate": 1.6042255251197643e-05, "loss": 0.7401584982872009, "step": 1712 }, { "epoch": 0.20786312340735347, "grad_norm": 1.8029251098632812, "learning_rate": 1.6039798550546618e-05, "loss": 0.07832077145576477, "step": 1713 }, { "epoch": 0.20798446790438055, "grad_norm": 2.1487743854522705, "learning_rate": 1.6037341849895592e-05, "loss": 0.3596492111682892, "step": 1714 }, { "epoch": 0.2081058124014076, "grad_norm": 2.0673341751098633, "learning_rate": 1.6034885149244566e-05, "loss": 0.21680273115634918, "step": 1715 }, { "epoch": 0.20822715689843466, "grad_norm": 1.9851670265197754, "learning_rate": 1.603242844859354e-05, "loss": 0.32054102420806885, "step": 1716 }, { "epoch": 0.2083485013954617, "grad_norm": 1.3611160516738892, "learning_rate": 1.6029971747942515e-05, "loss": 0.16891829669475555, "step": 1717 }, { "epoch": 0.20846984589248876, "grad_norm": 2.0195910930633545, "learning_rate": 1.602751504729149e-05, "loss": 0.1865098774433136, "step": 1718 }, { "epoch": 0.20859119038951585, "grad_norm": 1.8068580627441406, "learning_rate": 1.6025058346640463e-05, "loss": 0.1894182711839676, "step": 1719 }, { "epoch": 0.2087125348865429, "grad_norm": 1.6768163442611694, "learning_rate": 1.6022601645989437e-05, "loss": 0.11060914397239685, "step": 1720 }, { "epoch": 0.20883387938356995, "grad_norm": 1.8469024896621704, "learning_rate": 1.6020144945338412e-05, "loss": 0.34541481733322144, "step": 1721 }, { "epoch": 0.208955223880597, "grad_norm": 1.9112247228622437, "learning_rate": 1.6017688244687386e-05, "loss": 0.7080034017562866, "step": 1722 }, { "epoch": 0.20907656837762406, "grad_norm": 0.9716092944145203, "learning_rate": 1.601523154403636e-05, "loss": 0.15459425747394562, "step": 1723 }, { "epoch": 0.20919791287465114, "grad_norm": 1.692309856414795, "learning_rate": 1.6012774843385334e-05, "loss": 0.17918935418128967, "step": 1724 }, { "epoch": 0.2093192573716782, "grad_norm": 2.3839099407196045, "learning_rate": 1.601031814273431e-05, "loss": 0.6370502710342407, "step": 1725 }, { "epoch": 0.20944060186870525, "grad_norm": 2.299150228500366, "learning_rate": 1.6007861442083283e-05, "loss": 0.4926031231880188, "step": 1726 }, { "epoch": 0.2095619463657323, "grad_norm": 1.5755623579025269, "learning_rate": 1.6005404741432257e-05, "loss": 0.3240659236907959, "step": 1727 }, { "epoch": 0.20968329086275939, "grad_norm": 1.6985849142074585, "learning_rate": 1.600294804078123e-05, "loss": 0.17380595207214355, "step": 1728 }, { "epoch": 0.20980463535978644, "grad_norm": 1.0348140001296997, "learning_rate": 1.6000491340130206e-05, "loss": 0.11599317193031311, "step": 1729 }, { "epoch": 0.2099259798568135, "grad_norm": 2.2621145248413086, "learning_rate": 1.599803463947918e-05, "loss": 0.32561737298965454, "step": 1730 }, { "epoch": 0.21004732435384055, "grad_norm": 2.3321175575256348, "learning_rate": 1.5995577938828154e-05, "loss": 0.16530698537826538, "step": 1731 }, { "epoch": 0.2101686688508676, "grad_norm": 1.777334213256836, "learning_rate": 1.599312123817713e-05, "loss": 0.5165007710456848, "step": 1732 }, { "epoch": 0.21029001334789468, "grad_norm": 2.2404754161834717, "learning_rate": 1.5990664537526103e-05, "loss": 0.3408328592777252, "step": 1733 }, { "epoch": 0.21041135784492174, "grad_norm": 3.0858652591705322, "learning_rate": 1.5988207836875077e-05, "loss": 0.6301594972610474, "step": 1734 }, { "epoch": 0.2105327023419488, "grad_norm": 1.9224309921264648, "learning_rate": 1.598575113622405e-05, "loss": 0.14940844476222992, "step": 1735 }, { "epoch": 0.21065404683897584, "grad_norm": 2.7337467670440674, "learning_rate": 1.5983294435573026e-05, "loss": 0.4004538059234619, "step": 1736 }, { "epoch": 0.2107753913360029, "grad_norm": 1.8744240999221802, "learning_rate": 1.5980837734922e-05, "loss": 0.19295921921730042, "step": 1737 }, { "epoch": 0.21089673583302998, "grad_norm": 3.7155728340148926, "learning_rate": 1.5978381034270974e-05, "loss": 0.5623353719711304, "step": 1738 }, { "epoch": 0.21101808033005703, "grad_norm": 2.182851791381836, "learning_rate": 1.597592433361995e-05, "loss": 0.4261482357978821, "step": 1739 }, { "epoch": 0.2111394248270841, "grad_norm": 1.2220971584320068, "learning_rate": 1.5973467632968923e-05, "loss": 0.05080566555261612, "step": 1740 }, { "epoch": 0.21126076932411114, "grad_norm": 2.0607261657714844, "learning_rate": 1.5971010932317897e-05, "loss": 0.5656209588050842, "step": 1741 }, { "epoch": 0.21138211382113822, "grad_norm": 2.1184959411621094, "learning_rate": 1.5968554231666875e-05, "loss": 0.19552554190158844, "step": 1742 }, { "epoch": 0.21150345831816528, "grad_norm": 1.757657766342163, "learning_rate": 1.596609753101585e-05, "loss": 0.3370252251625061, "step": 1743 }, { "epoch": 0.21162480281519233, "grad_norm": 2.8892951011657715, "learning_rate": 1.5963640830364823e-05, "loss": 0.12219370156526566, "step": 1744 }, { "epoch": 0.21174614731221938, "grad_norm": 1.5726659297943115, "learning_rate": 1.5961184129713797e-05, "loss": 0.29163357615470886, "step": 1745 }, { "epoch": 0.21186749180924644, "grad_norm": 2.813798666000366, "learning_rate": 1.595872742906277e-05, "loss": 0.29331523180007935, "step": 1746 }, { "epoch": 0.21198883630627352, "grad_norm": 2.365394353866577, "learning_rate": 1.5956270728411746e-05, "loss": 0.4060492515563965, "step": 1747 }, { "epoch": 0.21211018080330057, "grad_norm": 3.7761080265045166, "learning_rate": 1.595381402776072e-05, "loss": 0.39178287982940674, "step": 1748 }, { "epoch": 0.21223152530032763, "grad_norm": 1.9571113586425781, "learning_rate": 1.5951357327109694e-05, "loss": 0.10142619907855988, "step": 1749 }, { "epoch": 0.21235286979735468, "grad_norm": 2.1784629821777344, "learning_rate": 1.594890062645867e-05, "loss": 0.6431654691696167, "step": 1750 }, { "epoch": 0.21247421429438176, "grad_norm": 0.5362848043441772, "learning_rate": 1.5946443925807643e-05, "loss": 0.017946042120456696, "step": 1751 }, { "epoch": 0.21259555879140882, "grad_norm": 1.8809864521026611, "learning_rate": 1.5943987225156617e-05, "loss": 0.1440238058567047, "step": 1752 }, { "epoch": 0.21271690328843587, "grad_norm": 2.783003807067871, "learning_rate": 1.594153052450559e-05, "loss": 0.41364654898643494, "step": 1753 }, { "epoch": 0.21283824778546293, "grad_norm": 1.64762282371521, "learning_rate": 1.5939073823854566e-05, "loss": 0.7017732858657837, "step": 1754 }, { "epoch": 0.21295959228248998, "grad_norm": 2.1315536499023438, "learning_rate": 1.593661712320354e-05, "loss": 0.453188419342041, "step": 1755 }, { "epoch": 0.21308093677951706, "grad_norm": 1.585434079170227, "learning_rate": 1.5934160422552514e-05, "loss": 0.08176189661026001, "step": 1756 }, { "epoch": 0.21320228127654411, "grad_norm": 2.495664358139038, "learning_rate": 1.593170372190149e-05, "loss": 0.3820353150367737, "step": 1757 }, { "epoch": 0.21332362577357117, "grad_norm": 1.1123815774917603, "learning_rate": 1.5929247021250463e-05, "loss": 0.033398568630218506, "step": 1758 }, { "epoch": 0.21344497027059822, "grad_norm": 2.053670883178711, "learning_rate": 1.5926790320599437e-05, "loss": 0.2600085437297821, "step": 1759 }, { "epoch": 0.21356631476762528, "grad_norm": 5.9268903732299805, "learning_rate": 1.592433361994841e-05, "loss": 0.4954833984375, "step": 1760 }, { "epoch": 0.21368765926465236, "grad_norm": 1.9209569692611694, "learning_rate": 1.5921876919297385e-05, "loss": 0.5450060963630676, "step": 1761 }, { "epoch": 0.2138090037616794, "grad_norm": 1.479677438735962, "learning_rate": 1.591942021864636e-05, "loss": 0.24173252284526825, "step": 1762 }, { "epoch": 0.21393034825870647, "grad_norm": 3.3846049308776855, "learning_rate": 1.5916963517995334e-05, "loss": 0.47177764773368835, "step": 1763 }, { "epoch": 0.21405169275573352, "grad_norm": 1.9015347957611084, "learning_rate": 1.5914506817344308e-05, "loss": 0.3082675039768219, "step": 1764 }, { "epoch": 0.2141730372527606, "grad_norm": 2.5283203125, "learning_rate": 1.5912050116693282e-05, "loss": 0.16115519404411316, "step": 1765 }, { "epoch": 0.21429438174978765, "grad_norm": 2.1408560276031494, "learning_rate": 1.5909593416042257e-05, "loss": 0.11530830711126328, "step": 1766 }, { "epoch": 0.2144157262468147, "grad_norm": 1.0230469703674316, "learning_rate": 1.590713671539123e-05, "loss": 0.03306128829717636, "step": 1767 }, { "epoch": 0.21453707074384176, "grad_norm": 2.4217331409454346, "learning_rate": 1.5904680014740205e-05, "loss": 0.3269408047199249, "step": 1768 }, { "epoch": 0.21465841524086882, "grad_norm": 0.4477101266384125, "learning_rate": 1.590222331408918e-05, "loss": 0.015322737395763397, "step": 1769 }, { "epoch": 0.2147797597378959, "grad_norm": 2.700812578201294, "learning_rate": 1.5899766613438154e-05, "loss": 0.38714274764060974, "step": 1770 }, { "epoch": 0.21490110423492295, "grad_norm": 0.9410980343818665, "learning_rate": 1.5897309912787128e-05, "loss": 0.017772674560546875, "step": 1771 }, { "epoch": 0.21502244873195, "grad_norm": 1.5146427154541016, "learning_rate": 1.5894853212136102e-05, "loss": 0.08214616775512695, "step": 1772 }, { "epoch": 0.21514379322897706, "grad_norm": 2.555539846420288, "learning_rate": 1.5892396511485077e-05, "loss": 0.41456887125968933, "step": 1773 }, { "epoch": 0.2152651377260041, "grad_norm": 2.356757164001465, "learning_rate": 1.588993981083405e-05, "loss": 0.0947510302066803, "step": 1774 }, { "epoch": 0.2153864822230312, "grad_norm": 2.3131558895111084, "learning_rate": 1.5887483110183025e-05, "loss": 0.2958258390426636, "step": 1775 }, { "epoch": 0.21550782672005825, "grad_norm": 2.141347646713257, "learning_rate": 1.5885026409532e-05, "loss": 0.25594037771224976, "step": 1776 }, { "epoch": 0.2156291712170853, "grad_norm": 0.9824745655059814, "learning_rate": 1.5882569708880974e-05, "loss": 0.04805383086204529, "step": 1777 }, { "epoch": 0.21575051571411236, "grad_norm": 2.266543388366699, "learning_rate": 1.5880113008229948e-05, "loss": 0.3093523681163788, "step": 1778 }, { "epoch": 0.21587186021113944, "grad_norm": 1.2219218015670776, "learning_rate": 1.5877656307578922e-05, "loss": 0.06167016550898552, "step": 1779 }, { "epoch": 0.2159932047081665, "grad_norm": 2.130420207977295, "learning_rate": 1.5875199606927896e-05, "loss": 0.0919126644730568, "step": 1780 }, { "epoch": 0.21611454920519355, "grad_norm": 3.2336394786834717, "learning_rate": 1.5872742906276874e-05, "loss": 0.597897469997406, "step": 1781 }, { "epoch": 0.2162358937022206, "grad_norm": 1.6644809246063232, "learning_rate": 1.5870286205625848e-05, "loss": 0.2237292230129242, "step": 1782 }, { "epoch": 0.21635723819924765, "grad_norm": 2.426450729370117, "learning_rate": 1.5867829504974822e-05, "loss": 0.513863742351532, "step": 1783 }, { "epoch": 0.21647858269627474, "grad_norm": 2.2129006385803223, "learning_rate": 1.5865372804323797e-05, "loss": 0.29766586422920227, "step": 1784 }, { "epoch": 0.2165999271933018, "grad_norm": 2.1841890811920166, "learning_rate": 1.586291610367277e-05, "loss": 0.597987174987793, "step": 1785 }, { "epoch": 0.21672127169032884, "grad_norm": 2.9279837608337402, "learning_rate": 1.5860459403021745e-05, "loss": 0.5400558710098267, "step": 1786 }, { "epoch": 0.2168426161873559, "grad_norm": 1.2487083673477173, "learning_rate": 1.585800270237072e-05, "loss": 0.09377280622720718, "step": 1787 }, { "epoch": 0.21696396068438295, "grad_norm": 3.491300106048584, "learning_rate": 1.5855546001719694e-05, "loss": 0.5781911015510559, "step": 1788 }, { "epoch": 0.21708530518141003, "grad_norm": 1.6507911682128906, "learning_rate": 1.5853089301068668e-05, "loss": 0.20534668862819672, "step": 1789 }, { "epoch": 0.21720664967843709, "grad_norm": 2.0704219341278076, "learning_rate": 1.5850632600417642e-05, "loss": 0.18910321593284607, "step": 1790 }, { "epoch": 0.21732799417546414, "grad_norm": 1.6216387748718262, "learning_rate": 1.5848175899766617e-05, "loss": 0.3603314459323883, "step": 1791 }, { "epoch": 0.2174493386724912, "grad_norm": 2.28153395652771, "learning_rate": 1.584571919911559e-05, "loss": 0.12273404747247696, "step": 1792 }, { "epoch": 0.21757068316951828, "grad_norm": 1.5948500633239746, "learning_rate": 1.5843262498464565e-05, "loss": 0.13908806443214417, "step": 1793 }, { "epoch": 0.21769202766654533, "grad_norm": 1.8439335823059082, "learning_rate": 1.584080579781354e-05, "loss": 0.24253743886947632, "step": 1794 }, { "epoch": 0.21781337216357238, "grad_norm": 2.1192080974578857, "learning_rate": 1.5838349097162514e-05, "loss": 0.27136489748954773, "step": 1795 }, { "epoch": 0.21793471666059944, "grad_norm": 2.2431752681732178, "learning_rate": 1.5835892396511488e-05, "loss": 0.33842700719833374, "step": 1796 }, { "epoch": 0.2180560611576265, "grad_norm": 2.34513783454895, "learning_rate": 1.5833435695860462e-05, "loss": 0.3854665458202362, "step": 1797 }, { "epoch": 0.21817740565465357, "grad_norm": 4.530099868774414, "learning_rate": 1.5830978995209433e-05, "loss": 0.6803076267242432, "step": 1798 }, { "epoch": 0.21829875015168063, "grad_norm": 3.580667495727539, "learning_rate": 1.5828522294558407e-05, "loss": 0.351321280002594, "step": 1799 }, { "epoch": 0.21842009464870768, "grad_norm": 1.670913815498352, "learning_rate": 1.582606559390738e-05, "loss": 0.11861065775156021, "step": 1800 }, { "epoch": 0.21854143914573473, "grad_norm": 1.8805264234542847, "learning_rate": 1.5823608893256356e-05, "loss": 0.19307005405426025, "step": 1801 }, { "epoch": 0.2186627836427618, "grad_norm": 2.305316209793091, "learning_rate": 1.582115219260533e-05, "loss": 0.20600402355194092, "step": 1802 }, { "epoch": 0.21878412813978887, "grad_norm": 1.742647647857666, "learning_rate": 1.5818695491954304e-05, "loss": 0.12964148819446564, "step": 1803 }, { "epoch": 0.21890547263681592, "grad_norm": 2.6212007999420166, "learning_rate": 1.581623879130328e-05, "loss": 0.5329718589782715, "step": 1804 }, { "epoch": 0.21902681713384298, "grad_norm": 2.0527093410491943, "learning_rate": 1.5813782090652253e-05, "loss": 0.27452802658081055, "step": 1805 }, { "epoch": 0.21914816163087003, "grad_norm": 1.9854778051376343, "learning_rate": 1.5811325390001227e-05, "loss": 0.25577837228775024, "step": 1806 }, { "epoch": 0.2192695061278971, "grad_norm": 1.33931565284729, "learning_rate": 1.5808868689350205e-05, "loss": 0.17164123058319092, "step": 1807 }, { "epoch": 0.21939085062492417, "grad_norm": 3.311396360397339, "learning_rate": 1.580641198869918e-05, "loss": 0.4014303684234619, "step": 1808 }, { "epoch": 0.21951219512195122, "grad_norm": 3.0539963245391846, "learning_rate": 1.5803955288048153e-05, "loss": 0.5720174908638, "step": 1809 }, { "epoch": 0.21963353961897827, "grad_norm": 2.067350387573242, "learning_rate": 1.5801498587397127e-05, "loss": 0.16131293773651123, "step": 1810 }, { "epoch": 0.21975488411600533, "grad_norm": 1.9542495012283325, "learning_rate": 1.5799041886746102e-05, "loss": 0.39418622851371765, "step": 1811 }, { "epoch": 0.2198762286130324, "grad_norm": 2.895956039428711, "learning_rate": 1.5796585186095076e-05, "loss": 0.2677726447582245, "step": 1812 }, { "epoch": 0.21999757311005946, "grad_norm": 2.118053674697876, "learning_rate": 1.579412848544405e-05, "loss": 0.4295734465122223, "step": 1813 }, { "epoch": 0.22011891760708652, "grad_norm": 2.0122063159942627, "learning_rate": 1.5791671784793025e-05, "loss": 0.44783979654312134, "step": 1814 }, { "epoch": 0.22024026210411357, "grad_norm": 2.1328415870666504, "learning_rate": 1.5789215084142e-05, "loss": 0.41867291927337646, "step": 1815 }, { "epoch": 0.22036160660114062, "grad_norm": 1.779789686203003, "learning_rate": 1.5786758383490973e-05, "loss": 0.32512974739074707, "step": 1816 }, { "epoch": 0.2204829510981677, "grad_norm": 2.9491400718688965, "learning_rate": 1.5784301682839947e-05, "loss": 0.4705115258693695, "step": 1817 }, { "epoch": 0.22060429559519476, "grad_norm": 1.8154832124710083, "learning_rate": 1.578184498218892e-05, "loss": 0.10341040045022964, "step": 1818 }, { "epoch": 0.22072564009222181, "grad_norm": 1.6541939973831177, "learning_rate": 1.5779388281537896e-05, "loss": 0.22720670700073242, "step": 1819 }, { "epoch": 0.22084698458924887, "grad_norm": 1.5187536478042603, "learning_rate": 1.577693158088687e-05, "loss": 0.44843828678131104, "step": 1820 }, { "epoch": 0.22096832908627595, "grad_norm": 2.146512269973755, "learning_rate": 1.5774474880235844e-05, "loss": 0.11195553839206696, "step": 1821 }, { "epoch": 0.221089673583303, "grad_norm": 1.5938338041305542, "learning_rate": 1.577201817958482e-05, "loss": 0.061287395656108856, "step": 1822 }, { "epoch": 0.22121101808033006, "grad_norm": 2.171705722808838, "learning_rate": 1.5769561478933793e-05, "loss": 0.29137569665908813, "step": 1823 }, { "epoch": 0.2213323625773571, "grad_norm": 2.4083402156829834, "learning_rate": 1.5767104778282767e-05, "loss": 0.28137272596359253, "step": 1824 }, { "epoch": 0.22145370707438417, "grad_norm": 2.548114776611328, "learning_rate": 1.576464807763174e-05, "loss": 0.5311647653579712, "step": 1825 }, { "epoch": 0.22157505157141125, "grad_norm": 2.3285162448883057, "learning_rate": 1.5762191376980716e-05, "loss": 0.6317347884178162, "step": 1826 }, { "epoch": 0.2216963960684383, "grad_norm": 2.0388638973236084, "learning_rate": 1.575973467632969e-05, "loss": 0.27131187915802, "step": 1827 }, { "epoch": 0.22181774056546535, "grad_norm": 2.087533712387085, "learning_rate": 1.5757277975678664e-05, "loss": 0.28966864943504333, "step": 1828 }, { "epoch": 0.2219390850624924, "grad_norm": 2.0379552841186523, "learning_rate": 1.575482127502764e-05, "loss": 0.15862126648426056, "step": 1829 }, { "epoch": 0.22206042955951946, "grad_norm": 2.1554763317108154, "learning_rate": 1.5752364574376613e-05, "loss": 0.4588105380535126, "step": 1830 }, { "epoch": 0.22218177405654654, "grad_norm": 1.7737832069396973, "learning_rate": 1.5749907873725587e-05, "loss": 0.08383680880069733, "step": 1831 }, { "epoch": 0.2223031185535736, "grad_norm": 0.10381510853767395, "learning_rate": 1.574745117307456e-05, "loss": 0.0010423610219731927, "step": 1832 }, { "epoch": 0.22242446305060065, "grad_norm": 1.4465632438659668, "learning_rate": 1.5744994472423535e-05, "loss": 0.2208431214094162, "step": 1833 }, { "epoch": 0.2225458075476277, "grad_norm": 2.1390795707702637, "learning_rate": 1.574253777177251e-05, "loss": 0.40946102142333984, "step": 1834 }, { "epoch": 0.2226671520446548, "grad_norm": 2.1270852088928223, "learning_rate": 1.5740081071121484e-05, "loss": 0.26926371455192566, "step": 1835 }, { "epoch": 0.22278849654168184, "grad_norm": 2.0879156589508057, "learning_rate": 1.5737624370470458e-05, "loss": 0.38591527938842773, "step": 1836 }, { "epoch": 0.2229098410387089, "grad_norm": 2.220813512802124, "learning_rate": 1.5735167669819432e-05, "loss": 0.35801708698272705, "step": 1837 }, { "epoch": 0.22303118553573595, "grad_norm": 2.3260087966918945, "learning_rate": 1.5732710969168407e-05, "loss": 0.2995595335960388, "step": 1838 }, { "epoch": 0.223152530032763, "grad_norm": 1.2784228324890137, "learning_rate": 1.573025426851738e-05, "loss": 0.28344660997390747, "step": 1839 }, { "epoch": 0.22327387452979008, "grad_norm": 2.584073305130005, "learning_rate": 1.5727797567866355e-05, "loss": 0.26841938495635986, "step": 1840 }, { "epoch": 0.22339521902681714, "grad_norm": 3.000821828842163, "learning_rate": 1.572534086721533e-05, "loss": 0.29360127449035645, "step": 1841 }, { "epoch": 0.2235165635238442, "grad_norm": 1.3792856931686401, "learning_rate": 1.5722884166564304e-05, "loss": 0.21720454096794128, "step": 1842 }, { "epoch": 0.22363790802087125, "grad_norm": 1.5156898498535156, "learning_rate": 1.5720427465913278e-05, "loss": 0.07712770253419876, "step": 1843 }, { "epoch": 0.22375925251789833, "grad_norm": 1.8725303411483765, "learning_rate": 1.5717970765262252e-05, "loss": 0.3022371232509613, "step": 1844 }, { "epoch": 0.22388059701492538, "grad_norm": 2.1809723377227783, "learning_rate": 1.5715514064611227e-05, "loss": 0.672953188419342, "step": 1845 }, { "epoch": 0.22400194151195243, "grad_norm": 2.7487642765045166, "learning_rate": 1.57130573639602e-05, "loss": 0.38799959421157837, "step": 1846 }, { "epoch": 0.2241232860089795, "grad_norm": 1.0793215036392212, "learning_rate": 1.571060066330918e-05, "loss": 0.14667223393917084, "step": 1847 }, { "epoch": 0.22424463050600654, "grad_norm": 2.1512701511383057, "learning_rate": 1.5708143962658153e-05, "loss": 0.3110212981700897, "step": 1848 }, { "epoch": 0.22436597500303362, "grad_norm": 0.9467477202415466, "learning_rate": 1.5705687262007127e-05, "loss": 0.03516809642314911, "step": 1849 }, { "epoch": 0.22448731950006068, "grad_norm": 2.599250078201294, "learning_rate": 1.57032305613561e-05, "loss": 0.11458954960107803, "step": 1850 }, { "epoch": 0.22460866399708773, "grad_norm": 1.5941566228866577, "learning_rate": 1.5700773860705075e-05, "loss": 0.18593533337116241, "step": 1851 }, { "epoch": 0.22473000849411479, "grad_norm": 1.8600636720657349, "learning_rate": 1.569831716005405e-05, "loss": 0.12170698493719101, "step": 1852 }, { "epoch": 0.22485135299114184, "grad_norm": 2.5060980319976807, "learning_rate": 1.5695860459403024e-05, "loss": 0.3910202085971832, "step": 1853 }, { "epoch": 0.22497269748816892, "grad_norm": 4.181054592132568, "learning_rate": 1.5693403758751998e-05, "loss": 0.35858675837516785, "step": 1854 }, { "epoch": 0.22509404198519598, "grad_norm": 2.437246084213257, "learning_rate": 1.5690947058100972e-05, "loss": 0.5846027731895447, "step": 1855 }, { "epoch": 0.22521538648222303, "grad_norm": 2.284369468688965, "learning_rate": 1.5688490357449947e-05, "loss": 0.30392757058143616, "step": 1856 }, { "epoch": 0.22533673097925008, "grad_norm": 1.6010268926620483, "learning_rate": 1.568603365679892e-05, "loss": 0.1779608577489853, "step": 1857 }, { "epoch": 0.22545807547627716, "grad_norm": 1.704008936882019, "learning_rate": 1.5683576956147895e-05, "loss": 0.37899062037467957, "step": 1858 }, { "epoch": 0.22557941997330422, "grad_norm": 2.50712251663208, "learning_rate": 1.568112025549687e-05, "loss": 0.5361481308937073, "step": 1859 }, { "epoch": 0.22570076447033127, "grad_norm": 1.7986700534820557, "learning_rate": 1.5678663554845844e-05, "loss": 0.14462506771087646, "step": 1860 }, { "epoch": 0.22582210896735833, "grad_norm": 2.1984047889709473, "learning_rate": 1.5676206854194818e-05, "loss": 0.1491418480873108, "step": 1861 }, { "epoch": 0.22594345346438538, "grad_norm": 2.797156572341919, "learning_rate": 1.5673750153543792e-05, "loss": 0.196271151304245, "step": 1862 }, { "epoch": 0.22606479796141246, "grad_norm": 2.319661855697632, "learning_rate": 1.5671293452892767e-05, "loss": 0.15388545393943787, "step": 1863 }, { "epoch": 0.22618614245843952, "grad_norm": 2.6694042682647705, "learning_rate": 1.566883675224174e-05, "loss": 0.29360467195510864, "step": 1864 }, { "epoch": 0.22630748695546657, "grad_norm": 1.1639213562011719, "learning_rate": 1.5666380051590715e-05, "loss": 0.05175938084721565, "step": 1865 }, { "epoch": 0.22642883145249362, "grad_norm": 2.2814810276031494, "learning_rate": 1.566392335093969e-05, "loss": 0.599335789680481, "step": 1866 }, { "epoch": 0.22655017594952068, "grad_norm": 2.186396837234497, "learning_rate": 1.5661466650288664e-05, "loss": 0.45210182666778564, "step": 1867 }, { "epoch": 0.22667152044654776, "grad_norm": 2.9524850845336914, "learning_rate": 1.5659009949637638e-05, "loss": 0.4272529184818268, "step": 1868 }, { "epoch": 0.2267928649435748, "grad_norm": 1.982879400253296, "learning_rate": 1.5656553248986612e-05, "loss": 0.42066940665245056, "step": 1869 }, { "epoch": 0.22691420944060187, "grad_norm": 1.641210913658142, "learning_rate": 1.5654096548335586e-05, "loss": 0.06164119392633438, "step": 1870 }, { "epoch": 0.22703555393762892, "grad_norm": 2.4468185901641846, "learning_rate": 1.565163984768456e-05, "loss": 0.23071660101413727, "step": 1871 }, { "epoch": 0.227156898434656, "grad_norm": 1.4559755325317383, "learning_rate": 1.5649183147033535e-05, "loss": 0.11746642738580704, "step": 1872 }, { "epoch": 0.22727824293168306, "grad_norm": 2.723864793777466, "learning_rate": 1.564672644638251e-05, "loss": 0.639769434928894, "step": 1873 }, { "epoch": 0.2273995874287101, "grad_norm": 3.133424997329712, "learning_rate": 1.5644269745731483e-05, "loss": 0.20287105441093445, "step": 1874 }, { "epoch": 0.22752093192573716, "grad_norm": 2.657029151916504, "learning_rate": 1.5641813045080458e-05, "loss": 0.3544206917285919, "step": 1875 }, { "epoch": 0.22764227642276422, "grad_norm": 1.662294864654541, "learning_rate": 1.5639356344429432e-05, "loss": 0.08332932740449905, "step": 1876 }, { "epoch": 0.2277636209197913, "grad_norm": 2.7968616485595703, "learning_rate": 1.5636899643778406e-05, "loss": 0.3955608904361725, "step": 1877 }, { "epoch": 0.22788496541681835, "grad_norm": 1.5010136365890503, "learning_rate": 1.563444294312738e-05, "loss": 0.13125336170196533, "step": 1878 }, { "epoch": 0.2280063099138454, "grad_norm": 1.6493403911590576, "learning_rate": 1.5631986242476355e-05, "loss": 0.2688809335231781, "step": 1879 }, { "epoch": 0.22812765441087246, "grad_norm": 3.2882649898529053, "learning_rate": 1.562952954182533e-05, "loss": 0.47903305292129517, "step": 1880 }, { "epoch": 0.22824899890789951, "grad_norm": 2.847830295562744, "learning_rate": 1.5627072841174303e-05, "loss": 0.2155795842409134, "step": 1881 }, { "epoch": 0.2283703434049266, "grad_norm": 1.9242929220199585, "learning_rate": 1.5624616140523277e-05, "loss": 0.35135090351104736, "step": 1882 }, { "epoch": 0.22849168790195365, "grad_norm": 3.96661114692688, "learning_rate": 1.5622159439872252e-05, "loss": 0.11496228724718094, "step": 1883 }, { "epoch": 0.2286130323989807, "grad_norm": 3.5542078018188477, "learning_rate": 1.5619702739221226e-05, "loss": 0.2090737372636795, "step": 1884 }, { "epoch": 0.22873437689600776, "grad_norm": 2.155834436416626, "learning_rate": 1.56172460385702e-05, "loss": 0.1563049852848053, "step": 1885 }, { "epoch": 0.22885572139303484, "grad_norm": 2.6029043197631836, "learning_rate": 1.5614789337919174e-05, "loss": 0.1502380073070526, "step": 1886 }, { "epoch": 0.2289770658900619, "grad_norm": 5.526102542877197, "learning_rate": 1.5612332637268152e-05, "loss": 0.5732027888298035, "step": 1887 }, { "epoch": 0.22909841038708895, "grad_norm": 2.530214548110962, "learning_rate": 1.5609875936617126e-05, "loss": 0.4119799733161926, "step": 1888 }, { "epoch": 0.229219754884116, "grad_norm": 1.4703720808029175, "learning_rate": 1.56074192359661e-05, "loss": 0.14894983172416687, "step": 1889 }, { "epoch": 0.22934109938114305, "grad_norm": 1.975054383277893, "learning_rate": 1.5604962535315075e-05, "loss": 0.20313455164432526, "step": 1890 }, { "epoch": 0.22946244387817014, "grad_norm": 1.664771318435669, "learning_rate": 1.560250583466405e-05, "loss": 0.23257260024547577, "step": 1891 }, { "epoch": 0.2295837883751972, "grad_norm": 2.094118118286133, "learning_rate": 1.5600049134013023e-05, "loss": 0.22651207447052002, "step": 1892 }, { "epoch": 0.22970513287222424, "grad_norm": 1.761396884918213, "learning_rate": 1.5597592433361998e-05, "loss": 0.2868579030036926, "step": 1893 }, { "epoch": 0.2298264773692513, "grad_norm": 2.3177692890167236, "learning_rate": 1.5595135732710972e-05, "loss": 0.1699366718530655, "step": 1894 }, { "epoch": 0.22994782186627835, "grad_norm": 2.4176840782165527, "learning_rate": 1.5592679032059946e-05, "loss": 0.35694047808647156, "step": 1895 }, { "epoch": 0.23006916636330543, "grad_norm": 2.576108455657959, "learning_rate": 1.559022233140892e-05, "loss": 0.213277667760849, "step": 1896 }, { "epoch": 0.2301905108603325, "grad_norm": 1.9640555381774902, "learning_rate": 1.5587765630757895e-05, "loss": 0.18581807613372803, "step": 1897 }, { "epoch": 0.23031185535735954, "grad_norm": 2.2211499214172363, "learning_rate": 1.558530893010687e-05, "loss": 0.3798431158065796, "step": 1898 }, { "epoch": 0.2304331998543866, "grad_norm": 4.352818012237549, "learning_rate": 1.5582852229455843e-05, "loss": 0.49065476655960083, "step": 1899 }, { "epoch": 0.23055454435141368, "grad_norm": 1.9557774066925049, "learning_rate": 1.5580395528804817e-05, "loss": 0.3938944935798645, "step": 1900 }, { "epoch": 0.23067588884844073, "grad_norm": 1.8748207092285156, "learning_rate": 1.5577938828153792e-05, "loss": 0.5157500505447388, "step": 1901 }, { "epoch": 0.23079723334546778, "grad_norm": 1.7990689277648926, "learning_rate": 1.5575482127502766e-05, "loss": 0.2987160384654999, "step": 1902 }, { "epoch": 0.23091857784249484, "grad_norm": 1.7470693588256836, "learning_rate": 1.557302542685174e-05, "loss": 0.16994519531726837, "step": 1903 }, { "epoch": 0.2310399223395219, "grad_norm": 2.477071523666382, "learning_rate": 1.5570568726200715e-05, "loss": 0.48906996846199036, "step": 1904 }, { "epoch": 0.23116126683654897, "grad_norm": 3.2109107971191406, "learning_rate": 1.556811202554969e-05, "loss": 0.5909616947174072, "step": 1905 }, { "epoch": 0.23128261133357603, "grad_norm": 1.8098355531692505, "learning_rate": 1.5565655324898663e-05, "loss": 0.4754788279533386, "step": 1906 }, { "epoch": 0.23140395583060308, "grad_norm": 0.7618553638458252, "learning_rate": 1.5563198624247637e-05, "loss": 0.0402400940656662, "step": 1907 }, { "epoch": 0.23152530032763013, "grad_norm": 2.028736114501953, "learning_rate": 1.556074192359661e-05, "loss": 0.2924274802207947, "step": 1908 }, { "epoch": 0.2316466448246572, "grad_norm": 2.1117236614227295, "learning_rate": 1.5558285222945586e-05, "loss": 0.26550453901290894, "step": 1909 }, { "epoch": 0.23176798932168427, "grad_norm": 2.282334566116333, "learning_rate": 1.555582852229456e-05, "loss": 0.27888938784599304, "step": 1910 }, { "epoch": 0.23188933381871132, "grad_norm": 2.4370946884155273, "learning_rate": 1.5553371821643534e-05, "loss": 0.4627356231212616, "step": 1911 }, { "epoch": 0.23201067831573838, "grad_norm": 0.9429489970207214, "learning_rate": 1.555091512099251e-05, "loss": 0.032071638852357864, "step": 1912 }, { "epoch": 0.23213202281276543, "grad_norm": 1.9986335039138794, "learning_rate": 1.5548458420341483e-05, "loss": 0.30108609795570374, "step": 1913 }, { "epoch": 0.2322533673097925, "grad_norm": 1.6694214344024658, "learning_rate": 1.5546001719690457e-05, "loss": 0.24282769858837128, "step": 1914 }, { "epoch": 0.23237471180681957, "grad_norm": 1.9311928749084473, "learning_rate": 1.554354501903943e-05, "loss": 0.24997027218341827, "step": 1915 }, { "epoch": 0.23249605630384662, "grad_norm": 1.8494151830673218, "learning_rate": 1.5541088318388406e-05, "loss": 0.3138563632965088, "step": 1916 }, { "epoch": 0.23261740080087367, "grad_norm": 2.9844398498535156, "learning_rate": 1.553863161773738e-05, "loss": 0.3842235803604126, "step": 1917 }, { "epoch": 0.23273874529790073, "grad_norm": 1.881724238395691, "learning_rate": 1.5536174917086354e-05, "loss": 0.12849420309066772, "step": 1918 }, { "epoch": 0.2328600897949278, "grad_norm": 2.011195421218872, "learning_rate": 1.553371821643533e-05, "loss": 0.26487109065055847, "step": 1919 }, { "epoch": 0.23298143429195486, "grad_norm": 2.0115201473236084, "learning_rate": 1.5531261515784303e-05, "loss": 0.3657824397087097, "step": 1920 }, { "epoch": 0.23310277878898192, "grad_norm": 2.71881365776062, "learning_rate": 1.5528804815133277e-05, "loss": 0.4061950147151947, "step": 1921 }, { "epoch": 0.23322412328600897, "grad_norm": 1.1871726512908936, "learning_rate": 1.552634811448225e-05, "loss": 0.21809077262878418, "step": 1922 }, { "epoch": 0.23334546778303603, "grad_norm": 1.5245609283447266, "learning_rate": 1.5523891413831225e-05, "loss": 0.1552489697933197, "step": 1923 }, { "epoch": 0.2334668122800631, "grad_norm": 1.666669487953186, "learning_rate": 1.55214347131802e-05, "loss": 0.10551527887582779, "step": 1924 }, { "epoch": 0.23358815677709016, "grad_norm": 1.1993343830108643, "learning_rate": 1.5518978012529174e-05, "loss": 0.06786657869815826, "step": 1925 }, { "epoch": 0.23370950127411722, "grad_norm": 2.8402671813964844, "learning_rate": 1.551652131187815e-05, "loss": 0.2608323097229004, "step": 1926 }, { "epoch": 0.23383084577114427, "grad_norm": 1.042061448097229, "learning_rate": 1.5514064611227126e-05, "loss": 0.03161429241299629, "step": 1927 }, { "epoch": 0.23395219026817135, "grad_norm": 1.6204326152801514, "learning_rate": 1.55116079105761e-05, "loss": 0.28808191418647766, "step": 1928 }, { "epoch": 0.2340735347651984, "grad_norm": 1.9779497385025024, "learning_rate": 1.5509151209925074e-05, "loss": 0.30647483468055725, "step": 1929 }, { "epoch": 0.23419487926222546, "grad_norm": 1.22590970993042, "learning_rate": 1.550669450927405e-05, "loss": 0.07887299358844757, "step": 1930 }, { "epoch": 0.2343162237592525, "grad_norm": 1.897480845451355, "learning_rate": 1.5504237808623023e-05, "loss": 0.45204317569732666, "step": 1931 }, { "epoch": 0.23443756825627957, "grad_norm": 2.028043031692505, "learning_rate": 1.5501781107971997e-05, "loss": 0.16453605890274048, "step": 1932 }, { "epoch": 0.23455891275330665, "grad_norm": 2.369666576385498, "learning_rate": 1.5499324407320968e-05, "loss": 0.3857429325580597, "step": 1933 }, { "epoch": 0.2346802572503337, "grad_norm": 0.9054241180419922, "learning_rate": 1.5496867706669942e-05, "loss": 0.06322108954191208, "step": 1934 }, { "epoch": 0.23480160174736076, "grad_norm": 1.2953579425811768, "learning_rate": 1.5494411006018917e-05, "loss": 0.09342847019433975, "step": 1935 }, { "epoch": 0.2349229462443878, "grad_norm": 3.134705066680908, "learning_rate": 1.549195430536789e-05, "loss": 0.49385154247283936, "step": 1936 }, { "epoch": 0.23504429074141486, "grad_norm": 1.4176846742630005, "learning_rate": 1.5489497604716865e-05, "loss": 0.1273263841867447, "step": 1937 }, { "epoch": 0.23516563523844194, "grad_norm": 0.8674216866493225, "learning_rate": 1.548704090406584e-05, "loss": 0.033561233431100845, "step": 1938 }, { "epoch": 0.235286979735469, "grad_norm": 2.134188413619995, "learning_rate": 1.5484584203414814e-05, "loss": 0.5493992567062378, "step": 1939 }, { "epoch": 0.23540832423249605, "grad_norm": 1.9525834321975708, "learning_rate": 1.5482127502763788e-05, "loss": 0.171519473195076, "step": 1940 }, { "epoch": 0.2355296687295231, "grad_norm": 1.428519368171692, "learning_rate": 1.5479670802112762e-05, "loss": 0.33162039518356323, "step": 1941 }, { "epoch": 0.2356510132265502, "grad_norm": 2.767943859100342, "learning_rate": 1.5477214101461736e-05, "loss": 0.3828122913837433, "step": 1942 }, { "epoch": 0.23577235772357724, "grad_norm": 2.6053030490875244, "learning_rate": 1.547475740081071e-05, "loss": 0.24867229163646698, "step": 1943 }, { "epoch": 0.2358937022206043, "grad_norm": 2.204803228378296, "learning_rate": 1.5472300700159685e-05, "loss": 0.40587592124938965, "step": 1944 }, { "epoch": 0.23601504671763135, "grad_norm": 2.1212689876556396, "learning_rate": 1.546984399950866e-05, "loss": 0.2930845320224762, "step": 1945 }, { "epoch": 0.2361363912146584, "grad_norm": 1.3296728134155273, "learning_rate": 1.5467387298857633e-05, "loss": 0.12678320705890656, "step": 1946 }, { "epoch": 0.23625773571168548, "grad_norm": 2.986150026321411, "learning_rate": 1.5464930598206608e-05, "loss": 0.577646017074585, "step": 1947 }, { "epoch": 0.23637908020871254, "grad_norm": 1.8417999744415283, "learning_rate": 1.5462473897555582e-05, "loss": 0.1973411589860916, "step": 1948 }, { "epoch": 0.2365004247057396, "grad_norm": 2.9808263778686523, "learning_rate": 1.5460017196904556e-05, "loss": 0.3182407319545746, "step": 1949 }, { "epoch": 0.23662176920276665, "grad_norm": 1.6840883493423462, "learning_rate": 1.545756049625353e-05, "loss": 0.22510495781898499, "step": 1950 }, { "epoch": 0.23674311369979373, "grad_norm": 3.430837392807007, "learning_rate": 1.5455103795602505e-05, "loss": 0.2617456018924713, "step": 1951 }, { "epoch": 0.23686445819682078, "grad_norm": 2.058379650115967, "learning_rate": 1.5452647094951482e-05, "loss": 0.2380506843328476, "step": 1952 }, { "epoch": 0.23698580269384784, "grad_norm": 0.942528247833252, "learning_rate": 1.5450190394300457e-05, "loss": 0.14793919026851654, "step": 1953 }, { "epoch": 0.2371071471908749, "grad_norm": 2.2082934379577637, "learning_rate": 1.544773369364943e-05, "loss": 0.1505918800830841, "step": 1954 }, { "epoch": 0.23722849168790194, "grad_norm": 3.3055496215820312, "learning_rate": 1.5445276992998405e-05, "loss": 0.209852397441864, "step": 1955 }, { "epoch": 0.23734983618492903, "grad_norm": 2.0881266593933105, "learning_rate": 1.544282029234738e-05, "loss": 0.2559841275215149, "step": 1956 }, { "epoch": 0.23747118068195608, "grad_norm": 1.9247335195541382, "learning_rate": 1.5440363591696354e-05, "loss": 0.2522317171096802, "step": 1957 }, { "epoch": 0.23759252517898313, "grad_norm": 2.6329305171966553, "learning_rate": 1.5437906891045328e-05, "loss": 0.36928677558898926, "step": 1958 }, { "epoch": 0.2377138696760102, "grad_norm": 1.8593052625656128, "learning_rate": 1.5435450190394302e-05, "loss": 0.15176129341125488, "step": 1959 }, { "epoch": 0.23783521417303724, "grad_norm": 1.2760701179504395, "learning_rate": 1.5432993489743276e-05, "loss": 0.09813748300075531, "step": 1960 }, { "epoch": 0.23795655867006432, "grad_norm": 3.074573516845703, "learning_rate": 1.543053678909225e-05, "loss": 0.241357684135437, "step": 1961 }, { "epoch": 0.23807790316709138, "grad_norm": 2.4001972675323486, "learning_rate": 1.5428080088441225e-05, "loss": 0.33054935932159424, "step": 1962 }, { "epoch": 0.23819924766411843, "grad_norm": 2.2785770893096924, "learning_rate": 1.54256233877902e-05, "loss": 0.35797953605651855, "step": 1963 }, { "epoch": 0.23832059216114548, "grad_norm": 2.6213173866271973, "learning_rate": 1.5423166687139173e-05, "loss": 0.5411311984062195, "step": 1964 }, { "epoch": 0.23844193665817257, "grad_norm": 1.7875020503997803, "learning_rate": 1.5420709986488148e-05, "loss": 0.35356011986732483, "step": 1965 }, { "epoch": 0.23856328115519962, "grad_norm": 1.8374285697937012, "learning_rate": 1.5418253285837122e-05, "loss": 0.10449785739183426, "step": 1966 }, { "epoch": 0.23868462565222667, "grad_norm": 0.0015914601972326636, "learning_rate": 1.5415796585186096e-05, "loss": 4.027661634609103e-05, "step": 1967 }, { "epoch": 0.23880597014925373, "grad_norm": 2.4419233798980713, "learning_rate": 1.541333988453507e-05, "loss": 0.36124101281166077, "step": 1968 }, { "epoch": 0.23892731464628078, "grad_norm": 1.531554937362671, "learning_rate": 1.5410883183884045e-05, "loss": 0.1830349862575531, "step": 1969 }, { "epoch": 0.23904865914330786, "grad_norm": 2.351914644241333, "learning_rate": 1.540842648323302e-05, "loss": 0.315641313791275, "step": 1970 }, { "epoch": 0.23917000364033492, "grad_norm": 1.7109310626983643, "learning_rate": 1.5405969782581993e-05, "loss": 0.3653964698314667, "step": 1971 }, { "epoch": 0.23929134813736197, "grad_norm": 2.335442066192627, "learning_rate": 1.5403513081930967e-05, "loss": 0.3463093638420105, "step": 1972 }, { "epoch": 0.23941269263438902, "grad_norm": 1.2720097303390503, "learning_rate": 1.5401056381279942e-05, "loss": 0.2935695946216583, "step": 1973 }, { "epoch": 0.23953403713141608, "grad_norm": 2.660950183868408, "learning_rate": 1.5398599680628916e-05, "loss": 0.4685511291027069, "step": 1974 }, { "epoch": 0.23965538162844316, "grad_norm": 1.722851276397705, "learning_rate": 1.539614297997789e-05, "loss": 0.13476943969726562, "step": 1975 }, { "epoch": 0.2397767261254702, "grad_norm": 1.1071078777313232, "learning_rate": 1.5393686279326864e-05, "loss": 0.010801886208355427, "step": 1976 }, { "epoch": 0.23989807062249727, "grad_norm": 3.19441819190979, "learning_rate": 1.539122957867584e-05, "loss": 0.4961863160133362, "step": 1977 }, { "epoch": 0.24001941511952432, "grad_norm": 1.6807912588119507, "learning_rate": 1.5388772878024813e-05, "loss": 0.49341505765914917, "step": 1978 }, { "epoch": 0.2401407596165514, "grad_norm": 1.4445300102233887, "learning_rate": 1.5386316177373787e-05, "loss": 0.497768759727478, "step": 1979 }, { "epoch": 0.24026210411357846, "grad_norm": 2.9250829219818115, "learning_rate": 1.538385947672276e-05, "loss": 0.5877270698547363, "step": 1980 }, { "epoch": 0.2403834486106055, "grad_norm": 1.8087142705917358, "learning_rate": 1.5381402776071736e-05, "loss": 0.3921996057033539, "step": 1981 }, { "epoch": 0.24050479310763256, "grad_norm": 2.8521523475646973, "learning_rate": 1.537894607542071e-05, "loss": 0.268110066652298, "step": 1982 }, { "epoch": 0.24062613760465962, "grad_norm": 3.089811086654663, "learning_rate": 1.5376489374769684e-05, "loss": 0.6380707621574402, "step": 1983 }, { "epoch": 0.2407474821016867, "grad_norm": 1.431771159172058, "learning_rate": 1.537403267411866e-05, "loss": 0.035007551312446594, "step": 1984 }, { "epoch": 0.24086882659871375, "grad_norm": 2.0163891315460205, "learning_rate": 1.5371575973467633e-05, "loss": 0.41505101323127747, "step": 1985 }, { "epoch": 0.2409901710957408, "grad_norm": 2.951434850692749, "learning_rate": 1.5369119272816607e-05, "loss": 0.3437270522117615, "step": 1986 }, { "epoch": 0.24111151559276786, "grad_norm": 2.0333163738250732, "learning_rate": 1.536666257216558e-05, "loss": 0.4240697920322418, "step": 1987 }, { "epoch": 0.24123286008979491, "grad_norm": 2.527355670928955, "learning_rate": 1.5364205871514556e-05, "loss": 0.4679162800312042, "step": 1988 }, { "epoch": 0.241354204586822, "grad_norm": 1.9801870584487915, "learning_rate": 1.536174917086353e-05, "loss": 0.5906752347946167, "step": 1989 }, { "epoch": 0.24147554908384905, "grad_norm": 3.036198377609253, "learning_rate": 1.5359292470212504e-05, "loss": 0.35773366689682007, "step": 1990 }, { "epoch": 0.2415968935808761, "grad_norm": 4.780759334564209, "learning_rate": 1.535683576956148e-05, "loss": 0.15195998549461365, "step": 1991 }, { "epoch": 0.24171823807790316, "grad_norm": 1.809084415435791, "learning_rate": 1.5354379068910456e-05, "loss": 0.4638228714466095, "step": 1992 }, { "epoch": 0.24183958257493024, "grad_norm": 3.004784345626831, "learning_rate": 1.535192236825943e-05, "loss": 0.3469436764717102, "step": 1993 }, { "epoch": 0.2419609270719573, "grad_norm": 1.9272234439849854, "learning_rate": 1.5349465667608405e-05, "loss": 0.316535085439682, "step": 1994 }, { "epoch": 0.24208227156898435, "grad_norm": 1.9831242561340332, "learning_rate": 1.534700896695738e-05, "loss": 0.21885432302951813, "step": 1995 }, { "epoch": 0.2422036160660114, "grad_norm": 1.7284822463989258, "learning_rate": 1.5344552266306353e-05, "loss": 0.2139614224433899, "step": 1996 }, { "epoch": 0.24232496056303846, "grad_norm": 4.944525241851807, "learning_rate": 1.5342095565655327e-05, "loss": 0.4055936932563782, "step": 1997 }, { "epoch": 0.24244630506006554, "grad_norm": 4.0585618019104, "learning_rate": 1.53396388650043e-05, "loss": 0.6400541663169861, "step": 1998 }, { "epoch": 0.2425676495570926, "grad_norm": 3.2324647903442383, "learning_rate": 1.5337182164353276e-05, "loss": 0.29065656661987305, "step": 1999 }, { "epoch": 0.24268899405411964, "grad_norm": 2.2216804027557373, "learning_rate": 1.533472546370225e-05, "loss": 0.3676172196865082, "step": 2000 }, { "epoch": 0.2428103385511467, "grad_norm": 2.722198247909546, "learning_rate": 1.5332268763051224e-05, "loss": 0.3258315920829773, "step": 2001 }, { "epoch": 0.24293168304817375, "grad_norm": 2.4563684463500977, "learning_rate": 1.53298120624002e-05, "loss": 0.3917081654071808, "step": 2002 }, { "epoch": 0.24305302754520083, "grad_norm": 2.719510078430176, "learning_rate": 1.5327355361749173e-05, "loss": 0.37439966201782227, "step": 2003 }, { "epoch": 0.2431743720422279, "grad_norm": 1.5176633596420288, "learning_rate": 1.5324898661098147e-05, "loss": 0.28591275215148926, "step": 2004 }, { "epoch": 0.24329571653925494, "grad_norm": 1.9752401113510132, "learning_rate": 1.532244196044712e-05, "loss": 0.4649636745452881, "step": 2005 }, { "epoch": 0.243417061036282, "grad_norm": 1.897263765335083, "learning_rate": 1.5319985259796096e-05, "loss": 0.33651724457740784, "step": 2006 }, { "epoch": 0.24353840553330908, "grad_norm": 2.2642662525177, "learning_rate": 1.531752855914507e-05, "loss": 0.19593358039855957, "step": 2007 }, { "epoch": 0.24365975003033613, "grad_norm": 1.0743077993392944, "learning_rate": 1.5315071858494044e-05, "loss": 0.07533052563667297, "step": 2008 }, { "epoch": 0.24378109452736318, "grad_norm": 1.6566965579986572, "learning_rate": 1.531261515784302e-05, "loss": 0.5573430061340332, "step": 2009 }, { "epoch": 0.24390243902439024, "grad_norm": 1.9225425720214844, "learning_rate": 1.5310158457191993e-05, "loss": 0.3474563956260681, "step": 2010 }, { "epoch": 0.2440237835214173, "grad_norm": 1.7132405042648315, "learning_rate": 1.5307701756540967e-05, "loss": 0.09912965446710587, "step": 2011 }, { "epoch": 0.24414512801844437, "grad_norm": 1.9798846244812012, "learning_rate": 1.530524505588994e-05, "loss": 0.1815606951713562, "step": 2012 }, { "epoch": 0.24426647251547143, "grad_norm": 1.9553190469741821, "learning_rate": 1.5302788355238915e-05, "loss": 0.08286681771278381, "step": 2013 }, { "epoch": 0.24438781701249848, "grad_norm": 3.6544125080108643, "learning_rate": 1.530033165458789e-05, "loss": 0.746819257736206, "step": 2014 }, { "epoch": 0.24450916150952554, "grad_norm": 2.1808061599731445, "learning_rate": 1.5297874953936864e-05, "loss": 0.6136137843132019, "step": 2015 }, { "epoch": 0.2446305060065526, "grad_norm": 1.8095557689666748, "learning_rate": 1.5295418253285838e-05, "loss": 0.3068550229072571, "step": 2016 }, { "epoch": 0.24475185050357967, "grad_norm": 0.7586051225662231, "learning_rate": 1.5292961552634812e-05, "loss": 0.004989935085177422, "step": 2017 }, { "epoch": 0.24487319500060672, "grad_norm": 1.9498528242111206, "learning_rate": 1.5290504851983787e-05, "loss": 0.19964951276779175, "step": 2018 }, { "epoch": 0.24499453949763378, "grad_norm": 2.4533638954162598, "learning_rate": 1.528804815133276e-05, "loss": 0.2574433982372284, "step": 2019 }, { "epoch": 0.24511588399466083, "grad_norm": 1.9604499340057373, "learning_rate": 1.5285591450681735e-05, "loss": 0.14973944425582886, "step": 2020 }, { "epoch": 0.24523722849168791, "grad_norm": 2.5714564323425293, "learning_rate": 1.528313475003071e-05, "loss": 0.33707791566848755, "step": 2021 }, { "epoch": 0.24535857298871497, "grad_norm": 1.7424302101135254, "learning_rate": 1.5280678049379684e-05, "loss": 0.11994636058807373, "step": 2022 }, { "epoch": 0.24547991748574202, "grad_norm": 1.7286163568496704, "learning_rate": 1.5278221348728658e-05, "loss": 0.20991051197052002, "step": 2023 }, { "epoch": 0.24560126198276908, "grad_norm": 2.8101806640625, "learning_rate": 1.5275764648077632e-05, "loss": 0.5377737283706665, "step": 2024 }, { "epoch": 0.24572260647979613, "grad_norm": 2.4450466632843018, "learning_rate": 1.5273307947426607e-05, "loss": 0.2972002625465393, "step": 2025 }, { "epoch": 0.2458439509768232, "grad_norm": 2.2055904865264893, "learning_rate": 1.527085124677558e-05, "loss": 0.4898790121078491, "step": 2026 }, { "epoch": 0.24596529547385027, "grad_norm": 1.6781805753707886, "learning_rate": 1.5268394546124555e-05, "loss": 0.227199986577034, "step": 2027 }, { "epoch": 0.24608663997087732, "grad_norm": 1.2776950597763062, "learning_rate": 1.526593784547353e-05, "loss": 0.05490413308143616, "step": 2028 }, { "epoch": 0.24620798446790437, "grad_norm": 1.8420385122299194, "learning_rate": 1.5263481144822504e-05, "loss": 0.5011781454086304, "step": 2029 }, { "epoch": 0.24632932896493143, "grad_norm": 1.7983120679855347, "learning_rate": 1.5261024444171478e-05, "loss": 0.30248773097991943, "step": 2030 }, { "epoch": 0.2464506734619585, "grad_norm": 1.9787551164627075, "learning_rate": 1.5258567743520454e-05, "loss": 0.27914515137672424, "step": 2031 }, { "epoch": 0.24657201795898556, "grad_norm": 2.571229934692383, "learning_rate": 1.5256111042869428e-05, "loss": 0.26840049028396606, "step": 2032 }, { "epoch": 0.24669336245601262, "grad_norm": 2.642086982727051, "learning_rate": 1.5253654342218402e-05, "loss": 0.6005504727363586, "step": 2033 }, { "epoch": 0.24681470695303967, "grad_norm": 2.7929725646972656, "learning_rate": 1.5251197641567377e-05, "loss": 0.20075125992298126, "step": 2034 }, { "epoch": 0.24693605145006675, "grad_norm": 1.746235966682434, "learning_rate": 1.524874094091635e-05, "loss": 0.21847037971019745, "step": 2035 }, { "epoch": 0.2470573959470938, "grad_norm": 2.4493091106414795, "learning_rate": 1.5246284240265325e-05, "loss": 0.22630271315574646, "step": 2036 }, { "epoch": 0.24717874044412086, "grad_norm": 3.2244346141815186, "learning_rate": 1.52438275396143e-05, "loss": 0.3294290006160736, "step": 2037 }, { "epoch": 0.2473000849411479, "grad_norm": 2.499692916870117, "learning_rate": 1.5241370838963274e-05, "loss": 0.0979139655828476, "step": 2038 }, { "epoch": 0.24742142943817497, "grad_norm": 1.6712913513183594, "learning_rate": 1.5238914138312248e-05, "loss": 0.03487418591976166, "step": 2039 }, { "epoch": 0.24754277393520205, "grad_norm": 2.223564863204956, "learning_rate": 1.5236457437661222e-05, "loss": 0.22165976464748383, "step": 2040 }, { "epoch": 0.2476641184322291, "grad_norm": 2.2923712730407715, "learning_rate": 1.5234000737010198e-05, "loss": 0.48853620886802673, "step": 2041 }, { "epoch": 0.24778546292925616, "grad_norm": 1.7251356840133667, "learning_rate": 1.5231544036359172e-05, "loss": 0.36525624990463257, "step": 2042 }, { "epoch": 0.2479068074262832, "grad_norm": 0.03325257450342178, "learning_rate": 1.5229087335708147e-05, "loss": 0.0005018762894906104, "step": 2043 }, { "epoch": 0.2480281519233103, "grad_norm": 2.6671509742736816, "learning_rate": 1.522663063505712e-05, "loss": 0.2062726467847824, "step": 2044 }, { "epoch": 0.24814949642033735, "grad_norm": 1.7973912954330444, "learning_rate": 1.5224173934406095e-05, "loss": 0.08589420467615128, "step": 2045 }, { "epoch": 0.2482708409173644, "grad_norm": 0.27702754735946655, "learning_rate": 1.522171723375507e-05, "loss": 0.010546392761170864, "step": 2046 }, { "epoch": 0.24839218541439145, "grad_norm": 1.957955241203308, "learning_rate": 1.5219260533104044e-05, "loss": 0.13728350400924683, "step": 2047 }, { "epoch": 0.2485135299114185, "grad_norm": 0.853355348110199, "learning_rate": 1.5216803832453018e-05, "loss": 0.037357147783041, "step": 2048 }, { "epoch": 0.2486348744084456, "grad_norm": 2.572986364364624, "learning_rate": 1.5214347131801992e-05, "loss": 0.4272196888923645, "step": 2049 }, { "epoch": 0.24875621890547264, "grad_norm": 2.455547571182251, "learning_rate": 1.5211890431150966e-05, "loss": 0.1457350105047226, "step": 2050 }, { "epoch": 0.2488775634024997, "grad_norm": 3.401978015899658, "learning_rate": 1.520943373049994e-05, "loss": 0.33558374643325806, "step": 2051 }, { "epoch": 0.24899890789952675, "grad_norm": 3.3851821422576904, "learning_rate": 1.5206977029848915e-05, "loss": 0.2722301483154297, "step": 2052 }, { "epoch": 0.2491202523965538, "grad_norm": 1.254441261291504, "learning_rate": 1.5204520329197889e-05, "loss": 0.10501188039779663, "step": 2053 }, { "epoch": 0.24924159689358089, "grad_norm": 2.4674789905548096, "learning_rate": 1.5202063628546863e-05, "loss": 0.4321037232875824, "step": 2054 }, { "epoch": 0.24936294139060794, "grad_norm": 2.63444185256958, "learning_rate": 1.5199606927895838e-05, "loss": 0.8152517080307007, "step": 2055 }, { "epoch": 0.249484285887635, "grad_norm": 2.0867371559143066, "learning_rate": 1.5197150227244812e-05, "loss": 0.27917084097862244, "step": 2056 }, { "epoch": 0.24960563038466205, "grad_norm": 1.4733816385269165, "learning_rate": 1.5194693526593786e-05, "loss": 0.08613657206296921, "step": 2057 }, { "epoch": 0.24972697488168913, "grad_norm": 2.284787893295288, "learning_rate": 1.519223682594276e-05, "loss": 0.20989270508289337, "step": 2058 }, { "epoch": 0.24984831937871618, "grad_norm": 2.9490373134613037, "learning_rate": 1.5189780125291735e-05, "loss": 0.30377861857414246, "step": 2059 }, { "epoch": 0.24996966387574324, "grad_norm": 1.8254625797271729, "learning_rate": 1.5187323424640709e-05, "loss": 0.13897734880447388, "step": 2060 }, { "epoch": 0.2500910083727703, "grad_norm": 2.1339690685272217, "learning_rate": 1.5184866723989685e-05, "loss": 0.1870119720697403, "step": 2061 }, { "epoch": 0.25021235286979737, "grad_norm": 2.154693365097046, "learning_rate": 1.518241002333866e-05, "loss": 0.32715970277786255, "step": 2062 }, { "epoch": 0.2503336973668244, "grad_norm": 2.3708536624908447, "learning_rate": 1.5179953322687633e-05, "loss": 0.5419004559516907, "step": 2063 }, { "epoch": 0.2504550418638515, "grad_norm": 2.2447872161865234, "learning_rate": 1.5177496622036608e-05, "loss": 0.3017771244049072, "step": 2064 }, { "epoch": 0.25057638636087853, "grad_norm": 2.2402448654174805, "learning_rate": 1.5175039921385582e-05, "loss": 0.31448161602020264, "step": 2065 }, { "epoch": 0.2506977308579056, "grad_norm": 2.5074141025543213, "learning_rate": 1.5172583220734556e-05, "loss": 0.6035457253456116, "step": 2066 }, { "epoch": 0.25081907535493264, "grad_norm": 2.2186553478240967, "learning_rate": 1.517012652008353e-05, "loss": 0.586137056350708, "step": 2067 }, { "epoch": 0.2509404198519597, "grad_norm": 2.2771787643432617, "learning_rate": 1.5167669819432505e-05, "loss": 0.24772994220256805, "step": 2068 }, { "epoch": 0.25106176434898675, "grad_norm": 1.10565984249115, "learning_rate": 1.5165213118781477e-05, "loss": 0.04752576723694801, "step": 2069 }, { "epoch": 0.25118310884601386, "grad_norm": 1.4915709495544434, "learning_rate": 1.5162756418130452e-05, "loss": 0.2911011576652527, "step": 2070 }, { "epoch": 0.2513044533430409, "grad_norm": 3.409348249435425, "learning_rate": 1.5160299717479426e-05, "loss": 0.5246961712837219, "step": 2071 }, { "epoch": 0.25142579784006797, "grad_norm": 2.192699432373047, "learning_rate": 1.51578430168284e-05, "loss": 0.60477614402771, "step": 2072 }, { "epoch": 0.251547142337095, "grad_norm": 2.0602874755859375, "learning_rate": 1.5155386316177374e-05, "loss": 0.26899605989456177, "step": 2073 }, { "epoch": 0.2516684868341221, "grad_norm": 4.965397834777832, "learning_rate": 1.5152929615526349e-05, "loss": 0.27953648567199707, "step": 2074 }, { "epoch": 0.2517898313311491, "grad_norm": 1.779467225074768, "learning_rate": 1.5150472914875323e-05, "loss": 0.2358415126800537, "step": 2075 }, { "epoch": 0.2519111758281762, "grad_norm": 1.53863525390625, "learning_rate": 1.5148016214224297e-05, "loss": 0.0970386490225792, "step": 2076 }, { "epoch": 0.25203252032520324, "grad_norm": 2.409815788269043, "learning_rate": 1.5145559513573271e-05, "loss": 0.29230624437332153, "step": 2077 }, { "epoch": 0.2521538648222303, "grad_norm": 2.354555130004883, "learning_rate": 1.5143102812922246e-05, "loss": 0.139778271317482, "step": 2078 }, { "epoch": 0.2522752093192574, "grad_norm": 2.3420677185058594, "learning_rate": 1.514064611227122e-05, "loss": 0.31483131647109985, "step": 2079 }, { "epoch": 0.25239655381628445, "grad_norm": 1.7981594800949097, "learning_rate": 1.5138189411620194e-05, "loss": 0.18265822529792786, "step": 2080 }, { "epoch": 0.2525178983133115, "grad_norm": 1.9754524230957031, "learning_rate": 1.5135732710969168e-05, "loss": 0.21006661653518677, "step": 2081 }, { "epoch": 0.25263924281033856, "grad_norm": 2.199647903442383, "learning_rate": 1.5133276010318143e-05, "loss": 0.3640359938144684, "step": 2082 }, { "epoch": 0.2527605873073656, "grad_norm": 2.174180030822754, "learning_rate": 1.5130819309667117e-05, "loss": 0.2530081868171692, "step": 2083 }, { "epoch": 0.25288193180439267, "grad_norm": 2.420414686203003, "learning_rate": 1.5128362609016091e-05, "loss": 0.2343895435333252, "step": 2084 }, { "epoch": 0.2530032763014197, "grad_norm": 2.9052069187164307, "learning_rate": 1.5125905908365065e-05, "loss": 0.2803361713886261, "step": 2085 }, { "epoch": 0.2531246207984468, "grad_norm": 2.1363558769226074, "learning_rate": 1.512344920771404e-05, "loss": 0.2120419144630432, "step": 2086 }, { "epoch": 0.25324596529547383, "grad_norm": 3.7581350803375244, "learning_rate": 1.5120992507063016e-05, "loss": 0.5633907318115234, "step": 2087 }, { "epoch": 0.2533673097925009, "grad_norm": 2.090036392211914, "learning_rate": 1.511853580641199e-05, "loss": 0.6909904479980469, "step": 2088 }, { "epoch": 0.253488654289528, "grad_norm": 1.761902093887329, "learning_rate": 1.5116079105760964e-05, "loss": 0.30952948331832886, "step": 2089 }, { "epoch": 0.25360999878655505, "grad_norm": 2.110933780670166, "learning_rate": 1.5113622405109938e-05, "loss": 0.286069393157959, "step": 2090 }, { "epoch": 0.2537313432835821, "grad_norm": 1.956461787223816, "learning_rate": 1.5111165704458913e-05, "loss": 0.13381141424179077, "step": 2091 }, { "epoch": 0.25385268778060915, "grad_norm": 1.5321189165115356, "learning_rate": 1.5108709003807887e-05, "loss": 0.10883570462465286, "step": 2092 }, { "epoch": 0.2539740322776362, "grad_norm": 1.6738094091415405, "learning_rate": 1.5106252303156861e-05, "loss": 0.3568721115589142, "step": 2093 }, { "epoch": 0.25409537677466326, "grad_norm": 3.5300426483154297, "learning_rate": 1.5103795602505835e-05, "loss": 0.15414369106292725, "step": 2094 }, { "epoch": 0.2542167212716903, "grad_norm": 2.9318861961364746, "learning_rate": 1.510133890185481e-05, "loss": 0.2508835196495056, "step": 2095 }, { "epoch": 0.25433806576871737, "grad_norm": 1.9397698640823364, "learning_rate": 1.5098882201203784e-05, "loss": 0.43463265895843506, "step": 2096 }, { "epoch": 0.2544594102657444, "grad_norm": 3.07175612449646, "learning_rate": 1.5096425500552758e-05, "loss": 0.4037576913833618, "step": 2097 }, { "epoch": 0.25458075476277153, "grad_norm": 2.9463930130004883, "learning_rate": 1.5093968799901732e-05, "loss": 0.40242353081703186, "step": 2098 }, { "epoch": 0.2547020992597986, "grad_norm": 3.273380994796753, "learning_rate": 1.5091512099250707e-05, "loss": 0.4325547218322754, "step": 2099 }, { "epoch": 0.25482344375682564, "grad_norm": 3.8387303352355957, "learning_rate": 1.5089055398599681e-05, "loss": 0.19408497214317322, "step": 2100 }, { "epoch": 0.2549447882538527, "grad_norm": 1.8059979677200317, "learning_rate": 1.5086598697948655e-05, "loss": 0.3167189955711365, "step": 2101 }, { "epoch": 0.25506613275087975, "grad_norm": 2.231421947479248, "learning_rate": 1.508414199729763e-05, "loss": 0.3446089029312134, "step": 2102 }, { "epoch": 0.2551874772479068, "grad_norm": 2.24876070022583, "learning_rate": 1.5081685296646604e-05, "loss": 0.27822139859199524, "step": 2103 }, { "epoch": 0.25530882174493386, "grad_norm": 1.2263654470443726, "learning_rate": 1.5079228595995578e-05, "loss": 0.1035546362400055, "step": 2104 }, { "epoch": 0.2554301662419609, "grad_norm": 2.2528395652770996, "learning_rate": 1.5076771895344552e-05, "loss": 0.2142188996076584, "step": 2105 }, { "epoch": 0.25555151073898796, "grad_norm": 2.0527801513671875, "learning_rate": 1.5074315194693527e-05, "loss": 0.17007258534431458, "step": 2106 }, { "epoch": 0.2556728552360151, "grad_norm": 1.6478774547576904, "learning_rate": 1.5071858494042502e-05, "loss": 0.10429070889949799, "step": 2107 }, { "epoch": 0.2557941997330421, "grad_norm": 1.3817076683044434, "learning_rate": 1.5069401793391477e-05, "loss": 0.08755984902381897, "step": 2108 }, { "epoch": 0.2559155442300692, "grad_norm": 3.489661931991577, "learning_rate": 1.5066945092740451e-05, "loss": 0.6258473992347717, "step": 2109 }, { "epoch": 0.25603688872709623, "grad_norm": 2.4805729389190674, "learning_rate": 1.5064488392089425e-05, "loss": 0.38748401403427124, "step": 2110 }, { "epoch": 0.2561582332241233, "grad_norm": 2.485771656036377, "learning_rate": 1.50620316914384e-05, "loss": 0.6375660300254822, "step": 2111 }, { "epoch": 0.25627957772115034, "grad_norm": 2.3500325679779053, "learning_rate": 1.5059574990787374e-05, "loss": 0.43053320050239563, "step": 2112 }, { "epoch": 0.2564009222181774, "grad_norm": 0.9563068747520447, "learning_rate": 1.5057118290136348e-05, "loss": 0.068636953830719, "step": 2113 }, { "epoch": 0.25652226671520445, "grad_norm": 1.7979881763458252, "learning_rate": 1.5054661589485322e-05, "loss": 0.2505972683429718, "step": 2114 }, { "epoch": 0.2566436112122315, "grad_norm": 1.7445679903030396, "learning_rate": 1.5052204888834297e-05, "loss": 0.15084236860275269, "step": 2115 }, { "epoch": 0.25676495570925856, "grad_norm": 1.9328049421310425, "learning_rate": 1.504974818818327e-05, "loss": 0.293321430683136, "step": 2116 }, { "epoch": 0.25688630020628567, "grad_norm": 0.001952366204932332, "learning_rate": 1.5047291487532245e-05, "loss": 3.136243685730733e-05, "step": 2117 }, { "epoch": 0.2570076447033127, "grad_norm": 1.4848837852478027, "learning_rate": 1.504483478688122e-05, "loss": 0.31598547101020813, "step": 2118 }, { "epoch": 0.2571289892003398, "grad_norm": 1.692036509513855, "learning_rate": 1.5042378086230194e-05, "loss": 0.3037584722042084, "step": 2119 }, { "epoch": 0.25725033369736683, "grad_norm": 2.918186664581299, "learning_rate": 1.5039921385579168e-05, "loss": 0.25492578744888306, "step": 2120 }, { "epoch": 0.2573716781943939, "grad_norm": 1.0130181312561035, "learning_rate": 1.5037464684928142e-05, "loss": 0.03894425556063652, "step": 2121 }, { "epoch": 0.25749302269142094, "grad_norm": 1.9439772367477417, "learning_rate": 1.5035007984277116e-05, "loss": 0.4366956651210785, "step": 2122 }, { "epoch": 0.257614367188448, "grad_norm": 2.389458656311035, "learning_rate": 1.503255128362609e-05, "loss": 0.2752874195575714, "step": 2123 }, { "epoch": 0.25773571168547504, "grad_norm": 3.298961639404297, "learning_rate": 1.5030094582975065e-05, "loss": 0.21089746057987213, "step": 2124 }, { "epoch": 0.2578570561825021, "grad_norm": 1.609940767288208, "learning_rate": 1.5027637882324039e-05, "loss": 0.4226279854774475, "step": 2125 }, { "epoch": 0.2579784006795292, "grad_norm": 2.3028876781463623, "learning_rate": 1.5025181181673013e-05, "loss": 0.31807318329811096, "step": 2126 }, { "epoch": 0.25809974517655626, "grad_norm": 1.385694146156311, "learning_rate": 1.502272448102199e-05, "loss": 0.12783929705619812, "step": 2127 }, { "epoch": 0.2582210896735833, "grad_norm": 2.2158091068267822, "learning_rate": 1.5020267780370964e-05, "loss": 0.3851372003555298, "step": 2128 }, { "epoch": 0.25834243417061037, "grad_norm": 1.4377362728118896, "learning_rate": 1.5017811079719938e-05, "loss": 0.11848616600036621, "step": 2129 }, { "epoch": 0.2584637786676374, "grad_norm": 3.7586557865142822, "learning_rate": 1.5015354379068912e-05, "loss": 0.4604339599609375, "step": 2130 }, { "epoch": 0.2585851231646645, "grad_norm": 3.421212673187256, "learning_rate": 1.5012897678417886e-05, "loss": 0.4293072819709778, "step": 2131 }, { "epoch": 0.25870646766169153, "grad_norm": 1.0327222347259521, "learning_rate": 1.501044097776686e-05, "loss": 0.03275943547487259, "step": 2132 }, { "epoch": 0.2588278121587186, "grad_norm": 2.4123880863189697, "learning_rate": 1.5007984277115835e-05, "loss": 0.4415239691734314, "step": 2133 }, { "epoch": 0.25894915665574564, "grad_norm": 3.552459955215454, "learning_rate": 1.5005527576464809e-05, "loss": 0.25388628244400024, "step": 2134 }, { "epoch": 0.25907050115277275, "grad_norm": 2.9123752117156982, "learning_rate": 1.5003070875813783e-05, "loss": 0.4704461395740509, "step": 2135 }, { "epoch": 0.2591918456497998, "grad_norm": 2.8575401306152344, "learning_rate": 1.5000614175162758e-05, "loss": 0.2883625626564026, "step": 2136 }, { "epoch": 0.25931319014682686, "grad_norm": 2.4062740802764893, "learning_rate": 1.4998157474511732e-05, "loss": 0.6726546883583069, "step": 2137 }, { "epoch": 0.2594345346438539, "grad_norm": 1.805238962173462, "learning_rate": 1.4995700773860706e-05, "loss": 0.1653829663991928, "step": 2138 }, { "epoch": 0.25955587914088096, "grad_norm": 1.1520496606826782, "learning_rate": 1.499324407320968e-05, "loss": 0.14368143677711487, "step": 2139 }, { "epoch": 0.259677223637908, "grad_norm": 1.4968868494033813, "learning_rate": 1.4990787372558655e-05, "loss": 0.24565939605236053, "step": 2140 }, { "epoch": 0.25979856813493507, "grad_norm": 1.9920177459716797, "learning_rate": 1.4988330671907629e-05, "loss": 0.2082982361316681, "step": 2141 }, { "epoch": 0.2599199126319621, "grad_norm": 2.7933566570281982, "learning_rate": 1.4985873971256603e-05, "loss": 0.9215075969696045, "step": 2142 }, { "epoch": 0.2600412571289892, "grad_norm": 1.636723518371582, "learning_rate": 1.4983417270605577e-05, "loss": 0.08533577620983124, "step": 2143 }, { "epoch": 0.2601626016260163, "grad_norm": 3.587369441986084, "learning_rate": 1.4980960569954552e-05, "loss": 0.6961395144462585, "step": 2144 }, { "epoch": 0.26028394612304334, "grad_norm": 2.541285991668701, "learning_rate": 1.4978503869303526e-05, "loss": 0.0697111263871193, "step": 2145 }, { "epoch": 0.2604052906200704, "grad_norm": 1.8340191841125488, "learning_rate": 1.49760471686525e-05, "loss": 0.14525513350963593, "step": 2146 }, { "epoch": 0.26052663511709745, "grad_norm": 2.301511526107788, "learning_rate": 1.4973590468001476e-05, "loss": 0.44270119071006775, "step": 2147 }, { "epoch": 0.2606479796141245, "grad_norm": 2.5070643424987793, "learning_rate": 1.497113376735045e-05, "loss": 0.47159114480018616, "step": 2148 }, { "epoch": 0.26076932411115156, "grad_norm": 2.2494490146636963, "learning_rate": 1.4968677066699425e-05, "loss": 0.22947777807712555, "step": 2149 }, { "epoch": 0.2608906686081786, "grad_norm": 2.050786018371582, "learning_rate": 1.4966220366048399e-05, "loss": 0.49438372254371643, "step": 2150 }, { "epoch": 0.26101201310520566, "grad_norm": 2.4287447929382324, "learning_rate": 1.4963763665397373e-05, "loss": 0.20165540277957916, "step": 2151 }, { "epoch": 0.2611333576022327, "grad_norm": 2.0431525707244873, "learning_rate": 1.4961306964746347e-05, "loss": 0.2393646389245987, "step": 2152 }, { "epoch": 0.2612547020992598, "grad_norm": 0.9281308650970459, "learning_rate": 1.4958850264095322e-05, "loss": 0.020468752831220627, "step": 2153 }, { "epoch": 0.2613760465962869, "grad_norm": 2.2387876510620117, "learning_rate": 1.4956393563444296e-05, "loss": 0.27769941091537476, "step": 2154 }, { "epoch": 0.26149739109331394, "grad_norm": 2.1103501319885254, "learning_rate": 1.495393686279327e-05, "loss": 0.4620177745819092, "step": 2155 }, { "epoch": 0.261618735590341, "grad_norm": 2.1693103313446045, "learning_rate": 1.4951480162142245e-05, "loss": 0.488872230052948, "step": 2156 }, { "epoch": 0.26174008008736804, "grad_norm": 2.1997673511505127, "learning_rate": 1.4949023461491219e-05, "loss": 0.2967035472393036, "step": 2157 }, { "epoch": 0.2618614245843951, "grad_norm": 1.9877324104309082, "learning_rate": 1.4946566760840193e-05, "loss": 0.39621827006340027, "step": 2158 }, { "epoch": 0.26198276908142215, "grad_norm": 2.1811094284057617, "learning_rate": 1.4944110060189167e-05, "loss": 0.4217057228088379, "step": 2159 }, { "epoch": 0.2621041135784492, "grad_norm": 1.8478000164031982, "learning_rate": 1.4941653359538142e-05, "loss": 0.25536009669303894, "step": 2160 }, { "epoch": 0.26222545807547626, "grad_norm": 3.0081064701080322, "learning_rate": 1.4939196658887116e-05, "loss": 0.44147688150405884, "step": 2161 }, { "epoch": 0.2623468025725033, "grad_norm": 3.5933215618133545, "learning_rate": 1.493673995823609e-05, "loss": 0.29955124855041504, "step": 2162 }, { "epoch": 0.2624681470695304, "grad_norm": 2.178781270980835, "learning_rate": 1.4934283257585064e-05, "loss": 0.31088438630104065, "step": 2163 }, { "epoch": 0.2625894915665575, "grad_norm": 0.008322947658598423, "learning_rate": 1.4931826556934039e-05, "loss": 0.0001154024648712948, "step": 2164 }, { "epoch": 0.26271083606358453, "grad_norm": 2.786419153213501, "learning_rate": 1.4929369856283013e-05, "loss": 0.355583518743515, "step": 2165 }, { "epoch": 0.2628321805606116, "grad_norm": 2.188692808151245, "learning_rate": 1.4926913155631989e-05, "loss": 0.05528976395726204, "step": 2166 }, { "epoch": 0.26295352505763864, "grad_norm": 1.79579496383667, "learning_rate": 1.4924456454980963e-05, "loss": 0.27237698435783386, "step": 2167 }, { "epoch": 0.2630748695546657, "grad_norm": 1.7321343421936035, "learning_rate": 1.4921999754329937e-05, "loss": 0.12968100607395172, "step": 2168 }, { "epoch": 0.26319621405169275, "grad_norm": 3.122284173965454, "learning_rate": 1.4919543053678912e-05, "loss": 0.17153069376945496, "step": 2169 }, { "epoch": 0.2633175585487198, "grad_norm": 1.6060526371002197, "learning_rate": 1.4917086353027886e-05, "loss": 0.09631694853305817, "step": 2170 }, { "epoch": 0.26343890304574685, "grad_norm": 2.729292154312134, "learning_rate": 1.491462965237686e-05, "loss": 0.16976892948150635, "step": 2171 }, { "epoch": 0.26356024754277396, "grad_norm": 1.546196699142456, "learning_rate": 1.4912172951725834e-05, "loss": 0.11652785539627075, "step": 2172 }, { "epoch": 0.263681592039801, "grad_norm": 1.605636715888977, "learning_rate": 1.4909716251074809e-05, "loss": 0.17108720541000366, "step": 2173 }, { "epoch": 0.26380293653682807, "grad_norm": 1.3509597778320312, "learning_rate": 1.4907259550423783e-05, "loss": 0.11237197369337082, "step": 2174 }, { "epoch": 0.2639242810338551, "grad_norm": 1.7617028951644897, "learning_rate": 1.4904802849772757e-05, "loss": 0.3738328516483307, "step": 2175 }, { "epoch": 0.2640456255308822, "grad_norm": 2.5859122276306152, "learning_rate": 1.4902346149121731e-05, "loss": 0.14145395159721375, "step": 2176 }, { "epoch": 0.26416697002790923, "grad_norm": 2.6692991256713867, "learning_rate": 1.4899889448470706e-05, "loss": 0.2799850404262543, "step": 2177 }, { "epoch": 0.2642883145249363, "grad_norm": 2.333256244659424, "learning_rate": 1.489743274781968e-05, "loss": 0.76203852891922, "step": 2178 }, { "epoch": 0.26440965902196334, "grad_norm": 0.8633162379264832, "learning_rate": 1.4894976047168654e-05, "loss": 0.0584387369453907, "step": 2179 }, { "epoch": 0.2645310035189904, "grad_norm": 3.16007137298584, "learning_rate": 1.4892519346517628e-05, "loss": 0.3512621223926544, "step": 2180 }, { "epoch": 0.26465234801601745, "grad_norm": 1.7493587732315063, "learning_rate": 1.4890062645866603e-05, "loss": 0.5163399577140808, "step": 2181 }, { "epoch": 0.26477369251304456, "grad_norm": 2.6557040214538574, "learning_rate": 1.4887605945215577e-05, "loss": 0.26694709062576294, "step": 2182 }, { "epoch": 0.2648950370100716, "grad_norm": 2.3680715560913086, "learning_rate": 1.4885149244564551e-05, "loss": 0.23716874420642853, "step": 2183 }, { "epoch": 0.26501638150709866, "grad_norm": 3.0584466457366943, "learning_rate": 1.4882692543913525e-05, "loss": 0.5385470986366272, "step": 2184 }, { "epoch": 0.2651377260041257, "grad_norm": 1.8691861629486084, "learning_rate": 1.48802358432625e-05, "loss": 0.09475544095039368, "step": 2185 }, { "epoch": 0.26525907050115277, "grad_norm": 2.55315899848938, "learning_rate": 1.4877779142611476e-05, "loss": 0.21837680041790009, "step": 2186 }, { "epoch": 0.2653804149981798, "grad_norm": 2.546182632446289, "learning_rate": 1.487532244196045e-05, "loss": 0.37429964542388916, "step": 2187 }, { "epoch": 0.2655017594952069, "grad_norm": 2.265850305557251, "learning_rate": 1.4872865741309424e-05, "loss": 0.4278375506401062, "step": 2188 }, { "epoch": 0.26562310399223393, "grad_norm": 2.3295276165008545, "learning_rate": 1.4870409040658398e-05, "loss": 0.3769855499267578, "step": 2189 }, { "epoch": 0.265744448489261, "grad_norm": 1.795820951461792, "learning_rate": 1.4867952340007373e-05, "loss": 0.2219768464565277, "step": 2190 }, { "epoch": 0.2658657929862881, "grad_norm": 2.0393049716949463, "learning_rate": 1.4865495639356347e-05, "loss": 0.2658393085002899, "step": 2191 }, { "epoch": 0.26598713748331515, "grad_norm": 2.258559465408325, "learning_rate": 1.4863038938705321e-05, "loss": 0.25479304790496826, "step": 2192 }, { "epoch": 0.2661084819803422, "grad_norm": 2.1604347229003906, "learning_rate": 1.4860582238054295e-05, "loss": 0.39344412088394165, "step": 2193 }, { "epoch": 0.26622982647736926, "grad_norm": 2.4166760444641113, "learning_rate": 1.485812553740327e-05, "loss": 0.17573045194149017, "step": 2194 }, { "epoch": 0.2663511709743963, "grad_norm": 1.7303146123886108, "learning_rate": 1.4855668836752244e-05, "loss": 0.13295504450798035, "step": 2195 }, { "epoch": 0.26647251547142337, "grad_norm": 2.6552679538726807, "learning_rate": 1.4853212136101218e-05, "loss": 0.3497859537601471, "step": 2196 }, { "epoch": 0.2665938599684504, "grad_norm": 4.82560396194458, "learning_rate": 1.4850755435450192e-05, "loss": 0.34919238090515137, "step": 2197 }, { "epoch": 0.2667152044654775, "grad_norm": 1.834449052810669, "learning_rate": 1.4848298734799167e-05, "loss": 0.1825961172580719, "step": 2198 }, { "epoch": 0.2668365489625045, "grad_norm": 2.188383102416992, "learning_rate": 1.4845842034148141e-05, "loss": 0.4335808753967285, "step": 2199 }, { "epoch": 0.26695789345953164, "grad_norm": 1.5220776796340942, "learning_rate": 1.4843385333497115e-05, "loss": 0.15386682748794556, "step": 2200 }, { "epoch": 0.2670792379565587, "grad_norm": 1.5477015972137451, "learning_rate": 1.484092863284609e-05, "loss": 0.12548573315143585, "step": 2201 }, { "epoch": 0.26720058245358574, "grad_norm": 2.084110975265503, "learning_rate": 1.4838471932195064e-05, "loss": 0.5170855522155762, "step": 2202 }, { "epoch": 0.2673219269506128, "grad_norm": 1.241639256477356, "learning_rate": 1.4836015231544038e-05, "loss": 0.1204286739230156, "step": 2203 }, { "epoch": 0.26744327144763985, "grad_norm": 2.8557798862457275, "learning_rate": 1.4833558530893012e-05, "loss": 0.5340369939804077, "step": 2204 }, { "epoch": 0.2675646159446669, "grad_norm": 2.8909528255462646, "learning_rate": 1.4831101830241985e-05, "loss": 0.512728750705719, "step": 2205 }, { "epoch": 0.26768596044169396, "grad_norm": 2.8460562229156494, "learning_rate": 1.4828645129590959e-05, "loss": 0.5244192481040955, "step": 2206 }, { "epoch": 0.267807304938721, "grad_norm": 1.083155870437622, "learning_rate": 1.4826188428939933e-05, "loss": 0.08616457134485245, "step": 2207 }, { "epoch": 0.26792864943574807, "grad_norm": 2.8009111881256104, "learning_rate": 1.4823731728288908e-05, "loss": 0.4594787657260895, "step": 2208 }, { "epoch": 0.2680499939327751, "grad_norm": 2.523655652999878, "learning_rate": 1.4821275027637882e-05, "loss": 0.17828914523124695, "step": 2209 }, { "epoch": 0.26817133842980223, "grad_norm": 3.436047315597534, "learning_rate": 1.4818818326986856e-05, "loss": 0.576928973197937, "step": 2210 }, { "epoch": 0.2682926829268293, "grad_norm": 1.741632342338562, "learning_rate": 1.481636162633583e-05, "loss": 0.08921710401773453, "step": 2211 }, { "epoch": 0.26841402742385634, "grad_norm": 0.03540222719311714, "learning_rate": 1.4813904925684806e-05, "loss": 0.0003073185798712075, "step": 2212 }, { "epoch": 0.2685353719208834, "grad_norm": 2.1982312202453613, "learning_rate": 1.481144822503378e-05, "loss": 0.2795671820640564, "step": 2213 }, { "epoch": 0.26865671641791045, "grad_norm": 2.006270170211792, "learning_rate": 1.4808991524382755e-05, "loss": 0.341614305973053, "step": 2214 }, { "epoch": 0.2687780609149375, "grad_norm": 2.3411900997161865, "learning_rate": 1.4806534823731729e-05, "loss": 0.23282599449157715, "step": 2215 }, { "epoch": 0.26889940541196455, "grad_norm": 2.9898903369903564, "learning_rate": 1.4804078123080703e-05, "loss": 0.32836875319480896, "step": 2216 }, { "epoch": 0.2690207499089916, "grad_norm": 1.7510194778442383, "learning_rate": 1.4801621422429678e-05, "loss": 0.1715484857559204, "step": 2217 }, { "epoch": 0.26914209440601866, "grad_norm": 1.937170147895813, "learning_rate": 1.4799164721778652e-05, "loss": 0.2645847201347351, "step": 2218 }, { "epoch": 0.26926343890304577, "grad_norm": 3.1905677318573, "learning_rate": 1.4796708021127626e-05, "loss": 0.2771417200565338, "step": 2219 }, { "epoch": 0.2693847834000728, "grad_norm": 2.6751186847686768, "learning_rate": 1.47942513204766e-05, "loss": 0.22138531506061554, "step": 2220 }, { "epoch": 0.2695061278970999, "grad_norm": 1.834376573562622, "learning_rate": 1.4791794619825575e-05, "loss": 0.2948099374771118, "step": 2221 }, { "epoch": 0.26962747239412693, "grad_norm": 2.072049379348755, "learning_rate": 1.4789337919174549e-05, "loss": 0.2355620414018631, "step": 2222 }, { "epoch": 0.269748816891154, "grad_norm": 2.5111448764801025, "learning_rate": 1.4786881218523523e-05, "loss": 0.27840733528137207, "step": 2223 }, { "epoch": 0.26987016138818104, "grad_norm": 1.1506294012069702, "learning_rate": 1.4784424517872497e-05, "loss": 0.011712668463587761, "step": 2224 }, { "epoch": 0.2699915058852081, "grad_norm": 2.4833226203918457, "learning_rate": 1.4781967817221472e-05, "loss": 0.33597809076309204, "step": 2225 }, { "epoch": 0.27011285038223515, "grad_norm": 2.3968794345855713, "learning_rate": 1.4779511116570446e-05, "loss": 0.10020019114017487, "step": 2226 }, { "epoch": 0.2702341948792622, "grad_norm": 3.9725828170776367, "learning_rate": 1.477705441591942e-05, "loss": 0.5326024293899536, "step": 2227 }, { "epoch": 0.2703555393762893, "grad_norm": 2.1287968158721924, "learning_rate": 1.4774597715268394e-05, "loss": 0.2615710198879242, "step": 2228 }, { "epoch": 0.27047688387331636, "grad_norm": 2.4575917720794678, "learning_rate": 1.4772141014617369e-05, "loss": 0.6502370834350586, "step": 2229 }, { "epoch": 0.2705982283703434, "grad_norm": 3.2591922283172607, "learning_rate": 1.4769684313966343e-05, "loss": 0.3786463737487793, "step": 2230 }, { "epoch": 0.2707195728673705, "grad_norm": 2.3155269622802734, "learning_rate": 1.4767227613315317e-05, "loss": 0.35610049962997437, "step": 2231 }, { "epoch": 0.2708409173643975, "grad_norm": 2.6793532371520996, "learning_rate": 1.4764770912664293e-05, "loss": 0.378531277179718, "step": 2232 }, { "epoch": 0.2709622618614246, "grad_norm": 1.7378513813018799, "learning_rate": 1.4762314212013267e-05, "loss": 0.14084599912166595, "step": 2233 }, { "epoch": 0.27108360635845163, "grad_norm": 2.425596237182617, "learning_rate": 1.4759857511362242e-05, "loss": 0.3366726040840149, "step": 2234 }, { "epoch": 0.2712049508554787, "grad_norm": 1.7024770975112915, "learning_rate": 1.4757400810711216e-05, "loss": 0.08021130412817001, "step": 2235 }, { "epoch": 0.27132629535250574, "grad_norm": 2.109647035598755, "learning_rate": 1.475494411006019e-05, "loss": 0.4019131660461426, "step": 2236 }, { "epoch": 0.27144763984953285, "grad_norm": 1.5787715911865234, "learning_rate": 1.4752487409409165e-05, "loss": 0.07558346539735794, "step": 2237 }, { "epoch": 0.2715689843465599, "grad_norm": 1.6632368564605713, "learning_rate": 1.4750030708758139e-05, "loss": 0.2343292236328125, "step": 2238 }, { "epoch": 0.27169032884358696, "grad_norm": 2.395317316055298, "learning_rate": 1.4747574008107113e-05, "loss": 0.5279711484909058, "step": 2239 }, { "epoch": 0.271811673340614, "grad_norm": 1.4840434789657593, "learning_rate": 1.4745117307456087e-05, "loss": 0.09839776903390884, "step": 2240 }, { "epoch": 0.27193301783764107, "grad_norm": 2.1698062419891357, "learning_rate": 1.4742660606805062e-05, "loss": 0.36876392364501953, "step": 2241 }, { "epoch": 0.2720543623346681, "grad_norm": 3.7944204807281494, "learning_rate": 1.4740203906154036e-05, "loss": 0.2219565361738205, "step": 2242 }, { "epoch": 0.2721757068316952, "grad_norm": 2.875039577484131, "learning_rate": 1.473774720550301e-05, "loss": 0.2375985085964203, "step": 2243 }, { "epoch": 0.27229705132872223, "grad_norm": 1.9923136234283447, "learning_rate": 1.4735290504851984e-05, "loss": 0.333662211894989, "step": 2244 }, { "epoch": 0.2724183958257493, "grad_norm": 1.5942877531051636, "learning_rate": 1.4732833804200959e-05, "loss": 0.24374574422836304, "step": 2245 }, { "epoch": 0.27253974032277634, "grad_norm": 2.685206651687622, "learning_rate": 1.4730377103549933e-05, "loss": 0.2717750072479248, "step": 2246 }, { "epoch": 0.27266108481980345, "grad_norm": 2.2949953079223633, "learning_rate": 1.4727920402898907e-05, "loss": 0.6460723876953125, "step": 2247 }, { "epoch": 0.2727824293168305, "grad_norm": 2.3787600994110107, "learning_rate": 1.4725463702247881e-05, "loss": 0.269961416721344, "step": 2248 }, { "epoch": 0.27290377381385755, "grad_norm": 2.749967336654663, "learning_rate": 1.4723007001596856e-05, "loss": 0.25449487566947937, "step": 2249 }, { "epoch": 0.2730251183108846, "grad_norm": 2.1262457370758057, "learning_rate": 1.472055030094583e-05, "loss": 0.2170713096857071, "step": 2250 }, { "epoch": 0.27314646280791166, "grad_norm": 3.3156375885009766, "learning_rate": 1.4718093600294804e-05, "loss": 0.3634417951107025, "step": 2251 }, { "epoch": 0.2732678073049387, "grad_norm": 2.9557299613952637, "learning_rate": 1.471563689964378e-05, "loss": 0.33523958921432495, "step": 2252 }, { "epoch": 0.27338915180196577, "grad_norm": 1.2465989589691162, "learning_rate": 1.4713180198992754e-05, "loss": 0.10973364859819412, "step": 2253 }, { "epoch": 0.2735104962989928, "grad_norm": 2.5796959400177, "learning_rate": 1.4710723498341729e-05, "loss": 0.17034733295440674, "step": 2254 }, { "epoch": 0.2736318407960199, "grad_norm": 2.6189379692077637, "learning_rate": 1.4708266797690703e-05, "loss": 0.3938886225223541, "step": 2255 }, { "epoch": 0.273753185293047, "grad_norm": 1.9602938890457153, "learning_rate": 1.4705810097039677e-05, "loss": 0.06322696805000305, "step": 2256 }, { "epoch": 0.27387452979007404, "grad_norm": 2.8924646377563477, "learning_rate": 1.4703353396388651e-05, "loss": 0.20917552709579468, "step": 2257 }, { "epoch": 0.2739958742871011, "grad_norm": 3.3789329528808594, "learning_rate": 1.4700896695737626e-05, "loss": 0.5144970417022705, "step": 2258 }, { "epoch": 0.27411721878412815, "grad_norm": 1.824726939201355, "learning_rate": 1.46984399950866e-05, "loss": 0.06435342133045197, "step": 2259 }, { "epoch": 0.2742385632811552, "grad_norm": 2.550523519515991, "learning_rate": 1.4695983294435574e-05, "loss": 0.6688740849494934, "step": 2260 }, { "epoch": 0.27435990777818225, "grad_norm": 3.7656705379486084, "learning_rate": 1.4693526593784548e-05, "loss": 0.3615667223930359, "step": 2261 }, { "epoch": 0.2744812522752093, "grad_norm": 2.5979697704315186, "learning_rate": 1.4691069893133523e-05, "loss": 0.24014034867286682, "step": 2262 }, { "epoch": 0.27460259677223636, "grad_norm": 2.1243038177490234, "learning_rate": 1.4688613192482497e-05, "loss": 0.28634655475616455, "step": 2263 }, { "epoch": 0.2747239412692634, "grad_norm": 1.4423234462738037, "learning_rate": 1.4686156491831471e-05, "loss": 0.05492861941456795, "step": 2264 }, { "epoch": 0.2748452857662905, "grad_norm": 2.3820173740386963, "learning_rate": 1.4683699791180445e-05, "loss": 0.33802103996276855, "step": 2265 }, { "epoch": 0.2749666302633176, "grad_norm": 1.9587498903274536, "learning_rate": 1.468124309052942e-05, "loss": 0.20275847613811493, "step": 2266 }, { "epoch": 0.27508797476034463, "grad_norm": 1.5850027799606323, "learning_rate": 1.4678786389878394e-05, "loss": 0.174808070063591, "step": 2267 }, { "epoch": 0.2752093192573717, "grad_norm": 2.649083375930786, "learning_rate": 1.4676329689227368e-05, "loss": 0.23923219740390778, "step": 2268 }, { "epoch": 0.27533066375439874, "grad_norm": 4.099024772644043, "learning_rate": 1.4673872988576342e-05, "loss": 0.4453088045120239, "step": 2269 }, { "epoch": 0.2754520082514258, "grad_norm": 3.006272315979004, "learning_rate": 1.4671416287925317e-05, "loss": 0.4450489282608032, "step": 2270 }, { "epoch": 0.27557335274845285, "grad_norm": 2.6077582836151123, "learning_rate": 1.4668959587274291e-05, "loss": 0.1065952479839325, "step": 2271 }, { "epoch": 0.2756946972454799, "grad_norm": 2.509742021560669, "learning_rate": 1.4666502886623267e-05, "loss": 0.08338365703821182, "step": 2272 }, { "epoch": 0.27581604174250696, "grad_norm": 2.725430965423584, "learning_rate": 1.4664046185972241e-05, "loss": 0.254682719707489, "step": 2273 }, { "epoch": 0.275937386239534, "grad_norm": 2.45782470703125, "learning_rate": 1.4661589485321215e-05, "loss": 0.2728583514690399, "step": 2274 }, { "epoch": 0.2760587307365611, "grad_norm": 2.486548900604248, "learning_rate": 1.465913278467019e-05, "loss": 0.16687417030334473, "step": 2275 }, { "epoch": 0.2761800752335882, "grad_norm": 2.0167322158813477, "learning_rate": 1.4656676084019164e-05, "loss": 0.2224939465522766, "step": 2276 }, { "epoch": 0.2763014197306152, "grad_norm": 4.493525505065918, "learning_rate": 1.4654219383368138e-05, "loss": 0.19643045961856842, "step": 2277 }, { "epoch": 0.2764227642276423, "grad_norm": 2.704180955886841, "learning_rate": 1.4651762682717112e-05, "loss": 0.6486481428146362, "step": 2278 }, { "epoch": 0.27654410872466934, "grad_norm": 2.1249678134918213, "learning_rate": 1.4649305982066087e-05, "loss": 0.11037565767765045, "step": 2279 }, { "epoch": 0.2766654532216964, "grad_norm": 6.904737949371338, "learning_rate": 1.4646849281415061e-05, "loss": 0.2306111752986908, "step": 2280 }, { "epoch": 0.27678679771872344, "grad_norm": 1.558171033859253, "learning_rate": 1.4644392580764035e-05, "loss": 0.14664669334888458, "step": 2281 }, { "epoch": 0.2769081422157505, "grad_norm": 3.1155104637145996, "learning_rate": 1.464193588011301e-05, "loss": 0.48484039306640625, "step": 2282 }, { "epoch": 0.27702948671277755, "grad_norm": 1.796221137046814, "learning_rate": 1.4639479179461984e-05, "loss": 0.1460006982088089, "step": 2283 }, { "epoch": 0.27715083120980466, "grad_norm": 2.843824863433838, "learning_rate": 1.4637022478810958e-05, "loss": 0.2773866653442383, "step": 2284 }, { "epoch": 0.2772721757068317, "grad_norm": 2.0969021320343018, "learning_rate": 1.4634565778159932e-05, "loss": 0.1800985485315323, "step": 2285 }, { "epoch": 0.27739352020385877, "grad_norm": 2.214677095413208, "learning_rate": 1.4632109077508907e-05, "loss": 0.14064058661460876, "step": 2286 }, { "epoch": 0.2775148647008858, "grad_norm": 2.574021816253662, "learning_rate": 1.462965237685788e-05, "loss": 0.363839715719223, "step": 2287 }, { "epoch": 0.2776362091979129, "grad_norm": 2.657700538635254, "learning_rate": 1.4627195676206855e-05, "loss": 0.26481837034225464, "step": 2288 }, { "epoch": 0.27775755369493993, "grad_norm": 1.2845975160598755, "learning_rate": 1.462473897555583e-05, "loss": 0.06213737279176712, "step": 2289 }, { "epoch": 0.277878898191967, "grad_norm": 1.7377910614013672, "learning_rate": 1.4622282274904804e-05, "loss": 0.25193387269973755, "step": 2290 }, { "epoch": 0.27800024268899404, "grad_norm": 1.1945483684539795, "learning_rate": 1.4619825574253778e-05, "loss": 0.05514277145266533, "step": 2291 }, { "epoch": 0.2781215871860211, "grad_norm": 1.3370293378829956, "learning_rate": 1.4617368873602754e-05, "loss": 0.26924511790275574, "step": 2292 }, { "epoch": 0.2782429316830482, "grad_norm": 3.4798552989959717, "learning_rate": 1.4614912172951728e-05, "loss": 0.491172194480896, "step": 2293 }, { "epoch": 0.27836427618007525, "grad_norm": 2.978219509124756, "learning_rate": 1.4612455472300702e-05, "loss": 0.12911397218704224, "step": 2294 }, { "epoch": 0.2784856206771023, "grad_norm": 3.0245933532714844, "learning_rate": 1.4609998771649677e-05, "loss": 0.25463297963142395, "step": 2295 }, { "epoch": 0.27860696517412936, "grad_norm": 0.0010776594281196594, "learning_rate": 1.460754207099865e-05, "loss": 2.679898898350075e-05, "step": 2296 }, { "epoch": 0.2787283096711564, "grad_norm": 2.9437077045440674, "learning_rate": 1.4605085370347625e-05, "loss": 0.15635134279727936, "step": 2297 }, { "epoch": 0.27884965416818347, "grad_norm": 2.673121452331543, "learning_rate": 1.46026286696966e-05, "loss": 0.13004031777381897, "step": 2298 }, { "epoch": 0.2789709986652105, "grad_norm": 2.7737526893615723, "learning_rate": 1.4600171969045574e-05, "loss": 0.5386815667152405, "step": 2299 }, { "epoch": 0.2790923431622376, "grad_norm": 3.677978038787842, "learning_rate": 1.4597715268394548e-05, "loss": 0.24360714852809906, "step": 2300 }, { "epoch": 0.27921368765926463, "grad_norm": 2.476017475128174, "learning_rate": 1.4595258567743522e-05, "loss": 0.3138554096221924, "step": 2301 }, { "epoch": 0.2793350321562917, "grad_norm": 3.1135735511779785, "learning_rate": 1.4592801867092496e-05, "loss": 0.3930318355560303, "step": 2302 }, { "epoch": 0.2794563766533188, "grad_norm": 1.6521217823028564, "learning_rate": 1.459034516644147e-05, "loss": 0.3495832681655884, "step": 2303 }, { "epoch": 0.27957772115034585, "grad_norm": 2.9899768829345703, "learning_rate": 1.4587888465790445e-05, "loss": 0.35416388511657715, "step": 2304 }, { "epoch": 0.2796990656473729, "grad_norm": 2.8638105392456055, "learning_rate": 1.458543176513942e-05, "loss": 0.5694817304611206, "step": 2305 }, { "epoch": 0.27982041014439996, "grad_norm": 2.0999724864959717, "learning_rate": 1.4582975064488393e-05, "loss": 0.12378177046775818, "step": 2306 }, { "epoch": 0.279941754641427, "grad_norm": 0.044359240680933, "learning_rate": 1.4580518363837368e-05, "loss": 0.0005623744218610227, "step": 2307 }, { "epoch": 0.28006309913845406, "grad_norm": 3.165572166442871, "learning_rate": 1.4578061663186342e-05, "loss": 0.16220404207706451, "step": 2308 }, { "epoch": 0.2801844436354811, "grad_norm": 2.750955820083618, "learning_rate": 1.4575604962535316e-05, "loss": 0.2112129181623459, "step": 2309 }, { "epoch": 0.28030578813250817, "grad_norm": 2.1458773612976074, "learning_rate": 1.457314826188429e-05, "loss": 0.11941893398761749, "step": 2310 }, { "epoch": 0.2804271326295352, "grad_norm": 1.2971408367156982, "learning_rate": 1.4570691561233266e-05, "loss": 0.1420069932937622, "step": 2311 }, { "epoch": 0.28054847712656233, "grad_norm": 2.0736007690429688, "learning_rate": 1.456823486058224e-05, "loss": 0.19403484463691711, "step": 2312 }, { "epoch": 0.2806698216235894, "grad_norm": 1.8388627767562866, "learning_rate": 1.4565778159931215e-05, "loss": 0.46471458673477173, "step": 2313 }, { "epoch": 0.28079116612061644, "grad_norm": 2.6409003734588623, "learning_rate": 1.456332145928019e-05, "loss": 0.25268906354904175, "step": 2314 }, { "epoch": 0.2809125106176435, "grad_norm": 1.8578016757965088, "learning_rate": 1.4560864758629163e-05, "loss": 0.31751006841659546, "step": 2315 }, { "epoch": 0.28103385511467055, "grad_norm": 2.8499512672424316, "learning_rate": 1.4558408057978138e-05, "loss": 0.22498056292533875, "step": 2316 }, { "epoch": 0.2811551996116976, "grad_norm": 1.2381291389465332, "learning_rate": 1.4555951357327112e-05, "loss": 0.2600299119949341, "step": 2317 }, { "epoch": 0.28127654410872466, "grad_norm": 2.155503034591675, "learning_rate": 1.4553494656676086e-05, "loss": 0.20116908848285675, "step": 2318 }, { "epoch": 0.2813978886057517, "grad_norm": 0.005920345429331064, "learning_rate": 1.455103795602506e-05, "loss": 0.00013091710570733994, "step": 2319 }, { "epoch": 0.28151923310277877, "grad_norm": 2.8475534915924072, "learning_rate": 1.4548581255374035e-05, "loss": 0.2616243362426758, "step": 2320 }, { "epoch": 0.2816405775998059, "grad_norm": 2.4341745376586914, "learning_rate": 1.4546124554723009e-05, "loss": 0.42221981287002563, "step": 2321 }, { "epoch": 0.28176192209683293, "grad_norm": 2.084240198135376, "learning_rate": 1.4543667854071983e-05, "loss": 0.11653836071491241, "step": 2322 }, { "epoch": 0.28188326659386, "grad_norm": 1.3616430759429932, "learning_rate": 1.4541211153420957e-05, "loss": 0.03960473835468292, "step": 2323 }, { "epoch": 0.28200461109088704, "grad_norm": 2.470994710922241, "learning_rate": 1.4538754452769932e-05, "loss": 0.1367553472518921, "step": 2324 }, { "epoch": 0.2821259555879141, "grad_norm": 1.8862669467926025, "learning_rate": 1.4536297752118906e-05, "loss": 0.46348825097084045, "step": 2325 }, { "epoch": 0.28224730008494114, "grad_norm": 2.269005537033081, "learning_rate": 1.453384105146788e-05, "loss": 0.698644757270813, "step": 2326 }, { "epoch": 0.2823686445819682, "grad_norm": 2.1463327407836914, "learning_rate": 1.4531384350816855e-05, "loss": 0.3938106894493103, "step": 2327 }, { "epoch": 0.28248998907899525, "grad_norm": 4.106855869293213, "learning_rate": 1.4528927650165829e-05, "loss": 0.2572036683559418, "step": 2328 }, { "epoch": 0.2826113335760223, "grad_norm": 3.1966500282287598, "learning_rate": 1.4526470949514803e-05, "loss": 0.5454995632171631, "step": 2329 }, { "epoch": 0.2827326780730494, "grad_norm": 2.915327548980713, "learning_rate": 1.4524014248863777e-05, "loss": 0.2935107946395874, "step": 2330 }, { "epoch": 0.28285402257007647, "grad_norm": 2.243208885192871, "learning_rate": 1.4521557548212753e-05, "loss": 0.18108990788459778, "step": 2331 }, { "epoch": 0.2829753670671035, "grad_norm": 2.35526704788208, "learning_rate": 1.4519100847561728e-05, "loss": 0.40194523334503174, "step": 2332 }, { "epoch": 0.2830967115641306, "grad_norm": 3.389552593231201, "learning_rate": 1.4516644146910702e-05, "loss": 0.12860919535160065, "step": 2333 }, { "epoch": 0.28321805606115763, "grad_norm": 2.883084774017334, "learning_rate": 1.4514187446259676e-05, "loss": 0.768912672996521, "step": 2334 }, { "epoch": 0.2833394005581847, "grad_norm": 2.719496250152588, "learning_rate": 1.451173074560865e-05, "loss": 0.28342047333717346, "step": 2335 }, { "epoch": 0.28346074505521174, "grad_norm": 1.3616071939468384, "learning_rate": 1.4509274044957625e-05, "loss": 0.037327129393815994, "step": 2336 }, { "epoch": 0.2835820895522388, "grad_norm": 2.3787996768951416, "learning_rate": 1.4506817344306599e-05, "loss": 0.27518394589424133, "step": 2337 }, { "epoch": 0.28370343404926585, "grad_norm": 1.4686760902404785, "learning_rate": 1.4504360643655573e-05, "loss": 0.030330002307891846, "step": 2338 }, { "epoch": 0.2838247785462929, "grad_norm": 2.7788968086242676, "learning_rate": 1.4501903943004547e-05, "loss": 0.29226791858673096, "step": 2339 }, { "epoch": 0.28394612304332, "grad_norm": 3.7886874675750732, "learning_rate": 1.449944724235352e-05, "loss": 0.434556245803833, "step": 2340 }, { "epoch": 0.28406746754034706, "grad_norm": 2.317943572998047, "learning_rate": 1.4496990541702494e-05, "loss": 0.22898314893245697, "step": 2341 }, { "epoch": 0.2841888120373741, "grad_norm": 1.6351468563079834, "learning_rate": 1.4494533841051468e-05, "loss": 0.36461710929870605, "step": 2342 }, { "epoch": 0.28431015653440117, "grad_norm": 2.5408875942230225, "learning_rate": 1.4492077140400443e-05, "loss": 0.21778815984725952, "step": 2343 }, { "epoch": 0.2844315010314282, "grad_norm": 1.5979942083358765, "learning_rate": 1.4489620439749417e-05, "loss": 0.19117160141468048, "step": 2344 }, { "epoch": 0.2845528455284553, "grad_norm": 2.846651315689087, "learning_rate": 1.4487163739098391e-05, "loss": 0.3508620262145996, "step": 2345 }, { "epoch": 0.28467419002548233, "grad_norm": 0.7627307176589966, "learning_rate": 1.4484707038447365e-05, "loss": 0.01792456954717636, "step": 2346 }, { "epoch": 0.2847955345225094, "grad_norm": 2.633852481842041, "learning_rate": 1.448225033779634e-05, "loss": 0.16636960208415985, "step": 2347 }, { "epoch": 0.28491687901953644, "grad_norm": 2.0512325763702393, "learning_rate": 1.4479793637145314e-05, "loss": 0.33269551396369934, "step": 2348 }, { "epoch": 0.28503822351656355, "grad_norm": 1.6394566297531128, "learning_rate": 1.4477336936494288e-05, "loss": 0.225246861577034, "step": 2349 }, { "epoch": 0.2851595680135906, "grad_norm": 2.5464892387390137, "learning_rate": 1.4474880235843262e-05, "loss": 0.19317497313022614, "step": 2350 }, { "epoch": 0.28528091251061766, "grad_norm": 1.4693812131881714, "learning_rate": 1.4472423535192237e-05, "loss": 0.09994740784168243, "step": 2351 }, { "epoch": 0.2854022570076447, "grad_norm": 1.9715471267700195, "learning_rate": 1.4469966834541211e-05, "loss": 0.2948615849018097, "step": 2352 }, { "epoch": 0.28552360150467176, "grad_norm": 1.835134506225586, "learning_rate": 1.4467510133890185e-05, "loss": 0.35654348134994507, "step": 2353 }, { "epoch": 0.2856449460016988, "grad_norm": 2.886162519454956, "learning_rate": 1.446505343323916e-05, "loss": 0.445361465215683, "step": 2354 }, { "epoch": 0.2857662904987259, "grad_norm": 6.913744926452637, "learning_rate": 1.4462596732588134e-05, "loss": 0.2596283555030823, "step": 2355 }, { "epoch": 0.2858876349957529, "grad_norm": 3.093846082687378, "learning_rate": 1.4460140031937108e-05, "loss": 0.33309924602508545, "step": 2356 }, { "epoch": 0.28600897949278, "grad_norm": 2.4137792587280273, "learning_rate": 1.4457683331286084e-05, "loss": 0.3461116850376129, "step": 2357 }, { "epoch": 0.2861303239898071, "grad_norm": 1.9789154529571533, "learning_rate": 1.4455226630635058e-05, "loss": 0.47336727380752563, "step": 2358 }, { "epoch": 0.28625166848683414, "grad_norm": 2.6370766162872314, "learning_rate": 1.4452769929984032e-05, "loss": 0.5336825251579285, "step": 2359 }, { "epoch": 0.2863730129838612, "grad_norm": 2.2987093925476074, "learning_rate": 1.4450313229333007e-05, "loss": 0.16392672061920166, "step": 2360 }, { "epoch": 0.28649435748088825, "grad_norm": 1.725595235824585, "learning_rate": 1.4447856528681981e-05, "loss": 0.10611050575971603, "step": 2361 }, { "epoch": 0.2866157019779153, "grad_norm": 1.7771233320236206, "learning_rate": 1.4445399828030955e-05, "loss": 0.17881464958190918, "step": 2362 }, { "epoch": 0.28673704647494236, "grad_norm": 1.7786450386047363, "learning_rate": 1.444294312737993e-05, "loss": 0.11273477971553802, "step": 2363 }, { "epoch": 0.2868583909719694, "grad_norm": 3.6905012130737305, "learning_rate": 1.4440486426728904e-05, "loss": 0.3938071131706238, "step": 2364 }, { "epoch": 0.28697973546899647, "grad_norm": 1.7522951364517212, "learning_rate": 1.4438029726077878e-05, "loss": 0.062087565660476685, "step": 2365 }, { "epoch": 0.2871010799660235, "grad_norm": 3.22233510017395, "learning_rate": 1.4435573025426852e-05, "loss": 0.30694952607154846, "step": 2366 }, { "epoch": 0.2872224244630506, "grad_norm": 1.6954015493392944, "learning_rate": 1.4433116324775827e-05, "loss": 0.1827646791934967, "step": 2367 }, { "epoch": 0.2873437689600777, "grad_norm": 2.5658228397369385, "learning_rate": 1.44306596241248e-05, "loss": 0.7508528232574463, "step": 2368 }, { "epoch": 0.28746511345710474, "grad_norm": 2.071087121963501, "learning_rate": 1.4428202923473775e-05, "loss": 0.19768717885017395, "step": 2369 }, { "epoch": 0.2875864579541318, "grad_norm": 3.3089733123779297, "learning_rate": 1.442574622282275e-05, "loss": 0.37479057908058167, "step": 2370 }, { "epoch": 0.28770780245115884, "grad_norm": 2.419912576675415, "learning_rate": 1.4423289522171724e-05, "loss": 0.14660024642944336, "step": 2371 }, { "epoch": 0.2878291469481859, "grad_norm": 2.0513293743133545, "learning_rate": 1.4420832821520698e-05, "loss": 0.301160991191864, "step": 2372 }, { "epoch": 0.28795049144521295, "grad_norm": 1.7172777652740479, "learning_rate": 1.4418376120869672e-05, "loss": 0.08773735910654068, "step": 2373 }, { "epoch": 0.28807183594224, "grad_norm": 3.0460121631622314, "learning_rate": 1.4415919420218646e-05, "loss": 0.3944966793060303, "step": 2374 }, { "epoch": 0.28819318043926706, "grad_norm": 2.4737274646759033, "learning_rate": 1.441346271956762e-05, "loss": 0.14740464091300964, "step": 2375 }, { "epoch": 0.2883145249362941, "grad_norm": 2.25986385345459, "learning_rate": 1.4411006018916595e-05, "loss": 0.16227565705776215, "step": 2376 }, { "epoch": 0.2884358694333212, "grad_norm": 3.9188077449798584, "learning_rate": 1.440854931826557e-05, "loss": 0.174227774143219, "step": 2377 }, { "epoch": 0.2885572139303483, "grad_norm": 2.5465247631073, "learning_rate": 1.4406092617614545e-05, "loss": 0.07011450082063675, "step": 2378 }, { "epoch": 0.28867855842737533, "grad_norm": 2.8433055877685547, "learning_rate": 1.440363591696352e-05, "loss": 0.41096341609954834, "step": 2379 }, { "epoch": 0.2887999029244024, "grad_norm": 2.99009108543396, "learning_rate": 1.4401179216312494e-05, "loss": 0.13420191407203674, "step": 2380 }, { "epoch": 0.28892124742142944, "grad_norm": 4.198729991912842, "learning_rate": 1.4398722515661468e-05, "loss": 0.17982061207294464, "step": 2381 }, { "epoch": 0.2890425919184565, "grad_norm": 2.9838645458221436, "learning_rate": 1.4396265815010442e-05, "loss": 0.4002196490764618, "step": 2382 }, { "epoch": 0.28916393641548355, "grad_norm": 2.1307244300842285, "learning_rate": 1.4393809114359416e-05, "loss": 0.34108394384384155, "step": 2383 }, { "epoch": 0.2892852809125106, "grad_norm": 3.062396287918091, "learning_rate": 1.439135241370839e-05, "loss": 0.19610823690891266, "step": 2384 }, { "epoch": 0.28940662540953765, "grad_norm": 3.283600091934204, "learning_rate": 1.4388895713057365e-05, "loss": 0.291292667388916, "step": 2385 }, { "epoch": 0.28952796990656476, "grad_norm": 1.4206572771072388, "learning_rate": 1.4386439012406339e-05, "loss": 0.10331039875745773, "step": 2386 }, { "epoch": 0.2896493144035918, "grad_norm": 2.4822564125061035, "learning_rate": 1.4383982311755313e-05, "loss": 0.7069910168647766, "step": 2387 }, { "epoch": 0.28977065890061887, "grad_norm": 2.143632173538208, "learning_rate": 1.4381525611104288e-05, "loss": 0.11336402595043182, "step": 2388 }, { "epoch": 0.2898920033976459, "grad_norm": 3.152005910873413, "learning_rate": 1.4379068910453262e-05, "loss": 0.1477632224559784, "step": 2389 }, { "epoch": 0.290013347894673, "grad_norm": 2.4455134868621826, "learning_rate": 1.4376612209802236e-05, "loss": 0.3816097378730774, "step": 2390 }, { "epoch": 0.29013469239170003, "grad_norm": 3.482804775238037, "learning_rate": 1.437415550915121e-05, "loss": 0.7042055726051331, "step": 2391 }, { "epoch": 0.2902560368887271, "grad_norm": 2.1608364582061768, "learning_rate": 1.4371698808500185e-05, "loss": 0.13173907995224, "step": 2392 }, { "epoch": 0.29037738138575414, "grad_norm": 2.268423557281494, "learning_rate": 1.4369242107849159e-05, "loss": 0.2680203914642334, "step": 2393 }, { "epoch": 0.2904987258827812, "grad_norm": 13.093183517456055, "learning_rate": 1.4366785407198133e-05, "loss": 0.3524789810180664, "step": 2394 }, { "epoch": 0.29062007037980825, "grad_norm": 1.6701669692993164, "learning_rate": 1.4364328706547107e-05, "loss": 0.23719149827957153, "step": 2395 }, { "epoch": 0.29074141487683536, "grad_norm": 2.475621461868286, "learning_rate": 1.4361872005896082e-05, "loss": 0.24349063634872437, "step": 2396 }, { "epoch": 0.2908627593738624, "grad_norm": 1.499640703201294, "learning_rate": 1.4359415305245058e-05, "loss": 0.02156628668308258, "step": 2397 }, { "epoch": 0.29098410387088947, "grad_norm": 0.6849834322929382, "learning_rate": 1.4356958604594032e-05, "loss": 0.039487432688474655, "step": 2398 }, { "epoch": 0.2911054483679165, "grad_norm": 1.7018049955368042, "learning_rate": 1.4354501903943006e-05, "loss": 0.5675016641616821, "step": 2399 }, { "epoch": 0.2912267928649436, "grad_norm": 2.959660530090332, "learning_rate": 1.435204520329198e-05, "loss": 0.5447296500205994, "step": 2400 }, { "epoch": 0.2913481373619706, "grad_norm": 3.4573299884796143, "learning_rate": 1.4349588502640955e-05, "loss": 0.47932490706443787, "step": 2401 }, { "epoch": 0.2914694818589977, "grad_norm": 4.334619045257568, "learning_rate": 1.4347131801989929e-05, "loss": 0.2604002356529236, "step": 2402 }, { "epoch": 0.29159082635602473, "grad_norm": 1.2521309852600098, "learning_rate": 1.4344675101338903e-05, "loss": 0.03895732760429382, "step": 2403 }, { "epoch": 0.2917121708530518, "grad_norm": 2.6345105171203613, "learning_rate": 1.4342218400687877e-05, "loss": 0.18209978938102722, "step": 2404 }, { "epoch": 0.2918335153500789, "grad_norm": 6.25616455078125, "learning_rate": 1.4339761700036852e-05, "loss": 0.26063674688339233, "step": 2405 }, { "epoch": 0.29195485984710595, "grad_norm": 2.0457608699798584, "learning_rate": 1.4337304999385826e-05, "loss": 0.17444831132888794, "step": 2406 }, { "epoch": 0.292076204344133, "grad_norm": 1.9592281579971313, "learning_rate": 1.43348482987348e-05, "loss": 0.40388017892837524, "step": 2407 }, { "epoch": 0.29219754884116006, "grad_norm": 1.884320855140686, "learning_rate": 1.4332391598083775e-05, "loss": 0.39805707335472107, "step": 2408 }, { "epoch": 0.2923188933381871, "grad_norm": 2.758399248123169, "learning_rate": 1.4329934897432749e-05, "loss": 0.23270559310913086, "step": 2409 }, { "epoch": 0.29244023783521417, "grad_norm": 1.7793257236480713, "learning_rate": 1.4327478196781723e-05, "loss": 0.22666704654693604, "step": 2410 }, { "epoch": 0.2925615823322412, "grad_norm": 2.531182289123535, "learning_rate": 1.4325021496130697e-05, "loss": 0.24797149002552032, "step": 2411 }, { "epoch": 0.2926829268292683, "grad_norm": 0.8958587646484375, "learning_rate": 1.4322564795479672e-05, "loss": 0.03709115460515022, "step": 2412 }, { "epoch": 0.29280427132629533, "grad_norm": 1.8994909524917603, "learning_rate": 1.4320108094828646e-05, "loss": 0.3536483943462372, "step": 2413 }, { "epoch": 0.29292561582332244, "grad_norm": 1.8767240047454834, "learning_rate": 1.431765139417762e-05, "loss": 0.4431970417499542, "step": 2414 }, { "epoch": 0.2930469603203495, "grad_norm": 2.977033853530884, "learning_rate": 1.4315194693526594e-05, "loss": 0.6355453133583069, "step": 2415 }, { "epoch": 0.29316830481737655, "grad_norm": 1.2712355852127075, "learning_rate": 1.4312737992875569e-05, "loss": 0.3493037819862366, "step": 2416 }, { "epoch": 0.2932896493144036, "grad_norm": 2.980384588241577, "learning_rate": 1.4310281292224545e-05, "loss": 0.7393550276756287, "step": 2417 }, { "epoch": 0.29341099381143065, "grad_norm": 2.503192186355591, "learning_rate": 1.4307824591573519e-05, "loss": 0.43294692039489746, "step": 2418 }, { "epoch": 0.2935323383084577, "grad_norm": 1.8902491331100464, "learning_rate": 1.4305367890922493e-05, "loss": 0.31721875071525574, "step": 2419 }, { "epoch": 0.29365368280548476, "grad_norm": 1.9838643074035645, "learning_rate": 1.4302911190271467e-05, "loss": 0.270327091217041, "step": 2420 }, { "epoch": 0.2937750273025118, "grad_norm": 1.4788776636123657, "learning_rate": 1.4300454489620442e-05, "loss": 0.1401694118976593, "step": 2421 }, { "epoch": 0.29389637179953887, "grad_norm": 1.731963038444519, "learning_rate": 1.4297997788969416e-05, "loss": 0.1834172010421753, "step": 2422 }, { "epoch": 0.2940177162965659, "grad_norm": 1.6887357234954834, "learning_rate": 1.429554108831839e-05, "loss": 0.1098705381155014, "step": 2423 }, { "epoch": 0.29413906079359303, "grad_norm": 3.4765584468841553, "learning_rate": 1.4293084387667364e-05, "loss": 0.34448036551475525, "step": 2424 }, { "epoch": 0.2942604052906201, "grad_norm": 2.472482204437256, "learning_rate": 1.4290627687016339e-05, "loss": 0.6094886064529419, "step": 2425 }, { "epoch": 0.29438174978764714, "grad_norm": 2.6117072105407715, "learning_rate": 1.4288170986365313e-05, "loss": 0.3450254797935486, "step": 2426 }, { "epoch": 0.2945030942846742, "grad_norm": 2.193211317062378, "learning_rate": 1.4285714285714287e-05, "loss": 0.6642693281173706, "step": 2427 }, { "epoch": 0.29462443878170125, "grad_norm": 1.7561453580856323, "learning_rate": 1.4283257585063261e-05, "loss": 0.3937324583530426, "step": 2428 }, { "epoch": 0.2947457832787283, "grad_norm": 1.7868642807006836, "learning_rate": 1.4280800884412236e-05, "loss": 0.12268880009651184, "step": 2429 }, { "epoch": 0.29486712777575536, "grad_norm": 1.5349853038787842, "learning_rate": 1.427834418376121e-05, "loss": 0.09088528156280518, "step": 2430 }, { "epoch": 0.2949884722727824, "grad_norm": 2.605236291885376, "learning_rate": 1.4275887483110184e-05, "loss": 0.25133824348449707, "step": 2431 }, { "epoch": 0.29510981676980946, "grad_norm": 1.354604959487915, "learning_rate": 1.4273430782459158e-05, "loss": 0.043326713144779205, "step": 2432 }, { "epoch": 0.2952311612668366, "grad_norm": 2.319394111633301, "learning_rate": 1.4270974081808133e-05, "loss": 0.27239248156547546, "step": 2433 }, { "epoch": 0.2953525057638636, "grad_norm": 0.021819638088345528, "learning_rate": 1.4268517381157107e-05, "loss": 0.0002714463334996253, "step": 2434 }, { "epoch": 0.2954738502608907, "grad_norm": 1.6144486665725708, "learning_rate": 1.4266060680506081e-05, "loss": 0.12072954326868057, "step": 2435 }, { "epoch": 0.29559519475791773, "grad_norm": 3.3453211784362793, "learning_rate": 1.4263603979855055e-05, "loss": 0.24734504520893097, "step": 2436 }, { "epoch": 0.2957165392549448, "grad_norm": 0.917447566986084, "learning_rate": 1.4261147279204031e-05, "loss": 0.012377920560538769, "step": 2437 }, { "epoch": 0.29583788375197184, "grad_norm": 1.9250411987304688, "learning_rate": 1.4258690578553006e-05, "loss": 0.23009344935417175, "step": 2438 }, { "epoch": 0.2959592282489989, "grad_norm": 1.2952210903167725, "learning_rate": 1.425623387790198e-05, "loss": 0.136221244931221, "step": 2439 }, { "epoch": 0.29608057274602595, "grad_norm": 2.7569329738616943, "learning_rate": 1.4253777177250954e-05, "loss": 0.3644600510597229, "step": 2440 }, { "epoch": 0.296201917243053, "grad_norm": 2.0895564556121826, "learning_rate": 1.4251320476599928e-05, "loss": 0.11592680215835571, "step": 2441 }, { "epoch": 0.2963232617400801, "grad_norm": 2.5663390159606934, "learning_rate": 1.4248863775948903e-05, "loss": 0.3135583996772766, "step": 2442 }, { "epoch": 0.29644460623710717, "grad_norm": 2.4366345405578613, "learning_rate": 1.4246407075297877e-05, "loss": 0.5698183178901672, "step": 2443 }, { "epoch": 0.2965659507341342, "grad_norm": 3.226321220397949, "learning_rate": 1.4243950374646851e-05, "loss": 0.4014982283115387, "step": 2444 }, { "epoch": 0.2966872952311613, "grad_norm": 2.4463469982147217, "learning_rate": 1.4241493673995825e-05, "loss": 0.5382885932922363, "step": 2445 }, { "epoch": 0.29680863972818833, "grad_norm": 2.7886500358581543, "learning_rate": 1.42390369733448e-05, "loss": 0.1627078503370285, "step": 2446 }, { "epoch": 0.2969299842252154, "grad_norm": 2.2656824588775635, "learning_rate": 1.4236580272693774e-05, "loss": 0.32474666833877563, "step": 2447 }, { "epoch": 0.29705132872224244, "grad_norm": 1.7347723245620728, "learning_rate": 1.4234123572042748e-05, "loss": 0.1043761670589447, "step": 2448 }, { "epoch": 0.2971726732192695, "grad_norm": 1.877159833908081, "learning_rate": 1.4231666871391722e-05, "loss": 0.3229933977127075, "step": 2449 }, { "epoch": 0.29729401771629654, "grad_norm": 1.9499645233154297, "learning_rate": 1.4229210170740697e-05, "loss": 0.4199894666671753, "step": 2450 }, { "epoch": 0.29741536221332365, "grad_norm": 1.3216056823730469, "learning_rate": 1.4226753470089671e-05, "loss": 0.048775821924209595, "step": 2451 }, { "epoch": 0.2975367067103507, "grad_norm": 2.8225107192993164, "learning_rate": 1.4224296769438645e-05, "loss": 0.23263612389564514, "step": 2452 }, { "epoch": 0.29765805120737776, "grad_norm": 2.0464394092559814, "learning_rate": 1.422184006878762e-05, "loss": 0.3901311755180359, "step": 2453 }, { "epoch": 0.2977793957044048, "grad_norm": 2.7550644874572754, "learning_rate": 1.4219383368136594e-05, "loss": 0.42294594645500183, "step": 2454 }, { "epoch": 0.29790074020143187, "grad_norm": 2.5105109214782715, "learning_rate": 1.4216926667485568e-05, "loss": 0.5021324157714844, "step": 2455 }, { "epoch": 0.2980220846984589, "grad_norm": 1.6055526733398438, "learning_rate": 1.4214469966834544e-05, "loss": 0.20961758494377136, "step": 2456 }, { "epoch": 0.298143429195486, "grad_norm": 2.4210283756256104, "learning_rate": 1.4212013266183518e-05, "loss": 0.25656208395957947, "step": 2457 }, { "epoch": 0.29826477369251303, "grad_norm": 2.8255374431610107, "learning_rate": 1.4209556565532493e-05, "loss": 0.302267849445343, "step": 2458 }, { "epoch": 0.2983861181895401, "grad_norm": 3.174891471862793, "learning_rate": 1.4207099864881467e-05, "loss": 0.2618406414985657, "step": 2459 }, { "epoch": 0.29850746268656714, "grad_norm": 4.338184833526611, "learning_rate": 1.4204643164230441e-05, "loss": 0.5188778638839722, "step": 2460 }, { "epoch": 0.29862880718359425, "grad_norm": 3.044785261154175, "learning_rate": 1.4202186463579415e-05, "loss": 0.2174786925315857, "step": 2461 }, { "epoch": 0.2987501516806213, "grad_norm": 3.8430070877075195, "learning_rate": 1.419972976292839e-05, "loss": 0.6191221475601196, "step": 2462 }, { "epoch": 0.29887149617764835, "grad_norm": 2.5778520107269287, "learning_rate": 1.4197273062277364e-05, "loss": 0.25278812646865845, "step": 2463 }, { "epoch": 0.2989928406746754, "grad_norm": 2.2133378982543945, "learning_rate": 1.4194816361626338e-05, "loss": 0.30241888761520386, "step": 2464 }, { "epoch": 0.29911418517170246, "grad_norm": 2.2290940284729004, "learning_rate": 1.4192359660975312e-05, "loss": 0.26590943336486816, "step": 2465 }, { "epoch": 0.2992355296687295, "grad_norm": 3.7877848148345947, "learning_rate": 1.4189902960324287e-05, "loss": 0.3344946801662445, "step": 2466 }, { "epoch": 0.29935687416575657, "grad_norm": 3.0269033908843994, "learning_rate": 1.418744625967326e-05, "loss": 0.4494978189468384, "step": 2467 }, { "epoch": 0.2994782186627836, "grad_norm": 1.7358289957046509, "learning_rate": 1.4184989559022235e-05, "loss": 0.36531975865364075, "step": 2468 }, { "epoch": 0.2995995631598107, "grad_norm": 2.7170467376708984, "learning_rate": 1.418253285837121e-05, "loss": 0.12027442455291748, "step": 2469 }, { "epoch": 0.2997209076568378, "grad_norm": 2.8832855224609375, "learning_rate": 1.4180076157720184e-05, "loss": 0.5412907600402832, "step": 2470 }, { "epoch": 0.29984225215386484, "grad_norm": 2.934298515319824, "learning_rate": 1.4177619457069158e-05, "loss": 0.4732085168361664, "step": 2471 }, { "epoch": 0.2999635966508919, "grad_norm": 1.8136005401611328, "learning_rate": 1.4175162756418132e-05, "loss": 0.1498020738363266, "step": 2472 }, { "epoch": 0.30008494114791895, "grad_norm": 2.257565975189209, "learning_rate": 1.4172706055767106e-05, "loss": 0.13816264271736145, "step": 2473 }, { "epoch": 0.300206285644946, "grad_norm": 1.8980768918991089, "learning_rate": 1.417024935511608e-05, "loss": 0.21113350987434387, "step": 2474 }, { "epoch": 0.30032763014197306, "grad_norm": 1.987173080444336, "learning_rate": 1.4167792654465055e-05, "loss": 0.1198820099234581, "step": 2475 }, { "epoch": 0.3004489746390001, "grad_norm": 2.3875110149383545, "learning_rate": 1.4165335953814027e-05, "loss": 0.3360016942024231, "step": 2476 }, { "epoch": 0.30057031913602716, "grad_norm": 2.5721499919891357, "learning_rate": 1.4162879253163002e-05, "loss": 0.39307811856269836, "step": 2477 }, { "epoch": 0.3006916636330542, "grad_norm": 1.5788015127182007, "learning_rate": 1.4160422552511976e-05, "loss": 0.18997590243816376, "step": 2478 }, { "epoch": 0.3008130081300813, "grad_norm": 1.8559566736221313, "learning_rate": 1.415796585186095e-05, "loss": 0.39030617475509644, "step": 2479 }, { "epoch": 0.3009343526271084, "grad_norm": 2.378830909729004, "learning_rate": 1.4155509151209925e-05, "loss": 0.22130689024925232, "step": 2480 }, { "epoch": 0.30105569712413544, "grad_norm": 2.7740321159362793, "learning_rate": 1.4153052450558899e-05, "loss": 0.21331149339675903, "step": 2481 }, { "epoch": 0.3011770416211625, "grad_norm": 1.0219964981079102, "learning_rate": 1.4150595749907875e-05, "loss": 0.014357716776430607, "step": 2482 }, { "epoch": 0.30129838611818954, "grad_norm": 2.1695570945739746, "learning_rate": 1.4148139049256849e-05, "loss": 0.3297964930534363, "step": 2483 }, { "epoch": 0.3014197306152166, "grad_norm": 2.3358898162841797, "learning_rate": 1.4145682348605823e-05, "loss": 0.5272507667541504, "step": 2484 }, { "epoch": 0.30154107511224365, "grad_norm": 2.6879382133483887, "learning_rate": 1.4143225647954797e-05, "loss": 0.16552339494228363, "step": 2485 }, { "epoch": 0.3016624196092707, "grad_norm": 0.8002064824104309, "learning_rate": 1.4140768947303772e-05, "loss": 0.024760831147432327, "step": 2486 }, { "epoch": 0.30178376410629776, "grad_norm": 2.4009058475494385, "learning_rate": 1.4138312246652746e-05, "loss": 0.28254228830337524, "step": 2487 }, { "epoch": 0.3019051086033248, "grad_norm": 2.0572311878204346, "learning_rate": 1.413585554600172e-05, "loss": 0.11042163521051407, "step": 2488 }, { "epoch": 0.3020264531003519, "grad_norm": 2.5954744815826416, "learning_rate": 1.4133398845350695e-05, "loss": 0.3538762331008911, "step": 2489 }, { "epoch": 0.302147797597379, "grad_norm": 1.7822202444076538, "learning_rate": 1.4130942144699669e-05, "loss": 0.16841650009155273, "step": 2490 }, { "epoch": 0.30226914209440603, "grad_norm": 1.5831629037857056, "learning_rate": 1.4128485444048643e-05, "loss": 0.1426149308681488, "step": 2491 }, { "epoch": 0.3023904865914331, "grad_norm": 1.7833654880523682, "learning_rate": 1.4126028743397617e-05, "loss": 0.06926427036523819, "step": 2492 }, { "epoch": 0.30251183108846014, "grad_norm": 1.857125997543335, "learning_rate": 1.4123572042746592e-05, "loss": 0.38097965717315674, "step": 2493 }, { "epoch": 0.3026331755854872, "grad_norm": 2.433147430419922, "learning_rate": 1.4121115342095566e-05, "loss": 0.4175128936767578, "step": 2494 }, { "epoch": 0.30275452008251424, "grad_norm": 1.8602956533432007, "learning_rate": 1.411865864144454e-05, "loss": 0.16847746074199677, "step": 2495 }, { "epoch": 0.3028758645795413, "grad_norm": 2.69030499458313, "learning_rate": 1.4116201940793514e-05, "loss": 0.2057284712791443, "step": 2496 }, { "epoch": 0.30299720907656835, "grad_norm": 1.6462435722351074, "learning_rate": 1.4113745240142489e-05, "loss": 0.1570907086133957, "step": 2497 }, { "epoch": 0.30311855357359546, "grad_norm": 1.7094794511795044, "learning_rate": 1.4111288539491463e-05, "loss": 0.3432033956050873, "step": 2498 }, { "epoch": 0.3032398980706225, "grad_norm": 1.9925639629364014, "learning_rate": 1.4108831838840437e-05, "loss": 0.3235139846801758, "step": 2499 }, { "epoch": 0.30336124256764957, "grad_norm": 1.7073578834533691, "learning_rate": 1.4106375138189411e-05, "loss": 0.2412710338830948, "step": 2500 }, { "epoch": 0.3034825870646766, "grad_norm": 3.1426279544830322, "learning_rate": 1.4103918437538386e-05, "loss": 0.6673815846443176, "step": 2501 }, { "epoch": 0.3036039315617037, "grad_norm": 1.851920485496521, "learning_rate": 1.4101461736887362e-05, "loss": 0.14570853114128113, "step": 2502 }, { "epoch": 0.30372527605873073, "grad_norm": 2.583951234817505, "learning_rate": 1.4099005036236336e-05, "loss": 0.24237391352653503, "step": 2503 }, { "epoch": 0.3038466205557578, "grad_norm": 1.823894739151001, "learning_rate": 1.409654833558531e-05, "loss": 0.3425629734992981, "step": 2504 }, { "epoch": 0.30396796505278484, "grad_norm": 2.8365423679351807, "learning_rate": 1.4094091634934284e-05, "loss": 0.26538074016571045, "step": 2505 }, { "epoch": 0.3040893095498119, "grad_norm": 2.6532278060913086, "learning_rate": 1.4091634934283259e-05, "loss": 0.3051794767379761, "step": 2506 }, { "epoch": 0.304210654046839, "grad_norm": 1.5138508081436157, "learning_rate": 1.4089178233632233e-05, "loss": 0.05153714492917061, "step": 2507 }, { "epoch": 0.30433199854386606, "grad_norm": 2.6453068256378174, "learning_rate": 1.4086721532981207e-05, "loss": 0.5268983244895935, "step": 2508 }, { "epoch": 0.3044533430408931, "grad_norm": 2.8347370624542236, "learning_rate": 1.4084264832330181e-05, "loss": 0.624761164188385, "step": 2509 }, { "epoch": 0.30457468753792016, "grad_norm": 0.3800169825553894, "learning_rate": 1.4081808131679156e-05, "loss": 0.00921088457107544, "step": 2510 }, { "epoch": 0.3046960320349472, "grad_norm": 1.8986505270004272, "learning_rate": 1.407935143102813e-05, "loss": 0.13484662771224976, "step": 2511 }, { "epoch": 0.30481737653197427, "grad_norm": 1.785301685333252, "learning_rate": 1.4076894730377104e-05, "loss": 0.27752214670181274, "step": 2512 }, { "epoch": 0.3049387210290013, "grad_norm": 2.454313039779663, "learning_rate": 1.4074438029726078e-05, "loss": 0.3176259398460388, "step": 2513 }, { "epoch": 0.3050600655260284, "grad_norm": 2.9403469562530518, "learning_rate": 1.4071981329075053e-05, "loss": 0.29386773705482483, "step": 2514 }, { "epoch": 0.30518141002305543, "grad_norm": 1.5787403583526611, "learning_rate": 1.4069524628424027e-05, "loss": 0.5370906591415405, "step": 2515 }, { "epoch": 0.3053027545200825, "grad_norm": 1.9172804355621338, "learning_rate": 1.4067067927773001e-05, "loss": 0.23191463947296143, "step": 2516 }, { "epoch": 0.3054240990171096, "grad_norm": 1.754380464553833, "learning_rate": 1.4064611227121975e-05, "loss": 0.30815523862838745, "step": 2517 }, { "epoch": 0.30554544351413665, "grad_norm": 3.231311082839966, "learning_rate": 1.406215452647095e-05, "loss": 0.4425506591796875, "step": 2518 }, { "epoch": 0.3056667880111637, "grad_norm": 2.873631477355957, "learning_rate": 1.4059697825819924e-05, "loss": 0.11558450758457184, "step": 2519 }, { "epoch": 0.30578813250819076, "grad_norm": 2.3214457035064697, "learning_rate": 1.4057241125168898e-05, "loss": 0.22642116248607635, "step": 2520 }, { "epoch": 0.3059094770052178, "grad_norm": 2.071108341217041, "learning_rate": 1.4054784424517872e-05, "loss": 0.39317962527275085, "step": 2521 }, { "epoch": 0.30603082150224487, "grad_norm": 0.2151782512664795, "learning_rate": 1.4052327723866848e-05, "loss": 0.0017753503052517772, "step": 2522 }, { "epoch": 0.3061521659992719, "grad_norm": 2.5965371131896973, "learning_rate": 1.4049871023215823e-05, "loss": 0.246237114071846, "step": 2523 }, { "epoch": 0.306273510496299, "grad_norm": 2.4071786403656006, "learning_rate": 1.4047414322564797e-05, "loss": 0.4483923316001892, "step": 2524 }, { "epoch": 0.306394854993326, "grad_norm": 2.2549428939819336, "learning_rate": 1.4044957621913771e-05, "loss": 0.13364174962043762, "step": 2525 }, { "epoch": 0.30651619949035314, "grad_norm": 4.5541300773620605, "learning_rate": 1.4042500921262745e-05, "loss": 0.4089297950267792, "step": 2526 }, { "epoch": 0.3066375439873802, "grad_norm": 2.6574063301086426, "learning_rate": 1.404004422061172e-05, "loss": 0.20073917508125305, "step": 2527 }, { "epoch": 0.30675888848440724, "grad_norm": 2.037043333053589, "learning_rate": 1.4037587519960694e-05, "loss": 0.21796780824661255, "step": 2528 }, { "epoch": 0.3068802329814343, "grad_norm": 2.436596632003784, "learning_rate": 1.4035130819309668e-05, "loss": 0.4355173408985138, "step": 2529 }, { "epoch": 0.30700157747846135, "grad_norm": 2.3366832733154297, "learning_rate": 1.4032674118658642e-05, "loss": 0.13320335745811462, "step": 2530 }, { "epoch": 0.3071229219754884, "grad_norm": 2.294221878051758, "learning_rate": 1.4030217418007617e-05, "loss": 0.3165469467639923, "step": 2531 }, { "epoch": 0.30724426647251546, "grad_norm": 5.119211196899414, "learning_rate": 1.4027760717356591e-05, "loss": 0.9067642688751221, "step": 2532 }, { "epoch": 0.3073656109695425, "grad_norm": 2.33833909034729, "learning_rate": 1.4025304016705565e-05, "loss": 0.284976065158844, "step": 2533 }, { "epoch": 0.30748695546656957, "grad_norm": 3.3833560943603516, "learning_rate": 1.402284731605454e-05, "loss": 0.16957956552505493, "step": 2534 }, { "epoch": 0.3076082999635967, "grad_norm": 1.6084033250808716, "learning_rate": 1.4020390615403514e-05, "loss": 0.49287718534469604, "step": 2535 }, { "epoch": 0.30772964446062373, "grad_norm": 3.138307809829712, "learning_rate": 1.4017933914752488e-05, "loss": 0.3367580473423004, "step": 2536 }, { "epoch": 0.3078509889576508, "grad_norm": 2.3280084133148193, "learning_rate": 1.4015477214101462e-05, "loss": 0.5034416913986206, "step": 2537 }, { "epoch": 0.30797233345467784, "grad_norm": 2.856696605682373, "learning_rate": 1.4013020513450437e-05, "loss": 0.1836393177509308, "step": 2538 }, { "epoch": 0.3080936779517049, "grad_norm": 2.4697892665863037, "learning_rate": 1.401056381279941e-05, "loss": 0.16828244924545288, "step": 2539 }, { "epoch": 0.30821502244873195, "grad_norm": 2.539705753326416, "learning_rate": 1.4008107112148385e-05, "loss": 0.4240834712982178, "step": 2540 }, { "epoch": 0.308336366945759, "grad_norm": 2.0786688327789307, "learning_rate": 1.400565041149736e-05, "loss": 0.10091252624988556, "step": 2541 }, { "epoch": 0.30845771144278605, "grad_norm": 3.49723219871521, "learning_rate": 1.4003193710846335e-05, "loss": 0.9220690727233887, "step": 2542 }, { "epoch": 0.3085790559398131, "grad_norm": 3.2246007919311523, "learning_rate": 1.400073701019531e-05, "loss": 0.3404640257358551, "step": 2543 }, { "epoch": 0.3087004004368402, "grad_norm": 2.174621105194092, "learning_rate": 1.3998280309544284e-05, "loss": 0.14235185086727142, "step": 2544 }, { "epoch": 0.30882174493386727, "grad_norm": 2.7580313682556152, "learning_rate": 1.3995823608893258e-05, "loss": 0.38407284021377563, "step": 2545 }, { "epoch": 0.3089430894308943, "grad_norm": 2.5175609588623047, "learning_rate": 1.3993366908242232e-05, "loss": 0.405870258808136, "step": 2546 }, { "epoch": 0.3090644339279214, "grad_norm": 2.1974775791168213, "learning_rate": 1.3990910207591207e-05, "loss": 0.31769827008247375, "step": 2547 }, { "epoch": 0.30918577842494843, "grad_norm": 2.1293437480926514, "learning_rate": 1.398845350694018e-05, "loss": 0.21596483886241913, "step": 2548 }, { "epoch": 0.3093071229219755, "grad_norm": 2.270765542984009, "learning_rate": 1.3985996806289155e-05, "loss": 0.4722200036048889, "step": 2549 }, { "epoch": 0.30942846741900254, "grad_norm": 2.49306583404541, "learning_rate": 1.398354010563813e-05, "loss": 0.26866334676742554, "step": 2550 }, { "epoch": 0.3095498119160296, "grad_norm": 2.2344727516174316, "learning_rate": 1.3981083404987104e-05, "loss": 0.489822119474411, "step": 2551 }, { "epoch": 0.30967115641305665, "grad_norm": 1.7446762323379517, "learning_rate": 1.3978626704336078e-05, "loss": 0.1108304038643837, "step": 2552 }, { "epoch": 0.3097925009100837, "grad_norm": 2.0523681640625, "learning_rate": 1.3976170003685052e-05, "loss": 0.04370498284697533, "step": 2553 }, { "epoch": 0.3099138454071108, "grad_norm": 1.4232988357543945, "learning_rate": 1.3973713303034026e-05, "loss": 0.10781297832727432, "step": 2554 }, { "epoch": 0.31003518990413786, "grad_norm": 3.349212884902954, "learning_rate": 1.3971256602383e-05, "loss": 0.06405492126941681, "step": 2555 }, { "epoch": 0.3101565344011649, "grad_norm": 1.9357292652130127, "learning_rate": 1.3968799901731975e-05, "loss": 0.3964538276195526, "step": 2556 }, { "epoch": 0.31027787889819197, "grad_norm": 2.4218809604644775, "learning_rate": 1.396634320108095e-05, "loss": 0.24902822077274323, "step": 2557 }, { "epoch": 0.310399223395219, "grad_norm": 4.099609375, "learning_rate": 1.3963886500429923e-05, "loss": 0.4399346709251404, "step": 2558 }, { "epoch": 0.3105205678922461, "grad_norm": 2.6035866737365723, "learning_rate": 1.3961429799778898e-05, "loss": 0.15043525397777557, "step": 2559 }, { "epoch": 0.31064191238927313, "grad_norm": 2.88090443611145, "learning_rate": 1.3958973099127872e-05, "loss": 0.15296348929405212, "step": 2560 }, { "epoch": 0.3107632568863002, "grad_norm": 3.220996141433716, "learning_rate": 1.3956516398476846e-05, "loss": 0.27642595767974854, "step": 2561 }, { "epoch": 0.31088460138332724, "grad_norm": 4.000929355621338, "learning_rate": 1.3954059697825822e-05, "loss": 0.29429298639297485, "step": 2562 }, { "epoch": 0.31100594588035435, "grad_norm": 2.1541647911071777, "learning_rate": 1.3951602997174796e-05, "loss": 0.4215291738510132, "step": 2563 }, { "epoch": 0.3111272903773814, "grad_norm": 2.1848764419555664, "learning_rate": 1.394914629652377e-05, "loss": 0.4602106809616089, "step": 2564 }, { "epoch": 0.31124863487440846, "grad_norm": 2.27817702293396, "learning_rate": 1.3946689595872745e-05, "loss": 0.49429088830947876, "step": 2565 }, { "epoch": 0.3113699793714355, "grad_norm": 2.1268224716186523, "learning_rate": 1.394423289522172e-05, "loss": 0.16737808287143707, "step": 2566 }, { "epoch": 0.31149132386846257, "grad_norm": 2.6273324489593506, "learning_rate": 1.3941776194570693e-05, "loss": 0.6507529616355896, "step": 2567 }, { "epoch": 0.3116126683654896, "grad_norm": 2.917470693588257, "learning_rate": 1.3939319493919668e-05, "loss": 0.6891695261001587, "step": 2568 }, { "epoch": 0.3117340128625167, "grad_norm": 5.002813339233398, "learning_rate": 1.3936862793268642e-05, "loss": 0.16554062068462372, "step": 2569 }, { "epoch": 0.31185535735954373, "grad_norm": 0.9612758159637451, "learning_rate": 1.3934406092617616e-05, "loss": 0.08712995052337646, "step": 2570 }, { "epoch": 0.3119767018565708, "grad_norm": 2.228410243988037, "learning_rate": 1.393194939196659e-05, "loss": 0.5029768347740173, "step": 2571 }, { "epoch": 0.3120980463535979, "grad_norm": 1.271191120147705, "learning_rate": 1.3929492691315565e-05, "loss": 0.05700064077973366, "step": 2572 }, { "epoch": 0.31221939085062494, "grad_norm": 3.567169189453125, "learning_rate": 1.3927035990664539e-05, "loss": 0.17123110592365265, "step": 2573 }, { "epoch": 0.312340735347652, "grad_norm": 2.542236566543579, "learning_rate": 1.3924579290013513e-05, "loss": 0.332086443901062, "step": 2574 }, { "epoch": 0.31246207984467905, "grad_norm": 1.2240946292877197, "learning_rate": 1.3922122589362488e-05, "loss": 0.04972168803215027, "step": 2575 }, { "epoch": 0.3125834243417061, "grad_norm": 2.9079947471618652, "learning_rate": 1.3919665888711462e-05, "loss": 0.6036537885665894, "step": 2576 }, { "epoch": 0.31270476883873316, "grad_norm": 2.383118152618408, "learning_rate": 1.3917209188060436e-05, "loss": 0.25742924213409424, "step": 2577 }, { "epoch": 0.3128261133357602, "grad_norm": 1.5192009210586548, "learning_rate": 1.391475248740941e-05, "loss": 0.14922620356082916, "step": 2578 }, { "epoch": 0.31294745783278727, "grad_norm": 1.5510634183883667, "learning_rate": 1.3912295786758385e-05, "loss": 0.46871283650398254, "step": 2579 }, { "epoch": 0.3130688023298143, "grad_norm": 2.901395559310913, "learning_rate": 1.3909839086107359e-05, "loss": 0.3961530923843384, "step": 2580 }, { "epoch": 0.3131901468268414, "grad_norm": 2.473994731903076, "learning_rate": 1.3907382385456335e-05, "loss": 0.06779327243566513, "step": 2581 }, { "epoch": 0.3133114913238685, "grad_norm": 2.99898362159729, "learning_rate": 1.3904925684805309e-05, "loss": 0.24095484614372253, "step": 2582 }, { "epoch": 0.31343283582089554, "grad_norm": 1.9804432392120361, "learning_rate": 1.3902468984154283e-05, "loss": 0.14461076259613037, "step": 2583 }, { "epoch": 0.3135541803179226, "grad_norm": 3.6258225440979004, "learning_rate": 1.3900012283503258e-05, "loss": 0.39698484539985657, "step": 2584 }, { "epoch": 0.31367552481494965, "grad_norm": 2.218045234680176, "learning_rate": 1.3897555582852232e-05, "loss": 0.38136905431747437, "step": 2585 }, { "epoch": 0.3137968693119767, "grad_norm": 3.269705057144165, "learning_rate": 1.3895098882201206e-05, "loss": 0.6193602085113525, "step": 2586 }, { "epoch": 0.31391821380900375, "grad_norm": 3.008082628250122, "learning_rate": 1.389264218155018e-05, "loss": 0.506243109703064, "step": 2587 }, { "epoch": 0.3140395583060308, "grad_norm": 3.4504647254943848, "learning_rate": 1.3890185480899155e-05, "loss": 0.4054690897464752, "step": 2588 }, { "epoch": 0.31416090280305786, "grad_norm": 2.2036619186401367, "learning_rate": 1.3887728780248129e-05, "loss": 0.21069829165935516, "step": 2589 }, { "epoch": 0.3142822473000849, "grad_norm": 2.6821749210357666, "learning_rate": 1.3885272079597103e-05, "loss": 0.06172182410955429, "step": 2590 }, { "epoch": 0.314403591797112, "grad_norm": 4.993224143981934, "learning_rate": 1.3882815378946077e-05, "loss": 0.4049087166786194, "step": 2591 }, { "epoch": 0.3145249362941391, "grad_norm": 2.4747045040130615, "learning_rate": 1.3880358678295052e-05, "loss": 0.20540593564510345, "step": 2592 }, { "epoch": 0.31464628079116613, "grad_norm": 1.2817527055740356, "learning_rate": 1.3877901977644026e-05, "loss": 0.07196521759033203, "step": 2593 }, { "epoch": 0.3147676252881932, "grad_norm": 2.2558658123016357, "learning_rate": 1.3875445276993e-05, "loss": 0.06774863600730896, "step": 2594 }, { "epoch": 0.31488896978522024, "grad_norm": 2.3449127674102783, "learning_rate": 1.3872988576341974e-05, "loss": 0.16595718264579773, "step": 2595 }, { "epoch": 0.3150103142822473, "grad_norm": 1.9539029598236084, "learning_rate": 1.3870531875690949e-05, "loss": 0.1529058814048767, "step": 2596 }, { "epoch": 0.31513165877927435, "grad_norm": 4.238483905792236, "learning_rate": 1.3868075175039923e-05, "loss": 0.563798189163208, "step": 2597 }, { "epoch": 0.3152530032763014, "grad_norm": 2.3801801204681396, "learning_rate": 1.3865618474388897e-05, "loss": 0.3669548034667969, "step": 2598 }, { "epoch": 0.31537434777332846, "grad_norm": 2.9926679134368896, "learning_rate": 1.3863161773737871e-05, "loss": 0.26389652490615845, "step": 2599 }, { "epoch": 0.31549569227035557, "grad_norm": 2.5304622650146484, "learning_rate": 1.3860705073086846e-05, "loss": 0.3482251465320587, "step": 2600 }, { "epoch": 0.3156170367673826, "grad_norm": 2.2760822772979736, "learning_rate": 1.3858248372435822e-05, "loss": 0.25967150926589966, "step": 2601 }, { "epoch": 0.3157383812644097, "grad_norm": 1.1612865924835205, "learning_rate": 1.3855791671784796e-05, "loss": 0.09870055317878723, "step": 2602 }, { "epoch": 0.3158597257614367, "grad_norm": 2.1818089485168457, "learning_rate": 1.385333497113377e-05, "loss": 0.24336664378643036, "step": 2603 }, { "epoch": 0.3159810702584638, "grad_norm": 3.514836072921753, "learning_rate": 1.3850878270482744e-05, "loss": 0.5613154172897339, "step": 2604 }, { "epoch": 0.31610241475549083, "grad_norm": 3.2239785194396973, "learning_rate": 1.3848421569831719e-05, "loss": 0.5051560997962952, "step": 2605 }, { "epoch": 0.3162237592525179, "grad_norm": 2.4050111770629883, "learning_rate": 1.3845964869180693e-05, "loss": 0.23731665313243866, "step": 2606 }, { "epoch": 0.31634510374954494, "grad_norm": 2.546924591064453, "learning_rate": 1.3843508168529667e-05, "loss": 0.1069282665848732, "step": 2607 }, { "epoch": 0.316466448246572, "grad_norm": 2.6186447143554688, "learning_rate": 1.3841051467878641e-05, "loss": 0.14223074913024902, "step": 2608 }, { "epoch": 0.31658779274359905, "grad_norm": 2.2534494400024414, "learning_rate": 1.3838594767227616e-05, "loss": 0.200982928276062, "step": 2609 }, { "epoch": 0.31670913724062616, "grad_norm": 3.0203487873077393, "learning_rate": 1.383613806657659e-05, "loss": 0.4329514503479004, "step": 2610 }, { "epoch": 0.3168304817376532, "grad_norm": 2.4250941276550293, "learning_rate": 1.3833681365925564e-05, "loss": 0.5131547451019287, "step": 2611 }, { "epoch": 0.31695182623468027, "grad_norm": 2.658128261566162, "learning_rate": 1.3831224665274537e-05, "loss": 0.4151817262172699, "step": 2612 }, { "epoch": 0.3170731707317073, "grad_norm": 3.0296595096588135, "learning_rate": 1.3828767964623511e-05, "loss": 0.36928361654281616, "step": 2613 }, { "epoch": 0.3171945152287344, "grad_norm": 2.8335986137390137, "learning_rate": 1.3826311263972485e-05, "loss": 0.29700344800949097, "step": 2614 }, { "epoch": 0.31731585972576143, "grad_norm": 2.5099239349365234, "learning_rate": 1.382385456332146e-05, "loss": 0.35675522685050964, "step": 2615 }, { "epoch": 0.3174372042227885, "grad_norm": 2.2245917320251465, "learning_rate": 1.3821397862670434e-05, "loss": 0.268341600894928, "step": 2616 }, { "epoch": 0.31755854871981554, "grad_norm": 2.405837297439575, "learning_rate": 1.3818941162019408e-05, "loss": 0.37409159541130066, "step": 2617 }, { "epoch": 0.3176798932168426, "grad_norm": 2.6217498779296875, "learning_rate": 1.3816484461368382e-05, "loss": 0.3058534264564514, "step": 2618 }, { "epoch": 0.3178012377138697, "grad_norm": 2.3653597831726074, "learning_rate": 1.3814027760717357e-05, "loss": 0.4587075114250183, "step": 2619 }, { "epoch": 0.31792258221089675, "grad_norm": 1.733944296836853, "learning_rate": 1.381157106006633e-05, "loss": 0.1447191834449768, "step": 2620 }, { "epoch": 0.3180439267079238, "grad_norm": 1.9843920469284058, "learning_rate": 1.3809114359415305e-05, "loss": 0.22765451669692993, "step": 2621 }, { "epoch": 0.31816527120495086, "grad_norm": 2.412477970123291, "learning_rate": 1.380665765876428e-05, "loss": 0.49215835332870483, "step": 2622 }, { "epoch": 0.3182866157019779, "grad_norm": 3.0536649227142334, "learning_rate": 1.3804200958113254e-05, "loss": 0.22170889377593994, "step": 2623 }, { "epoch": 0.31840796019900497, "grad_norm": 2.3984286785125732, "learning_rate": 1.3801744257462228e-05, "loss": 0.26721903681755066, "step": 2624 }, { "epoch": 0.318529304696032, "grad_norm": 2.1126484870910645, "learning_rate": 1.3799287556811202e-05, "loss": 0.39440837502479553, "step": 2625 }, { "epoch": 0.3186506491930591, "grad_norm": 1.7480491399765015, "learning_rate": 1.3796830856160176e-05, "loss": 0.07162956148386002, "step": 2626 }, { "epoch": 0.31877199369008613, "grad_norm": 3.1320767402648926, "learning_rate": 1.3794374155509152e-05, "loss": 0.4290386140346527, "step": 2627 }, { "epoch": 0.31889333818711324, "grad_norm": 3.318819522857666, "learning_rate": 1.3791917454858127e-05, "loss": 0.40751951932907104, "step": 2628 }, { "epoch": 0.3190146826841403, "grad_norm": 3.5489182472229004, "learning_rate": 1.37894607542071e-05, "loss": 0.22148944437503815, "step": 2629 }, { "epoch": 0.31913602718116735, "grad_norm": 2.1631932258605957, "learning_rate": 1.3787004053556075e-05, "loss": 0.43457284569740295, "step": 2630 }, { "epoch": 0.3192573716781944, "grad_norm": 3.955735445022583, "learning_rate": 1.378454735290505e-05, "loss": 0.43126925826072693, "step": 2631 }, { "epoch": 0.31937871617522146, "grad_norm": 0.3669489324092865, "learning_rate": 1.3782090652254024e-05, "loss": 0.0014458309160545468, "step": 2632 }, { "epoch": 0.3195000606722485, "grad_norm": 2.2121047973632812, "learning_rate": 1.3779633951602998e-05, "loss": 0.35696786642074585, "step": 2633 }, { "epoch": 0.31962140516927556, "grad_norm": 2.8321733474731445, "learning_rate": 1.3777177250951972e-05, "loss": 0.5759726166725159, "step": 2634 }, { "epoch": 0.3197427496663026, "grad_norm": 2.503988027572632, "learning_rate": 1.3774720550300946e-05, "loss": 0.24126414954662323, "step": 2635 }, { "epoch": 0.31986409416332967, "grad_norm": 3.8036837577819824, "learning_rate": 1.377226384964992e-05, "loss": 0.14890190958976746, "step": 2636 }, { "epoch": 0.3199854386603568, "grad_norm": 2.332839012145996, "learning_rate": 1.3769807148998895e-05, "loss": 0.4150230288505554, "step": 2637 }, { "epoch": 0.32010678315738383, "grad_norm": 1.6931169033050537, "learning_rate": 1.376735044834787e-05, "loss": 0.02355354093015194, "step": 2638 }, { "epoch": 0.3202281276544109, "grad_norm": 1.7993088960647583, "learning_rate": 1.3764893747696843e-05, "loss": 0.228868767619133, "step": 2639 }, { "epoch": 0.32034947215143794, "grad_norm": 4.86004114151001, "learning_rate": 1.3762437047045818e-05, "loss": 0.6723122000694275, "step": 2640 }, { "epoch": 0.320470816648465, "grad_norm": 2.648098945617676, "learning_rate": 1.3759980346394792e-05, "loss": 0.3872552514076233, "step": 2641 }, { "epoch": 0.32059216114549205, "grad_norm": 2.9110360145568848, "learning_rate": 1.3757523645743766e-05, "loss": 0.535675585269928, "step": 2642 }, { "epoch": 0.3207135056425191, "grad_norm": 2.2493841648101807, "learning_rate": 1.375506694509274e-05, "loss": 0.12403810024261475, "step": 2643 }, { "epoch": 0.32083485013954616, "grad_norm": 2.314582586288452, "learning_rate": 1.3752610244441715e-05, "loss": 0.17878662049770355, "step": 2644 }, { "epoch": 0.3209561946365732, "grad_norm": 2.2142958641052246, "learning_rate": 1.3750153543790689e-05, "loss": 0.23200297355651855, "step": 2645 }, { "epoch": 0.32107753913360026, "grad_norm": 2.109805107116699, "learning_rate": 1.3747696843139663e-05, "loss": 0.5256378650665283, "step": 2646 }, { "epoch": 0.3211988836306274, "grad_norm": 1.9347625970840454, "learning_rate": 1.374524014248864e-05, "loss": 0.18874908983707428, "step": 2647 }, { "epoch": 0.32132022812765443, "grad_norm": 1.4329655170440674, "learning_rate": 1.3742783441837613e-05, "loss": 0.13318997621536255, "step": 2648 }, { "epoch": 0.3214415726246815, "grad_norm": 1.443900465965271, "learning_rate": 1.3740326741186588e-05, "loss": 0.0861474797129631, "step": 2649 }, { "epoch": 0.32156291712170854, "grad_norm": 3.0313220024108887, "learning_rate": 1.3737870040535562e-05, "loss": 0.4402236342430115, "step": 2650 }, { "epoch": 0.3216842616187356, "grad_norm": 2.4113519191741943, "learning_rate": 1.3735413339884536e-05, "loss": 0.980143129825592, "step": 2651 }, { "epoch": 0.32180560611576264, "grad_norm": 2.563471794128418, "learning_rate": 1.373295663923351e-05, "loss": 0.1953703910112381, "step": 2652 }, { "epoch": 0.3219269506127897, "grad_norm": 0.014330465346574783, "learning_rate": 1.3730499938582485e-05, "loss": 0.0001082075759768486, "step": 2653 }, { "epoch": 0.32204829510981675, "grad_norm": 2.2561421394348145, "learning_rate": 1.3728043237931459e-05, "loss": 0.5709770917892456, "step": 2654 }, { "epoch": 0.3221696396068438, "grad_norm": 2.220679521560669, "learning_rate": 1.3725586537280433e-05, "loss": 0.21078574657440186, "step": 2655 }, { "epoch": 0.3222909841038709, "grad_norm": 3.595139741897583, "learning_rate": 1.3723129836629407e-05, "loss": 0.28871840238571167, "step": 2656 }, { "epoch": 0.32241232860089797, "grad_norm": 2.6884381771087646, "learning_rate": 1.3720673135978382e-05, "loss": 0.312512069940567, "step": 2657 }, { "epoch": 0.322533673097925, "grad_norm": 2.3358237743377686, "learning_rate": 1.3718216435327356e-05, "loss": 0.21305698156356812, "step": 2658 }, { "epoch": 0.3226550175949521, "grad_norm": 2.857361078262329, "learning_rate": 1.371575973467633e-05, "loss": 0.2686937749385834, "step": 2659 }, { "epoch": 0.32277636209197913, "grad_norm": 4.118078708648682, "learning_rate": 1.3713303034025305e-05, "loss": 0.22622153162956238, "step": 2660 }, { "epoch": 0.3228977065890062, "grad_norm": 1.957195520401001, "learning_rate": 1.3710846333374279e-05, "loss": 0.04407578334212303, "step": 2661 }, { "epoch": 0.32301905108603324, "grad_norm": 2.119107484817505, "learning_rate": 1.3708389632723253e-05, "loss": 0.23805676400661469, "step": 2662 }, { "epoch": 0.3231403955830603, "grad_norm": 1.9530389308929443, "learning_rate": 1.3705932932072227e-05, "loss": 0.48880478739738464, "step": 2663 }, { "epoch": 0.32326174008008735, "grad_norm": 1.2080408334732056, "learning_rate": 1.3703476231421202e-05, "loss": 0.11424320936203003, "step": 2664 }, { "epoch": 0.32338308457711445, "grad_norm": 1.7990244626998901, "learning_rate": 1.3701019530770176e-05, "loss": 0.06525573879480362, "step": 2665 }, { "epoch": 0.3235044290741415, "grad_norm": 2.054141044616699, "learning_rate": 1.369856283011915e-05, "loss": 0.365490585565567, "step": 2666 }, { "epoch": 0.32362577357116856, "grad_norm": 3.4649338722229004, "learning_rate": 1.3696106129468126e-05, "loss": 0.6358323097229004, "step": 2667 }, { "epoch": 0.3237471180681956, "grad_norm": 2.41003680229187, "learning_rate": 1.36936494288171e-05, "loss": 0.3890521824359894, "step": 2668 }, { "epoch": 0.32386846256522267, "grad_norm": 3.2424607276916504, "learning_rate": 1.3691192728166075e-05, "loss": 0.27454662322998047, "step": 2669 }, { "epoch": 0.3239898070622497, "grad_norm": 2.258336067199707, "learning_rate": 1.3688736027515049e-05, "loss": 0.1877439320087433, "step": 2670 }, { "epoch": 0.3241111515592768, "grad_norm": 2.6736388206481934, "learning_rate": 1.3686279326864023e-05, "loss": 0.4286113977432251, "step": 2671 }, { "epoch": 0.32423249605630383, "grad_norm": 1.740665078163147, "learning_rate": 1.3683822626212997e-05, "loss": 0.16415956616401672, "step": 2672 }, { "epoch": 0.3243538405533309, "grad_norm": 2.988381862640381, "learning_rate": 1.3681365925561972e-05, "loss": 0.17869721353054047, "step": 2673 }, { "epoch": 0.32447518505035794, "grad_norm": 1.8452136516571045, "learning_rate": 1.3678909224910946e-05, "loss": 0.338632196187973, "step": 2674 }, { "epoch": 0.32459652954738505, "grad_norm": 3.0976476669311523, "learning_rate": 1.367645252425992e-05, "loss": 0.12716446816921234, "step": 2675 }, { "epoch": 0.3247178740444121, "grad_norm": 2.4348232746124268, "learning_rate": 1.3673995823608894e-05, "loss": 0.038799576461315155, "step": 2676 }, { "epoch": 0.32483921854143916, "grad_norm": 6.233269691467285, "learning_rate": 1.3671539122957869e-05, "loss": 0.14202484488487244, "step": 2677 }, { "epoch": 0.3249605630384662, "grad_norm": 2.443070411682129, "learning_rate": 1.3669082422306843e-05, "loss": 0.11883317679166794, "step": 2678 }, { "epoch": 0.32508190753549326, "grad_norm": 1.7617214918136597, "learning_rate": 1.3666625721655817e-05, "loss": 0.16712503135204315, "step": 2679 }, { "epoch": 0.3252032520325203, "grad_norm": 1.9943039417266846, "learning_rate": 1.3664169021004791e-05, "loss": 0.3453546166419983, "step": 2680 }, { "epoch": 0.32532459652954737, "grad_norm": 2.232401132583618, "learning_rate": 1.3661712320353766e-05, "loss": 0.2506612241268158, "step": 2681 }, { "epoch": 0.3254459410265744, "grad_norm": 1.0962969064712524, "learning_rate": 1.365925561970274e-05, "loss": 0.06261173635721207, "step": 2682 }, { "epoch": 0.3255672855236015, "grad_norm": 2.8957111835479736, "learning_rate": 1.3656798919051714e-05, "loss": 0.2506040036678314, "step": 2683 }, { "epoch": 0.3256886300206286, "grad_norm": 3.8329215049743652, "learning_rate": 1.3654342218400688e-05, "loss": 0.2881605923175812, "step": 2684 }, { "epoch": 0.32580997451765564, "grad_norm": 1.4724440574645996, "learning_rate": 1.3651885517749663e-05, "loss": 0.03610328212380409, "step": 2685 }, { "epoch": 0.3259313190146827, "grad_norm": 2.247314929962158, "learning_rate": 1.3649428817098637e-05, "loss": 0.1618456095457077, "step": 2686 }, { "epoch": 0.32605266351170975, "grad_norm": 1.9265762567520142, "learning_rate": 1.3646972116447613e-05, "loss": 0.1461503505706787, "step": 2687 }, { "epoch": 0.3261740080087368, "grad_norm": 2.0526015758514404, "learning_rate": 1.3644515415796587e-05, "loss": 0.08651389181613922, "step": 2688 }, { "epoch": 0.32629535250576386, "grad_norm": 3.633225202560425, "learning_rate": 1.3642058715145561e-05, "loss": 0.20691147446632385, "step": 2689 }, { "epoch": 0.3264166970027909, "grad_norm": 2.8911032676696777, "learning_rate": 1.3639602014494536e-05, "loss": 0.28065308928489685, "step": 2690 }, { "epoch": 0.32653804149981797, "grad_norm": 2.2704734802246094, "learning_rate": 1.363714531384351e-05, "loss": 0.1923559606075287, "step": 2691 }, { "epoch": 0.326659385996845, "grad_norm": 2.778303384780884, "learning_rate": 1.3634688613192484e-05, "loss": 0.35757672786712646, "step": 2692 }, { "epoch": 0.32678073049387213, "grad_norm": 2.3999030590057373, "learning_rate": 1.3632231912541458e-05, "loss": 0.23560933768749237, "step": 2693 }, { "epoch": 0.3269020749908992, "grad_norm": 2.9183757305145264, "learning_rate": 1.3629775211890433e-05, "loss": 0.4588801860809326, "step": 2694 }, { "epoch": 0.32702341948792624, "grad_norm": 2.39132022857666, "learning_rate": 1.3627318511239407e-05, "loss": 0.13901375234127045, "step": 2695 }, { "epoch": 0.3271447639849533, "grad_norm": 1.504375696182251, "learning_rate": 1.3624861810588381e-05, "loss": 0.29446569085121155, "step": 2696 }, { "epoch": 0.32726610848198034, "grad_norm": 2.672706127166748, "learning_rate": 1.3622405109937355e-05, "loss": 0.1738225817680359, "step": 2697 }, { "epoch": 0.3273874529790074, "grad_norm": 2.682893991470337, "learning_rate": 1.361994840928633e-05, "loss": 0.42572760581970215, "step": 2698 }, { "epoch": 0.32750879747603445, "grad_norm": 2.149664878845215, "learning_rate": 1.3617491708635304e-05, "loss": 0.22784820199012756, "step": 2699 }, { "epoch": 0.3276301419730615, "grad_norm": 2.5921425819396973, "learning_rate": 1.3615035007984278e-05, "loss": 0.2059967815876007, "step": 2700 }, { "epoch": 0.32775148647008856, "grad_norm": 2.3024137020111084, "learning_rate": 1.3612578307333253e-05, "loss": 0.3200721740722656, "step": 2701 }, { "epoch": 0.3278728309671156, "grad_norm": 2.3466262817382812, "learning_rate": 1.3610121606682227e-05, "loss": 0.4672166109085083, "step": 2702 }, { "epoch": 0.3279941754641427, "grad_norm": 2.3088572025299072, "learning_rate": 1.3607664906031201e-05, "loss": 0.3052404224872589, "step": 2703 }, { "epoch": 0.3281155199611698, "grad_norm": 2.4651174545288086, "learning_rate": 1.3605208205380175e-05, "loss": 0.2764608860015869, "step": 2704 }, { "epoch": 0.32823686445819683, "grad_norm": 2.1033411026000977, "learning_rate": 1.360275150472915e-05, "loss": 0.15545253455638885, "step": 2705 }, { "epoch": 0.3283582089552239, "grad_norm": 1.5688694715499878, "learning_rate": 1.3600294804078124e-05, "loss": 0.22580206394195557, "step": 2706 }, { "epoch": 0.32847955345225094, "grad_norm": 1.5795338153839111, "learning_rate": 1.35978381034271e-05, "loss": 0.05386420339345932, "step": 2707 }, { "epoch": 0.328600897949278, "grad_norm": 2.356640338897705, "learning_rate": 1.3595381402776074e-05, "loss": 0.5281941294670105, "step": 2708 }, { "epoch": 0.32872224244630505, "grad_norm": 2.8616931438446045, "learning_rate": 1.3592924702125048e-05, "loss": 0.317641943693161, "step": 2709 }, { "epoch": 0.3288435869433321, "grad_norm": 3.2473084926605225, "learning_rate": 1.3590468001474023e-05, "loss": 0.32571110129356384, "step": 2710 }, { "epoch": 0.32896493144035915, "grad_norm": 1.7362678050994873, "learning_rate": 1.3588011300822997e-05, "loss": 0.08650978654623032, "step": 2711 }, { "epoch": 0.32908627593738626, "grad_norm": 2.0047483444213867, "learning_rate": 1.3585554600171971e-05, "loss": 0.2949868142604828, "step": 2712 }, { "epoch": 0.3292076204344133, "grad_norm": 1.2510021924972534, "learning_rate": 1.3583097899520945e-05, "loss": 0.05454785004258156, "step": 2713 }, { "epoch": 0.32932896493144037, "grad_norm": 2.209566831588745, "learning_rate": 1.358064119886992e-05, "loss": 0.23208726942539215, "step": 2714 }, { "epoch": 0.3294503094284674, "grad_norm": 1.2048362493515015, "learning_rate": 1.3578184498218894e-05, "loss": 0.013374043628573418, "step": 2715 }, { "epoch": 0.3295716539254945, "grad_norm": 2.5049784183502197, "learning_rate": 1.3575727797567868e-05, "loss": 0.5582481622695923, "step": 2716 }, { "epoch": 0.32969299842252153, "grad_norm": 3.1326956748962402, "learning_rate": 1.3573271096916842e-05, "loss": 0.2733760476112366, "step": 2717 }, { "epoch": 0.3298143429195486, "grad_norm": 1.2727867364883423, "learning_rate": 1.3570814396265817e-05, "loss": 0.09801354259252548, "step": 2718 }, { "epoch": 0.32993568741657564, "grad_norm": 4.572424411773682, "learning_rate": 1.356835769561479e-05, "loss": 0.5125389695167542, "step": 2719 }, { "epoch": 0.3300570319136027, "grad_norm": 3.2168471813201904, "learning_rate": 1.3565900994963765e-05, "loss": 0.7108581066131592, "step": 2720 }, { "epoch": 0.3301783764106298, "grad_norm": 2.22446608543396, "learning_rate": 1.356344429431274e-05, "loss": 0.38087382912635803, "step": 2721 }, { "epoch": 0.33029972090765686, "grad_norm": 1.1339706182479858, "learning_rate": 1.3560987593661714e-05, "loss": 0.2368617057800293, "step": 2722 }, { "epoch": 0.3304210654046839, "grad_norm": 1.8774468898773193, "learning_rate": 1.3558530893010688e-05, "loss": 0.298879474401474, "step": 2723 }, { "epoch": 0.33054240990171097, "grad_norm": 2.565373420715332, "learning_rate": 1.3556074192359662e-05, "loss": 0.3679310977458954, "step": 2724 }, { "epoch": 0.330663754398738, "grad_norm": 2.55824613571167, "learning_rate": 1.3553617491708636e-05, "loss": 0.40981119871139526, "step": 2725 }, { "epoch": 0.3307850988957651, "grad_norm": 1.0780901908874512, "learning_rate": 1.3551160791057612e-05, "loss": 0.09795771539211273, "step": 2726 }, { "epoch": 0.3309064433927921, "grad_norm": 2.007880210876465, "learning_rate": 1.3548704090406587e-05, "loss": 0.12057384848594666, "step": 2727 }, { "epoch": 0.3310277878898192, "grad_norm": 2.1274709701538086, "learning_rate": 1.3546247389755561e-05, "loss": 0.25686660408973694, "step": 2728 }, { "epoch": 0.33114913238684623, "grad_norm": 4.40798282623291, "learning_rate": 1.3543790689104535e-05, "loss": 0.16807983815670013, "step": 2729 }, { "epoch": 0.33127047688387334, "grad_norm": 2.4525368213653564, "learning_rate": 1.354133398845351e-05, "loss": 0.5694208145141602, "step": 2730 }, { "epoch": 0.3313918213809004, "grad_norm": 1.2971670627593994, "learning_rate": 1.3538877287802484e-05, "loss": 0.044074006378650665, "step": 2731 }, { "epoch": 0.33151316587792745, "grad_norm": 4.044321060180664, "learning_rate": 1.3536420587151458e-05, "loss": 0.32732656598091125, "step": 2732 }, { "epoch": 0.3316345103749545, "grad_norm": 3.670825242996216, "learning_rate": 1.3533963886500432e-05, "loss": 0.9322635531425476, "step": 2733 }, { "epoch": 0.33175585487198156, "grad_norm": 1.3728653192520142, "learning_rate": 1.3531507185849406e-05, "loss": 0.01705666445195675, "step": 2734 }, { "epoch": 0.3318771993690086, "grad_norm": 0.8902427554130554, "learning_rate": 1.352905048519838e-05, "loss": 0.03738236054778099, "step": 2735 }, { "epoch": 0.33199854386603567, "grad_norm": 1.3336197137832642, "learning_rate": 1.3526593784547355e-05, "loss": 0.13155893981456757, "step": 2736 }, { "epoch": 0.3321198883630627, "grad_norm": 1.853929877281189, "learning_rate": 1.352413708389633e-05, "loss": 0.08310995995998383, "step": 2737 }, { "epoch": 0.3322412328600898, "grad_norm": 2.526223659515381, "learning_rate": 1.3521680383245303e-05, "loss": 0.28907132148742676, "step": 2738 }, { "epoch": 0.33236257735711683, "grad_norm": 3.0757598876953125, "learning_rate": 1.3519223682594278e-05, "loss": 0.3350384831428528, "step": 2739 }, { "epoch": 0.33248392185414394, "grad_norm": 2.2847514152526855, "learning_rate": 1.3516766981943252e-05, "loss": 0.19784024357795715, "step": 2740 }, { "epoch": 0.332605266351171, "grad_norm": 4.3813042640686035, "learning_rate": 1.3514310281292226e-05, "loss": 0.29084765911102295, "step": 2741 }, { "epoch": 0.33272661084819805, "grad_norm": 1.7603188753128052, "learning_rate": 1.35118535806412e-05, "loss": 0.261428564786911, "step": 2742 }, { "epoch": 0.3328479553452251, "grad_norm": 1.2502996921539307, "learning_rate": 1.3509396879990175e-05, "loss": 0.07206853479146957, "step": 2743 }, { "epoch": 0.33296929984225215, "grad_norm": 2.522667169570923, "learning_rate": 1.3506940179339149e-05, "loss": 0.3589305877685547, "step": 2744 }, { "epoch": 0.3330906443392792, "grad_norm": 2.1586971282958984, "learning_rate": 1.3504483478688123e-05, "loss": 0.17322710156440735, "step": 2745 }, { "epoch": 0.33321198883630626, "grad_norm": 1.3340508937835693, "learning_rate": 1.35020267780371e-05, "loss": 0.08164606988430023, "step": 2746 }, { "epoch": 0.3333333333333333, "grad_norm": 2.6283721923828125, "learning_rate": 1.349957007738607e-05, "loss": 0.24979859590530396, "step": 2747 }, { "epoch": 0.33345467783036037, "grad_norm": 3.0438013076782227, "learning_rate": 1.3497113376735044e-05, "loss": 0.17684221267700195, "step": 2748 }, { "epoch": 0.3335760223273875, "grad_norm": 2.437587022781372, "learning_rate": 1.3494656676084019e-05, "loss": 0.06924796104431152, "step": 2749 }, { "epoch": 0.33369736682441453, "grad_norm": 3.365072011947632, "learning_rate": 1.3492199975432993e-05, "loss": 0.19792334735393524, "step": 2750 }, { "epoch": 0.3338187113214416, "grad_norm": 3.1566526889801025, "learning_rate": 1.3489743274781967e-05, "loss": 0.30358290672302246, "step": 2751 }, { "epoch": 0.33394005581846864, "grad_norm": 1.8867077827453613, "learning_rate": 1.3487286574130943e-05, "loss": 0.4433608949184418, "step": 2752 }, { "epoch": 0.3340614003154957, "grad_norm": 1.9975477457046509, "learning_rate": 1.3484829873479917e-05, "loss": 0.03996870666742325, "step": 2753 }, { "epoch": 0.33418274481252275, "grad_norm": 2.1786231994628906, "learning_rate": 1.3482373172828892e-05, "loss": 0.4242089092731476, "step": 2754 }, { "epoch": 0.3343040893095498, "grad_norm": 3.0105373859405518, "learning_rate": 1.3479916472177866e-05, "loss": 0.14656175673007965, "step": 2755 }, { "epoch": 0.33442543380657686, "grad_norm": 2.8930420875549316, "learning_rate": 1.347745977152684e-05, "loss": 0.4321075677871704, "step": 2756 }, { "epoch": 0.3345467783036039, "grad_norm": 3.0688719749450684, "learning_rate": 1.3475003070875814e-05, "loss": 0.4075854420661926, "step": 2757 }, { "epoch": 0.334668122800631, "grad_norm": 2.5898208618164062, "learning_rate": 1.3472546370224789e-05, "loss": 0.2931281626224518, "step": 2758 }, { "epoch": 0.33478946729765807, "grad_norm": 3.415714740753174, "learning_rate": 1.3470089669573763e-05, "loss": 0.3117362856864929, "step": 2759 }, { "epoch": 0.3349108117946851, "grad_norm": 2.4045135974884033, "learning_rate": 1.3467632968922737e-05, "loss": 0.6036303043365479, "step": 2760 }, { "epoch": 0.3350321562917122, "grad_norm": 3.6322500705718994, "learning_rate": 1.3465176268271711e-05, "loss": 0.2127571851015091, "step": 2761 }, { "epoch": 0.33515350078873923, "grad_norm": 1.8815101385116577, "learning_rate": 1.3462719567620686e-05, "loss": 0.33295542001724243, "step": 2762 }, { "epoch": 0.3352748452857663, "grad_norm": 3.2687511444091797, "learning_rate": 1.346026286696966e-05, "loss": 0.22823671996593475, "step": 2763 }, { "epoch": 0.33539618978279334, "grad_norm": 2.9881391525268555, "learning_rate": 1.3457806166318634e-05, "loss": 0.20742876827716827, "step": 2764 }, { "epoch": 0.3355175342798204, "grad_norm": 3.587043523788452, "learning_rate": 1.3455349465667608e-05, "loss": 0.26324501633644104, "step": 2765 }, { "epoch": 0.33563887877684745, "grad_norm": 2.8624846935272217, "learning_rate": 1.3452892765016583e-05, "loss": 0.31277331709861755, "step": 2766 }, { "epoch": 0.3357602232738745, "grad_norm": 2.4092533588409424, "learning_rate": 1.3450436064365557e-05, "loss": 0.3587488532066345, "step": 2767 }, { "epoch": 0.3358815677709016, "grad_norm": 2.2200207710266113, "learning_rate": 1.3447979363714531e-05, "loss": 0.3722842335700989, "step": 2768 }, { "epoch": 0.33600291226792867, "grad_norm": 2.3017053604125977, "learning_rate": 1.3445522663063505e-05, "loss": 0.4437088966369629, "step": 2769 }, { "epoch": 0.3361242567649557, "grad_norm": 2.613037586212158, "learning_rate": 1.344306596241248e-05, "loss": 0.44210320711135864, "step": 2770 }, { "epoch": 0.3362456012619828, "grad_norm": 1.9856221675872803, "learning_rate": 1.3440609261761454e-05, "loss": 0.5379482507705688, "step": 2771 }, { "epoch": 0.3363669457590098, "grad_norm": 4.091605186462402, "learning_rate": 1.343815256111043e-05, "loss": 0.26920321583747864, "step": 2772 }, { "epoch": 0.3364882902560369, "grad_norm": 2.7099874019622803, "learning_rate": 1.3435695860459404e-05, "loss": 0.4916417598724365, "step": 2773 }, { "epoch": 0.33660963475306394, "grad_norm": 3.850006580352783, "learning_rate": 1.3433239159808378e-05, "loss": 0.44688522815704346, "step": 2774 }, { "epoch": 0.336730979250091, "grad_norm": 2.5527663230895996, "learning_rate": 1.3430782459157353e-05, "loss": 0.5459522604942322, "step": 2775 }, { "epoch": 0.33685232374711804, "grad_norm": 2.5261878967285156, "learning_rate": 1.3428325758506327e-05, "loss": 0.29146242141723633, "step": 2776 }, { "epoch": 0.33697366824414515, "grad_norm": 1.816943645477295, "learning_rate": 1.3425869057855301e-05, "loss": 0.24747847020626068, "step": 2777 }, { "epoch": 0.3370950127411722, "grad_norm": 3.2097055912017822, "learning_rate": 1.3423412357204275e-05, "loss": 0.2549365758895874, "step": 2778 }, { "epoch": 0.33721635723819926, "grad_norm": 1.550027847290039, "learning_rate": 1.342095565655325e-05, "loss": 0.12433695793151855, "step": 2779 }, { "epoch": 0.3373377017352263, "grad_norm": 1.417708158493042, "learning_rate": 1.3418498955902224e-05, "loss": 0.05040618032217026, "step": 2780 }, { "epoch": 0.33745904623225337, "grad_norm": 2.225236415863037, "learning_rate": 1.3416042255251198e-05, "loss": 0.1814093440771103, "step": 2781 }, { "epoch": 0.3375803907292804, "grad_norm": 1.977687120437622, "learning_rate": 1.3413585554600172e-05, "loss": 0.2293473184108734, "step": 2782 }, { "epoch": 0.3377017352263075, "grad_norm": 1.7540130615234375, "learning_rate": 1.3411128853949147e-05, "loss": 0.18827372789382935, "step": 2783 }, { "epoch": 0.33782307972333453, "grad_norm": 2.545426845550537, "learning_rate": 1.3408672153298121e-05, "loss": 0.3193444609642029, "step": 2784 }, { "epoch": 0.3379444242203616, "grad_norm": 1.901411533355713, "learning_rate": 1.3406215452647095e-05, "loss": 0.048846084624528885, "step": 2785 }, { "epoch": 0.3380657687173887, "grad_norm": 1.747877597808838, "learning_rate": 1.340375875199607e-05, "loss": 0.4019920229911804, "step": 2786 }, { "epoch": 0.33818711321441575, "grad_norm": 2.040862798690796, "learning_rate": 1.3401302051345044e-05, "loss": 0.3238888084888458, "step": 2787 }, { "epoch": 0.3383084577114428, "grad_norm": 1.6558022499084473, "learning_rate": 1.3398845350694018e-05, "loss": 0.1995115578174591, "step": 2788 }, { "epoch": 0.33842980220846985, "grad_norm": 2.0918025970458984, "learning_rate": 1.3396388650042992e-05, "loss": 0.2865673303604126, "step": 2789 }, { "epoch": 0.3385511467054969, "grad_norm": 2.004918098449707, "learning_rate": 1.3393931949391967e-05, "loss": 0.19865933060646057, "step": 2790 }, { "epoch": 0.33867249120252396, "grad_norm": 2.260880947113037, "learning_rate": 1.339147524874094e-05, "loss": 0.516450047492981, "step": 2791 }, { "epoch": 0.338793835699551, "grad_norm": 1.7905795574188232, "learning_rate": 1.3389018548089917e-05, "loss": 0.17964567244052887, "step": 2792 }, { "epoch": 0.33891518019657807, "grad_norm": 1.4596978425979614, "learning_rate": 1.3386561847438891e-05, "loss": 0.2544354200363159, "step": 2793 }, { "epoch": 0.3390365246936051, "grad_norm": 3.439584732055664, "learning_rate": 1.3384105146787865e-05, "loss": 0.2773582637310028, "step": 2794 }, { "epoch": 0.3391578691906322, "grad_norm": 0.0027794241905212402, "learning_rate": 1.338164844613684e-05, "loss": 4.521989467320964e-05, "step": 2795 }, { "epoch": 0.3392792136876593, "grad_norm": 1.4505723714828491, "learning_rate": 1.3379191745485814e-05, "loss": 0.07739366590976715, "step": 2796 }, { "epoch": 0.33940055818468634, "grad_norm": 2.9833128452301025, "learning_rate": 1.3376735044834788e-05, "loss": 0.4213433563709259, "step": 2797 }, { "epoch": 0.3395219026817134, "grad_norm": 2.603236436843872, "learning_rate": 1.3374278344183762e-05, "loss": 0.4406318962574005, "step": 2798 }, { "epoch": 0.33964324717874045, "grad_norm": 1.4903669357299805, "learning_rate": 1.3371821643532737e-05, "loss": 0.07684893161058426, "step": 2799 }, { "epoch": 0.3397645916757675, "grad_norm": 1.9389923810958862, "learning_rate": 1.336936494288171e-05, "loss": 0.2516050338745117, "step": 2800 }, { "epoch": 0.33988593617279456, "grad_norm": 2.0152909755706787, "learning_rate": 1.3366908242230685e-05, "loss": 0.15068350732326508, "step": 2801 }, { "epoch": 0.3400072806698216, "grad_norm": 2.715830087661743, "learning_rate": 1.336445154157966e-05, "loss": 0.21225924789905548, "step": 2802 }, { "epoch": 0.34012862516684866, "grad_norm": 1.9526959657669067, "learning_rate": 1.3361994840928634e-05, "loss": 0.1875372678041458, "step": 2803 }, { "epoch": 0.3402499696638757, "grad_norm": 2.714557409286499, "learning_rate": 1.3359538140277608e-05, "loss": 0.11924070864915848, "step": 2804 }, { "epoch": 0.3403713141609028, "grad_norm": 2.0488007068634033, "learning_rate": 1.3357081439626582e-05, "loss": 0.17785635590553284, "step": 2805 }, { "epoch": 0.3404926586579299, "grad_norm": 3.2751269340515137, "learning_rate": 1.3354624738975556e-05, "loss": 0.3669911026954651, "step": 2806 }, { "epoch": 0.34061400315495693, "grad_norm": 2.7896599769592285, "learning_rate": 1.335216803832453e-05, "loss": 0.5928270816802979, "step": 2807 }, { "epoch": 0.340735347651984, "grad_norm": 2.9548301696777344, "learning_rate": 1.3349711337673505e-05, "loss": 0.1790076196193695, "step": 2808 }, { "epoch": 0.34085669214901104, "grad_norm": 2.8890271186828613, "learning_rate": 1.334725463702248e-05, "loss": 0.24262064695358276, "step": 2809 }, { "epoch": 0.3409780366460381, "grad_norm": 2.664588212966919, "learning_rate": 1.3344797936371453e-05, "loss": 0.2634744346141815, "step": 2810 }, { "epoch": 0.34109938114306515, "grad_norm": 2.912458658218384, "learning_rate": 1.3342341235720428e-05, "loss": 0.16553360223770142, "step": 2811 }, { "epoch": 0.3412207256400922, "grad_norm": 1.9930040836334229, "learning_rate": 1.3339884535069404e-05, "loss": 0.22839638590812683, "step": 2812 }, { "epoch": 0.34134207013711926, "grad_norm": 3.27184796333313, "learning_rate": 1.3337427834418378e-05, "loss": 0.17788903415203094, "step": 2813 }, { "epoch": 0.34146341463414637, "grad_norm": 2.9638190269470215, "learning_rate": 1.3334971133767352e-05, "loss": 0.39199915528297424, "step": 2814 }, { "epoch": 0.3415847591311734, "grad_norm": 1.9852628707885742, "learning_rate": 1.3332514433116326e-05, "loss": 0.09361261874437332, "step": 2815 }, { "epoch": 0.3417061036282005, "grad_norm": 2.0682806968688965, "learning_rate": 1.33300577324653e-05, "loss": 0.6686457991600037, "step": 2816 }, { "epoch": 0.34182744812522753, "grad_norm": 2.4491777420043945, "learning_rate": 1.3327601031814275e-05, "loss": 0.22851845622062683, "step": 2817 }, { "epoch": 0.3419487926222546, "grad_norm": 2.942040205001831, "learning_rate": 1.332514433116325e-05, "loss": 0.33597275614738464, "step": 2818 }, { "epoch": 0.34207013711928164, "grad_norm": 3.0147740840911865, "learning_rate": 1.3322687630512223e-05, "loss": 0.2704648971557617, "step": 2819 }, { "epoch": 0.3421914816163087, "grad_norm": 2.686281442642212, "learning_rate": 1.3320230929861198e-05, "loss": 0.18692731857299805, "step": 2820 }, { "epoch": 0.34231282611333574, "grad_norm": 0.9338500499725342, "learning_rate": 1.3317774229210172e-05, "loss": 0.026601918041706085, "step": 2821 }, { "epoch": 0.3424341706103628, "grad_norm": 2.4016454219818115, "learning_rate": 1.3315317528559146e-05, "loss": 0.15437975525856018, "step": 2822 }, { "epoch": 0.3425555151073899, "grad_norm": 1.8265492916107178, "learning_rate": 1.331286082790812e-05, "loss": 0.5835421681404114, "step": 2823 }, { "epoch": 0.34267685960441696, "grad_norm": 1.672721266746521, "learning_rate": 1.3310404127257095e-05, "loss": 0.20360293984413147, "step": 2824 }, { "epoch": 0.342798204101444, "grad_norm": 2.058891534805298, "learning_rate": 1.3307947426606069e-05, "loss": 0.2055591642856598, "step": 2825 }, { "epoch": 0.34291954859847107, "grad_norm": 1.4964263439178467, "learning_rate": 1.3305490725955043e-05, "loss": 0.05043473467230797, "step": 2826 }, { "epoch": 0.3430408930954981, "grad_norm": 1.9713994264602661, "learning_rate": 1.3303034025304018e-05, "loss": 0.250923216342926, "step": 2827 }, { "epoch": 0.3431622375925252, "grad_norm": 2.310774564743042, "learning_rate": 1.3300577324652992e-05, "loss": 0.35582536458969116, "step": 2828 }, { "epoch": 0.34328358208955223, "grad_norm": 3.157845973968506, "learning_rate": 1.3298120624001966e-05, "loss": 0.4011169672012329, "step": 2829 }, { "epoch": 0.3434049265865793, "grad_norm": 2.0145437717437744, "learning_rate": 1.329566392335094e-05, "loss": 0.13335394859313965, "step": 2830 }, { "epoch": 0.34352627108360634, "grad_norm": 1.5914976596832275, "learning_rate": 1.3293207222699915e-05, "loss": 0.12164802849292755, "step": 2831 }, { "epoch": 0.3436476155806334, "grad_norm": 2.443269729614258, "learning_rate": 1.329075052204889e-05, "loss": 0.29157182574272156, "step": 2832 }, { "epoch": 0.3437689600776605, "grad_norm": 2.2892653942108154, "learning_rate": 1.3288293821397865e-05, "loss": 0.23905837535858154, "step": 2833 }, { "epoch": 0.34389030457468756, "grad_norm": 1.754455804824829, "learning_rate": 1.3285837120746839e-05, "loss": 0.30073070526123047, "step": 2834 }, { "epoch": 0.3440116490717146, "grad_norm": 1.9648005962371826, "learning_rate": 1.3283380420095813e-05, "loss": 0.18791383504867554, "step": 2835 }, { "epoch": 0.34413299356874166, "grad_norm": 2.0399529933929443, "learning_rate": 1.3280923719444788e-05, "loss": 0.21662276983261108, "step": 2836 }, { "epoch": 0.3442543380657687, "grad_norm": 2.2935118675231934, "learning_rate": 1.3278467018793762e-05, "loss": 0.4054362177848816, "step": 2837 }, { "epoch": 0.34437568256279577, "grad_norm": 1.9891663789749146, "learning_rate": 1.3276010318142736e-05, "loss": 0.2039833813905716, "step": 2838 }, { "epoch": 0.3444970270598228, "grad_norm": 2.4605650901794434, "learning_rate": 1.327355361749171e-05, "loss": 0.6762115359306335, "step": 2839 }, { "epoch": 0.3446183715568499, "grad_norm": 2.63932728767395, "learning_rate": 1.3271096916840685e-05, "loss": 0.6073483228683472, "step": 2840 }, { "epoch": 0.34473971605387693, "grad_norm": 3.1658761501312256, "learning_rate": 1.3268640216189659e-05, "loss": 0.31008243560791016, "step": 2841 }, { "epoch": 0.34486106055090404, "grad_norm": 1.9690529108047485, "learning_rate": 1.3266183515538633e-05, "loss": 0.20550884306430817, "step": 2842 }, { "epoch": 0.3449824050479311, "grad_norm": 2.4023427963256836, "learning_rate": 1.3263726814887607e-05, "loss": 0.16229194402694702, "step": 2843 }, { "epoch": 0.34510374954495815, "grad_norm": 2.4014225006103516, "learning_rate": 1.3261270114236582e-05, "loss": 0.5345857739448547, "step": 2844 }, { "epoch": 0.3452250940419852, "grad_norm": 3.2167434692382812, "learning_rate": 1.3258813413585556e-05, "loss": 0.5057387351989746, "step": 2845 }, { "epoch": 0.34534643853901226, "grad_norm": 2.031100034713745, "learning_rate": 1.325635671293453e-05, "loss": 0.2413824498653412, "step": 2846 }, { "epoch": 0.3454677830360393, "grad_norm": 1.7678101062774658, "learning_rate": 1.3253900012283504e-05, "loss": 0.1743219941854477, "step": 2847 }, { "epoch": 0.34558912753306636, "grad_norm": 2.502016067504883, "learning_rate": 1.3251443311632479e-05, "loss": 0.1482171267271042, "step": 2848 }, { "epoch": 0.3457104720300934, "grad_norm": 2.780811071395874, "learning_rate": 1.3248986610981453e-05, "loss": 0.5652299523353577, "step": 2849 }, { "epoch": 0.3458318165271205, "grad_norm": 2.4874913692474365, "learning_rate": 1.3246529910330427e-05, "loss": 0.36608320474624634, "step": 2850 }, { "epoch": 0.3459531610241476, "grad_norm": 2.2988367080688477, "learning_rate": 1.3244073209679401e-05, "loss": 0.33672210574150085, "step": 2851 }, { "epoch": 0.34607450552117464, "grad_norm": 2.0662240982055664, "learning_rate": 1.3241616509028377e-05, "loss": 0.2632628083229065, "step": 2852 }, { "epoch": 0.3461958500182017, "grad_norm": 2.669175624847412, "learning_rate": 1.3239159808377352e-05, "loss": 0.2074550986289978, "step": 2853 }, { "epoch": 0.34631719451522874, "grad_norm": 1.6437294483184814, "learning_rate": 1.3236703107726326e-05, "loss": 0.15819406509399414, "step": 2854 }, { "epoch": 0.3464385390122558, "grad_norm": 3.062838554382324, "learning_rate": 1.32342464070753e-05, "loss": 0.11034675687551498, "step": 2855 }, { "epoch": 0.34655988350928285, "grad_norm": 4.433793067932129, "learning_rate": 1.3231789706424274e-05, "loss": 0.4895234704017639, "step": 2856 }, { "epoch": 0.3466812280063099, "grad_norm": 2.2202999591827393, "learning_rate": 1.3229333005773249e-05, "loss": 0.1974233090877533, "step": 2857 }, { "epoch": 0.34680257250333696, "grad_norm": 2.6762070655822754, "learning_rate": 1.3226876305122223e-05, "loss": 0.3637422025203705, "step": 2858 }, { "epoch": 0.346923917000364, "grad_norm": 3.210794687271118, "learning_rate": 1.3224419604471197e-05, "loss": 0.34235748648643494, "step": 2859 }, { "epoch": 0.34704526149739107, "grad_norm": 2.9384000301361084, "learning_rate": 1.3221962903820171e-05, "loss": 0.5411485433578491, "step": 2860 }, { "epoch": 0.3471666059944182, "grad_norm": 2.98555064201355, "learning_rate": 1.3219506203169146e-05, "loss": 0.23736192286014557, "step": 2861 }, { "epoch": 0.34728795049144523, "grad_norm": 2.6572117805480957, "learning_rate": 1.321704950251812e-05, "loss": 0.4607403874397278, "step": 2862 }, { "epoch": 0.3474092949884723, "grad_norm": 4.189919948577881, "learning_rate": 1.3214592801867094e-05, "loss": 0.19775786995887756, "step": 2863 }, { "epoch": 0.34753063948549934, "grad_norm": 1.5939427614212036, "learning_rate": 1.3212136101216068e-05, "loss": 0.09462389349937439, "step": 2864 }, { "epoch": 0.3476519839825264, "grad_norm": 3.5034804344177246, "learning_rate": 1.3209679400565043e-05, "loss": 0.24761658906936646, "step": 2865 }, { "epoch": 0.34777332847955345, "grad_norm": 3.050703287124634, "learning_rate": 1.3207222699914017e-05, "loss": 0.49068590998649597, "step": 2866 }, { "epoch": 0.3478946729765805, "grad_norm": 1.9454255104064941, "learning_rate": 1.3204765999262991e-05, "loss": 0.1986999660730362, "step": 2867 }, { "epoch": 0.34801601747360755, "grad_norm": 2.60258150100708, "learning_rate": 1.3202309298611965e-05, "loss": 0.35261183977127075, "step": 2868 }, { "epoch": 0.3481373619706346, "grad_norm": 1.4820805788040161, "learning_rate": 1.319985259796094e-05, "loss": 0.09027974307537079, "step": 2869 }, { "epoch": 0.3482587064676617, "grad_norm": 2.0941667556762695, "learning_rate": 1.3197395897309914e-05, "loss": 0.2873879671096802, "step": 2870 }, { "epoch": 0.34838005096468877, "grad_norm": 1.7293140888214111, "learning_rate": 1.319493919665889e-05, "loss": 0.18081168830394745, "step": 2871 }, { "epoch": 0.3485013954617158, "grad_norm": 4.354601860046387, "learning_rate": 1.3192482496007864e-05, "loss": 0.4792991280555725, "step": 2872 }, { "epoch": 0.3486227399587429, "grad_norm": 2.6210784912109375, "learning_rate": 1.3190025795356838e-05, "loss": 0.1830996870994568, "step": 2873 }, { "epoch": 0.34874408445576993, "grad_norm": 4.812962055206299, "learning_rate": 1.3187569094705813e-05, "loss": 0.7624297142028809, "step": 2874 }, { "epoch": 0.348865428952797, "grad_norm": 2.0475847721099854, "learning_rate": 1.3185112394054787e-05, "loss": 0.2510002851486206, "step": 2875 }, { "epoch": 0.34898677344982404, "grad_norm": 4.413797378540039, "learning_rate": 1.3182655693403761e-05, "loss": 0.516799807548523, "step": 2876 }, { "epoch": 0.3491081179468511, "grad_norm": 1.7577311992645264, "learning_rate": 1.3180198992752735e-05, "loss": 0.12230469286441803, "step": 2877 }, { "epoch": 0.34922946244387815, "grad_norm": 1.4777488708496094, "learning_rate": 1.317774229210171e-05, "loss": 0.33226674795150757, "step": 2878 }, { "epoch": 0.34935080694090526, "grad_norm": 3.4159419536590576, "learning_rate": 1.3175285591450684e-05, "loss": 0.49763816595077515, "step": 2879 }, { "epoch": 0.3494721514379323, "grad_norm": 2.3247647285461426, "learning_rate": 1.3172828890799658e-05, "loss": 0.11598958820104599, "step": 2880 }, { "epoch": 0.34959349593495936, "grad_norm": 2.48176908493042, "learning_rate": 1.3170372190148633e-05, "loss": 0.6656193733215332, "step": 2881 }, { "epoch": 0.3497148404319864, "grad_norm": 3.7606394290924072, "learning_rate": 1.3167915489497607e-05, "loss": 0.6240760087966919, "step": 2882 }, { "epoch": 0.34983618492901347, "grad_norm": 2.8003673553466797, "learning_rate": 1.316545878884658e-05, "loss": 0.2076607495546341, "step": 2883 }, { "epoch": 0.3499575294260405, "grad_norm": 2.08559250831604, "learning_rate": 1.3163002088195554e-05, "loss": 0.24259454011917114, "step": 2884 }, { "epoch": 0.3500788739230676, "grad_norm": 0.9130677580833435, "learning_rate": 1.3160545387544528e-05, "loss": 0.03686123341321945, "step": 2885 }, { "epoch": 0.35020021842009463, "grad_norm": 2.195913314819336, "learning_rate": 1.3158088686893502e-05, "loss": 0.34596142172813416, "step": 2886 }, { "epoch": 0.3503215629171217, "grad_norm": 2.618661642074585, "learning_rate": 1.3155631986242476e-05, "loss": 0.3908511996269226, "step": 2887 }, { "epoch": 0.35044290741414874, "grad_norm": 3.42486834526062, "learning_rate": 1.315317528559145e-05, "loss": 0.11605776101350784, "step": 2888 }, { "epoch": 0.35056425191117585, "grad_norm": 2.014707326889038, "learning_rate": 1.3150718584940425e-05, "loss": 0.37118685245513916, "step": 2889 }, { "epoch": 0.3506855964082029, "grad_norm": 1.9236880540847778, "learning_rate": 1.31482618842894e-05, "loss": 0.4760400950908661, "step": 2890 }, { "epoch": 0.35080694090522996, "grad_norm": 2.5255494117736816, "learning_rate": 1.3145805183638373e-05, "loss": 0.2357504814863205, "step": 2891 }, { "epoch": 0.350928285402257, "grad_norm": 1.822582483291626, "learning_rate": 1.3143348482987348e-05, "loss": 0.0703888088464737, "step": 2892 }, { "epoch": 0.35104962989928407, "grad_norm": 2.847137451171875, "learning_rate": 1.3140891782336322e-05, "loss": 0.2367681860923767, "step": 2893 }, { "epoch": 0.3511709743963111, "grad_norm": 3.9951229095458984, "learning_rate": 1.3138435081685296e-05, "loss": 0.1303313970565796, "step": 2894 }, { "epoch": 0.3512923188933382, "grad_norm": 1.6860606670379639, "learning_rate": 1.313597838103427e-05, "loss": 0.18127286434173584, "step": 2895 }, { "epoch": 0.3514136633903652, "grad_norm": 1.8364192247390747, "learning_rate": 1.3133521680383245e-05, "loss": 0.12500934302806854, "step": 2896 }, { "epoch": 0.3515350078873923, "grad_norm": 2.4998505115509033, "learning_rate": 1.313106497973222e-05, "loss": 0.3593692481517792, "step": 2897 }, { "epoch": 0.3516563523844194, "grad_norm": 2.5611114501953125, "learning_rate": 1.3128608279081195e-05, "loss": 0.4094930589199066, "step": 2898 }, { "epoch": 0.35177769688144644, "grad_norm": 3.476863384246826, "learning_rate": 1.312615157843017e-05, "loss": 0.23066210746765137, "step": 2899 }, { "epoch": 0.3518990413784735, "grad_norm": 2.578794479370117, "learning_rate": 1.3123694877779143e-05, "loss": 0.05409723147749901, "step": 2900 }, { "epoch": 0.35202038587550055, "grad_norm": 2.6811587810516357, "learning_rate": 1.3121238177128118e-05, "loss": 0.2996734380722046, "step": 2901 }, { "epoch": 0.3521417303725276, "grad_norm": 2.539841413497925, "learning_rate": 1.3118781476477092e-05, "loss": 0.2994079887866974, "step": 2902 }, { "epoch": 0.35226307486955466, "grad_norm": 2.050487756729126, "learning_rate": 1.3116324775826066e-05, "loss": 0.46317172050476074, "step": 2903 }, { "epoch": 0.3523844193665817, "grad_norm": 2.817878484725952, "learning_rate": 1.311386807517504e-05, "loss": 0.2869478464126587, "step": 2904 }, { "epoch": 0.35250576386360877, "grad_norm": 2.065239191055298, "learning_rate": 1.3111411374524015e-05, "loss": 0.2752199172973633, "step": 2905 }, { "epoch": 0.3526271083606358, "grad_norm": 3.927312135696411, "learning_rate": 1.3108954673872989e-05, "loss": 0.23858079314231873, "step": 2906 }, { "epoch": 0.35274845285766293, "grad_norm": 2.510765552520752, "learning_rate": 1.3106497973221963e-05, "loss": 0.578784704208374, "step": 2907 }, { "epoch": 0.35286979735469, "grad_norm": 2.7408816814422607, "learning_rate": 1.3104041272570938e-05, "loss": 0.4173663854598999, "step": 2908 }, { "epoch": 0.35299114185171704, "grad_norm": 2.8283655643463135, "learning_rate": 1.3101584571919912e-05, "loss": 0.17574214935302734, "step": 2909 }, { "epoch": 0.3531124863487441, "grad_norm": 2.037839412689209, "learning_rate": 1.3099127871268886e-05, "loss": 0.10666762292385101, "step": 2910 }, { "epoch": 0.35323383084577115, "grad_norm": 3.1735360622406006, "learning_rate": 1.309667117061786e-05, "loss": 0.2770954668521881, "step": 2911 }, { "epoch": 0.3533551753427982, "grad_norm": 1.4993237257003784, "learning_rate": 1.3094214469966835e-05, "loss": 0.06719283759593964, "step": 2912 }, { "epoch": 0.35347651983982525, "grad_norm": 1.993152141571045, "learning_rate": 1.3091757769315809e-05, "loss": 0.2014002799987793, "step": 2913 }, { "epoch": 0.3535978643368523, "grad_norm": 2.437696695327759, "learning_rate": 1.3089301068664783e-05, "loss": 0.3466431498527527, "step": 2914 }, { "epoch": 0.35371920883387936, "grad_norm": 2.866640329360962, "learning_rate": 1.3086844368013757e-05, "loss": 0.3083437383174896, "step": 2915 }, { "epoch": 0.35384055333090647, "grad_norm": 2.857501983642578, "learning_rate": 1.3084387667362732e-05, "loss": 0.5835140347480774, "step": 2916 }, { "epoch": 0.3539618978279335, "grad_norm": 2.6366968154907227, "learning_rate": 1.3081930966711708e-05, "loss": 0.3993077576160431, "step": 2917 }, { "epoch": 0.3540832423249606, "grad_norm": 1.1135361194610596, "learning_rate": 1.3079474266060682e-05, "loss": 0.22319062054157257, "step": 2918 }, { "epoch": 0.35420458682198763, "grad_norm": 1.3128582239151, "learning_rate": 1.3077017565409656e-05, "loss": 0.12067007273435593, "step": 2919 }, { "epoch": 0.3543259313190147, "grad_norm": 3.6358463764190674, "learning_rate": 1.307456086475863e-05, "loss": 0.3024868965148926, "step": 2920 }, { "epoch": 0.35444727581604174, "grad_norm": 1.5476981401443481, "learning_rate": 1.3072104164107605e-05, "loss": 0.07749363780021667, "step": 2921 }, { "epoch": 0.3545686203130688, "grad_norm": 1.784947395324707, "learning_rate": 1.3069647463456579e-05, "loss": 0.18837328255176544, "step": 2922 }, { "epoch": 0.35468996481009585, "grad_norm": 2.081301212310791, "learning_rate": 1.3067190762805553e-05, "loss": 0.14046838879585266, "step": 2923 }, { "epoch": 0.3548113093071229, "grad_norm": 1.8582570552825928, "learning_rate": 1.3064734062154527e-05, "loss": 0.17502886056900024, "step": 2924 }, { "epoch": 0.35493265380414996, "grad_norm": 2.0138418674468994, "learning_rate": 1.3062277361503502e-05, "loss": 0.08949162065982819, "step": 2925 }, { "epoch": 0.35505399830117707, "grad_norm": 1.6978049278259277, "learning_rate": 1.3059820660852476e-05, "loss": 0.19336704909801483, "step": 2926 }, { "epoch": 0.3551753427982041, "grad_norm": 4.394315242767334, "learning_rate": 1.305736396020145e-05, "loss": 0.43278732895851135, "step": 2927 }, { "epoch": 0.3552966872952312, "grad_norm": 2.5735270977020264, "learning_rate": 1.3054907259550424e-05, "loss": 0.38995563983917236, "step": 2928 }, { "epoch": 0.3554180317922582, "grad_norm": 3.6233863830566406, "learning_rate": 1.3052450558899399e-05, "loss": 0.14834274351596832, "step": 2929 }, { "epoch": 0.3555393762892853, "grad_norm": 1.9931936264038086, "learning_rate": 1.3049993858248373e-05, "loss": 0.16212575137615204, "step": 2930 }, { "epoch": 0.35566072078631233, "grad_norm": 2.0874016284942627, "learning_rate": 1.3047537157597347e-05, "loss": 0.2454877644777298, "step": 2931 }, { "epoch": 0.3557820652833394, "grad_norm": 1.7809704542160034, "learning_rate": 1.3045080456946321e-05, "loss": 0.2872205078601837, "step": 2932 }, { "epoch": 0.35590340978036644, "grad_norm": 2.3312153816223145, "learning_rate": 1.3042623756295296e-05, "loss": 0.21331094205379486, "step": 2933 }, { "epoch": 0.3560247542773935, "grad_norm": 2.0230391025543213, "learning_rate": 1.304016705564427e-05, "loss": 0.23937641084194183, "step": 2934 }, { "epoch": 0.3561460987744206, "grad_norm": 0.25450608134269714, "learning_rate": 1.3037710354993244e-05, "loss": 0.002068326575681567, "step": 2935 }, { "epoch": 0.35626744327144766, "grad_norm": 2.4915611743927, "learning_rate": 1.3035253654342218e-05, "loss": 0.48472559452056885, "step": 2936 }, { "epoch": 0.3563887877684747, "grad_norm": 2.8751635551452637, "learning_rate": 1.3032796953691194e-05, "loss": 0.3046378791332245, "step": 2937 }, { "epoch": 0.35651013226550177, "grad_norm": 3.4425442218780518, "learning_rate": 1.3030340253040169e-05, "loss": 0.3476297855377197, "step": 2938 }, { "epoch": 0.3566314767625288, "grad_norm": 1.4276381731033325, "learning_rate": 1.3027883552389143e-05, "loss": 0.06332909315824509, "step": 2939 }, { "epoch": 0.3567528212595559, "grad_norm": 2.643845558166504, "learning_rate": 1.3025426851738117e-05, "loss": 0.26920342445373535, "step": 2940 }, { "epoch": 0.35687416575658293, "grad_norm": 1.530829668045044, "learning_rate": 1.3022970151087091e-05, "loss": 0.0930422991514206, "step": 2941 }, { "epoch": 0.35699551025361, "grad_norm": 2.0474140644073486, "learning_rate": 1.3020513450436066e-05, "loss": 0.20393139123916626, "step": 2942 }, { "epoch": 0.35711685475063704, "grad_norm": 2.6223721504211426, "learning_rate": 1.301805674978504e-05, "loss": 0.4995730519294739, "step": 2943 }, { "epoch": 0.35723819924766415, "grad_norm": 6.0837321281433105, "learning_rate": 1.3015600049134014e-05, "loss": 0.44847533106803894, "step": 2944 }, { "epoch": 0.3573595437446912, "grad_norm": 0.8783859014511108, "learning_rate": 1.3013143348482988e-05, "loss": 0.05103648826479912, "step": 2945 }, { "epoch": 0.35748088824171825, "grad_norm": 2.090203046798706, "learning_rate": 1.3010686647831963e-05, "loss": 0.0770326629281044, "step": 2946 }, { "epoch": 0.3576022327387453, "grad_norm": 1.8669445514678955, "learning_rate": 1.3008229947180937e-05, "loss": 0.12285949289798737, "step": 2947 }, { "epoch": 0.35772357723577236, "grad_norm": 1.8406798839569092, "learning_rate": 1.3005773246529911e-05, "loss": 0.11922474205493927, "step": 2948 }, { "epoch": 0.3578449217327994, "grad_norm": 2.202354669570923, "learning_rate": 1.3003316545878885e-05, "loss": 0.15273219347000122, "step": 2949 }, { "epoch": 0.35796626622982647, "grad_norm": 2.0528676509857178, "learning_rate": 1.300085984522786e-05, "loss": 0.2751436233520508, "step": 2950 }, { "epoch": 0.3580876107268535, "grad_norm": 2.0697927474975586, "learning_rate": 1.2998403144576834e-05, "loss": 0.1616508513689041, "step": 2951 }, { "epoch": 0.3582089552238806, "grad_norm": 3.1109538078308105, "learning_rate": 1.2995946443925808e-05, "loss": 0.27921411395072937, "step": 2952 }, { "epoch": 0.35833029972090763, "grad_norm": 3.62821364402771, "learning_rate": 1.2993489743274783e-05, "loss": 0.5124978423118591, "step": 2953 }, { "epoch": 0.35845164421793474, "grad_norm": 3.2712950706481934, "learning_rate": 1.2991033042623757e-05, "loss": 0.4778803586959839, "step": 2954 }, { "epoch": 0.3585729887149618, "grad_norm": 1.9799039363861084, "learning_rate": 1.2988576341972731e-05, "loss": 0.16806745529174805, "step": 2955 }, { "epoch": 0.35869433321198885, "grad_norm": 2.7012410163879395, "learning_rate": 1.2986119641321705e-05, "loss": 0.13724809885025024, "step": 2956 }, { "epoch": 0.3588156777090159, "grad_norm": 2.23858380317688, "learning_rate": 1.2983662940670681e-05, "loss": 0.873268723487854, "step": 2957 }, { "epoch": 0.35893702220604295, "grad_norm": 2.514082670211792, "learning_rate": 1.2981206240019655e-05, "loss": 0.2670159339904785, "step": 2958 }, { "epoch": 0.35905836670307, "grad_norm": 1.5424782037734985, "learning_rate": 1.297874953936863e-05, "loss": 0.11754392087459564, "step": 2959 }, { "epoch": 0.35917971120009706, "grad_norm": 2.0172996520996094, "learning_rate": 1.2976292838717604e-05, "loss": 0.24237750470638275, "step": 2960 }, { "epoch": 0.3593010556971241, "grad_norm": 2.2911813259124756, "learning_rate": 1.2973836138066578e-05, "loss": 0.17581063508987427, "step": 2961 }, { "epoch": 0.35942240019415117, "grad_norm": 2.3171849250793457, "learning_rate": 1.2971379437415553e-05, "loss": 0.23211178183555603, "step": 2962 }, { "epoch": 0.3595437446911783, "grad_norm": 3.187316656112671, "learning_rate": 1.2968922736764527e-05, "loss": 0.35318523645401, "step": 2963 }, { "epoch": 0.35966508918820533, "grad_norm": 2.611950159072876, "learning_rate": 1.2966466036113501e-05, "loss": 0.1411629617214203, "step": 2964 }, { "epoch": 0.3597864336852324, "grad_norm": 2.4547252655029297, "learning_rate": 1.2964009335462475e-05, "loss": 0.36622804403305054, "step": 2965 }, { "epoch": 0.35990777818225944, "grad_norm": 1.1976194381713867, "learning_rate": 1.296155263481145e-05, "loss": 0.27061358094215393, "step": 2966 }, { "epoch": 0.3600291226792865, "grad_norm": 1.6270411014556885, "learning_rate": 1.2959095934160424e-05, "loss": 0.14751648902893066, "step": 2967 }, { "epoch": 0.36015046717631355, "grad_norm": 2.6941215991973877, "learning_rate": 1.2956639233509398e-05, "loss": 0.22208136320114136, "step": 2968 }, { "epoch": 0.3602718116733406, "grad_norm": 0.12202031910419464, "learning_rate": 1.2954182532858372e-05, "loss": 0.0013922062935307622, "step": 2969 }, { "epoch": 0.36039315617036766, "grad_norm": 3.520007848739624, "learning_rate": 1.2951725832207347e-05, "loss": 0.40424954891204834, "step": 2970 }, { "epoch": 0.3605145006673947, "grad_norm": 2.3271749019622803, "learning_rate": 1.2949269131556321e-05, "loss": 0.533889889717102, "step": 2971 }, { "epoch": 0.3606358451644218, "grad_norm": 2.4552700519561768, "learning_rate": 1.2946812430905295e-05, "loss": 0.5242980718612671, "step": 2972 }, { "epoch": 0.3607571896614489, "grad_norm": 1.2487123012542725, "learning_rate": 1.294435573025427e-05, "loss": 0.1968783587217331, "step": 2973 }, { "epoch": 0.3608785341584759, "grad_norm": 2.4111859798431396, "learning_rate": 1.2941899029603244e-05, "loss": 0.20926323533058167, "step": 2974 }, { "epoch": 0.360999878655503, "grad_norm": 2.784630298614502, "learning_rate": 1.2939442328952218e-05, "loss": 0.18628396093845367, "step": 2975 }, { "epoch": 0.36112122315253004, "grad_norm": 2.0924742221832275, "learning_rate": 1.2936985628301192e-05, "loss": 0.36132916808128357, "step": 2976 }, { "epoch": 0.3612425676495571, "grad_norm": 1.6369595527648926, "learning_rate": 1.2934528927650168e-05, "loss": 0.20456495881080627, "step": 2977 }, { "epoch": 0.36136391214658414, "grad_norm": 1.6293615102767944, "learning_rate": 1.2932072226999142e-05, "loss": 0.1595347821712494, "step": 2978 }, { "epoch": 0.3614852566436112, "grad_norm": 2.087491989135742, "learning_rate": 1.2929615526348117e-05, "loss": 0.23917625844478607, "step": 2979 }, { "epoch": 0.36160660114063825, "grad_norm": 2.3202195167541504, "learning_rate": 1.2927158825697091e-05, "loss": 0.19291448593139648, "step": 2980 }, { "epoch": 0.3617279456376653, "grad_norm": 2.6504223346710205, "learning_rate": 1.2924702125046065e-05, "loss": 0.35504570603370667, "step": 2981 }, { "epoch": 0.3618492901346924, "grad_norm": 2.5289902687072754, "learning_rate": 1.292224542439504e-05, "loss": 0.30801528692245483, "step": 2982 }, { "epoch": 0.36197063463171947, "grad_norm": 2.0009565353393555, "learning_rate": 1.2919788723744014e-05, "loss": 0.1884746551513672, "step": 2983 }, { "epoch": 0.3620919791287465, "grad_norm": 3.072634696960449, "learning_rate": 1.2917332023092988e-05, "loss": 0.3839377462863922, "step": 2984 }, { "epoch": 0.3622133236257736, "grad_norm": 2.597742795944214, "learning_rate": 1.2914875322441962e-05, "loss": 0.5811505317687988, "step": 2985 }, { "epoch": 0.36233466812280063, "grad_norm": 2.4145267009735107, "learning_rate": 1.2912418621790936e-05, "loss": 0.443876713514328, "step": 2986 }, { "epoch": 0.3624560126198277, "grad_norm": 1.8467440605163574, "learning_rate": 1.290996192113991e-05, "loss": 0.29026928544044495, "step": 2987 }, { "epoch": 0.36257735711685474, "grad_norm": 2.9077274799346924, "learning_rate": 1.2907505220488885e-05, "loss": 0.35525912046432495, "step": 2988 }, { "epoch": 0.3626987016138818, "grad_norm": 0.22295789420604706, "learning_rate": 1.290504851983786e-05, "loss": 0.004607439041137695, "step": 2989 }, { "epoch": 0.36282004611090884, "grad_norm": 2.0454721450805664, "learning_rate": 1.2902591819186833e-05, "loss": 0.3613908588886261, "step": 2990 }, { "epoch": 0.36294139060793595, "grad_norm": 3.7352137565612793, "learning_rate": 1.2900135118535808e-05, "loss": 0.25977185368537903, "step": 2991 }, { "epoch": 0.363062735104963, "grad_norm": 3.1388018131256104, "learning_rate": 1.2897678417884782e-05, "loss": 0.4856216311454773, "step": 2992 }, { "epoch": 0.36318407960199006, "grad_norm": 1.5494506359100342, "learning_rate": 1.2895221717233756e-05, "loss": 0.1923268884420395, "step": 2993 }, { "epoch": 0.3633054240990171, "grad_norm": 3.564504861831665, "learning_rate": 1.289276501658273e-05, "loss": 0.30987757444381714, "step": 2994 }, { "epoch": 0.36342676859604417, "grad_norm": 2.160773515701294, "learning_rate": 1.2890308315931705e-05, "loss": 0.21949148178100586, "step": 2995 }, { "epoch": 0.3635481130930712, "grad_norm": 3.99308443069458, "learning_rate": 1.288785161528068e-05, "loss": 0.4553235173225403, "step": 2996 }, { "epoch": 0.3636694575900983, "grad_norm": 2.348376989364624, "learning_rate": 1.2885394914629655e-05, "loss": 0.22087374329566956, "step": 2997 }, { "epoch": 0.36379080208712533, "grad_norm": 1.88888680934906, "learning_rate": 1.288293821397863e-05, "loss": 0.5941238403320312, "step": 2998 }, { "epoch": 0.3639121465841524, "grad_norm": 1.6137655973434448, "learning_rate": 1.2880481513327603e-05, "loss": 0.06201152503490448, "step": 2999 }, { "epoch": 0.3640334910811795, "grad_norm": 2.1788697242736816, "learning_rate": 1.2878024812676578e-05, "loss": 0.2939947545528412, "step": 3000 }, { "epoch": 0.36415483557820655, "grad_norm": 2.3649392127990723, "learning_rate": 1.2875568112025552e-05, "loss": 0.2894429564476013, "step": 3001 }, { "epoch": 0.3642761800752336, "grad_norm": 2.135638475418091, "learning_rate": 1.2873111411374526e-05, "loss": 0.2235684096813202, "step": 3002 }, { "epoch": 0.36439752457226066, "grad_norm": 3.520004987716675, "learning_rate": 1.28706547107235e-05, "loss": 0.17777599394321442, "step": 3003 }, { "epoch": 0.3645188690692877, "grad_norm": 3.2360692024230957, "learning_rate": 1.2868198010072475e-05, "loss": 0.229665145277977, "step": 3004 }, { "epoch": 0.36464021356631476, "grad_norm": 2.8983798027038574, "learning_rate": 1.2865741309421449e-05, "loss": 0.388833224773407, "step": 3005 }, { "epoch": 0.3647615580633418, "grad_norm": 1.9312299489974976, "learning_rate": 1.2863284608770423e-05, "loss": 0.3245135247707367, "step": 3006 }, { "epoch": 0.36488290256036887, "grad_norm": 2.141505718231201, "learning_rate": 1.2860827908119398e-05, "loss": 0.39916810393333435, "step": 3007 }, { "epoch": 0.3650042470573959, "grad_norm": 2.692316770553589, "learning_rate": 1.2858371207468372e-05, "loss": 0.3534470200538635, "step": 3008 }, { "epoch": 0.36512559155442303, "grad_norm": 1.6258231401443481, "learning_rate": 1.2855914506817346e-05, "loss": 0.1111140251159668, "step": 3009 }, { "epoch": 0.3652469360514501, "grad_norm": 1.6851928234100342, "learning_rate": 1.285345780616632e-05, "loss": 0.04014979302883148, "step": 3010 }, { "epoch": 0.36536828054847714, "grad_norm": 2.299107313156128, "learning_rate": 1.2851001105515295e-05, "loss": 0.07421331852674484, "step": 3011 }, { "epoch": 0.3654896250455042, "grad_norm": 2.7299962043762207, "learning_rate": 1.2848544404864269e-05, "loss": 0.23941995203495026, "step": 3012 }, { "epoch": 0.36561096954253125, "grad_norm": 2.1895580291748047, "learning_rate": 1.2846087704213243e-05, "loss": 0.39537790417671204, "step": 3013 }, { "epoch": 0.3657323140395583, "grad_norm": 2.276113748550415, "learning_rate": 1.2843631003562217e-05, "loss": 0.32509279251098633, "step": 3014 }, { "epoch": 0.36585365853658536, "grad_norm": 1.897255539894104, "learning_rate": 1.2841174302911192e-05, "loss": 0.1867866814136505, "step": 3015 }, { "epoch": 0.3659750030336124, "grad_norm": 2.16688871383667, "learning_rate": 1.2838717602260168e-05, "loss": 0.21447589993476868, "step": 3016 }, { "epoch": 0.36609634753063947, "grad_norm": 1.9278409481048584, "learning_rate": 1.2836260901609142e-05, "loss": 0.07187424600124359, "step": 3017 }, { "epoch": 0.3662176920276665, "grad_norm": 3.1750142574310303, "learning_rate": 1.2833804200958116e-05, "loss": 0.36346420645713806, "step": 3018 }, { "epoch": 0.36633903652469363, "grad_norm": 2.207082509994507, "learning_rate": 1.2831347500307087e-05, "loss": 0.5422084927558899, "step": 3019 }, { "epoch": 0.3664603810217207, "grad_norm": 2.3459904193878174, "learning_rate": 1.2828890799656061e-05, "loss": 0.21172483265399933, "step": 3020 }, { "epoch": 0.36658172551874774, "grad_norm": 4.062553405761719, "learning_rate": 1.2826434099005035e-05, "loss": 0.29971086978912354, "step": 3021 }, { "epoch": 0.3667030700157748, "grad_norm": 2.1893389225006104, "learning_rate": 1.2823977398354011e-05, "loss": 0.28227633237838745, "step": 3022 }, { "epoch": 0.36682441451280184, "grad_norm": 2.9444973468780518, "learning_rate": 1.2821520697702986e-05, "loss": 0.6413102149963379, "step": 3023 }, { "epoch": 0.3669457590098289, "grad_norm": 1.4639172554016113, "learning_rate": 1.281906399705196e-05, "loss": 0.03601495176553726, "step": 3024 }, { "epoch": 0.36706710350685595, "grad_norm": 3.3567471504211426, "learning_rate": 1.2816607296400934e-05, "loss": 0.6441450715065002, "step": 3025 }, { "epoch": 0.367188448003883, "grad_norm": 3.2175350189208984, "learning_rate": 1.2814150595749908e-05, "loss": 0.4079563319683075, "step": 3026 }, { "epoch": 0.36730979250091006, "grad_norm": 2.4198379516601562, "learning_rate": 1.2811693895098883e-05, "loss": 0.3430071473121643, "step": 3027 }, { "epoch": 0.36743113699793717, "grad_norm": 2.058039426803589, "learning_rate": 1.2809237194447857e-05, "loss": 0.29600954055786133, "step": 3028 }, { "epoch": 0.3675524814949642, "grad_norm": 1.824734091758728, "learning_rate": 1.2806780493796831e-05, "loss": 0.1772790551185608, "step": 3029 }, { "epoch": 0.3676738259919913, "grad_norm": 2.377044200897217, "learning_rate": 1.2804323793145805e-05, "loss": 0.24278861284255981, "step": 3030 }, { "epoch": 0.36779517048901833, "grad_norm": 2.844923257827759, "learning_rate": 1.280186709249478e-05, "loss": 0.6934537887573242, "step": 3031 }, { "epoch": 0.3679165149860454, "grad_norm": 1.1115227937698364, "learning_rate": 1.2799410391843754e-05, "loss": 0.037027470767498016, "step": 3032 }, { "epoch": 0.36803785948307244, "grad_norm": 1.604121208190918, "learning_rate": 1.2796953691192728e-05, "loss": 0.06987448036670685, "step": 3033 }, { "epoch": 0.3681592039800995, "grad_norm": 2.811095714569092, "learning_rate": 1.2794496990541703e-05, "loss": 0.5348390936851501, "step": 3034 }, { "epoch": 0.36828054847712655, "grad_norm": 2.9516241550445557, "learning_rate": 1.2792040289890677e-05, "loss": 0.13303931057453156, "step": 3035 }, { "epoch": 0.3684018929741536, "grad_norm": 2.7784767150878906, "learning_rate": 1.2789583589239651e-05, "loss": 0.32491278648376465, "step": 3036 }, { "epoch": 0.3685232374711807, "grad_norm": 3.5733752250671387, "learning_rate": 1.2787126888588625e-05, "loss": 0.49746209383010864, "step": 3037 }, { "epoch": 0.36864458196820776, "grad_norm": 3.677290916442871, "learning_rate": 1.27846701879376e-05, "loss": 0.18780988454818726, "step": 3038 }, { "epoch": 0.3687659264652348, "grad_norm": 2.170703411102295, "learning_rate": 1.2782213487286574e-05, "loss": 0.18865230679512024, "step": 3039 }, { "epoch": 0.36888727096226187, "grad_norm": 2.0496461391448975, "learning_rate": 1.2779756786635548e-05, "loss": 0.12246742844581604, "step": 3040 }, { "epoch": 0.3690086154592889, "grad_norm": 4.869044303894043, "learning_rate": 1.2777300085984522e-05, "loss": 0.2805744409561157, "step": 3041 }, { "epoch": 0.369129959956316, "grad_norm": 3.1038382053375244, "learning_rate": 1.2774843385333498e-05, "loss": 0.4670161306858063, "step": 3042 }, { "epoch": 0.36925130445334303, "grad_norm": 2.6496403217315674, "learning_rate": 1.2772386684682473e-05, "loss": 0.1732165813446045, "step": 3043 }, { "epoch": 0.3693726489503701, "grad_norm": 3.3521132469177246, "learning_rate": 1.2769929984031447e-05, "loss": 0.16238215565681458, "step": 3044 }, { "epoch": 0.36949399344739714, "grad_norm": 1.6630042791366577, "learning_rate": 1.2767473283380421e-05, "loss": 0.22390274703502655, "step": 3045 }, { "epoch": 0.3696153379444242, "grad_norm": 2.7344253063201904, "learning_rate": 1.2765016582729395e-05, "loss": 0.04648754373192787, "step": 3046 }, { "epoch": 0.3697366824414513, "grad_norm": 2.4483656883239746, "learning_rate": 1.276255988207837e-05, "loss": 0.23034049570560455, "step": 3047 }, { "epoch": 0.36985802693847836, "grad_norm": 2.579706907272339, "learning_rate": 1.2760103181427344e-05, "loss": 0.4101458489894867, "step": 3048 }, { "epoch": 0.3699793714355054, "grad_norm": 5.1360931396484375, "learning_rate": 1.2757646480776318e-05, "loss": 0.795612633228302, "step": 3049 }, { "epoch": 0.37010071593253246, "grad_norm": 3.743968963623047, "learning_rate": 1.2755189780125292e-05, "loss": 0.34071826934814453, "step": 3050 }, { "epoch": 0.3702220604295595, "grad_norm": 2.5002975463867188, "learning_rate": 1.2752733079474267e-05, "loss": 0.2797744870185852, "step": 3051 }, { "epoch": 0.3703434049265866, "grad_norm": 3.206812620162964, "learning_rate": 1.275027637882324e-05, "loss": 0.31890809535980225, "step": 3052 }, { "epoch": 0.3704647494236136, "grad_norm": 2.2290642261505127, "learning_rate": 1.2747819678172215e-05, "loss": 0.3868074417114258, "step": 3053 }, { "epoch": 0.3705860939206407, "grad_norm": 1.7649569511413574, "learning_rate": 1.274536297752119e-05, "loss": 0.5119009613990784, "step": 3054 }, { "epoch": 0.37070743841766773, "grad_norm": 2.605090856552124, "learning_rate": 1.2742906276870164e-05, "loss": 0.13250136375427246, "step": 3055 }, { "epoch": 0.37082878291469484, "grad_norm": 2.710275650024414, "learning_rate": 1.2740449576219138e-05, "loss": 0.5559759140014648, "step": 3056 }, { "epoch": 0.3709501274117219, "grad_norm": 3.008519172668457, "learning_rate": 1.2737992875568112e-05, "loss": 0.43797487020492554, "step": 3057 }, { "epoch": 0.37107147190874895, "grad_norm": 2.1876819133758545, "learning_rate": 1.2735536174917086e-05, "loss": 0.13243570923805237, "step": 3058 }, { "epoch": 0.371192816405776, "grad_norm": 1.1735800504684448, "learning_rate": 1.273307947426606e-05, "loss": 0.04852079600095749, "step": 3059 }, { "epoch": 0.37131416090280306, "grad_norm": 2.3674979209899902, "learning_rate": 1.2730622773615035e-05, "loss": 0.21951356530189514, "step": 3060 }, { "epoch": 0.3714355053998301, "grad_norm": 2.802356719970703, "learning_rate": 1.272816607296401e-05, "loss": 0.5340931415557861, "step": 3061 }, { "epoch": 0.37155684989685717, "grad_norm": 2.132953643798828, "learning_rate": 1.2725709372312985e-05, "loss": 0.21803082525730133, "step": 3062 }, { "epoch": 0.3716781943938842, "grad_norm": 2.711453676223755, "learning_rate": 1.272325267166196e-05, "loss": 0.20228137075901031, "step": 3063 }, { "epoch": 0.3717995388909113, "grad_norm": 3.1502573490142822, "learning_rate": 1.2720795971010934e-05, "loss": 0.3638427257537842, "step": 3064 }, { "epoch": 0.3719208833879384, "grad_norm": 1.838789463043213, "learning_rate": 1.2718339270359908e-05, "loss": 0.08531152456998825, "step": 3065 }, { "epoch": 0.37204222788496544, "grad_norm": 4.831695556640625, "learning_rate": 1.2715882569708882e-05, "loss": 0.3012272119522095, "step": 3066 }, { "epoch": 0.3721635723819925, "grad_norm": 2.007178783416748, "learning_rate": 1.2713425869057856e-05, "loss": 0.3394854962825775, "step": 3067 }, { "epoch": 0.37228491687901955, "grad_norm": 2.307107448577881, "learning_rate": 1.271096916840683e-05, "loss": 0.19313615560531616, "step": 3068 }, { "epoch": 0.3724062613760466, "grad_norm": 1.7929253578186035, "learning_rate": 1.2708512467755805e-05, "loss": 0.3770360052585602, "step": 3069 }, { "epoch": 0.37252760587307365, "grad_norm": 3.1486785411834717, "learning_rate": 1.270605576710478e-05, "loss": 0.12018324434757233, "step": 3070 }, { "epoch": 0.3726489503701007, "grad_norm": 1.9934414625167847, "learning_rate": 1.2703599066453753e-05, "loss": 0.22126701474189758, "step": 3071 }, { "epoch": 0.37277029486712776, "grad_norm": 2.5344128608703613, "learning_rate": 1.2701142365802728e-05, "loss": 0.4378224313259125, "step": 3072 }, { "epoch": 0.3728916393641548, "grad_norm": 2.389474868774414, "learning_rate": 1.2698685665151702e-05, "loss": 0.15001432597637177, "step": 3073 }, { "epoch": 0.37301298386118187, "grad_norm": 2.823737382888794, "learning_rate": 1.2696228964500676e-05, "loss": 0.2103637009859085, "step": 3074 }, { "epoch": 0.373134328358209, "grad_norm": 1.848333716392517, "learning_rate": 1.269377226384965e-05, "loss": 0.1431073397397995, "step": 3075 }, { "epoch": 0.37325567285523603, "grad_norm": 1.597927212715149, "learning_rate": 1.2691315563198625e-05, "loss": 0.2240583747625351, "step": 3076 }, { "epoch": 0.3733770173522631, "grad_norm": 2.668405055999756, "learning_rate": 1.2688858862547599e-05, "loss": 0.07994963228702545, "step": 3077 }, { "epoch": 0.37349836184929014, "grad_norm": 3.062838554382324, "learning_rate": 1.2686402161896573e-05, "loss": 0.5268748998641968, "step": 3078 }, { "epoch": 0.3736197063463172, "grad_norm": 1.37921142578125, "learning_rate": 1.2683945461245548e-05, "loss": 0.1220514327287674, "step": 3079 }, { "epoch": 0.37374105084334425, "grad_norm": 1.977890133857727, "learning_rate": 1.2681488760594522e-05, "loss": 0.3197315037250519, "step": 3080 }, { "epoch": 0.3738623953403713, "grad_norm": 2.71738862991333, "learning_rate": 1.2679032059943496e-05, "loss": 0.22290587425231934, "step": 3081 }, { "epoch": 0.37398373983739835, "grad_norm": 1.8566792011260986, "learning_rate": 1.2676575359292472e-05, "loss": 0.20437034964561462, "step": 3082 }, { "epoch": 0.3741050843344254, "grad_norm": 3.2051069736480713, "learning_rate": 1.2674118658641446e-05, "loss": 0.22961628437042236, "step": 3083 }, { "epoch": 0.3742264288314525, "grad_norm": 2.850677251815796, "learning_rate": 1.267166195799042e-05, "loss": 0.40382158756256104, "step": 3084 }, { "epoch": 0.37434777332847957, "grad_norm": 2.2020742893218994, "learning_rate": 1.2669205257339395e-05, "loss": 0.08760152012109756, "step": 3085 }, { "epoch": 0.3744691178255066, "grad_norm": 2.463630199432373, "learning_rate": 1.2666748556688369e-05, "loss": 0.5425976514816284, "step": 3086 }, { "epoch": 0.3745904623225337, "grad_norm": 2.6825172901153564, "learning_rate": 1.2664291856037343e-05, "loss": 0.5442331433296204, "step": 3087 }, { "epoch": 0.37471180681956073, "grad_norm": 2.0428361892700195, "learning_rate": 1.2661835155386318e-05, "loss": 0.1842765361070633, "step": 3088 }, { "epoch": 0.3748331513165878, "grad_norm": 2.6168863773345947, "learning_rate": 1.2659378454735292e-05, "loss": 0.17524027824401855, "step": 3089 }, { "epoch": 0.37495449581361484, "grad_norm": 1.9933257102966309, "learning_rate": 1.2656921754084266e-05, "loss": 0.19788393378257751, "step": 3090 }, { "epoch": 0.3750758403106419, "grad_norm": 1.8031022548675537, "learning_rate": 1.265446505343324e-05, "loss": 0.2875882685184479, "step": 3091 }, { "epoch": 0.37519718480766895, "grad_norm": 3.0728745460510254, "learning_rate": 1.2652008352782215e-05, "loss": 0.34630057215690613, "step": 3092 }, { "epoch": 0.37531852930469606, "grad_norm": 3.2881243228912354, "learning_rate": 1.2649551652131189e-05, "loss": 0.4444838762283325, "step": 3093 }, { "epoch": 0.3754398738017231, "grad_norm": 1.610763669013977, "learning_rate": 1.2647094951480163e-05, "loss": 0.08967998623847961, "step": 3094 }, { "epoch": 0.37556121829875017, "grad_norm": 0.7797218561172485, "learning_rate": 1.2644638250829137e-05, "loss": 0.014695637859404087, "step": 3095 }, { "epoch": 0.3756825627957772, "grad_norm": 4.406348705291748, "learning_rate": 1.2642181550178112e-05, "loss": 0.10652171820402145, "step": 3096 }, { "epoch": 0.3758039072928043, "grad_norm": 5.548430919647217, "learning_rate": 1.2639724849527086e-05, "loss": 0.7209188938140869, "step": 3097 }, { "epoch": 0.3759252517898313, "grad_norm": 3.451643705368042, "learning_rate": 1.263726814887606e-05, "loss": 0.31551826000213623, "step": 3098 }, { "epoch": 0.3760465962868584, "grad_norm": 2.1619338989257812, "learning_rate": 1.2634811448225034e-05, "loss": 0.3468658924102783, "step": 3099 }, { "epoch": 0.37616794078388543, "grad_norm": 0.46372610330581665, "learning_rate": 1.2632354747574009e-05, "loss": 0.007035141810774803, "step": 3100 }, { "epoch": 0.3762892852809125, "grad_norm": 3.1669082641601562, "learning_rate": 1.2629898046922983e-05, "loss": 0.5927947759628296, "step": 3101 }, { "epoch": 0.37641062977793954, "grad_norm": 2.1995599269866943, "learning_rate": 1.2627441346271959e-05, "loss": 0.19552944600582123, "step": 3102 }, { "epoch": 0.37653197427496665, "grad_norm": 1.4851335287094116, "learning_rate": 1.2624984645620933e-05, "loss": 0.02476375550031662, "step": 3103 }, { "epoch": 0.3766533187719937, "grad_norm": 3.12634015083313, "learning_rate": 1.2622527944969907e-05, "loss": 0.2813194990158081, "step": 3104 }, { "epoch": 0.37677466326902076, "grad_norm": 2.6773481369018555, "learning_rate": 1.2620071244318882e-05, "loss": 0.20298711955547333, "step": 3105 }, { "epoch": 0.3768960077660478, "grad_norm": 2.8419148921966553, "learning_rate": 1.2617614543667856e-05, "loss": 0.31778767704963684, "step": 3106 }, { "epoch": 0.37701735226307487, "grad_norm": 3.2999582290649414, "learning_rate": 1.261515784301683e-05, "loss": 0.27695170044898987, "step": 3107 }, { "epoch": 0.3771386967601019, "grad_norm": 2.0065877437591553, "learning_rate": 1.2612701142365804e-05, "loss": 0.3481106758117676, "step": 3108 }, { "epoch": 0.377260041257129, "grad_norm": 1.229748249053955, "learning_rate": 1.2610244441714779e-05, "loss": 0.037845637649297714, "step": 3109 }, { "epoch": 0.37738138575415603, "grad_norm": 2.5632731914520264, "learning_rate": 1.2607787741063753e-05, "loss": 0.3110155165195465, "step": 3110 }, { "epoch": 0.3775027302511831, "grad_norm": 3.8191258907318115, "learning_rate": 1.2605331040412727e-05, "loss": 0.5061408281326294, "step": 3111 }, { "epoch": 0.3776240747482102, "grad_norm": 2.7737317085266113, "learning_rate": 1.2602874339761701e-05, "loss": 0.603361964225769, "step": 3112 }, { "epoch": 0.37774541924523725, "grad_norm": 2.004978656768799, "learning_rate": 1.2600417639110676e-05, "loss": 0.19077220559120178, "step": 3113 }, { "epoch": 0.3778667637422643, "grad_norm": 4.966524124145508, "learning_rate": 1.259796093845965e-05, "loss": 0.32509520649909973, "step": 3114 }, { "epoch": 0.37798810823929135, "grad_norm": 4.069063186645508, "learning_rate": 1.2595504237808624e-05, "loss": 0.4654199481010437, "step": 3115 }, { "epoch": 0.3781094527363184, "grad_norm": 2.699005126953125, "learning_rate": 1.2593047537157598e-05, "loss": 0.20681169629096985, "step": 3116 }, { "epoch": 0.37823079723334546, "grad_norm": 2.2115743160247803, "learning_rate": 1.2590590836506573e-05, "loss": 0.4543258249759674, "step": 3117 }, { "epoch": 0.3783521417303725, "grad_norm": 3.3127706050872803, "learning_rate": 1.2588134135855547e-05, "loss": 0.3135000765323639, "step": 3118 }, { "epoch": 0.37847348622739957, "grad_norm": 2.7028489112854004, "learning_rate": 1.2585677435204521e-05, "loss": 0.49060899019241333, "step": 3119 }, { "epoch": 0.3785948307244266, "grad_norm": 4.582715034484863, "learning_rate": 1.2583220734553495e-05, "loss": 0.5623689293861389, "step": 3120 }, { "epoch": 0.37871617522145373, "grad_norm": 2.5746421813964844, "learning_rate": 1.258076403390247e-05, "loss": 0.2465989589691162, "step": 3121 }, { "epoch": 0.3788375197184808, "grad_norm": 4.443078517913818, "learning_rate": 1.2578307333251446e-05, "loss": 0.4048062264919281, "step": 3122 }, { "epoch": 0.37895886421550784, "grad_norm": 2.589857816696167, "learning_rate": 1.257585063260042e-05, "loss": 0.4509902596473694, "step": 3123 }, { "epoch": 0.3790802087125349, "grad_norm": 1.951952576637268, "learning_rate": 1.2573393931949394e-05, "loss": 0.1623089760541916, "step": 3124 }, { "epoch": 0.37920155320956195, "grad_norm": 3.1534080505371094, "learning_rate": 1.2570937231298368e-05, "loss": 0.46682316064834595, "step": 3125 }, { "epoch": 0.379322897706589, "grad_norm": 3.0295398235321045, "learning_rate": 1.2568480530647343e-05, "loss": 0.3204183280467987, "step": 3126 }, { "epoch": 0.37944424220361606, "grad_norm": 2.0212247371673584, "learning_rate": 1.2566023829996317e-05, "loss": 0.17289389669895172, "step": 3127 }, { "epoch": 0.3795655867006431, "grad_norm": 3.1270949840545654, "learning_rate": 1.2563567129345291e-05, "loss": 0.9947634935379028, "step": 3128 }, { "epoch": 0.37968693119767016, "grad_norm": 2.232048273086548, "learning_rate": 1.2561110428694266e-05, "loss": 0.2776552438735962, "step": 3129 }, { "epoch": 0.3798082756946973, "grad_norm": 2.834043502807617, "learning_rate": 1.255865372804324e-05, "loss": 0.13570261001586914, "step": 3130 }, { "epoch": 0.3799296201917243, "grad_norm": 2.6776390075683594, "learning_rate": 1.2556197027392214e-05, "loss": 0.5355879664421082, "step": 3131 }, { "epoch": 0.3800509646887514, "grad_norm": 2.001067638397217, "learning_rate": 1.2553740326741188e-05, "loss": 0.18915635347366333, "step": 3132 }, { "epoch": 0.38017230918577843, "grad_norm": 2.9914603233337402, "learning_rate": 1.2551283626090163e-05, "loss": 0.6213992834091187, "step": 3133 }, { "epoch": 0.3802936536828055, "grad_norm": 2.629340887069702, "learning_rate": 1.2548826925439137e-05, "loss": 0.29846295714378357, "step": 3134 }, { "epoch": 0.38041499817983254, "grad_norm": 3.0676817893981934, "learning_rate": 1.2546370224788111e-05, "loss": 0.16866937279701233, "step": 3135 }, { "epoch": 0.3805363426768596, "grad_norm": 2.5763516426086426, "learning_rate": 1.2543913524137085e-05, "loss": 0.2580344080924988, "step": 3136 }, { "epoch": 0.38065768717388665, "grad_norm": 3.0476651191711426, "learning_rate": 1.254145682348606e-05, "loss": 0.3251637816429138, "step": 3137 }, { "epoch": 0.3807790316709137, "grad_norm": 2.6623525619506836, "learning_rate": 1.2539000122835034e-05, "loss": 0.3432225286960602, "step": 3138 }, { "epoch": 0.38090037616794076, "grad_norm": 2.9788427352905273, "learning_rate": 1.2536543422184008e-05, "loss": 0.17502693831920624, "step": 3139 }, { "epoch": 0.38102172066496787, "grad_norm": 3.0660781860351562, "learning_rate": 1.2534086721532982e-05, "loss": 0.4956677556037903, "step": 3140 }, { "epoch": 0.3811430651619949, "grad_norm": 1.9002922773361206, "learning_rate": 1.2531630020881958e-05, "loss": 0.17276537418365479, "step": 3141 }, { "epoch": 0.381264409659022, "grad_norm": 3.2068376541137695, "learning_rate": 1.2529173320230933e-05, "loss": 0.19760315120220184, "step": 3142 }, { "epoch": 0.38138575415604903, "grad_norm": 2.6643495559692383, "learning_rate": 1.2526716619579907e-05, "loss": 0.29236993193626404, "step": 3143 }, { "epoch": 0.3815070986530761, "grad_norm": 2.8827171325683594, "learning_rate": 1.2524259918928881e-05, "loss": 0.2830185294151306, "step": 3144 }, { "epoch": 0.38162844315010314, "grad_norm": 3.2341325283050537, "learning_rate": 1.2521803218277855e-05, "loss": 0.20774394273757935, "step": 3145 }, { "epoch": 0.3817497876471302, "grad_norm": 2.291086196899414, "learning_rate": 1.251934651762683e-05, "loss": 0.4800224304199219, "step": 3146 }, { "epoch": 0.38187113214415724, "grad_norm": 3.330087661743164, "learning_rate": 1.2516889816975804e-05, "loss": 0.6222670078277588, "step": 3147 }, { "epoch": 0.3819924766411843, "grad_norm": 2.547192335128784, "learning_rate": 1.2514433116324778e-05, "loss": 0.38417696952819824, "step": 3148 }, { "epoch": 0.3821138211382114, "grad_norm": 2.1112709045410156, "learning_rate": 1.2511976415673752e-05, "loss": 0.25194817781448364, "step": 3149 }, { "epoch": 0.38223516563523846, "grad_norm": 3.026857376098633, "learning_rate": 1.2509519715022727e-05, "loss": 0.3098749816417694, "step": 3150 }, { "epoch": 0.3823565101322655, "grad_norm": 1.1321700811386108, "learning_rate": 1.2507063014371701e-05, "loss": 0.059583015739917755, "step": 3151 }, { "epoch": 0.38247785462929257, "grad_norm": 2.0039877891540527, "learning_rate": 1.2504606313720675e-05, "loss": 0.11419906467199326, "step": 3152 }, { "epoch": 0.3825991991263196, "grad_norm": 2.0959770679473877, "learning_rate": 1.250214961306965e-05, "loss": 0.27986180782318115, "step": 3153 }, { "epoch": 0.3827205436233467, "grad_norm": 2.224518060684204, "learning_rate": 1.2499692912418622e-05, "loss": 0.4512319564819336, "step": 3154 }, { "epoch": 0.38284188812037373, "grad_norm": 3.628472328186035, "learning_rate": 1.2497236211767596e-05, "loss": 0.17027032375335693, "step": 3155 }, { "epoch": 0.3829632326174008, "grad_norm": 2.3958566188812256, "learning_rate": 1.249477951111657e-05, "loss": 0.4874890148639679, "step": 3156 }, { "epoch": 0.38308457711442784, "grad_norm": 3.159470319747925, "learning_rate": 1.2492322810465545e-05, "loss": 0.22992083430290222, "step": 3157 }, { "epoch": 0.38320592161145495, "grad_norm": 2.3461990356445312, "learning_rate": 1.2489866109814519e-05, "loss": 0.17536038160324097, "step": 3158 }, { "epoch": 0.383327266108482, "grad_norm": 2.364351749420166, "learning_rate": 1.2487409409163493e-05, "loss": 0.17691849172115326, "step": 3159 }, { "epoch": 0.38344861060550905, "grad_norm": 2.3514404296875, "learning_rate": 1.2484952708512468e-05, "loss": 0.6298539638519287, "step": 3160 }, { "epoch": 0.3835699551025361, "grad_norm": 2.537447452545166, "learning_rate": 1.2482496007861442e-05, "loss": 0.35013389587402344, "step": 3161 }, { "epoch": 0.38369129959956316, "grad_norm": 2.3254945278167725, "learning_rate": 1.2480039307210416e-05, "loss": 0.3717833161354065, "step": 3162 }, { "epoch": 0.3838126440965902, "grad_norm": 2.0248873233795166, "learning_rate": 1.247758260655939e-05, "loss": 0.46691590547561646, "step": 3163 }, { "epoch": 0.38393398859361727, "grad_norm": 2.308234214782715, "learning_rate": 1.2475125905908365e-05, "loss": 0.2653298079967499, "step": 3164 }, { "epoch": 0.3840553330906443, "grad_norm": 2.288908004760742, "learning_rate": 1.2472669205257339e-05, "loss": 0.33068254590034485, "step": 3165 }, { "epoch": 0.3841766775876714, "grad_norm": 2.5129432678222656, "learning_rate": 1.2470212504606313e-05, "loss": 0.7068184018135071, "step": 3166 }, { "epoch": 0.38429802208469843, "grad_norm": 0.04241632670164108, "learning_rate": 1.2467755803955289e-05, "loss": 0.0002795231994241476, "step": 3167 }, { "epoch": 0.38441936658172554, "grad_norm": 2.9558823108673096, "learning_rate": 1.2465299103304263e-05, "loss": 0.5982683897018433, "step": 3168 }, { "epoch": 0.3845407110787526, "grad_norm": 3.4366278648376465, "learning_rate": 1.2462842402653238e-05, "loss": 0.1740218549966812, "step": 3169 }, { "epoch": 0.38466205557577965, "grad_norm": 1.9050742387771606, "learning_rate": 1.2460385702002212e-05, "loss": 0.3764839172363281, "step": 3170 }, { "epoch": 0.3847834000728067, "grad_norm": 2.1938631534576416, "learning_rate": 1.2457929001351186e-05, "loss": 0.22942158579826355, "step": 3171 }, { "epoch": 0.38490474456983376, "grad_norm": 1.5889396667480469, "learning_rate": 1.245547230070016e-05, "loss": 0.4512004852294922, "step": 3172 }, { "epoch": 0.3850260890668608, "grad_norm": 2.1552133560180664, "learning_rate": 1.2453015600049135e-05, "loss": 0.41254958510398865, "step": 3173 }, { "epoch": 0.38514743356388786, "grad_norm": 2.0876870155334473, "learning_rate": 1.2450558899398109e-05, "loss": 0.18605846166610718, "step": 3174 }, { "epoch": 0.3852687780609149, "grad_norm": 1.8509613275527954, "learning_rate": 1.2448102198747083e-05, "loss": 0.054551247507333755, "step": 3175 }, { "epoch": 0.38539012255794197, "grad_norm": 1.963201880455017, "learning_rate": 1.2445645498096057e-05, "loss": 0.11149666458368301, "step": 3176 }, { "epoch": 0.3855114670549691, "grad_norm": 1.8932554721832275, "learning_rate": 1.2443188797445032e-05, "loss": 0.15254680812358856, "step": 3177 }, { "epoch": 0.38563281155199614, "grad_norm": 1.92061448097229, "learning_rate": 1.2440732096794006e-05, "loss": 0.2103467434644699, "step": 3178 }, { "epoch": 0.3857541560490232, "grad_norm": 3.4142160415649414, "learning_rate": 1.243827539614298e-05, "loss": 0.4288268983364105, "step": 3179 }, { "epoch": 0.38587550054605024, "grad_norm": 1.7066508531570435, "learning_rate": 1.2435818695491954e-05, "loss": 0.23310059309005737, "step": 3180 }, { "epoch": 0.3859968450430773, "grad_norm": 1.9355300664901733, "learning_rate": 1.2433361994840929e-05, "loss": 0.1703137308359146, "step": 3181 }, { "epoch": 0.38611818954010435, "grad_norm": 2.196570634841919, "learning_rate": 1.2430905294189903e-05, "loss": 0.3992791175842285, "step": 3182 }, { "epoch": 0.3862395340371314, "grad_norm": 2.8637094497680664, "learning_rate": 1.2428448593538877e-05, "loss": 0.2217373102903366, "step": 3183 }, { "epoch": 0.38636087853415846, "grad_norm": 2.239389657974243, "learning_rate": 1.2425991892887851e-05, "loss": 0.16824842989444733, "step": 3184 }, { "epoch": 0.3864822230311855, "grad_norm": 1.2548781633377075, "learning_rate": 1.2423535192236826e-05, "loss": 0.3385087847709656, "step": 3185 }, { "epoch": 0.3866035675282126, "grad_norm": 1.8939255475997925, "learning_rate": 1.24210784915858e-05, "loss": 0.17056138813495636, "step": 3186 }, { "epoch": 0.3867249120252397, "grad_norm": 2.307015895843506, "learning_rate": 1.2418621790934776e-05, "loss": 0.26844489574432373, "step": 3187 }, { "epoch": 0.38684625652226673, "grad_norm": 2.7086517810821533, "learning_rate": 1.241616509028375e-05, "loss": 0.46166932582855225, "step": 3188 }, { "epoch": 0.3869676010192938, "grad_norm": 1.6944949626922607, "learning_rate": 1.2413708389632724e-05, "loss": 0.25167518854141235, "step": 3189 }, { "epoch": 0.38708894551632084, "grad_norm": 2.3843600749969482, "learning_rate": 1.2411251688981699e-05, "loss": 0.13508471846580505, "step": 3190 }, { "epoch": 0.3872102900133479, "grad_norm": 2.2334437370300293, "learning_rate": 1.2408794988330673e-05, "loss": 0.4638756215572357, "step": 3191 }, { "epoch": 0.38733163451037494, "grad_norm": 1.9633973836898804, "learning_rate": 1.2406338287679647e-05, "loss": 0.28580886125564575, "step": 3192 }, { "epoch": 0.387452979007402, "grad_norm": 2.670125722885132, "learning_rate": 1.2403881587028621e-05, "loss": 0.3812810480594635, "step": 3193 }, { "epoch": 0.38757432350442905, "grad_norm": 1.94724702835083, "learning_rate": 1.2401424886377596e-05, "loss": 0.060899440199136734, "step": 3194 }, { "epoch": 0.3876956680014561, "grad_norm": 3.4984238147735596, "learning_rate": 1.239896818572657e-05, "loss": 0.2988520562648773, "step": 3195 }, { "epoch": 0.3878170124984832, "grad_norm": 3.7518434524536133, "learning_rate": 1.2396511485075544e-05, "loss": 0.4453827738761902, "step": 3196 }, { "epoch": 0.38793835699551027, "grad_norm": 1.590352177619934, "learning_rate": 1.2394054784424518e-05, "loss": 0.14353054761886597, "step": 3197 }, { "epoch": 0.3880597014925373, "grad_norm": 2.1668214797973633, "learning_rate": 1.2391598083773493e-05, "loss": 0.4555373191833496, "step": 3198 }, { "epoch": 0.3881810459895644, "grad_norm": 2.0084173679351807, "learning_rate": 1.2389141383122467e-05, "loss": 0.4028552770614624, "step": 3199 }, { "epoch": 0.38830239048659143, "grad_norm": 2.393423318862915, "learning_rate": 1.2386684682471441e-05, "loss": 0.5026838779449463, "step": 3200 }, { "epoch": 0.3884237349836185, "grad_norm": 2.0486631393432617, "learning_rate": 1.2384227981820415e-05, "loss": 0.24359053373336792, "step": 3201 }, { "epoch": 0.38854507948064554, "grad_norm": 2.0221498012542725, "learning_rate": 1.238177128116939e-05, "loss": 0.2597520053386688, "step": 3202 }, { "epoch": 0.3886664239776726, "grad_norm": 3.5579679012298584, "learning_rate": 1.2379314580518364e-05, "loss": 0.2755480706691742, "step": 3203 }, { "epoch": 0.38878776847469965, "grad_norm": 2.409950017929077, "learning_rate": 1.2376857879867338e-05, "loss": 0.42971426248550415, "step": 3204 }, { "epoch": 0.38890911297172676, "grad_norm": 1.7714725732803345, "learning_rate": 1.2374401179216313e-05, "loss": 0.36897578835487366, "step": 3205 }, { "epoch": 0.3890304574687538, "grad_norm": 1.6995115280151367, "learning_rate": 1.2371944478565287e-05, "loss": 0.10867939889431, "step": 3206 }, { "epoch": 0.38915180196578086, "grad_norm": 3.117710828781128, "learning_rate": 1.2369487777914263e-05, "loss": 0.233770951628685, "step": 3207 }, { "epoch": 0.3892731464628079, "grad_norm": 1.8280388116836548, "learning_rate": 1.2367031077263237e-05, "loss": 0.15694071352481842, "step": 3208 }, { "epoch": 0.38939449095983497, "grad_norm": 2.1613285541534424, "learning_rate": 1.2364574376612211e-05, "loss": 0.2800590991973877, "step": 3209 }, { "epoch": 0.389515835456862, "grad_norm": 3.0443708896636963, "learning_rate": 1.2362117675961185e-05, "loss": 0.2745928466320038, "step": 3210 }, { "epoch": 0.3896371799538891, "grad_norm": 2.057307243347168, "learning_rate": 1.235966097531016e-05, "loss": 0.2623305022716522, "step": 3211 }, { "epoch": 0.38975852445091613, "grad_norm": 2.3027143478393555, "learning_rate": 1.2357204274659134e-05, "loss": 0.22861507534980774, "step": 3212 }, { "epoch": 0.3898798689479432, "grad_norm": 1.321251392364502, "learning_rate": 1.2354747574008108e-05, "loss": 0.048757895827293396, "step": 3213 }, { "epoch": 0.3900012134449703, "grad_norm": 3.1153645515441895, "learning_rate": 1.2352290873357083e-05, "loss": 0.5409466624259949, "step": 3214 }, { "epoch": 0.39012255794199735, "grad_norm": 2.7181615829467773, "learning_rate": 1.2349834172706057e-05, "loss": 0.2796017825603485, "step": 3215 }, { "epoch": 0.3902439024390244, "grad_norm": 2.5121684074401855, "learning_rate": 1.2347377472055031e-05, "loss": 0.11338968575000763, "step": 3216 }, { "epoch": 0.39036524693605146, "grad_norm": 4.269004821777344, "learning_rate": 1.2344920771404005e-05, "loss": 0.6595829129219055, "step": 3217 }, { "epoch": 0.3904865914330785, "grad_norm": 2.7294209003448486, "learning_rate": 1.234246407075298e-05, "loss": 0.0769912600517273, "step": 3218 }, { "epoch": 0.39060793593010557, "grad_norm": 2.528923749923706, "learning_rate": 1.2340007370101954e-05, "loss": 0.1698744297027588, "step": 3219 }, { "epoch": 0.3907292804271326, "grad_norm": 2.590547800064087, "learning_rate": 1.2337550669450928e-05, "loss": 0.16006270051002502, "step": 3220 }, { "epoch": 0.3908506249241597, "grad_norm": 2.1922099590301514, "learning_rate": 1.2335093968799902e-05, "loss": 0.4186220169067383, "step": 3221 }, { "epoch": 0.3909719694211867, "grad_norm": 3.8413069248199463, "learning_rate": 1.2332637268148877e-05, "loss": 0.3306880593299866, "step": 3222 }, { "epoch": 0.39109331391821384, "grad_norm": 2.2855052947998047, "learning_rate": 1.2330180567497851e-05, "loss": 0.1272583305835724, "step": 3223 }, { "epoch": 0.3912146584152409, "grad_norm": 1.7248187065124512, "learning_rate": 1.2327723866846825e-05, "loss": 0.3310267925262451, "step": 3224 }, { "epoch": 0.39133600291226794, "grad_norm": 3.021890163421631, "learning_rate": 1.23252671661958e-05, "loss": 0.0816137045621872, "step": 3225 }, { "epoch": 0.391457347409295, "grad_norm": 1.641603708267212, "learning_rate": 1.2322810465544774e-05, "loss": 0.15734626352787018, "step": 3226 }, { "epoch": 0.39157869190632205, "grad_norm": 2.941344976425171, "learning_rate": 1.232035376489375e-05, "loss": 0.48225948214530945, "step": 3227 }, { "epoch": 0.3917000364033491, "grad_norm": 1.3670870065689087, "learning_rate": 1.2317897064242724e-05, "loss": 0.07061704248189926, "step": 3228 }, { "epoch": 0.39182138090037616, "grad_norm": 2.085196018218994, "learning_rate": 1.2315440363591698e-05, "loss": 0.02575668878853321, "step": 3229 }, { "epoch": 0.3919427253974032, "grad_norm": 2.4026095867156982, "learning_rate": 1.2312983662940672e-05, "loss": 0.6204736828804016, "step": 3230 }, { "epoch": 0.39206406989443027, "grad_norm": 0.7162059545516968, "learning_rate": 1.2310526962289647e-05, "loss": 0.06766770780086517, "step": 3231 }, { "epoch": 0.3921854143914573, "grad_norm": 2.660524606704712, "learning_rate": 1.2308070261638621e-05, "loss": 0.40516796708106995, "step": 3232 }, { "epoch": 0.39230675888848443, "grad_norm": 2.5455191135406494, "learning_rate": 1.2305613560987595e-05, "loss": 0.6203538179397583, "step": 3233 }, { "epoch": 0.3924281033855115, "grad_norm": 2.572350025177002, "learning_rate": 1.230315686033657e-05, "loss": 0.4282997250556946, "step": 3234 }, { "epoch": 0.39254944788253854, "grad_norm": 1.7551993131637573, "learning_rate": 1.2300700159685544e-05, "loss": 0.11783856898546219, "step": 3235 }, { "epoch": 0.3926707923795656, "grad_norm": 2.145592212677002, "learning_rate": 1.2298243459034518e-05, "loss": 0.38956859707832336, "step": 3236 }, { "epoch": 0.39279213687659265, "grad_norm": 2.457643985748291, "learning_rate": 1.2295786758383492e-05, "loss": 0.4253358244895935, "step": 3237 }, { "epoch": 0.3929134813736197, "grad_norm": 3.5604052543640137, "learning_rate": 1.2293330057732466e-05, "loss": 0.3451758325099945, "step": 3238 }, { "epoch": 0.39303482587064675, "grad_norm": 3.7612414360046387, "learning_rate": 1.229087335708144e-05, "loss": 0.48366186022758484, "step": 3239 }, { "epoch": 0.3931561703676738, "grad_norm": 2.9638922214508057, "learning_rate": 1.2288416656430415e-05, "loss": 0.2552471458911896, "step": 3240 }, { "epoch": 0.39327751486470086, "grad_norm": 1.779090404510498, "learning_rate": 1.228595995577939e-05, "loss": 0.22641043365001678, "step": 3241 }, { "epoch": 0.39339885936172797, "grad_norm": 3.6208078861236572, "learning_rate": 1.2283503255128363e-05, "loss": 0.6202171444892883, "step": 3242 }, { "epoch": 0.393520203858755, "grad_norm": 2.2945821285247803, "learning_rate": 1.2281046554477338e-05, "loss": 0.23252303898334503, "step": 3243 }, { "epoch": 0.3936415483557821, "grad_norm": 3.1645302772521973, "learning_rate": 1.2278589853826312e-05, "loss": 0.3423767685890198, "step": 3244 }, { "epoch": 0.39376289285280913, "grad_norm": 2.032697916030884, "learning_rate": 1.2276133153175286e-05, "loss": 0.21341218054294586, "step": 3245 }, { "epoch": 0.3938842373498362, "grad_norm": 2.588686227798462, "learning_rate": 1.227367645252426e-05, "loss": 0.3370200991630554, "step": 3246 }, { "epoch": 0.39400558184686324, "grad_norm": 2.5420618057250977, "learning_rate": 1.2271219751873236e-05, "loss": 0.24502411484718323, "step": 3247 }, { "epoch": 0.3941269263438903, "grad_norm": 1.9366720914840698, "learning_rate": 1.226876305122221e-05, "loss": 0.33743882179260254, "step": 3248 }, { "epoch": 0.39424827084091735, "grad_norm": 1.910409927368164, "learning_rate": 1.2266306350571185e-05, "loss": 0.16318891942501068, "step": 3249 }, { "epoch": 0.3943696153379444, "grad_norm": 4.104035377502441, "learning_rate": 1.226384964992016e-05, "loss": 0.16479748487472534, "step": 3250 }, { "epoch": 0.3944909598349715, "grad_norm": 2.1861987113952637, "learning_rate": 1.2261392949269133e-05, "loss": 0.19104893505573273, "step": 3251 }, { "epoch": 0.39461230433199856, "grad_norm": 2.10404109954834, "learning_rate": 1.2258936248618108e-05, "loss": 0.3565627336502075, "step": 3252 }, { "epoch": 0.3947336488290256, "grad_norm": 1.9464977979660034, "learning_rate": 1.2256479547967082e-05, "loss": 0.18876899778842926, "step": 3253 }, { "epoch": 0.3948549933260527, "grad_norm": 2.625919818878174, "learning_rate": 1.2254022847316056e-05, "loss": 0.2997460663318634, "step": 3254 }, { "epoch": 0.3949763378230797, "grad_norm": 1.9485828876495361, "learning_rate": 1.225156614666503e-05, "loss": 0.15886113047599792, "step": 3255 }, { "epoch": 0.3950976823201068, "grad_norm": 2.5636370182037354, "learning_rate": 1.2249109446014005e-05, "loss": 0.13460613787174225, "step": 3256 }, { "epoch": 0.39521902681713383, "grad_norm": 2.213409662246704, "learning_rate": 1.2246652745362979e-05, "loss": 0.2532368302345276, "step": 3257 }, { "epoch": 0.3953403713141609, "grad_norm": 2.0923726558685303, "learning_rate": 1.2244196044711953e-05, "loss": 0.3443406820297241, "step": 3258 }, { "epoch": 0.39546171581118794, "grad_norm": 2.9076550006866455, "learning_rate": 1.2241739344060928e-05, "loss": 0.3790132999420166, "step": 3259 }, { "epoch": 0.395583060308215, "grad_norm": 3.352142572402954, "learning_rate": 1.2239282643409902e-05, "loss": 0.27705270051956177, "step": 3260 }, { "epoch": 0.3957044048052421, "grad_norm": 2.4780049324035645, "learning_rate": 1.2236825942758876e-05, "loss": 0.22344955801963806, "step": 3261 }, { "epoch": 0.39582574930226916, "grad_norm": 0.6083203554153442, "learning_rate": 1.223436924210785e-05, "loss": 0.008890108205378056, "step": 3262 }, { "epoch": 0.3959470937992962, "grad_norm": 3.4495291709899902, "learning_rate": 1.2231912541456825e-05, "loss": 0.3349066376686096, "step": 3263 }, { "epoch": 0.39606843829632327, "grad_norm": 4.21277379989624, "learning_rate": 1.2229455840805799e-05, "loss": 0.2736901044845581, "step": 3264 }, { "epoch": 0.3961897827933503, "grad_norm": 3.274369239807129, "learning_rate": 1.2226999140154773e-05, "loss": 0.332505464553833, "step": 3265 }, { "epoch": 0.3963111272903774, "grad_norm": 3.3138391971588135, "learning_rate": 1.2224542439503749e-05, "loss": 0.3367203176021576, "step": 3266 }, { "epoch": 0.39643247178740443, "grad_norm": 1.1294124126434326, "learning_rate": 1.2222085738852723e-05, "loss": 0.14279571175575256, "step": 3267 }, { "epoch": 0.3965538162844315, "grad_norm": 2.2251570224761963, "learning_rate": 1.2219629038201698e-05, "loss": 0.20248806476593018, "step": 3268 }, { "epoch": 0.39667516078145854, "grad_norm": 3.620305061340332, "learning_rate": 1.2217172337550672e-05, "loss": 0.2944931387901306, "step": 3269 }, { "epoch": 0.39679650527848565, "grad_norm": 2.076772689819336, "learning_rate": 1.2214715636899646e-05, "loss": 0.2450747936964035, "step": 3270 }, { "epoch": 0.3969178497755127, "grad_norm": 2.4754016399383545, "learning_rate": 1.221225893624862e-05, "loss": 0.329637348651886, "step": 3271 }, { "epoch": 0.39703919427253975, "grad_norm": 3.68548583984375, "learning_rate": 1.2209802235597595e-05, "loss": 0.36103370785713196, "step": 3272 }, { "epoch": 0.3971605387695668, "grad_norm": 3.922680616378784, "learning_rate": 1.2207345534946569e-05, "loss": 0.35007330775260925, "step": 3273 }, { "epoch": 0.39728188326659386, "grad_norm": 3.0613858699798584, "learning_rate": 1.2204888834295543e-05, "loss": 0.5026612877845764, "step": 3274 }, { "epoch": 0.3974032277636209, "grad_norm": 3.63730525970459, "learning_rate": 1.2202432133644517e-05, "loss": 0.5656598806381226, "step": 3275 }, { "epoch": 0.39752457226064797, "grad_norm": 2.1219875812530518, "learning_rate": 1.2199975432993492e-05, "loss": 0.15861688554286957, "step": 3276 }, { "epoch": 0.397645916757675, "grad_norm": 2.8872973918914795, "learning_rate": 1.2197518732342466e-05, "loss": 0.13566479086875916, "step": 3277 }, { "epoch": 0.3977672612547021, "grad_norm": 2.136732816696167, "learning_rate": 1.219506203169144e-05, "loss": 0.14007946848869324, "step": 3278 }, { "epoch": 0.3978886057517292, "grad_norm": 1.4482614994049072, "learning_rate": 1.2192605331040414e-05, "loss": 0.4385533332824707, "step": 3279 }, { "epoch": 0.39800995024875624, "grad_norm": 1.9478120803833008, "learning_rate": 1.2190148630389389e-05, "loss": 0.14154918491840363, "step": 3280 }, { "epoch": 0.3981312947457833, "grad_norm": 2.286846876144409, "learning_rate": 1.2187691929738363e-05, "loss": 0.12173419445753098, "step": 3281 }, { "epoch": 0.39825263924281035, "grad_norm": 1.8455288410186768, "learning_rate": 1.2185235229087337e-05, "loss": 0.08757827430963516, "step": 3282 }, { "epoch": 0.3983739837398374, "grad_norm": 2.537466287612915, "learning_rate": 1.2182778528436311e-05, "loss": 0.32250505685806274, "step": 3283 }, { "epoch": 0.39849532823686445, "grad_norm": 0.4925089478492737, "learning_rate": 1.2180321827785286e-05, "loss": 0.004798689857125282, "step": 3284 }, { "epoch": 0.3986166727338915, "grad_norm": 1.9180618524551392, "learning_rate": 1.217786512713426e-05, "loss": 0.1593484878540039, "step": 3285 }, { "epoch": 0.39873801723091856, "grad_norm": 2.7618894577026367, "learning_rate": 1.2175408426483236e-05, "loss": 0.2754642069339752, "step": 3286 }, { "epoch": 0.3988593617279456, "grad_norm": 3.483588933944702, "learning_rate": 1.217295172583221e-05, "loss": 0.1414378434419632, "step": 3287 }, { "epoch": 0.39898070622497267, "grad_norm": 1.5736106634140015, "learning_rate": 1.2170495025181184e-05, "loss": 0.07543988525867462, "step": 3288 }, { "epoch": 0.3991020507219998, "grad_norm": 3.2746686935424805, "learning_rate": 1.2168038324530159e-05, "loss": 0.3163296580314636, "step": 3289 }, { "epoch": 0.39922339521902683, "grad_norm": 1.879117727279663, "learning_rate": 1.216558162387913e-05, "loss": 0.06840763986110687, "step": 3290 }, { "epoch": 0.3993447397160539, "grad_norm": 2.6469168663024902, "learning_rate": 1.2163124923228104e-05, "loss": 0.283497154712677, "step": 3291 }, { "epoch": 0.39946608421308094, "grad_norm": 2.1057522296905518, "learning_rate": 1.216066822257708e-05, "loss": 0.4513644278049469, "step": 3292 }, { "epoch": 0.399587428710108, "grad_norm": 1.2667713165283203, "learning_rate": 1.2158211521926054e-05, "loss": 0.06725046038627625, "step": 3293 }, { "epoch": 0.39970877320713505, "grad_norm": 1.7996500730514526, "learning_rate": 1.2155754821275028e-05, "loss": 0.0848223939538002, "step": 3294 }, { "epoch": 0.3998301177041621, "grad_norm": 2.1585934162139893, "learning_rate": 1.2153298120624003e-05, "loss": 0.14250634610652924, "step": 3295 }, { "epoch": 0.39995146220118916, "grad_norm": 2.8726449012756348, "learning_rate": 1.2150841419972977e-05, "loss": 0.4488234519958496, "step": 3296 }, { "epoch": 0.4000728066982162, "grad_norm": 3.3992249965667725, "learning_rate": 1.2148384719321951e-05, "loss": 0.06597807258367538, "step": 3297 }, { "epoch": 0.4001941511952433, "grad_norm": 2.812727689743042, "learning_rate": 1.2145928018670925e-05, "loss": 0.34727030992507935, "step": 3298 }, { "epoch": 0.4003154956922704, "grad_norm": 2.010152578353882, "learning_rate": 1.21434713180199e-05, "loss": 0.5472154021263123, "step": 3299 }, { "epoch": 0.4004368401892974, "grad_norm": 2.1349987983703613, "learning_rate": 1.2141014617368874e-05, "loss": 0.34844332933425903, "step": 3300 }, { "epoch": 0.4005581846863245, "grad_norm": 4.817236423492432, "learning_rate": 1.2138557916717848e-05, "loss": 0.3785272240638733, "step": 3301 }, { "epoch": 0.40067952918335153, "grad_norm": 1.7386378049850464, "learning_rate": 1.2136101216066822e-05, "loss": 0.1248244047164917, "step": 3302 }, { "epoch": 0.4008008736803786, "grad_norm": 1.653026819229126, "learning_rate": 1.2133644515415797e-05, "loss": 0.19100800156593323, "step": 3303 }, { "epoch": 0.40092221817740564, "grad_norm": 2.473686695098877, "learning_rate": 1.2131187814764771e-05, "loss": 0.32151561975479126, "step": 3304 }, { "epoch": 0.4010435626744327, "grad_norm": 4.7555670738220215, "learning_rate": 1.2128731114113745e-05, "loss": 0.19386518001556396, "step": 3305 }, { "epoch": 0.40116490717145975, "grad_norm": 2.2356576919555664, "learning_rate": 1.212627441346272e-05, "loss": 0.2218162715435028, "step": 3306 }, { "epoch": 0.40128625166848686, "grad_norm": 4.039072513580322, "learning_rate": 1.2123817712811694e-05, "loss": 0.18677039444446564, "step": 3307 }, { "epoch": 0.4014075961655139, "grad_norm": 3.065251588821411, "learning_rate": 1.2121361012160668e-05, "loss": 0.21303556859493256, "step": 3308 }, { "epoch": 0.40152894066254097, "grad_norm": 2.333803653717041, "learning_rate": 1.2118904311509642e-05, "loss": 0.23278328776359558, "step": 3309 }, { "epoch": 0.401650285159568, "grad_norm": 1.8717297315597534, "learning_rate": 1.2116447610858616e-05, "loss": 0.5052412748336792, "step": 3310 }, { "epoch": 0.4017716296565951, "grad_norm": 2.5327258110046387, "learning_rate": 1.211399091020759e-05, "loss": 0.2127455770969391, "step": 3311 }, { "epoch": 0.40189297415362213, "grad_norm": 3.4019956588745117, "learning_rate": 1.2111534209556567e-05, "loss": 0.25699320435523987, "step": 3312 }, { "epoch": 0.4020143186506492, "grad_norm": 3.1442573070526123, "learning_rate": 1.2109077508905541e-05, "loss": 0.08312302827835083, "step": 3313 }, { "epoch": 0.40213566314767624, "grad_norm": 2.6303045749664307, "learning_rate": 1.2106620808254515e-05, "loss": 0.28288233280181885, "step": 3314 }, { "epoch": 0.4022570076447033, "grad_norm": 2.8890466690063477, "learning_rate": 1.210416410760349e-05, "loss": 0.23453925549983978, "step": 3315 }, { "epoch": 0.4023783521417304, "grad_norm": 2.136404514312744, "learning_rate": 1.2101707406952464e-05, "loss": 0.2528800964355469, "step": 3316 }, { "epoch": 0.40249969663875745, "grad_norm": 3.9638454914093018, "learning_rate": 1.2099250706301438e-05, "loss": 0.25377410650253296, "step": 3317 }, { "epoch": 0.4026210411357845, "grad_norm": 3.9589438438415527, "learning_rate": 1.2096794005650412e-05, "loss": 0.3660302758216858, "step": 3318 }, { "epoch": 0.40274238563281156, "grad_norm": 1.771589994430542, "learning_rate": 1.2094337304999386e-05, "loss": 0.08093957602977753, "step": 3319 }, { "epoch": 0.4028637301298386, "grad_norm": 1.7189016342163086, "learning_rate": 1.209188060434836e-05, "loss": 0.07831019163131714, "step": 3320 }, { "epoch": 0.40298507462686567, "grad_norm": 3.500816583633423, "learning_rate": 1.2089423903697335e-05, "loss": 0.24039025604724884, "step": 3321 }, { "epoch": 0.4031064191238927, "grad_norm": 1.0927573442459106, "learning_rate": 1.208696720304631e-05, "loss": 0.03208920359611511, "step": 3322 }, { "epoch": 0.4032277636209198, "grad_norm": 2.8350446224212646, "learning_rate": 1.2084510502395283e-05, "loss": 0.10441645979881287, "step": 3323 }, { "epoch": 0.40334910811794683, "grad_norm": 2.9019887447357178, "learning_rate": 1.2082053801744258e-05, "loss": 0.3544785976409912, "step": 3324 }, { "epoch": 0.4034704526149739, "grad_norm": 3.5630905628204346, "learning_rate": 1.2079597101093232e-05, "loss": 0.48876744508743286, "step": 3325 }, { "epoch": 0.403591797112001, "grad_norm": 2.5119359493255615, "learning_rate": 1.2077140400442206e-05, "loss": 0.37746191024780273, "step": 3326 }, { "epoch": 0.40371314160902805, "grad_norm": 3.674171209335327, "learning_rate": 1.207468369979118e-05, "loss": 0.13165758550167084, "step": 3327 }, { "epoch": 0.4038344861060551, "grad_norm": 2.2177445888519287, "learning_rate": 1.2072226999140155e-05, "loss": 0.2323538362979889, "step": 3328 }, { "epoch": 0.40395583060308216, "grad_norm": 2.0879428386688232, "learning_rate": 1.2069770298489129e-05, "loss": 0.37489527463912964, "step": 3329 }, { "epoch": 0.4040771751001092, "grad_norm": 3.3663511276245117, "learning_rate": 1.2067313597838103e-05, "loss": 0.28204816579818726, "step": 3330 }, { "epoch": 0.40419851959713626, "grad_norm": 2.811638355255127, "learning_rate": 1.2064856897187078e-05, "loss": 0.2594698965549469, "step": 3331 }, { "epoch": 0.4043198640941633, "grad_norm": 4.1061882972717285, "learning_rate": 1.2062400196536053e-05, "loss": 0.5023982524871826, "step": 3332 }, { "epoch": 0.40444120859119037, "grad_norm": 1.7032650709152222, "learning_rate": 1.2059943495885028e-05, "loss": 0.1312413364648819, "step": 3333 }, { "epoch": 0.4045625530882174, "grad_norm": 2.882871150970459, "learning_rate": 1.2057486795234002e-05, "loss": 0.13748528063297272, "step": 3334 }, { "epoch": 0.40468389758524453, "grad_norm": 2.590442180633545, "learning_rate": 1.2055030094582976e-05, "loss": 0.2229790985584259, "step": 3335 }, { "epoch": 0.4048052420822716, "grad_norm": 2.217602491378784, "learning_rate": 1.205257339393195e-05, "loss": 0.3741094470024109, "step": 3336 }, { "epoch": 0.40492658657929864, "grad_norm": 5.3910322189331055, "learning_rate": 1.2050116693280925e-05, "loss": 0.6367065906524658, "step": 3337 }, { "epoch": 0.4050479310763257, "grad_norm": 2.7664647102355957, "learning_rate": 1.2047659992629899e-05, "loss": 0.21413977444171906, "step": 3338 }, { "epoch": 0.40516927557335275, "grad_norm": 2.5352351665496826, "learning_rate": 1.2045203291978873e-05, "loss": 0.41863587498664856, "step": 3339 }, { "epoch": 0.4052906200703798, "grad_norm": 1.850381851196289, "learning_rate": 1.2042746591327848e-05, "loss": 0.08942709118127823, "step": 3340 }, { "epoch": 0.40541196456740686, "grad_norm": 1.6138914823532104, "learning_rate": 1.2040289890676822e-05, "loss": 0.0864180326461792, "step": 3341 }, { "epoch": 0.4055333090644339, "grad_norm": 2.189840316772461, "learning_rate": 1.2037833190025796e-05, "loss": 0.27191150188446045, "step": 3342 }, { "epoch": 0.40565465356146096, "grad_norm": 2.550299882888794, "learning_rate": 1.203537648937477e-05, "loss": 0.5286152362823486, "step": 3343 }, { "epoch": 0.4057759980584881, "grad_norm": 1.8738741874694824, "learning_rate": 1.2032919788723745e-05, "loss": 0.183305025100708, "step": 3344 }, { "epoch": 0.40589734255551513, "grad_norm": 2.3162126541137695, "learning_rate": 1.2030463088072719e-05, "loss": 0.48604118824005127, "step": 3345 }, { "epoch": 0.4060186870525422, "grad_norm": 1.9461023807525635, "learning_rate": 1.2028006387421693e-05, "loss": 0.1616666615009308, "step": 3346 }, { "epoch": 0.40614003154956924, "grad_norm": 4.0628743171691895, "learning_rate": 1.2025549686770667e-05, "loss": 0.3301737904548645, "step": 3347 }, { "epoch": 0.4062613760465963, "grad_norm": 2.348146438598633, "learning_rate": 1.2023092986119642e-05, "loss": 0.16470575332641602, "step": 3348 }, { "epoch": 0.40638272054362334, "grad_norm": 2.3404624462127686, "learning_rate": 1.2020636285468616e-05, "loss": 0.3218824863433838, "step": 3349 }, { "epoch": 0.4065040650406504, "grad_norm": 1.9611907005310059, "learning_rate": 1.201817958481759e-05, "loss": 0.12972447276115417, "step": 3350 }, { "epoch": 0.40662540953767745, "grad_norm": 2.2058868408203125, "learning_rate": 1.2015722884166564e-05, "loss": 0.10044681280851364, "step": 3351 }, { "epoch": 0.4067467540347045, "grad_norm": 2.242136240005493, "learning_rate": 1.201326618351554e-05, "loss": 0.11007091403007507, "step": 3352 }, { "epoch": 0.40686809853173156, "grad_norm": 0.8104648590087891, "learning_rate": 1.2010809482864515e-05, "loss": 0.01841391623020172, "step": 3353 }, { "epoch": 0.40698944302875867, "grad_norm": 3.7218685150146484, "learning_rate": 1.2008352782213489e-05, "loss": 0.270338237285614, "step": 3354 }, { "epoch": 0.4071107875257857, "grad_norm": 1.942610502243042, "learning_rate": 1.2005896081562463e-05, "loss": 0.22724571824073792, "step": 3355 }, { "epoch": 0.4072321320228128, "grad_norm": 2.6146812438964844, "learning_rate": 1.2003439380911437e-05, "loss": 0.23565348982810974, "step": 3356 }, { "epoch": 0.40735347651983983, "grad_norm": 2.875767707824707, "learning_rate": 1.2000982680260412e-05, "loss": 0.2510961592197418, "step": 3357 }, { "epoch": 0.4074748210168669, "grad_norm": 3.8768150806427, "learning_rate": 1.1998525979609386e-05, "loss": 0.32638803124427795, "step": 3358 }, { "epoch": 0.40759616551389394, "grad_norm": 2.081332206726074, "learning_rate": 1.199606927895836e-05, "loss": 0.1831972599029541, "step": 3359 }, { "epoch": 0.407717510010921, "grad_norm": 2.9332032203674316, "learning_rate": 1.1993612578307334e-05, "loss": 0.22655761241912842, "step": 3360 }, { "epoch": 0.40783885450794805, "grad_norm": 2.714696168899536, "learning_rate": 1.1991155877656309e-05, "loss": 0.22243930399417877, "step": 3361 }, { "epoch": 0.4079601990049751, "grad_norm": 3.028733253479004, "learning_rate": 1.1988699177005283e-05, "loss": 0.10828859359025955, "step": 3362 }, { "epoch": 0.4080815435020022, "grad_norm": 2.400108814239502, "learning_rate": 1.1986242476354257e-05, "loss": 0.4200745224952698, "step": 3363 }, { "epoch": 0.40820288799902926, "grad_norm": 2.8346779346466064, "learning_rate": 1.1983785775703231e-05, "loss": 0.30214208364486694, "step": 3364 }, { "epoch": 0.4083242324960563, "grad_norm": 3.227933883666992, "learning_rate": 1.1981329075052206e-05, "loss": 0.4001484513282776, "step": 3365 }, { "epoch": 0.40844557699308337, "grad_norm": 2.5522067546844482, "learning_rate": 1.197887237440118e-05, "loss": 0.10643468797206879, "step": 3366 }, { "epoch": 0.4085669214901104, "grad_norm": 2.336789608001709, "learning_rate": 1.1976415673750154e-05, "loss": 0.25655195116996765, "step": 3367 }, { "epoch": 0.4086882659871375, "grad_norm": 1.6107679605484009, "learning_rate": 1.1973958973099128e-05, "loss": 0.05966953933238983, "step": 3368 }, { "epoch": 0.40880961048416453, "grad_norm": 2.949903726577759, "learning_rate": 1.1971502272448103e-05, "loss": 0.21616233885288239, "step": 3369 }, { "epoch": 0.4089309549811916, "grad_norm": 0.012700640596449375, "learning_rate": 1.1969045571797077e-05, "loss": 0.00018036008987110108, "step": 3370 }, { "epoch": 0.40905229947821864, "grad_norm": 2.55505108833313, "learning_rate": 1.1966588871146051e-05, "loss": 0.18714764714241028, "step": 3371 }, { "epoch": 0.40917364397524575, "grad_norm": 2.1479029655456543, "learning_rate": 1.1964132170495027e-05, "loss": 0.4700539708137512, "step": 3372 }, { "epoch": 0.4092949884722728, "grad_norm": 3.7351419925689697, "learning_rate": 1.1961675469844001e-05, "loss": 0.051050830632448196, "step": 3373 }, { "epoch": 0.40941633296929986, "grad_norm": 0.914373517036438, "learning_rate": 1.1959218769192976e-05, "loss": 0.026432767510414124, "step": 3374 }, { "epoch": 0.4095376774663269, "grad_norm": 2.449012041091919, "learning_rate": 1.195676206854195e-05, "loss": 0.22211278975009918, "step": 3375 }, { "epoch": 0.40965902196335396, "grad_norm": 2.449822187423706, "learning_rate": 1.1954305367890924e-05, "loss": 0.1475517302751541, "step": 3376 }, { "epoch": 0.409780366460381, "grad_norm": 2.3760457038879395, "learning_rate": 1.1951848667239898e-05, "loss": 0.2158413827419281, "step": 3377 }, { "epoch": 0.40990171095740807, "grad_norm": 3.990077257156372, "learning_rate": 1.1949391966588873e-05, "loss": 0.2949408292770386, "step": 3378 }, { "epoch": 0.4100230554544351, "grad_norm": 2.18121600151062, "learning_rate": 1.1946935265937847e-05, "loss": 0.3253840208053589, "step": 3379 }, { "epoch": 0.4101443999514622, "grad_norm": 4.510829448699951, "learning_rate": 1.1944478565286821e-05, "loss": 0.25793325901031494, "step": 3380 }, { "epoch": 0.41026574444848923, "grad_norm": 1.6633398532867432, "learning_rate": 1.1942021864635796e-05, "loss": 0.23681926727294922, "step": 3381 }, { "epoch": 0.41038708894551634, "grad_norm": 3.3275249004364014, "learning_rate": 1.193956516398477e-05, "loss": 0.314714252948761, "step": 3382 }, { "epoch": 0.4105084334425434, "grad_norm": 1.8083856105804443, "learning_rate": 1.1937108463333744e-05, "loss": 0.1765631139278412, "step": 3383 }, { "epoch": 0.41062977793957045, "grad_norm": 2.6071438789367676, "learning_rate": 1.1934651762682718e-05, "loss": 0.3219744563102722, "step": 3384 }, { "epoch": 0.4107511224365975, "grad_norm": 2.4086461067199707, "learning_rate": 1.1932195062031693e-05, "loss": 0.09224195033311844, "step": 3385 }, { "epoch": 0.41087246693362456, "grad_norm": 0.7400537729263306, "learning_rate": 1.1929738361380667e-05, "loss": 0.009996118023991585, "step": 3386 }, { "epoch": 0.4109938114306516, "grad_norm": 1.6936898231506348, "learning_rate": 1.1927281660729641e-05, "loss": 0.013633204624056816, "step": 3387 }, { "epoch": 0.41111515592767867, "grad_norm": 1.4746376276016235, "learning_rate": 1.1924824960078615e-05, "loss": 0.07111170887947083, "step": 3388 }, { "epoch": 0.4112365004247057, "grad_norm": 1.5193908214569092, "learning_rate": 1.192236825942759e-05, "loss": 0.18472343683242798, "step": 3389 }, { "epoch": 0.4113578449217328, "grad_norm": 2.5291969776153564, "learning_rate": 1.1919911558776564e-05, "loss": 0.6256142854690552, "step": 3390 }, { "epoch": 0.4114791894187599, "grad_norm": 1.5568585395812988, "learning_rate": 1.1917454858125538e-05, "loss": 0.17391648888587952, "step": 3391 }, { "epoch": 0.41160053391578694, "grad_norm": 1.5837876796722412, "learning_rate": 1.1914998157474514e-05, "loss": 0.16797544062137604, "step": 3392 }, { "epoch": 0.411721878412814, "grad_norm": 2.7582831382751465, "learning_rate": 1.1912541456823488e-05, "loss": 0.5750889778137207, "step": 3393 }, { "epoch": 0.41184322290984104, "grad_norm": 2.240612745285034, "learning_rate": 1.1910084756172463e-05, "loss": 0.4195220470428467, "step": 3394 }, { "epoch": 0.4119645674068681, "grad_norm": 3.0088918209075928, "learning_rate": 1.1907628055521437e-05, "loss": 0.2987426519393921, "step": 3395 }, { "epoch": 0.41208591190389515, "grad_norm": 1.6480534076690674, "learning_rate": 1.1905171354870411e-05, "loss": 0.07369241118431091, "step": 3396 }, { "epoch": 0.4122072564009222, "grad_norm": 3.518282175064087, "learning_rate": 1.1902714654219385e-05, "loss": 0.22647514939308167, "step": 3397 }, { "epoch": 0.41232860089794926, "grad_norm": 2.50191068649292, "learning_rate": 1.190025795356836e-05, "loss": 0.5125393867492676, "step": 3398 }, { "epoch": 0.4124499453949763, "grad_norm": 1.9100526571273804, "learning_rate": 1.1897801252917334e-05, "loss": 0.32923686504364014, "step": 3399 }, { "epoch": 0.4125712898920034, "grad_norm": 1.7821210622787476, "learning_rate": 1.1895344552266308e-05, "loss": 0.25902876257896423, "step": 3400 }, { "epoch": 0.4126926343890305, "grad_norm": 1.7797950506210327, "learning_rate": 1.1892887851615282e-05, "loss": 0.14268380403518677, "step": 3401 }, { "epoch": 0.41281397888605753, "grad_norm": 2.540289878845215, "learning_rate": 1.1890431150964257e-05, "loss": 0.24589216709136963, "step": 3402 }, { "epoch": 0.4129353233830846, "grad_norm": 2.3687174320220947, "learning_rate": 1.1887974450313231e-05, "loss": 0.2886613607406616, "step": 3403 }, { "epoch": 0.41305666788011164, "grad_norm": 3.2549076080322266, "learning_rate": 1.1885517749662205e-05, "loss": 0.3263327479362488, "step": 3404 }, { "epoch": 0.4131780123771387, "grad_norm": 3.080291509628296, "learning_rate": 1.188306104901118e-05, "loss": 0.5150718092918396, "step": 3405 }, { "epoch": 0.41329935687416575, "grad_norm": 2.8416271209716797, "learning_rate": 1.1880604348360154e-05, "loss": 0.41582363843917847, "step": 3406 }, { "epoch": 0.4134207013711928, "grad_norm": 1.054196834564209, "learning_rate": 1.1878147647709128e-05, "loss": 0.013622181490063667, "step": 3407 }, { "epoch": 0.41354204586821985, "grad_norm": 2.1204440593719482, "learning_rate": 1.1875690947058102e-05, "loss": 0.11492344737052917, "step": 3408 }, { "epoch": 0.41366339036524696, "grad_norm": 3.895838975906372, "learning_rate": 1.1873234246407076e-05, "loss": 0.3368130624294281, "step": 3409 }, { "epoch": 0.413784734862274, "grad_norm": 1.3537365198135376, "learning_rate": 1.187077754575605e-05, "loss": 0.12048971652984619, "step": 3410 }, { "epoch": 0.41390607935930107, "grad_norm": 1.7345317602157593, "learning_rate": 1.1868320845105027e-05, "loss": 0.23148749768733978, "step": 3411 }, { "epoch": 0.4140274238563281, "grad_norm": 0.9605816602706909, "learning_rate": 1.1865864144454001e-05, "loss": 0.03351011127233505, "step": 3412 }, { "epoch": 0.4141487683533552, "grad_norm": 3.424372673034668, "learning_rate": 1.1863407443802975e-05, "loss": 0.14960801601409912, "step": 3413 }, { "epoch": 0.41427011285038223, "grad_norm": 2.743013381958008, "learning_rate": 1.186095074315195e-05, "loss": 0.47832292318344116, "step": 3414 }, { "epoch": 0.4143914573474093, "grad_norm": 2.6609420776367188, "learning_rate": 1.1858494042500924e-05, "loss": 0.16290368139743805, "step": 3415 }, { "epoch": 0.41451280184443634, "grad_norm": 3.455261707305908, "learning_rate": 1.1856037341849898e-05, "loss": 0.39955049753189087, "step": 3416 }, { "epoch": 0.4146341463414634, "grad_norm": 1.9961707592010498, "learning_rate": 1.1853580641198872e-05, "loss": 0.15304440259933472, "step": 3417 }, { "epoch": 0.41475549083849045, "grad_norm": 5.198159217834473, "learning_rate": 1.1851123940547846e-05, "loss": 0.1540556102991104, "step": 3418 }, { "epoch": 0.41487683533551756, "grad_norm": 2.2172584533691406, "learning_rate": 1.184866723989682e-05, "loss": 0.1485886126756668, "step": 3419 }, { "epoch": 0.4149981798325446, "grad_norm": 2.6809258460998535, "learning_rate": 1.1846210539245795e-05, "loss": 0.17108909785747528, "step": 3420 }, { "epoch": 0.41511952432957167, "grad_norm": 2.615473985671997, "learning_rate": 1.184375383859477e-05, "loss": 0.3039502501487732, "step": 3421 }, { "epoch": 0.4152408688265987, "grad_norm": 0.0017497573280707002, "learning_rate": 1.1841297137943743e-05, "loss": 4.755393456434831e-05, "step": 3422 }, { "epoch": 0.4153622133236258, "grad_norm": 2.4875738620758057, "learning_rate": 1.1838840437292718e-05, "loss": 0.17304940521717072, "step": 3423 }, { "epoch": 0.4154835578206528, "grad_norm": 3.6963891983032227, "learning_rate": 1.1836383736641692e-05, "loss": 0.22823911905288696, "step": 3424 }, { "epoch": 0.4156049023176799, "grad_norm": 2.8694639205932617, "learning_rate": 1.1833927035990666e-05, "loss": 0.2963635325431824, "step": 3425 }, { "epoch": 0.41572624681470693, "grad_norm": 2.4273478984832764, "learning_rate": 1.1831470335339639e-05, "loss": 0.10278123617172241, "step": 3426 }, { "epoch": 0.415847591311734, "grad_norm": 2.7320337295532227, "learning_rate": 1.1829013634688613e-05, "loss": 0.10359422862529755, "step": 3427 }, { "epoch": 0.4159689358087611, "grad_norm": 3.307586193084717, "learning_rate": 1.1826556934037587e-05, "loss": 0.24914634227752686, "step": 3428 }, { "epoch": 0.41609028030578815, "grad_norm": 2.7045950889587402, "learning_rate": 1.1824100233386562e-05, "loss": 0.1354033648967743, "step": 3429 }, { "epoch": 0.4162116248028152, "grad_norm": 3.0522232055664062, "learning_rate": 1.1821643532735536e-05, "loss": 0.24849481880664825, "step": 3430 }, { "epoch": 0.41633296929984226, "grad_norm": 3.298497438430786, "learning_rate": 1.181918683208451e-05, "loss": 0.12789031863212585, "step": 3431 }, { "epoch": 0.4164543137968693, "grad_norm": 3.9013428688049316, "learning_rate": 1.1816730131433484e-05, "loss": 0.49709630012512207, "step": 3432 }, { "epoch": 0.41657565829389637, "grad_norm": 1.721135139465332, "learning_rate": 1.1814273430782459e-05, "loss": 0.42568439245224, "step": 3433 }, { "epoch": 0.4166970027909234, "grad_norm": 1.8467674255371094, "learning_rate": 1.1811816730131433e-05, "loss": 0.43003901839256287, "step": 3434 }, { "epoch": 0.4168183472879505, "grad_norm": 2.3580636978149414, "learning_rate": 1.1809360029480407e-05, "loss": 0.25825563073158264, "step": 3435 }, { "epoch": 0.41693969178497753, "grad_norm": 2.2304062843322754, "learning_rate": 1.1806903328829381e-05, "loss": 0.3182814121246338, "step": 3436 }, { "epoch": 0.41706103628200464, "grad_norm": 3.9392285346984863, "learning_rate": 1.1804446628178357e-05, "loss": 0.07946665585041046, "step": 3437 }, { "epoch": 0.4171823807790317, "grad_norm": 3.439152240753174, "learning_rate": 1.1801989927527332e-05, "loss": 0.4690271019935608, "step": 3438 }, { "epoch": 0.41730372527605875, "grad_norm": 1.1062110662460327, "learning_rate": 1.1799533226876306e-05, "loss": 0.023044677451252937, "step": 3439 }, { "epoch": 0.4174250697730858, "grad_norm": 2.055751085281372, "learning_rate": 1.179707652622528e-05, "loss": 0.3243967890739441, "step": 3440 }, { "epoch": 0.41754641427011285, "grad_norm": 2.848154306411743, "learning_rate": 1.1794619825574254e-05, "loss": 0.411385715007782, "step": 3441 }, { "epoch": 0.4176677587671399, "grad_norm": 2.5012378692626953, "learning_rate": 1.1792163124923229e-05, "loss": 0.1882108449935913, "step": 3442 }, { "epoch": 0.41778910326416696, "grad_norm": 3.05448579788208, "learning_rate": 1.1789706424272203e-05, "loss": 0.3536946773529053, "step": 3443 }, { "epoch": 0.417910447761194, "grad_norm": 3.1110012531280518, "learning_rate": 1.1787249723621177e-05, "loss": 0.18979328870773315, "step": 3444 }, { "epoch": 0.41803179225822107, "grad_norm": 2.1430108547210693, "learning_rate": 1.1784793022970151e-05, "loss": 0.06423166394233704, "step": 3445 }, { "epoch": 0.4181531367552481, "grad_norm": 2.6607182025909424, "learning_rate": 1.1782336322319126e-05, "loss": 0.3381607234477997, "step": 3446 }, { "epoch": 0.41827448125227523, "grad_norm": 4.065653324127197, "learning_rate": 1.17798796216681e-05, "loss": 0.3323563039302826, "step": 3447 }, { "epoch": 0.4183958257493023, "grad_norm": 2.4826183319091797, "learning_rate": 1.1777422921017074e-05, "loss": 0.35323530435562134, "step": 3448 }, { "epoch": 0.41851717024632934, "grad_norm": 1.811853051185608, "learning_rate": 1.1774966220366048e-05, "loss": 0.20018243789672852, "step": 3449 }, { "epoch": 0.4186385147433564, "grad_norm": 3.218059539794922, "learning_rate": 1.1772509519715023e-05, "loss": 0.40599000453948975, "step": 3450 }, { "epoch": 0.41875985924038345, "grad_norm": 2.8796212673187256, "learning_rate": 1.1770052819063997e-05, "loss": 0.3082905411720276, "step": 3451 }, { "epoch": 0.4188812037374105, "grad_norm": 1.919619083404541, "learning_rate": 1.1767596118412971e-05, "loss": 0.12047933042049408, "step": 3452 }, { "epoch": 0.41900254823443756, "grad_norm": 1.6094869375228882, "learning_rate": 1.1765139417761945e-05, "loss": 0.13965997099876404, "step": 3453 }, { "epoch": 0.4191238927314646, "grad_norm": 2.9742469787597656, "learning_rate": 1.176268271711092e-05, "loss": 0.3110111355781555, "step": 3454 }, { "epoch": 0.41924523722849166, "grad_norm": 3.066451072692871, "learning_rate": 1.1760226016459894e-05, "loss": 0.248049795627594, "step": 3455 }, { "epoch": 0.41936658172551877, "grad_norm": 2.063441753387451, "learning_rate": 1.1757769315808868e-05, "loss": 0.19167058169841766, "step": 3456 }, { "epoch": 0.4194879262225458, "grad_norm": 1.94533371925354, "learning_rate": 1.1755312615157844e-05, "loss": 0.12360180914402008, "step": 3457 }, { "epoch": 0.4196092707195729, "grad_norm": 1.6641358137130737, "learning_rate": 1.1752855914506818e-05, "loss": 0.15361237525939941, "step": 3458 }, { "epoch": 0.41973061521659993, "grad_norm": 3.931972026824951, "learning_rate": 1.1750399213855793e-05, "loss": 0.4843231439590454, "step": 3459 }, { "epoch": 0.419851959713627, "grad_norm": 1.9616279602050781, "learning_rate": 1.1747942513204767e-05, "loss": 0.25367042422294617, "step": 3460 }, { "epoch": 0.41997330421065404, "grad_norm": 3.106501340866089, "learning_rate": 1.1745485812553741e-05, "loss": 0.3402540683746338, "step": 3461 }, { "epoch": 0.4200946487076811, "grad_norm": 2.1331770420074463, "learning_rate": 1.1743029111902716e-05, "loss": 0.3224339485168457, "step": 3462 }, { "epoch": 0.42021599320470815, "grad_norm": 2.8695995807647705, "learning_rate": 1.174057241125169e-05, "loss": 0.05880775675177574, "step": 3463 }, { "epoch": 0.4203373377017352, "grad_norm": 2.60664963722229, "learning_rate": 1.1738115710600664e-05, "loss": 0.2266593724489212, "step": 3464 }, { "epoch": 0.4204586821987623, "grad_norm": 2.247929573059082, "learning_rate": 1.1735659009949638e-05, "loss": 0.20880204439163208, "step": 3465 }, { "epoch": 0.42058002669578937, "grad_norm": 1.9216110706329346, "learning_rate": 1.1733202309298613e-05, "loss": 0.2999063730239868, "step": 3466 }, { "epoch": 0.4207013711928164, "grad_norm": 2.366934061050415, "learning_rate": 1.1730745608647587e-05, "loss": 0.14423035085201263, "step": 3467 }, { "epoch": 0.4208227156898435, "grad_norm": 1.9067960977554321, "learning_rate": 1.1728288907996561e-05, "loss": 0.1828007698059082, "step": 3468 }, { "epoch": 0.42094406018687053, "grad_norm": 2.340365409851074, "learning_rate": 1.1725832207345535e-05, "loss": 0.40259379148483276, "step": 3469 }, { "epoch": 0.4210654046838976, "grad_norm": 1.356203317642212, "learning_rate": 1.172337550669451e-05, "loss": 0.04147522151470184, "step": 3470 }, { "epoch": 0.42118674918092464, "grad_norm": 3.8081276416778564, "learning_rate": 1.1720918806043484e-05, "loss": 0.20986762642860413, "step": 3471 }, { "epoch": 0.4213080936779517, "grad_norm": 3.0089476108551025, "learning_rate": 1.1718462105392458e-05, "loss": 0.3639543354511261, "step": 3472 }, { "epoch": 0.42142943817497874, "grad_norm": 2.3719913959503174, "learning_rate": 1.1716005404741432e-05, "loss": 0.10580010712146759, "step": 3473 }, { "epoch": 0.4215507826720058, "grad_norm": 2.1673812866210938, "learning_rate": 1.1713548704090407e-05, "loss": 0.44383132457733154, "step": 3474 }, { "epoch": 0.4216721271690329, "grad_norm": 1.9117655754089355, "learning_rate": 1.1711092003439381e-05, "loss": 0.1181115061044693, "step": 3475 }, { "epoch": 0.42179347166605996, "grad_norm": 2.113288164138794, "learning_rate": 1.1708635302788355e-05, "loss": 0.10157492756843567, "step": 3476 }, { "epoch": 0.421914816163087, "grad_norm": 4.154051303863525, "learning_rate": 1.1706178602137331e-05, "loss": 0.5281915664672852, "step": 3477 }, { "epoch": 0.42203616066011407, "grad_norm": 3.4318158626556396, "learning_rate": 1.1703721901486305e-05, "loss": 0.4258676767349243, "step": 3478 }, { "epoch": 0.4221575051571411, "grad_norm": 2.2328593730926514, "learning_rate": 1.170126520083528e-05, "loss": 0.13055363297462463, "step": 3479 }, { "epoch": 0.4222788496541682, "grad_norm": 3.7509846687316895, "learning_rate": 1.1698808500184254e-05, "loss": 0.24521100521087646, "step": 3480 }, { "epoch": 0.42240019415119523, "grad_norm": 3.5181307792663574, "learning_rate": 1.1696351799533228e-05, "loss": 0.15635553002357483, "step": 3481 }, { "epoch": 0.4225215386482223, "grad_norm": 2.506375789642334, "learning_rate": 1.1693895098882202e-05, "loss": 0.6314314603805542, "step": 3482 }, { "epoch": 0.42264288314524934, "grad_norm": 0.2738441228866577, "learning_rate": 1.1691438398231177e-05, "loss": 0.006706348154693842, "step": 3483 }, { "epoch": 0.42276422764227645, "grad_norm": 1.785003900527954, "learning_rate": 1.1688981697580151e-05, "loss": 0.17614462971687317, "step": 3484 }, { "epoch": 0.4228855721393035, "grad_norm": 2.6342015266418457, "learning_rate": 1.1686524996929125e-05, "loss": 0.2105119228363037, "step": 3485 }, { "epoch": 0.42300691663633055, "grad_norm": 1.8216427564620972, "learning_rate": 1.16840682962781e-05, "loss": 0.260210782289505, "step": 3486 }, { "epoch": 0.4231282611333576, "grad_norm": 4.7217302322387695, "learning_rate": 1.1681611595627074e-05, "loss": 0.3383857011795044, "step": 3487 }, { "epoch": 0.42324960563038466, "grad_norm": 2.028871536254883, "learning_rate": 1.1679154894976048e-05, "loss": 0.25453463196754456, "step": 3488 }, { "epoch": 0.4233709501274117, "grad_norm": 1.85947847366333, "learning_rate": 1.1676698194325022e-05, "loss": 0.22250008583068848, "step": 3489 }, { "epoch": 0.42349229462443877, "grad_norm": 2.306297540664673, "learning_rate": 1.1674241493673996e-05, "loss": 0.28463929891586304, "step": 3490 }, { "epoch": 0.4236136391214658, "grad_norm": 3.397921323776245, "learning_rate": 1.167178479302297e-05, "loss": 0.4102676212787628, "step": 3491 }, { "epoch": 0.4237349836184929, "grad_norm": 2.7147834300994873, "learning_rate": 1.1669328092371945e-05, "loss": 0.37689846754074097, "step": 3492 }, { "epoch": 0.42385632811552, "grad_norm": 2.865588426589966, "learning_rate": 1.166687139172092e-05, "loss": 0.480543315410614, "step": 3493 }, { "epoch": 0.42397767261254704, "grad_norm": 1.8033530712127686, "learning_rate": 1.1664414691069893e-05, "loss": 0.1970994621515274, "step": 3494 }, { "epoch": 0.4240990171095741, "grad_norm": 1.495980143547058, "learning_rate": 1.1661957990418868e-05, "loss": 0.10199486464262009, "step": 3495 }, { "epoch": 0.42422036160660115, "grad_norm": 3.647043466567993, "learning_rate": 1.1659501289767842e-05, "loss": 0.4993443191051483, "step": 3496 }, { "epoch": 0.4243417061036282, "grad_norm": 4.067138195037842, "learning_rate": 1.1657044589116818e-05, "loss": 0.16017641127109528, "step": 3497 }, { "epoch": 0.42446305060065526, "grad_norm": 2.5167953968048096, "learning_rate": 1.1654587888465792e-05, "loss": 0.5309091806411743, "step": 3498 }, { "epoch": 0.4245843950976823, "grad_norm": 3.4322762489318848, "learning_rate": 1.1652131187814766e-05, "loss": 0.34487950801849365, "step": 3499 }, { "epoch": 0.42470573959470936, "grad_norm": 3.0887749195098877, "learning_rate": 1.164967448716374e-05, "loss": 0.4641647934913635, "step": 3500 }, { "epoch": 0.4248270840917364, "grad_norm": 2.39925217628479, "learning_rate": 1.1647217786512715e-05, "loss": 0.19832506775856018, "step": 3501 }, { "epoch": 0.4249484285887635, "grad_norm": 0.8822919726371765, "learning_rate": 1.164476108586169e-05, "loss": 0.024732433259487152, "step": 3502 }, { "epoch": 0.4250697730857906, "grad_norm": 2.3826348781585693, "learning_rate": 1.1642304385210663e-05, "loss": 0.14591310918331146, "step": 3503 }, { "epoch": 0.42519111758281763, "grad_norm": 2.1904237270355225, "learning_rate": 1.1639847684559638e-05, "loss": 0.28078460693359375, "step": 3504 }, { "epoch": 0.4253124620798447, "grad_norm": 2.4217185974121094, "learning_rate": 1.1637390983908612e-05, "loss": 0.1772153079509735, "step": 3505 }, { "epoch": 0.42543380657687174, "grad_norm": 3.663706064224243, "learning_rate": 1.1634934283257586e-05, "loss": 0.3164679706096649, "step": 3506 }, { "epoch": 0.4255551510738988, "grad_norm": 3.5393636226654053, "learning_rate": 1.163247758260656e-05, "loss": 0.36650732159614563, "step": 3507 }, { "epoch": 0.42567649557092585, "grad_norm": 2.4501283168792725, "learning_rate": 1.1630020881955535e-05, "loss": 0.301665723323822, "step": 3508 }, { "epoch": 0.4257978400679529, "grad_norm": 2.496389627456665, "learning_rate": 1.1627564181304509e-05, "loss": 0.4281569719314575, "step": 3509 }, { "epoch": 0.42591918456497996, "grad_norm": 2.8083412647247314, "learning_rate": 1.1625107480653483e-05, "loss": 0.27063703536987305, "step": 3510 }, { "epoch": 0.426040529062007, "grad_norm": 2.149444580078125, "learning_rate": 1.1622650780002458e-05, "loss": 0.19885718822479248, "step": 3511 }, { "epoch": 0.4261618735590341, "grad_norm": 2.8739559650421143, "learning_rate": 1.1620194079351432e-05, "loss": 0.4075099229812622, "step": 3512 }, { "epoch": 0.4262832180560612, "grad_norm": 2.3116772174835205, "learning_rate": 1.1617737378700406e-05, "loss": 0.163457989692688, "step": 3513 }, { "epoch": 0.42640456255308823, "grad_norm": 2.594003438949585, "learning_rate": 1.161528067804938e-05, "loss": 0.48457199335098267, "step": 3514 }, { "epoch": 0.4265259070501153, "grad_norm": 1.6292730569839478, "learning_rate": 1.1612823977398355e-05, "loss": 0.2179834097623825, "step": 3515 }, { "epoch": 0.42664725154714234, "grad_norm": 2.3443987369537354, "learning_rate": 1.1610367276747329e-05, "loss": 0.48786991834640503, "step": 3516 }, { "epoch": 0.4267685960441694, "grad_norm": 2.87321400642395, "learning_rate": 1.1607910576096305e-05, "loss": 0.20168547332286835, "step": 3517 }, { "epoch": 0.42688994054119644, "grad_norm": 3.7508914470672607, "learning_rate": 1.1605453875445279e-05, "loss": 0.18805919587612152, "step": 3518 }, { "epoch": 0.4270112850382235, "grad_norm": 3.1636602878570557, "learning_rate": 1.1602997174794253e-05, "loss": 0.7994831204414368, "step": 3519 }, { "epoch": 0.42713262953525055, "grad_norm": 1.8529301881790161, "learning_rate": 1.1600540474143228e-05, "loss": 0.19495491683483124, "step": 3520 }, { "epoch": 0.42725397403227766, "grad_norm": 3.220113515853882, "learning_rate": 1.1598083773492202e-05, "loss": 0.23272007703781128, "step": 3521 }, { "epoch": 0.4273753185293047, "grad_norm": 2.2415926456451416, "learning_rate": 1.1595627072841176e-05, "loss": 0.18578383326530457, "step": 3522 }, { "epoch": 0.42749666302633177, "grad_norm": 3.9811363220214844, "learning_rate": 1.159317037219015e-05, "loss": 0.5883661508560181, "step": 3523 }, { "epoch": 0.4276180075233588, "grad_norm": 3.611394166946411, "learning_rate": 1.1590713671539125e-05, "loss": 0.29106664657592773, "step": 3524 }, { "epoch": 0.4277393520203859, "grad_norm": 2.861515760421753, "learning_rate": 1.1588256970888099e-05, "loss": 0.13153845071792603, "step": 3525 }, { "epoch": 0.42786069651741293, "grad_norm": 1.6934245824813843, "learning_rate": 1.1585800270237073e-05, "loss": 0.059216201305389404, "step": 3526 }, { "epoch": 0.42798204101444, "grad_norm": 1.712193250656128, "learning_rate": 1.1583343569586047e-05, "loss": 0.0886315107345581, "step": 3527 }, { "epoch": 0.42810338551146704, "grad_norm": 2.9867546558380127, "learning_rate": 1.1580886868935022e-05, "loss": 0.729335606098175, "step": 3528 }, { "epoch": 0.4282247300084941, "grad_norm": 1.6713956594467163, "learning_rate": 1.1578430168283996e-05, "loss": 0.09660333395004272, "step": 3529 }, { "epoch": 0.4283460745055212, "grad_norm": 1.2232511043548584, "learning_rate": 1.157597346763297e-05, "loss": 0.27441084384918213, "step": 3530 }, { "epoch": 0.42846741900254826, "grad_norm": 2.7496514320373535, "learning_rate": 1.1573516766981944e-05, "loss": 0.3395706117153168, "step": 3531 }, { "epoch": 0.4285887634995753, "grad_norm": 2.7180612087249756, "learning_rate": 1.1571060066330919e-05, "loss": 0.2094610333442688, "step": 3532 }, { "epoch": 0.42871010799660236, "grad_norm": 2.590651750564575, "learning_rate": 1.1568603365679893e-05, "loss": 0.3029654026031494, "step": 3533 }, { "epoch": 0.4288314524936294, "grad_norm": 3.976020574569702, "learning_rate": 1.1566146665028867e-05, "loss": 0.51016765832901, "step": 3534 }, { "epoch": 0.42895279699065647, "grad_norm": 1.4675898551940918, "learning_rate": 1.1563689964377841e-05, "loss": 0.11305573582649231, "step": 3535 }, { "epoch": 0.4290741414876835, "grad_norm": 3.136228084564209, "learning_rate": 1.1561233263726816e-05, "loss": 0.5877640843391418, "step": 3536 }, { "epoch": 0.4291954859847106, "grad_norm": 3.057117462158203, "learning_rate": 1.1558776563075792e-05, "loss": 0.32149437069892883, "step": 3537 }, { "epoch": 0.42931683048173763, "grad_norm": 3.842179775238037, "learning_rate": 1.1556319862424766e-05, "loss": 0.2886391282081604, "step": 3538 }, { "epoch": 0.4294381749787647, "grad_norm": 3.0417163372039795, "learning_rate": 1.155386316177374e-05, "loss": 0.30413177609443665, "step": 3539 }, { "epoch": 0.4295595194757918, "grad_norm": 3.1643025875091553, "learning_rate": 1.1551406461122714e-05, "loss": 0.2993037700653076, "step": 3540 }, { "epoch": 0.42968086397281885, "grad_norm": 3.737643003463745, "learning_rate": 1.1548949760471689e-05, "loss": 0.3740388751029968, "step": 3541 }, { "epoch": 0.4298022084698459, "grad_norm": 2.914933204650879, "learning_rate": 1.1546493059820663e-05, "loss": 0.15409384667873383, "step": 3542 }, { "epoch": 0.42992355296687296, "grad_norm": 2.2203798294067383, "learning_rate": 1.1544036359169637e-05, "loss": 0.14025011658668518, "step": 3543 }, { "epoch": 0.4300448974639, "grad_norm": 2.3739166259765625, "learning_rate": 1.1541579658518611e-05, "loss": 0.25144433975219727, "step": 3544 }, { "epoch": 0.43016624196092706, "grad_norm": 2.491156578063965, "learning_rate": 1.1539122957867586e-05, "loss": 0.3260461091995239, "step": 3545 }, { "epoch": 0.4302875864579541, "grad_norm": 2.44917368888855, "learning_rate": 1.153666625721656e-05, "loss": 0.15014734864234924, "step": 3546 }, { "epoch": 0.4304089309549812, "grad_norm": 2.5014209747314453, "learning_rate": 1.1534209556565534e-05, "loss": 0.3493393659591675, "step": 3547 }, { "epoch": 0.4305302754520082, "grad_norm": 2.060030460357666, "learning_rate": 1.1531752855914508e-05, "loss": 0.10436143726110458, "step": 3548 }, { "epoch": 0.43065161994903534, "grad_norm": 3.732374906539917, "learning_rate": 1.1529296155263483e-05, "loss": 0.2870655059814453, "step": 3549 }, { "epoch": 0.4307729644460624, "grad_norm": 2.585165023803711, "learning_rate": 1.1526839454612457e-05, "loss": 0.3311839699745178, "step": 3550 }, { "epoch": 0.43089430894308944, "grad_norm": 1.9173551797866821, "learning_rate": 1.1524382753961431e-05, "loss": 0.08459767699241638, "step": 3551 }, { "epoch": 0.4310156534401165, "grad_norm": 3.1529128551483154, "learning_rate": 1.1521926053310406e-05, "loss": 0.7174496650695801, "step": 3552 }, { "epoch": 0.43113699793714355, "grad_norm": 2.7838709354400635, "learning_rate": 1.151946935265938e-05, "loss": 0.30458956956863403, "step": 3553 }, { "epoch": 0.4312583424341706, "grad_norm": 2.698802947998047, "learning_rate": 1.1517012652008354e-05, "loss": 0.38087013363838196, "step": 3554 }, { "epoch": 0.43137968693119766, "grad_norm": 1.689888834953308, "learning_rate": 1.1514555951357328e-05, "loss": 0.055040039122104645, "step": 3555 }, { "epoch": 0.4315010314282247, "grad_norm": 2.4405455589294434, "learning_rate": 1.1512099250706304e-05, "loss": 0.377621054649353, "step": 3556 }, { "epoch": 0.43162237592525177, "grad_norm": 0.798610508441925, "learning_rate": 1.1509642550055279e-05, "loss": 0.01458574179559946, "step": 3557 }, { "epoch": 0.4317437204222789, "grad_norm": 2.441983222961426, "learning_rate": 1.1507185849404253e-05, "loss": 0.4220641255378723, "step": 3558 }, { "epoch": 0.43186506491930593, "grad_norm": 2.1550843715667725, "learning_rate": 1.1504729148753227e-05, "loss": 0.3988901376724243, "step": 3559 }, { "epoch": 0.431986409416333, "grad_norm": 1.9146445989608765, "learning_rate": 1.1502272448102201e-05, "loss": 0.07315497100353241, "step": 3560 }, { "epoch": 0.43210775391336004, "grad_norm": 2.2415459156036377, "learning_rate": 1.1499815747451172e-05, "loss": 0.3059389889240265, "step": 3561 }, { "epoch": 0.4322290984103871, "grad_norm": 2.4857776165008545, "learning_rate": 1.1497359046800146e-05, "loss": 0.102960005402565, "step": 3562 }, { "epoch": 0.43235044290741415, "grad_norm": 3.687988042831421, "learning_rate": 1.1494902346149122e-05, "loss": 0.08040996640920639, "step": 3563 }, { "epoch": 0.4324717874044412, "grad_norm": 1.5955703258514404, "learning_rate": 1.1492445645498097e-05, "loss": 0.051232174038887024, "step": 3564 }, { "epoch": 0.43259313190146825, "grad_norm": 3.303870439529419, "learning_rate": 1.1489988944847071e-05, "loss": 0.7577817440032959, "step": 3565 }, { "epoch": 0.4327144763984953, "grad_norm": 3.2710652351379395, "learning_rate": 1.1487532244196045e-05, "loss": 0.25319910049438477, "step": 3566 }, { "epoch": 0.43283582089552236, "grad_norm": 2.376044988632202, "learning_rate": 1.148507554354502e-05, "loss": 0.06742098927497864, "step": 3567 }, { "epoch": 0.43295716539254947, "grad_norm": 1.9534738063812256, "learning_rate": 1.1482618842893994e-05, "loss": 0.22318729758262634, "step": 3568 }, { "epoch": 0.4330785098895765, "grad_norm": 2.6536521911621094, "learning_rate": 1.1480162142242968e-05, "loss": 0.29003578424453735, "step": 3569 }, { "epoch": 0.4331998543866036, "grad_norm": 3.3636391162872314, "learning_rate": 1.1477705441591942e-05, "loss": 0.2415037751197815, "step": 3570 }, { "epoch": 0.43332119888363063, "grad_norm": 2.113816261291504, "learning_rate": 1.1475248740940916e-05, "loss": 0.34754252433776855, "step": 3571 }, { "epoch": 0.4334425433806577, "grad_norm": 3.4806878566741943, "learning_rate": 1.147279204028989e-05, "loss": 0.5855320692062378, "step": 3572 }, { "epoch": 0.43356388787768474, "grad_norm": 2.993398427963257, "learning_rate": 1.1470335339638865e-05, "loss": 0.4003051519393921, "step": 3573 }, { "epoch": 0.4336852323747118, "grad_norm": 3.324693441390991, "learning_rate": 1.146787863898784e-05, "loss": 0.16323281824588776, "step": 3574 }, { "epoch": 0.43380657687173885, "grad_norm": 2.242252826690674, "learning_rate": 1.1465421938336813e-05, "loss": 0.07665936648845673, "step": 3575 }, { "epoch": 0.4339279213687659, "grad_norm": 1.906256079673767, "learning_rate": 1.1462965237685788e-05, "loss": 0.17530910670757294, "step": 3576 }, { "epoch": 0.434049265865793, "grad_norm": 1.4897524118423462, "learning_rate": 1.1460508537034762e-05, "loss": 0.0955578088760376, "step": 3577 }, { "epoch": 0.43417061036282006, "grad_norm": 2.944699287414551, "learning_rate": 1.1458051836383736e-05, "loss": 0.2497621476650238, "step": 3578 }, { "epoch": 0.4342919548598471, "grad_norm": 2.2499160766601562, "learning_rate": 1.145559513573271e-05, "loss": 0.5359215140342712, "step": 3579 }, { "epoch": 0.43441329935687417, "grad_norm": 4.141959190368652, "learning_rate": 1.1453138435081685e-05, "loss": 0.7478635311126709, "step": 3580 }, { "epoch": 0.4345346438539012, "grad_norm": 2.1418049335479736, "learning_rate": 1.1450681734430659e-05, "loss": 0.6666457056999207, "step": 3581 }, { "epoch": 0.4346559883509283, "grad_norm": 2.682008981704712, "learning_rate": 1.1448225033779635e-05, "loss": 0.2484360635280609, "step": 3582 }, { "epoch": 0.43477733284795533, "grad_norm": 1.3566113710403442, "learning_rate": 1.144576833312861e-05, "loss": 0.11822110414505005, "step": 3583 }, { "epoch": 0.4348986773449824, "grad_norm": 3.0834574699401855, "learning_rate": 1.1443311632477583e-05, "loss": 0.25057023763656616, "step": 3584 }, { "epoch": 0.43502002184200944, "grad_norm": 1.1841784715652466, "learning_rate": 1.1440854931826558e-05, "loss": 0.0327373705804348, "step": 3585 }, { "epoch": 0.43514136633903655, "grad_norm": 2.461822748184204, "learning_rate": 1.1438398231175532e-05, "loss": 0.19103094935417175, "step": 3586 }, { "epoch": 0.4352627108360636, "grad_norm": 2.851731538772583, "learning_rate": 1.1435941530524506e-05, "loss": 0.37302541732788086, "step": 3587 }, { "epoch": 0.43538405533309066, "grad_norm": 2.4243295192718506, "learning_rate": 1.143348482987348e-05, "loss": 0.32346072793006897, "step": 3588 }, { "epoch": 0.4355053998301177, "grad_norm": 2.8613784313201904, "learning_rate": 1.1431028129222455e-05, "loss": 0.13731716573238373, "step": 3589 }, { "epoch": 0.43562674432714477, "grad_norm": 2.957123279571533, "learning_rate": 1.1428571428571429e-05, "loss": 0.33699917793273926, "step": 3590 }, { "epoch": 0.4357480888241718, "grad_norm": 2.904517412185669, "learning_rate": 1.1426114727920403e-05, "loss": 0.16977430880069733, "step": 3591 }, { "epoch": 0.4358694333211989, "grad_norm": 3.907168388366699, "learning_rate": 1.1423658027269378e-05, "loss": 0.1890479475259781, "step": 3592 }, { "epoch": 0.4359907778182259, "grad_norm": 1.818791389465332, "learning_rate": 1.1421201326618352e-05, "loss": 0.1662425696849823, "step": 3593 }, { "epoch": 0.436112122315253, "grad_norm": 2.139403820037842, "learning_rate": 1.1418744625967326e-05, "loss": 0.3009265959262848, "step": 3594 }, { "epoch": 0.4362334668122801, "grad_norm": 3.330634117126465, "learning_rate": 1.14162879253163e-05, "loss": 0.30554115772247314, "step": 3595 }, { "epoch": 0.43635481130930714, "grad_norm": 2.787034034729004, "learning_rate": 1.1413831224665275e-05, "loss": 0.1728060245513916, "step": 3596 }, { "epoch": 0.4364761558063342, "grad_norm": 3.3264801502227783, "learning_rate": 1.1411374524014249e-05, "loss": 0.27438050508499146, "step": 3597 }, { "epoch": 0.43659750030336125, "grad_norm": 2.9363393783569336, "learning_rate": 1.1408917823363223e-05, "loss": 0.5970642566680908, "step": 3598 }, { "epoch": 0.4367188448003883, "grad_norm": 2.2291409969329834, "learning_rate": 1.1406461122712197e-05, "loss": 0.22181521356105804, "step": 3599 }, { "epoch": 0.43684018929741536, "grad_norm": 2.859792947769165, "learning_rate": 1.1404004422061172e-05, "loss": 0.19904783368110657, "step": 3600 }, { "epoch": 0.4369615337944424, "grad_norm": 3.4355077743530273, "learning_rate": 1.1401547721410146e-05, "loss": 0.7270107269287109, "step": 3601 }, { "epoch": 0.43708287829146947, "grad_norm": 2.798133611679077, "learning_rate": 1.1399091020759122e-05, "loss": 0.2644732594490051, "step": 3602 }, { "epoch": 0.4372042227884965, "grad_norm": 2.2718589305877686, "learning_rate": 1.1396634320108096e-05, "loss": 0.1575520932674408, "step": 3603 }, { "epoch": 0.4373255672855236, "grad_norm": 1.9757939577102661, "learning_rate": 1.139417761945707e-05, "loss": 0.14380985498428345, "step": 3604 }, { "epoch": 0.4374469117825507, "grad_norm": 2.0162603855133057, "learning_rate": 1.1391720918806045e-05, "loss": 0.2779490649700165, "step": 3605 }, { "epoch": 0.43756825627957774, "grad_norm": 3.3149561882019043, "learning_rate": 1.1389264218155019e-05, "loss": 0.1290723830461502, "step": 3606 }, { "epoch": 0.4376896007766048, "grad_norm": 1.8297659158706665, "learning_rate": 1.1386807517503993e-05, "loss": 0.34521085023880005, "step": 3607 }, { "epoch": 0.43781094527363185, "grad_norm": 2.085313320159912, "learning_rate": 1.1384350816852967e-05, "loss": 0.1551712602376938, "step": 3608 }, { "epoch": 0.4379322897706589, "grad_norm": 3.8662731647491455, "learning_rate": 1.1381894116201942e-05, "loss": 0.25557762384414673, "step": 3609 }, { "epoch": 0.43805363426768595, "grad_norm": 2.7187681198120117, "learning_rate": 1.1379437415550916e-05, "loss": 0.24853914976119995, "step": 3610 }, { "epoch": 0.438174978764713, "grad_norm": 2.3490939140319824, "learning_rate": 1.137698071489989e-05, "loss": 0.32234546542167664, "step": 3611 }, { "epoch": 0.43829632326174006, "grad_norm": 2.499971866607666, "learning_rate": 1.1374524014248864e-05, "loss": 0.27525416016578674, "step": 3612 }, { "epoch": 0.4384176677587671, "grad_norm": 1.7944048643112183, "learning_rate": 1.1372067313597839e-05, "loss": 0.10448901355266571, "step": 3613 }, { "epoch": 0.4385390122557942, "grad_norm": 2.5525612831115723, "learning_rate": 1.1369610612946813e-05, "loss": 0.1685064136981964, "step": 3614 }, { "epoch": 0.4386603567528213, "grad_norm": 2.662881851196289, "learning_rate": 1.1367153912295787e-05, "loss": 0.3491094708442688, "step": 3615 }, { "epoch": 0.43878170124984833, "grad_norm": 2.6570043563842773, "learning_rate": 1.1364697211644761e-05, "loss": 0.2924567461013794, "step": 3616 }, { "epoch": 0.4389030457468754, "grad_norm": 2.361314058303833, "learning_rate": 1.1362240510993736e-05, "loss": 0.5655845403671265, "step": 3617 }, { "epoch": 0.43902439024390244, "grad_norm": 3.0596911907196045, "learning_rate": 1.135978381034271e-05, "loss": 0.3180844783782959, "step": 3618 }, { "epoch": 0.4391457347409295, "grad_norm": 3.2376537322998047, "learning_rate": 1.1357327109691684e-05, "loss": 0.48498889803886414, "step": 3619 }, { "epoch": 0.43926707923795655, "grad_norm": 2.4582009315490723, "learning_rate": 1.1354870409040658e-05, "loss": 0.14106501638889313, "step": 3620 }, { "epoch": 0.4393884237349836, "grad_norm": 1.0312790870666504, "learning_rate": 1.1352413708389633e-05, "loss": 0.009895110502839088, "step": 3621 }, { "epoch": 0.43950976823201066, "grad_norm": 2.2700610160827637, "learning_rate": 1.1349957007738609e-05, "loss": 0.1510792374610901, "step": 3622 }, { "epoch": 0.43963111272903777, "grad_norm": 3.85322904586792, "learning_rate": 1.1347500307087583e-05, "loss": 0.6400561928749084, "step": 3623 }, { "epoch": 0.4397524572260648, "grad_norm": 2.530968427658081, "learning_rate": 1.1345043606436557e-05, "loss": 0.3659777343273163, "step": 3624 }, { "epoch": 0.4398738017230919, "grad_norm": 2.400381088256836, "learning_rate": 1.1342586905785531e-05, "loss": 0.09772318601608276, "step": 3625 }, { "epoch": 0.4399951462201189, "grad_norm": 3.9213593006134033, "learning_rate": 1.1340130205134506e-05, "loss": 0.7347447872161865, "step": 3626 }, { "epoch": 0.440116490717146, "grad_norm": 1.5514545440673828, "learning_rate": 1.133767350448348e-05, "loss": 0.09241119772195816, "step": 3627 }, { "epoch": 0.44023783521417303, "grad_norm": 1.6501867771148682, "learning_rate": 1.1335216803832454e-05, "loss": 0.12137887626886368, "step": 3628 }, { "epoch": 0.4403591797112001, "grad_norm": 2.3960394859313965, "learning_rate": 1.1332760103181428e-05, "loss": 0.5893489122390747, "step": 3629 }, { "epoch": 0.44048052420822714, "grad_norm": 0.7135860919952393, "learning_rate": 1.1330303402530403e-05, "loss": 0.019609341397881508, "step": 3630 }, { "epoch": 0.4406018687052542, "grad_norm": 1.8278428316116333, "learning_rate": 1.1327846701879377e-05, "loss": 0.10106104612350464, "step": 3631 }, { "epoch": 0.44072321320228125, "grad_norm": 2.0153684616088867, "learning_rate": 1.1325390001228351e-05, "loss": 0.27083125710487366, "step": 3632 }, { "epoch": 0.44084455769930836, "grad_norm": 3.3400328159332275, "learning_rate": 1.1322933300577326e-05, "loss": 0.11951978504657745, "step": 3633 }, { "epoch": 0.4409659021963354, "grad_norm": 3.1101183891296387, "learning_rate": 1.13204765999263e-05, "loss": 0.3573867082595825, "step": 3634 }, { "epoch": 0.44108724669336247, "grad_norm": 2.616203784942627, "learning_rate": 1.1318019899275274e-05, "loss": 0.17786771059036255, "step": 3635 }, { "epoch": 0.4412085911903895, "grad_norm": 2.3295364379882812, "learning_rate": 1.1315563198624248e-05, "loss": 0.13881252706050873, "step": 3636 }, { "epoch": 0.4413299356874166, "grad_norm": 2.7589056491851807, "learning_rate": 1.1313106497973223e-05, "loss": 0.24190297722816467, "step": 3637 }, { "epoch": 0.44145128018444363, "grad_norm": 5.638766765594482, "learning_rate": 1.1310649797322197e-05, "loss": 0.463576078414917, "step": 3638 }, { "epoch": 0.4415726246814707, "grad_norm": 2.3034234046936035, "learning_rate": 1.1308193096671171e-05, "loss": 0.2793579399585724, "step": 3639 }, { "epoch": 0.44169396917849774, "grad_norm": 3.2416858673095703, "learning_rate": 1.1305736396020145e-05, "loss": 0.4077681005001068, "step": 3640 }, { "epoch": 0.4418153136755248, "grad_norm": 2.0680837631225586, "learning_rate": 1.130327969536912e-05, "loss": 0.2055528461933136, "step": 3641 }, { "epoch": 0.4419366581725519, "grad_norm": 2.0137829780578613, "learning_rate": 1.1300822994718096e-05, "loss": 0.44573357701301575, "step": 3642 }, { "epoch": 0.44205800266957895, "grad_norm": 1.356884479522705, "learning_rate": 1.129836629406707e-05, "loss": 0.10731276869773865, "step": 3643 }, { "epoch": 0.442179347166606, "grad_norm": 1.970157504081726, "learning_rate": 1.1295909593416044e-05, "loss": 0.416285902261734, "step": 3644 }, { "epoch": 0.44230069166363306, "grad_norm": 2.973112106323242, "learning_rate": 1.1293452892765018e-05, "loss": 0.108841173350811, "step": 3645 }, { "epoch": 0.4424220361606601, "grad_norm": 3.15687894821167, "learning_rate": 1.1290996192113993e-05, "loss": 0.5777620077133179, "step": 3646 }, { "epoch": 0.44254338065768717, "grad_norm": 2.8096749782562256, "learning_rate": 1.1288539491462967e-05, "loss": 0.1413717120885849, "step": 3647 }, { "epoch": 0.4426647251547142, "grad_norm": 2.755324125289917, "learning_rate": 1.1286082790811941e-05, "loss": 0.24966059625148773, "step": 3648 }, { "epoch": 0.4427860696517413, "grad_norm": 2.03189754486084, "learning_rate": 1.1283626090160915e-05, "loss": 0.2302841991186142, "step": 3649 }, { "epoch": 0.44290741414876833, "grad_norm": 3.2773029804229736, "learning_rate": 1.128116938950989e-05, "loss": 0.2901148200035095, "step": 3650 }, { "epoch": 0.44302875864579544, "grad_norm": 1.733625054359436, "learning_rate": 1.1278712688858864e-05, "loss": 0.10937804728746414, "step": 3651 }, { "epoch": 0.4431501031428225, "grad_norm": 2.043578624725342, "learning_rate": 1.1276255988207838e-05, "loss": 0.2226874828338623, "step": 3652 }, { "epoch": 0.44327144763984955, "grad_norm": 1.561728596687317, "learning_rate": 1.1273799287556812e-05, "loss": 0.1764751374721527, "step": 3653 }, { "epoch": 0.4433927921368766, "grad_norm": 2.3204519748687744, "learning_rate": 1.1271342586905787e-05, "loss": 0.6837862730026245, "step": 3654 }, { "epoch": 0.44351413663390366, "grad_norm": 2.4015603065490723, "learning_rate": 1.1268885886254761e-05, "loss": 0.5877541303634644, "step": 3655 }, { "epoch": 0.4436354811309307, "grad_norm": 2.2663328647613525, "learning_rate": 1.1266429185603735e-05, "loss": 0.17803741991519928, "step": 3656 }, { "epoch": 0.44375682562795776, "grad_norm": 1.9317059516906738, "learning_rate": 1.126397248495271e-05, "loss": 0.3878974914550781, "step": 3657 }, { "epoch": 0.4438781701249848, "grad_norm": 2.765651226043701, "learning_rate": 1.1261515784301684e-05, "loss": 0.22609782218933105, "step": 3658 }, { "epoch": 0.44399951462201187, "grad_norm": 2.527340888977051, "learning_rate": 1.1259059083650658e-05, "loss": 0.22068405151367188, "step": 3659 }, { "epoch": 0.4441208591190389, "grad_norm": 2.0030107498168945, "learning_rate": 1.1256602382999632e-05, "loss": 0.15071557462215424, "step": 3660 }, { "epoch": 0.44424220361606603, "grad_norm": 1.7133005857467651, "learning_rate": 1.1254145682348606e-05, "loss": 0.060266535729169846, "step": 3661 }, { "epoch": 0.4443635481130931, "grad_norm": 4.328049182891846, "learning_rate": 1.1251688981697582e-05, "loss": 0.27519306540489197, "step": 3662 }, { "epoch": 0.44448489261012014, "grad_norm": 4.070196628570557, "learning_rate": 1.1249232281046557e-05, "loss": 0.19711193442344666, "step": 3663 }, { "epoch": 0.4446062371071472, "grad_norm": 1.8636376857757568, "learning_rate": 1.1246775580395531e-05, "loss": 0.06845342367887497, "step": 3664 }, { "epoch": 0.44472758160417425, "grad_norm": 1.4219677448272705, "learning_rate": 1.1244318879744505e-05, "loss": 0.06279438734054565, "step": 3665 }, { "epoch": 0.4448489261012013, "grad_norm": 2.331526756286621, "learning_rate": 1.124186217909348e-05, "loss": 0.19652298092842102, "step": 3666 }, { "epoch": 0.44497027059822836, "grad_norm": 2.0246334075927734, "learning_rate": 1.1239405478442454e-05, "loss": 0.3149532675743103, "step": 3667 }, { "epoch": 0.4450916150952554, "grad_norm": 1.9124372005462646, "learning_rate": 1.1236948777791428e-05, "loss": 0.25972670316696167, "step": 3668 }, { "epoch": 0.44521295959228246, "grad_norm": 2.798413038253784, "learning_rate": 1.1234492077140402e-05, "loss": 0.2329055815935135, "step": 3669 }, { "epoch": 0.4453343040893096, "grad_norm": 2.784217119216919, "learning_rate": 1.1232035376489376e-05, "loss": 0.13828489184379578, "step": 3670 }, { "epoch": 0.4454556485863366, "grad_norm": 2.5673763751983643, "learning_rate": 1.122957867583835e-05, "loss": 0.5141546726226807, "step": 3671 }, { "epoch": 0.4455769930833637, "grad_norm": 3.2268829345703125, "learning_rate": 1.1227121975187325e-05, "loss": 0.3171578347682953, "step": 3672 }, { "epoch": 0.44569833758039074, "grad_norm": 2.244013786315918, "learning_rate": 1.12246652745363e-05, "loss": 0.20982038974761963, "step": 3673 }, { "epoch": 0.4458196820774178, "grad_norm": 4.174572944641113, "learning_rate": 1.1222208573885273e-05, "loss": 0.5640068054199219, "step": 3674 }, { "epoch": 0.44594102657444484, "grad_norm": 2.7843480110168457, "learning_rate": 1.1219751873234248e-05, "loss": 0.2926955223083496, "step": 3675 }, { "epoch": 0.4460623710714719, "grad_norm": 1.712131142616272, "learning_rate": 1.1217295172583222e-05, "loss": 0.14825302362442017, "step": 3676 }, { "epoch": 0.44618371556849895, "grad_norm": 3.0750234127044678, "learning_rate": 1.1214838471932196e-05, "loss": 0.20352722704410553, "step": 3677 }, { "epoch": 0.446305060065526, "grad_norm": 2.5182766914367676, "learning_rate": 1.121238177128117e-05, "loss": 0.21560010313987732, "step": 3678 }, { "epoch": 0.4464264045625531, "grad_norm": 1.4503228664398193, "learning_rate": 1.1209925070630145e-05, "loss": 0.08557716012001038, "step": 3679 }, { "epoch": 0.44654774905958017, "grad_norm": 2.568263053894043, "learning_rate": 1.1207468369979119e-05, "loss": 0.2553696930408478, "step": 3680 }, { "epoch": 0.4466690935566072, "grad_norm": 3.5506436824798584, "learning_rate": 1.1205011669328095e-05, "loss": 0.5497809648513794, "step": 3681 }, { "epoch": 0.4467904380536343, "grad_norm": 2.676406145095825, "learning_rate": 1.120255496867707e-05, "loss": 0.23912879824638367, "step": 3682 }, { "epoch": 0.44691178255066133, "grad_norm": 2.9485552310943604, "learning_rate": 1.1200098268026044e-05, "loss": 0.2600053548812866, "step": 3683 }, { "epoch": 0.4470331270476884, "grad_norm": 1.826737403869629, "learning_rate": 1.1197641567375018e-05, "loss": 0.22873912751674652, "step": 3684 }, { "epoch": 0.44715447154471544, "grad_norm": 1.9971508979797363, "learning_rate": 1.1195184866723992e-05, "loss": 0.1476428210735321, "step": 3685 }, { "epoch": 0.4472758160417425, "grad_norm": 1.1586400270462036, "learning_rate": 1.1192728166072966e-05, "loss": 0.10401519387960434, "step": 3686 }, { "epoch": 0.44739716053876954, "grad_norm": 3.4623281955718994, "learning_rate": 1.119027146542194e-05, "loss": 0.24992474913597107, "step": 3687 }, { "epoch": 0.44751850503579665, "grad_norm": 3.845836639404297, "learning_rate": 1.1187814764770915e-05, "loss": 0.7359163761138916, "step": 3688 }, { "epoch": 0.4476398495328237, "grad_norm": 2.8226850032806396, "learning_rate": 1.1185358064119889e-05, "loss": 0.24005986750125885, "step": 3689 }, { "epoch": 0.44776119402985076, "grad_norm": 2.2125051021575928, "learning_rate": 1.1182901363468863e-05, "loss": 0.3258327841758728, "step": 3690 }, { "epoch": 0.4478825385268778, "grad_norm": 2.472573757171631, "learning_rate": 1.1180444662817838e-05, "loss": 0.10994560271501541, "step": 3691 }, { "epoch": 0.44800388302390487, "grad_norm": 2.341024398803711, "learning_rate": 1.1177987962166812e-05, "loss": 0.14841987192630768, "step": 3692 }, { "epoch": 0.4481252275209319, "grad_norm": 1.2704229354858398, "learning_rate": 1.1175531261515786e-05, "loss": 0.06925120204687119, "step": 3693 }, { "epoch": 0.448246572017959, "grad_norm": 1.6538794040679932, "learning_rate": 1.117307456086476e-05, "loss": 0.11470181494951248, "step": 3694 }, { "epoch": 0.44836791651498603, "grad_norm": 1.6831903457641602, "learning_rate": 1.1170617860213735e-05, "loss": 0.0720558613538742, "step": 3695 }, { "epoch": 0.4484892610120131, "grad_norm": 1.8351422548294067, "learning_rate": 1.1168161159562709e-05, "loss": 0.17231544852256775, "step": 3696 }, { "epoch": 0.44861060550904014, "grad_norm": 1.7946488857269287, "learning_rate": 1.1165704458911681e-05, "loss": 0.10063959658145905, "step": 3697 }, { "epoch": 0.44873195000606725, "grad_norm": 2.6582419872283936, "learning_rate": 1.1163247758260656e-05, "loss": 0.12853577733039856, "step": 3698 }, { "epoch": 0.4488532945030943, "grad_norm": 3.3695971965789795, "learning_rate": 1.116079105760963e-05, "loss": 0.4992516040802002, "step": 3699 }, { "epoch": 0.44897463900012136, "grad_norm": 3.701185941696167, "learning_rate": 1.1158334356958604e-05, "loss": 0.24211734533309937, "step": 3700 }, { "epoch": 0.4490959834971484, "grad_norm": 4.2144904136657715, "learning_rate": 1.1155877656307578e-05, "loss": 0.49937915802001953, "step": 3701 }, { "epoch": 0.44921732799417546, "grad_norm": 3.507452964782715, "learning_rate": 1.1153420955656553e-05, "loss": 0.4374186396598816, "step": 3702 }, { "epoch": 0.4493386724912025, "grad_norm": 2.2759249210357666, "learning_rate": 1.1150964255005527e-05, "loss": 0.07882027328014374, "step": 3703 }, { "epoch": 0.44946001698822957, "grad_norm": 1.028842568397522, "learning_rate": 1.1148507554354501e-05, "loss": 0.04556459188461304, "step": 3704 }, { "epoch": 0.4495813614852566, "grad_norm": 2.8861923217773438, "learning_rate": 1.1146050853703475e-05, "loss": 0.26581016182899475, "step": 3705 }, { "epoch": 0.4497027059822837, "grad_norm": 3.1814310550689697, "learning_rate": 1.114359415305245e-05, "loss": 0.34447595477104187, "step": 3706 }, { "epoch": 0.4498240504793108, "grad_norm": 2.059791326522827, "learning_rate": 1.1141137452401426e-05, "loss": 0.1574913114309311, "step": 3707 }, { "epoch": 0.44994539497633784, "grad_norm": 1.8999013900756836, "learning_rate": 1.11386807517504e-05, "loss": 0.08208724856376648, "step": 3708 }, { "epoch": 0.4500667394733649, "grad_norm": 2.4744856357574463, "learning_rate": 1.1136224051099374e-05, "loss": 0.21813160181045532, "step": 3709 }, { "epoch": 0.45018808397039195, "grad_norm": 2.708770513534546, "learning_rate": 1.1133767350448348e-05, "loss": 0.6522781252861023, "step": 3710 }, { "epoch": 0.450309428467419, "grad_norm": 2.580425977706909, "learning_rate": 1.1131310649797323e-05, "loss": 0.25760287046432495, "step": 3711 }, { "epoch": 0.45043077296444606, "grad_norm": 1.9656243324279785, "learning_rate": 1.1128853949146297e-05, "loss": 0.1759759485721588, "step": 3712 }, { "epoch": 0.4505521174614731, "grad_norm": 2.028416156768799, "learning_rate": 1.1126397248495271e-05, "loss": 0.12356072664260864, "step": 3713 }, { "epoch": 0.45067346195850017, "grad_norm": 2.8768270015716553, "learning_rate": 1.1123940547844246e-05, "loss": 0.30887824296951294, "step": 3714 }, { "epoch": 0.4507948064555272, "grad_norm": 3.1012911796569824, "learning_rate": 1.112148384719322e-05, "loss": 0.6460905075073242, "step": 3715 }, { "epoch": 0.45091615095255433, "grad_norm": 2.7501220703125, "learning_rate": 1.1119027146542194e-05, "loss": 0.4956667721271515, "step": 3716 }, { "epoch": 0.4510374954495814, "grad_norm": 3.2978079319000244, "learning_rate": 1.1116570445891168e-05, "loss": 0.30553939938545227, "step": 3717 }, { "epoch": 0.45115883994660844, "grad_norm": 1.735008716583252, "learning_rate": 1.1114113745240143e-05, "loss": 0.11469829082489014, "step": 3718 }, { "epoch": 0.4512801844436355, "grad_norm": 1.777876377105713, "learning_rate": 1.1111657044589117e-05, "loss": 0.10907553136348724, "step": 3719 }, { "epoch": 0.45140152894066254, "grad_norm": 2.542064666748047, "learning_rate": 1.1109200343938091e-05, "loss": 0.17916345596313477, "step": 3720 }, { "epoch": 0.4515228734376896, "grad_norm": 1.6052556037902832, "learning_rate": 1.1106743643287065e-05, "loss": 0.029923617839813232, "step": 3721 }, { "epoch": 0.45164421793471665, "grad_norm": 2.0250751972198486, "learning_rate": 1.110428694263604e-05, "loss": 0.07364283502101898, "step": 3722 }, { "epoch": 0.4517655624317437, "grad_norm": 2.5152509212493896, "learning_rate": 1.1101830241985014e-05, "loss": 0.21491853892803192, "step": 3723 }, { "epoch": 0.45188690692877076, "grad_norm": 2.6573703289031982, "learning_rate": 1.1099373541333988e-05, "loss": 0.2361636459827423, "step": 3724 }, { "epoch": 0.4520082514257978, "grad_norm": 2.5982563495635986, "learning_rate": 1.1096916840682962e-05, "loss": 0.2589453458786011, "step": 3725 }, { "epoch": 0.4521295959228249, "grad_norm": 3.372690439224243, "learning_rate": 1.1094460140031937e-05, "loss": 0.3643377721309662, "step": 3726 }, { "epoch": 0.452250940419852, "grad_norm": 2.9856884479522705, "learning_rate": 1.1092003439380913e-05, "loss": 0.4071636497974396, "step": 3727 }, { "epoch": 0.45237228491687903, "grad_norm": 3.358541965484619, "learning_rate": 1.1089546738729887e-05, "loss": 0.33494922518730164, "step": 3728 }, { "epoch": 0.4524936294139061, "grad_norm": 1.913957953453064, "learning_rate": 1.1087090038078861e-05, "loss": 0.0629691556096077, "step": 3729 }, { "epoch": 0.45261497391093314, "grad_norm": 2.1571216583251953, "learning_rate": 1.1084633337427835e-05, "loss": 0.26977357268333435, "step": 3730 }, { "epoch": 0.4527363184079602, "grad_norm": 2.607900381088257, "learning_rate": 1.108217663677681e-05, "loss": 0.10467074811458588, "step": 3731 }, { "epoch": 0.45285766290498725, "grad_norm": 1.5066871643066406, "learning_rate": 1.1079719936125784e-05, "loss": 0.12838679552078247, "step": 3732 }, { "epoch": 0.4529790074020143, "grad_norm": 2.9027323722839355, "learning_rate": 1.1077263235474758e-05, "loss": 0.5089572072029114, "step": 3733 }, { "epoch": 0.45310035189904135, "grad_norm": 2.9095492362976074, "learning_rate": 1.1074806534823732e-05, "loss": 0.20720970630645752, "step": 3734 }, { "epoch": 0.45322169639606846, "grad_norm": 1.759684681892395, "learning_rate": 1.1072349834172707e-05, "loss": 0.4124320447444916, "step": 3735 }, { "epoch": 0.4533430408930955, "grad_norm": 4.119267463684082, "learning_rate": 1.1069893133521681e-05, "loss": 0.26363465189933777, "step": 3736 }, { "epoch": 0.45346438539012257, "grad_norm": 2.257187604904175, "learning_rate": 1.1067436432870655e-05, "loss": 0.2994039058685303, "step": 3737 }, { "epoch": 0.4535857298871496, "grad_norm": 1.8574036359786987, "learning_rate": 1.106497973221963e-05, "loss": 0.1107100248336792, "step": 3738 }, { "epoch": 0.4537070743841767, "grad_norm": 1.9110321998596191, "learning_rate": 1.1062523031568604e-05, "loss": 0.18646883964538574, "step": 3739 }, { "epoch": 0.45382841888120373, "grad_norm": 2.2259957790374756, "learning_rate": 1.1060066330917578e-05, "loss": 0.12754613161087036, "step": 3740 }, { "epoch": 0.4539497633782308, "grad_norm": 2.3074288368225098, "learning_rate": 1.1057609630266552e-05, "loss": 0.11077870428562164, "step": 3741 }, { "epoch": 0.45407110787525784, "grad_norm": 2.2520663738250732, "learning_rate": 1.1055152929615526e-05, "loss": 0.16683274507522583, "step": 3742 }, { "epoch": 0.4541924523722849, "grad_norm": 2.3819658756256104, "learning_rate": 1.10526962289645e-05, "loss": 0.17191553115844727, "step": 3743 }, { "epoch": 0.454313796869312, "grad_norm": 2.6728708744049072, "learning_rate": 1.1050239528313475e-05, "loss": 0.27221840620040894, "step": 3744 }, { "epoch": 0.45443514136633906, "grad_norm": 1.5359383821487427, "learning_rate": 1.104778282766245e-05, "loss": 0.0838262066245079, "step": 3745 }, { "epoch": 0.4545564858633661, "grad_norm": 2.8321125507354736, "learning_rate": 1.1045326127011423e-05, "loss": 0.3541053533554077, "step": 3746 }, { "epoch": 0.45467783036039316, "grad_norm": 2.7794976234436035, "learning_rate": 1.10428694263604e-05, "loss": 0.31279149651527405, "step": 3747 }, { "epoch": 0.4547991748574202, "grad_norm": 1.9179726839065552, "learning_rate": 1.1040412725709374e-05, "loss": 0.06787727028131485, "step": 3748 }, { "epoch": 0.4549205193544473, "grad_norm": 2.6522345542907715, "learning_rate": 1.1037956025058348e-05, "loss": 0.14771534502506256, "step": 3749 }, { "epoch": 0.4550418638514743, "grad_norm": 1.329545497894287, "learning_rate": 1.1035499324407322e-05, "loss": 0.0695379301905632, "step": 3750 }, { "epoch": 0.4551632083485014, "grad_norm": 2.4873099327087402, "learning_rate": 1.1033042623756296e-05, "loss": 0.10876917839050293, "step": 3751 }, { "epoch": 0.45528455284552843, "grad_norm": 1.5930664539337158, "learning_rate": 1.103058592310527e-05, "loss": 0.08490962535142899, "step": 3752 }, { "epoch": 0.4554058973425555, "grad_norm": 1.8572665452957153, "learning_rate": 1.1028129222454245e-05, "loss": 0.4095427989959717, "step": 3753 }, { "epoch": 0.4555272418395826, "grad_norm": 0.954850435256958, "learning_rate": 1.102567252180322e-05, "loss": 0.02159183658659458, "step": 3754 }, { "epoch": 0.45564858633660965, "grad_norm": 2.3393404483795166, "learning_rate": 1.1023215821152193e-05, "loss": 0.49459564685821533, "step": 3755 }, { "epoch": 0.4557699308336367, "grad_norm": 3.7105650901794434, "learning_rate": 1.1020759120501168e-05, "loss": 0.504112720489502, "step": 3756 }, { "epoch": 0.45589127533066376, "grad_norm": 1.7900317907333374, "learning_rate": 1.1018302419850142e-05, "loss": 0.04191075637936592, "step": 3757 }, { "epoch": 0.4560126198276908, "grad_norm": 2.862607717514038, "learning_rate": 1.1015845719199116e-05, "loss": 0.2582750618457794, "step": 3758 }, { "epoch": 0.45613396432471787, "grad_norm": 2.3275985717773438, "learning_rate": 1.101338901854809e-05, "loss": 0.29841142892837524, "step": 3759 }, { "epoch": 0.4562553088217449, "grad_norm": 3.050267219543457, "learning_rate": 1.1010932317897065e-05, "loss": 0.12717841565608978, "step": 3760 }, { "epoch": 0.456376653318772, "grad_norm": 3.191488265991211, "learning_rate": 1.1008475617246039e-05, "loss": 0.16273337602615356, "step": 3761 }, { "epoch": 0.45649799781579903, "grad_norm": 1.3571722507476807, "learning_rate": 1.1006018916595013e-05, "loss": 0.19462181627750397, "step": 3762 }, { "epoch": 0.45661934231282614, "grad_norm": 4.811699390411377, "learning_rate": 1.1003562215943988e-05, "loss": 0.24823786318302155, "step": 3763 }, { "epoch": 0.4567406868098532, "grad_norm": 3.2369728088378906, "learning_rate": 1.1001105515292962e-05, "loss": 0.2375682145357132, "step": 3764 }, { "epoch": 0.45686203130688025, "grad_norm": 2.953446388244629, "learning_rate": 1.0998648814641936e-05, "loss": 0.2989498972892761, "step": 3765 }, { "epoch": 0.4569833758039073, "grad_norm": 2.2972118854522705, "learning_rate": 1.099619211399091e-05, "loss": 0.23729734122753143, "step": 3766 }, { "epoch": 0.45710472030093435, "grad_norm": 3.0327601432800293, "learning_rate": 1.0993735413339886e-05, "loss": 0.1904260814189911, "step": 3767 }, { "epoch": 0.4572260647979614, "grad_norm": 3.116729497909546, "learning_rate": 1.099127871268886e-05, "loss": 0.2167295217514038, "step": 3768 }, { "epoch": 0.45734740929498846, "grad_norm": 2.1636672019958496, "learning_rate": 1.0988822012037835e-05, "loss": 0.1741916388273239, "step": 3769 }, { "epoch": 0.4574687537920155, "grad_norm": 3.252021551132202, "learning_rate": 1.0986365311386809e-05, "loss": 0.2020607441663742, "step": 3770 }, { "epoch": 0.45759009828904257, "grad_norm": 3.535862922668457, "learning_rate": 1.0983908610735783e-05, "loss": 0.17055633664131165, "step": 3771 }, { "epoch": 0.4577114427860697, "grad_norm": 3.0886454582214355, "learning_rate": 1.0981451910084758e-05, "loss": 0.23023851215839386, "step": 3772 }, { "epoch": 0.45783278728309673, "grad_norm": 2.175285577774048, "learning_rate": 1.0978995209433732e-05, "loss": 0.151248961687088, "step": 3773 }, { "epoch": 0.4579541317801238, "grad_norm": 1.763638973236084, "learning_rate": 1.0976538508782706e-05, "loss": 0.08291637152433395, "step": 3774 }, { "epoch": 0.45807547627715084, "grad_norm": 1.2859675884246826, "learning_rate": 1.097408180813168e-05, "loss": 0.04810434579849243, "step": 3775 }, { "epoch": 0.4581968207741779, "grad_norm": 3.772589683532715, "learning_rate": 1.0971625107480655e-05, "loss": 0.35038936138153076, "step": 3776 }, { "epoch": 0.45831816527120495, "grad_norm": 3.2569420337677, "learning_rate": 1.0969168406829629e-05, "loss": 0.41661763191223145, "step": 3777 }, { "epoch": 0.458439509768232, "grad_norm": 3.516561269760132, "learning_rate": 1.0966711706178603e-05, "loss": 0.2825745940208435, "step": 3778 }, { "epoch": 0.45856085426525905, "grad_norm": 4.072770118713379, "learning_rate": 1.0964255005527577e-05, "loss": 0.6777939200401306, "step": 3779 }, { "epoch": 0.4586821987622861, "grad_norm": 2.922475576400757, "learning_rate": 1.0961798304876552e-05, "loss": 0.07564674317836761, "step": 3780 }, { "epoch": 0.45880354325931316, "grad_norm": 2.678272008895874, "learning_rate": 1.0959341604225526e-05, "loss": 0.2217925786972046, "step": 3781 }, { "epoch": 0.45892488775634027, "grad_norm": 2.3827056884765625, "learning_rate": 1.09568849035745e-05, "loss": 0.3195100426673889, "step": 3782 }, { "epoch": 0.4590462322533673, "grad_norm": 2.139965057373047, "learning_rate": 1.0954428202923474e-05, "loss": 0.33632519841194153, "step": 3783 }, { "epoch": 0.4591675767503944, "grad_norm": 3.4572787284851074, "learning_rate": 1.0951971502272449e-05, "loss": 0.5048207640647888, "step": 3784 }, { "epoch": 0.45928892124742143, "grad_norm": 2.216235876083374, "learning_rate": 1.0949514801621423e-05, "loss": 0.18083621561527252, "step": 3785 }, { "epoch": 0.4594102657444485, "grad_norm": 2.518138885498047, "learning_rate": 1.0947058100970397e-05, "loss": 0.40386083722114563, "step": 3786 }, { "epoch": 0.45953161024147554, "grad_norm": 1.9466145038604736, "learning_rate": 1.0944601400319373e-05, "loss": 0.15552747249603271, "step": 3787 }, { "epoch": 0.4596529547385026, "grad_norm": 1.848572850227356, "learning_rate": 1.0942144699668347e-05, "loss": 0.10546831786632538, "step": 3788 }, { "epoch": 0.45977429923552965, "grad_norm": 2.860260248184204, "learning_rate": 1.0939687999017322e-05, "loss": 0.34014883637428284, "step": 3789 }, { "epoch": 0.4598956437325567, "grad_norm": 3.1662676334381104, "learning_rate": 1.0937231298366296e-05, "loss": 0.45574745535850525, "step": 3790 }, { "epoch": 0.4600169882295838, "grad_norm": 3.0280942916870117, "learning_rate": 1.093477459771527e-05, "loss": 0.6346323490142822, "step": 3791 }, { "epoch": 0.46013833272661087, "grad_norm": 2.906144380569458, "learning_rate": 1.0932317897064244e-05, "loss": 0.4477047324180603, "step": 3792 }, { "epoch": 0.4602596772236379, "grad_norm": 3.9944026470184326, "learning_rate": 1.0929861196413219e-05, "loss": 0.4693850576877594, "step": 3793 }, { "epoch": 0.460381021720665, "grad_norm": 1.2766904830932617, "learning_rate": 1.0927404495762193e-05, "loss": 0.013830102048814297, "step": 3794 }, { "epoch": 0.460502366217692, "grad_norm": 3.3384554386138916, "learning_rate": 1.0924947795111167e-05, "loss": 0.4867768883705139, "step": 3795 }, { "epoch": 0.4606237107147191, "grad_norm": 2.780524969100952, "learning_rate": 1.0922491094460141e-05, "loss": 0.5420291423797607, "step": 3796 }, { "epoch": 0.46074505521174614, "grad_norm": 1.9150787591934204, "learning_rate": 1.0920034393809116e-05, "loss": 0.10740383714437485, "step": 3797 }, { "epoch": 0.4608663997087732, "grad_norm": 2.6779415607452393, "learning_rate": 1.091757769315809e-05, "loss": 0.11736059933900833, "step": 3798 }, { "epoch": 0.46098774420580024, "grad_norm": 2.689134359359741, "learning_rate": 1.0915120992507064e-05, "loss": 0.5773141980171204, "step": 3799 }, { "epoch": 0.46110908870282735, "grad_norm": 2.030559539794922, "learning_rate": 1.0912664291856038e-05, "loss": 0.1807558387517929, "step": 3800 }, { "epoch": 0.4612304331998544, "grad_norm": 2.132622718811035, "learning_rate": 1.0910207591205013e-05, "loss": 0.3821357488632202, "step": 3801 }, { "epoch": 0.46135177769688146, "grad_norm": 3.906198501586914, "learning_rate": 1.0907750890553987e-05, "loss": 0.34439414739608765, "step": 3802 }, { "epoch": 0.4614731221939085, "grad_norm": 3.38917875289917, "learning_rate": 1.0905294189902961e-05, "loss": 0.2816096246242523, "step": 3803 }, { "epoch": 0.46159446669093557, "grad_norm": 2.7078585624694824, "learning_rate": 1.0902837489251936e-05, "loss": 0.2726061940193176, "step": 3804 }, { "epoch": 0.4617158111879626, "grad_norm": 2.956815242767334, "learning_rate": 1.090038078860091e-05, "loss": 0.19974419474601746, "step": 3805 }, { "epoch": 0.4618371556849897, "grad_norm": 2.5726802349090576, "learning_rate": 1.0897924087949884e-05, "loss": 0.1582794338464737, "step": 3806 }, { "epoch": 0.46195850018201673, "grad_norm": 2.217891216278076, "learning_rate": 1.089546738729886e-05, "loss": 0.4006637632846832, "step": 3807 }, { "epoch": 0.4620798446790438, "grad_norm": 2.2818827629089355, "learning_rate": 1.0893010686647834e-05, "loss": 0.38829168677330017, "step": 3808 }, { "epoch": 0.4622011891760709, "grad_norm": 3.294948101043701, "learning_rate": 1.0890553985996809e-05, "loss": 0.42471641302108765, "step": 3809 }, { "epoch": 0.46232253367309795, "grad_norm": 1.1976569890975952, "learning_rate": 1.0888097285345783e-05, "loss": 0.08018773794174194, "step": 3810 }, { "epoch": 0.462443878170125, "grad_norm": 1.8881957530975342, "learning_rate": 1.0885640584694757e-05, "loss": 0.09716351330280304, "step": 3811 }, { "epoch": 0.46256522266715205, "grad_norm": 1.2837885618209839, "learning_rate": 1.0883183884043731e-05, "loss": 0.04680376499891281, "step": 3812 }, { "epoch": 0.4626865671641791, "grad_norm": 2.924452304840088, "learning_rate": 1.0880727183392706e-05, "loss": 0.36568957567214966, "step": 3813 }, { "epoch": 0.46280791166120616, "grad_norm": 2.9263999462127686, "learning_rate": 1.087827048274168e-05, "loss": 0.31210649013519287, "step": 3814 }, { "epoch": 0.4629292561582332, "grad_norm": 1.9239130020141602, "learning_rate": 1.0875813782090654e-05, "loss": 0.2763766050338745, "step": 3815 }, { "epoch": 0.46305060065526027, "grad_norm": 2.8812992572784424, "learning_rate": 1.0873357081439628e-05, "loss": 0.1720142662525177, "step": 3816 }, { "epoch": 0.4631719451522873, "grad_norm": 2.3239588737487793, "learning_rate": 1.0870900380788603e-05, "loss": 0.3543533980846405, "step": 3817 }, { "epoch": 0.4632932896493144, "grad_norm": 1.8847872018814087, "learning_rate": 1.0868443680137577e-05, "loss": 0.09396478533744812, "step": 3818 }, { "epoch": 0.4634146341463415, "grad_norm": 3.816448450088501, "learning_rate": 1.0865986979486551e-05, "loss": 0.08851034939289093, "step": 3819 }, { "epoch": 0.46353597864336854, "grad_norm": 2.910249710083008, "learning_rate": 1.0863530278835525e-05, "loss": 0.4211457669734955, "step": 3820 }, { "epoch": 0.4636573231403956, "grad_norm": 2.403411388397217, "learning_rate": 1.08610735781845e-05, "loss": 0.21749405562877655, "step": 3821 }, { "epoch": 0.46377866763742265, "grad_norm": 2.843613862991333, "learning_rate": 1.0858616877533474e-05, "loss": 0.43604692816734314, "step": 3822 }, { "epoch": 0.4639000121344497, "grad_norm": 2.7551069259643555, "learning_rate": 1.0856160176882448e-05, "loss": 0.3502194583415985, "step": 3823 }, { "epoch": 0.46402135663147676, "grad_norm": 3.504392385482788, "learning_rate": 1.0853703476231422e-05, "loss": 0.2447194755077362, "step": 3824 }, { "epoch": 0.4641427011285038, "grad_norm": 1.3589024543762207, "learning_rate": 1.0851246775580397e-05, "loss": 0.06629391759634018, "step": 3825 }, { "epoch": 0.46426404562553086, "grad_norm": 2.4835751056671143, "learning_rate": 1.0848790074929373e-05, "loss": 0.3067898452281952, "step": 3826 }, { "epoch": 0.4643853901225579, "grad_norm": 0.09892398864030838, "learning_rate": 1.0846333374278347e-05, "loss": 0.0005673590349033475, "step": 3827 }, { "epoch": 0.464506734619585, "grad_norm": 0.376726359128952, "learning_rate": 1.0843876673627321e-05, "loss": 0.0030884118750691414, "step": 3828 }, { "epoch": 0.4646280791166121, "grad_norm": 3.054842710494995, "learning_rate": 1.0841419972976295e-05, "loss": 0.3230999708175659, "step": 3829 }, { "epoch": 0.46474942361363913, "grad_norm": 2.944398880004883, "learning_rate": 1.083896327232527e-05, "loss": 0.26404961943626404, "step": 3830 }, { "epoch": 0.4648707681106662, "grad_norm": 2.2208971977233887, "learning_rate": 1.0836506571674244e-05, "loss": 0.12686988711357117, "step": 3831 }, { "epoch": 0.46499211260769324, "grad_norm": 1.7423559427261353, "learning_rate": 1.0834049871023218e-05, "loss": 0.11456757038831711, "step": 3832 }, { "epoch": 0.4651134571047203, "grad_norm": 1.8023579120635986, "learning_rate": 1.083159317037219e-05, "loss": 0.03461475670337677, "step": 3833 }, { "epoch": 0.46523480160174735, "grad_norm": 4.159533500671387, "learning_rate": 1.0829136469721165e-05, "loss": 0.6996920704841614, "step": 3834 }, { "epoch": 0.4653561460987744, "grad_norm": 0.8902222514152527, "learning_rate": 1.082667976907014e-05, "loss": 0.015484759584069252, "step": 3835 }, { "epoch": 0.46547749059580146, "grad_norm": 3.078047275543213, "learning_rate": 1.0824223068419113e-05, "loss": 0.031939297914505005, "step": 3836 }, { "epoch": 0.46559883509282857, "grad_norm": 3.063530683517456, "learning_rate": 1.0821766367768088e-05, "loss": 0.6199505925178528, "step": 3837 }, { "epoch": 0.4657201795898556, "grad_norm": 2.463263988494873, "learning_rate": 1.0819309667117062e-05, "loss": 0.2926793098449707, "step": 3838 }, { "epoch": 0.4658415240868827, "grad_norm": 2.8171894550323486, "learning_rate": 1.0816852966466036e-05, "loss": 0.3186990022659302, "step": 3839 }, { "epoch": 0.46596286858390973, "grad_norm": 2.6074156761169434, "learning_rate": 1.081439626581501e-05, "loss": 0.1860102415084839, "step": 3840 }, { "epoch": 0.4660842130809368, "grad_norm": 2.959845542907715, "learning_rate": 1.0811939565163985e-05, "loss": 0.20030447840690613, "step": 3841 }, { "epoch": 0.46620555757796384, "grad_norm": 2.546820878982544, "learning_rate": 1.0809482864512959e-05, "loss": 0.2492934912443161, "step": 3842 }, { "epoch": 0.4663269020749909, "grad_norm": 2.02109432220459, "learning_rate": 1.0807026163861933e-05, "loss": 0.080159492790699, "step": 3843 }, { "epoch": 0.46644824657201794, "grad_norm": 7.214606761932373, "learning_rate": 1.0804569463210908e-05, "loss": 0.31246402859687805, "step": 3844 }, { "epoch": 0.466569591069045, "grad_norm": 3.7370619773864746, "learning_rate": 1.0802112762559882e-05, "loss": 0.292080283164978, "step": 3845 }, { "epoch": 0.46669093556607205, "grad_norm": 4.298639297485352, "learning_rate": 1.0799656061908856e-05, "loss": 0.2018800675868988, "step": 3846 }, { "epoch": 0.46681228006309916, "grad_norm": 2.9732825756073, "learning_rate": 1.079719936125783e-05, "loss": 0.12097842991352081, "step": 3847 }, { "epoch": 0.4669336245601262, "grad_norm": 3.72353196144104, "learning_rate": 1.0794742660606805e-05, "loss": 0.739285409450531, "step": 3848 }, { "epoch": 0.46705496905715327, "grad_norm": 4.16900110244751, "learning_rate": 1.0792285959955779e-05, "loss": 0.27454543113708496, "step": 3849 }, { "epoch": 0.4671763135541803, "grad_norm": 3.276912212371826, "learning_rate": 1.0789829259304753e-05, "loss": 0.1591143012046814, "step": 3850 }, { "epoch": 0.4672976580512074, "grad_norm": 3.6436712741851807, "learning_rate": 1.0787372558653727e-05, "loss": 0.5543659925460815, "step": 3851 }, { "epoch": 0.46741900254823443, "grad_norm": 2.5346391201019287, "learning_rate": 1.0784915858002703e-05, "loss": 0.10443320870399475, "step": 3852 }, { "epoch": 0.4675403470452615, "grad_norm": 2.5267155170440674, "learning_rate": 1.0782459157351678e-05, "loss": 0.18570610880851746, "step": 3853 }, { "epoch": 0.46766169154228854, "grad_norm": 2.378736972808838, "learning_rate": 1.0780002456700652e-05, "loss": 0.1752547025680542, "step": 3854 }, { "epoch": 0.4677830360393156, "grad_norm": 2.7192838191986084, "learning_rate": 1.0777545756049626e-05, "loss": 0.07624950259923935, "step": 3855 }, { "epoch": 0.4679043805363427, "grad_norm": 2.262993097305298, "learning_rate": 1.07750890553986e-05, "loss": 0.16422806680202484, "step": 3856 }, { "epoch": 0.46802572503336975, "grad_norm": 1.9306615591049194, "learning_rate": 1.0772632354747575e-05, "loss": 0.285910427570343, "step": 3857 }, { "epoch": 0.4681470695303968, "grad_norm": 3.803337812423706, "learning_rate": 1.0770175654096549e-05, "loss": 0.3446381688117981, "step": 3858 }, { "epoch": 0.46826841402742386, "grad_norm": 2.9336376190185547, "learning_rate": 1.0767718953445523e-05, "loss": 0.25159865617752075, "step": 3859 }, { "epoch": 0.4683897585244509, "grad_norm": 2.7131705284118652, "learning_rate": 1.0765262252794497e-05, "loss": 0.22185799479484558, "step": 3860 }, { "epoch": 0.46851110302147797, "grad_norm": 3.0690999031066895, "learning_rate": 1.0762805552143472e-05, "loss": 0.3870882987976074, "step": 3861 }, { "epoch": 0.468632447518505, "grad_norm": 2.561528205871582, "learning_rate": 1.0760348851492446e-05, "loss": 0.6698027849197388, "step": 3862 }, { "epoch": 0.4687537920155321, "grad_norm": 3.0866506099700928, "learning_rate": 1.075789215084142e-05, "loss": 0.3074035346508026, "step": 3863 }, { "epoch": 0.46887513651255913, "grad_norm": 1.8056470155715942, "learning_rate": 1.0755435450190394e-05, "loss": 0.06750544160604477, "step": 3864 }, { "epoch": 0.46899648100958624, "grad_norm": 3.010787010192871, "learning_rate": 1.0752978749539369e-05, "loss": 0.5204322338104248, "step": 3865 }, { "epoch": 0.4691178255066133, "grad_norm": 2.996385097503662, "learning_rate": 1.0750522048888343e-05, "loss": 0.24821801483631134, "step": 3866 }, { "epoch": 0.46923917000364035, "grad_norm": 2.704739809036255, "learning_rate": 1.0748065348237317e-05, "loss": 0.29266121983528137, "step": 3867 }, { "epoch": 0.4693605145006674, "grad_norm": 2.354991912841797, "learning_rate": 1.0745608647586291e-05, "loss": 0.12653499841690063, "step": 3868 }, { "epoch": 0.46948185899769446, "grad_norm": 3.1913106441497803, "learning_rate": 1.0743151946935266e-05, "loss": 0.3769054114818573, "step": 3869 }, { "epoch": 0.4696032034947215, "grad_norm": 2.2342827320098877, "learning_rate": 1.074069524628424e-05, "loss": 0.14861273765563965, "step": 3870 }, { "epoch": 0.46972454799174856, "grad_norm": 2.3855037689208984, "learning_rate": 1.0738238545633214e-05, "loss": 0.16335630416870117, "step": 3871 }, { "epoch": 0.4698458924887756, "grad_norm": 3.1305956840515137, "learning_rate": 1.073578184498219e-05, "loss": 0.3295513987541199, "step": 3872 }, { "epoch": 0.46996723698580267, "grad_norm": 1.3403748273849487, "learning_rate": 1.0733325144331164e-05, "loss": 0.048968229442834854, "step": 3873 }, { "epoch": 0.4700885814828297, "grad_norm": 3.024962902069092, "learning_rate": 1.0730868443680139e-05, "loss": 0.6027660369873047, "step": 3874 }, { "epoch": 0.47020992597985684, "grad_norm": 2.7007224559783936, "learning_rate": 1.0728411743029113e-05, "loss": 0.40500205755233765, "step": 3875 }, { "epoch": 0.4703312704768839, "grad_norm": 2.693378210067749, "learning_rate": 1.0725955042378087e-05, "loss": 0.4564482271671295, "step": 3876 }, { "epoch": 0.47045261497391094, "grad_norm": 1.9114058017730713, "learning_rate": 1.0723498341727061e-05, "loss": 0.2021862268447876, "step": 3877 }, { "epoch": 0.470573959470938, "grad_norm": 1.7047556638717651, "learning_rate": 1.0721041641076036e-05, "loss": 0.2426297515630722, "step": 3878 }, { "epoch": 0.47069530396796505, "grad_norm": 2.2682855129241943, "learning_rate": 1.071858494042501e-05, "loss": 0.1504073441028595, "step": 3879 }, { "epoch": 0.4708166484649921, "grad_norm": 3.064701557159424, "learning_rate": 1.0716128239773984e-05, "loss": 0.40392714738845825, "step": 3880 }, { "epoch": 0.47093799296201916, "grad_norm": 2.480076789855957, "learning_rate": 1.0713671539122958e-05, "loss": 0.28264525532722473, "step": 3881 }, { "epoch": 0.4710593374590462, "grad_norm": 0.01469326764345169, "learning_rate": 1.0711214838471933e-05, "loss": 0.0003070329548791051, "step": 3882 }, { "epoch": 0.47118068195607327, "grad_norm": 2.8371729850769043, "learning_rate": 1.0708758137820907e-05, "loss": 0.35440123081207275, "step": 3883 }, { "epoch": 0.4713020264531004, "grad_norm": 3.269803285598755, "learning_rate": 1.0706301437169881e-05, "loss": 0.41059374809265137, "step": 3884 }, { "epoch": 0.47142337095012743, "grad_norm": 2.0703015327453613, "learning_rate": 1.0703844736518856e-05, "loss": 0.07132905721664429, "step": 3885 }, { "epoch": 0.4715447154471545, "grad_norm": 2.3490655422210693, "learning_rate": 1.070138803586783e-05, "loss": 0.49852243065834045, "step": 3886 }, { "epoch": 0.47166605994418154, "grad_norm": 1.5123106241226196, "learning_rate": 1.0698931335216804e-05, "loss": 0.1113678365945816, "step": 3887 }, { "epoch": 0.4717874044412086, "grad_norm": 3.1069955825805664, "learning_rate": 1.0696474634565778e-05, "loss": 0.3869853615760803, "step": 3888 }, { "epoch": 0.47190874893823564, "grad_norm": 2.830869674682617, "learning_rate": 1.0694017933914753e-05, "loss": 0.48214244842529297, "step": 3889 }, { "epoch": 0.4720300934352627, "grad_norm": 1.855278730392456, "learning_rate": 1.0691561233263727e-05, "loss": 0.056116193532943726, "step": 3890 }, { "epoch": 0.47215143793228975, "grad_norm": 4.9797749519348145, "learning_rate": 1.0689104532612701e-05, "loss": 0.41455674171447754, "step": 3891 }, { "epoch": 0.4722727824293168, "grad_norm": 2.3777003288269043, "learning_rate": 1.0686647831961677e-05, "loss": 0.2299240231513977, "step": 3892 }, { "epoch": 0.4723941269263439, "grad_norm": 0.6550430655479431, "learning_rate": 1.0684191131310651e-05, "loss": 0.002911692252382636, "step": 3893 }, { "epoch": 0.47251547142337097, "grad_norm": 2.060988187789917, "learning_rate": 1.0681734430659626e-05, "loss": 0.13176614046096802, "step": 3894 }, { "epoch": 0.472636815920398, "grad_norm": 3.4118447303771973, "learning_rate": 1.06792777300086e-05, "loss": 0.23221538960933685, "step": 3895 }, { "epoch": 0.4727581604174251, "grad_norm": 2.803123950958252, "learning_rate": 1.0676821029357574e-05, "loss": 0.16176444292068481, "step": 3896 }, { "epoch": 0.47287950491445213, "grad_norm": 1.795917272567749, "learning_rate": 1.0674364328706548e-05, "loss": 0.4585912823677063, "step": 3897 }, { "epoch": 0.4730008494114792, "grad_norm": 2.3427181243896484, "learning_rate": 1.0671907628055523e-05, "loss": 0.34860968589782715, "step": 3898 }, { "epoch": 0.47312219390850624, "grad_norm": 1.4768927097320557, "learning_rate": 1.0669450927404497e-05, "loss": 0.09793134033679962, "step": 3899 }, { "epoch": 0.4732435384055333, "grad_norm": 2.1842041015625, "learning_rate": 1.0666994226753471e-05, "loss": 0.4446258544921875, "step": 3900 }, { "epoch": 0.47336488290256035, "grad_norm": 2.9729180335998535, "learning_rate": 1.0664537526102445e-05, "loss": 0.3867642283439636, "step": 3901 }, { "epoch": 0.47348622739958746, "grad_norm": 3.0708909034729004, "learning_rate": 1.066208082545142e-05, "loss": 0.2150513231754303, "step": 3902 }, { "epoch": 0.4736075718966145, "grad_norm": 1.3335891962051392, "learning_rate": 1.0659624124800394e-05, "loss": 0.021012621000409126, "step": 3903 }, { "epoch": 0.47372891639364156, "grad_norm": 2.9141807556152344, "learning_rate": 1.0657167424149368e-05, "loss": 0.6602778434753418, "step": 3904 }, { "epoch": 0.4738502608906686, "grad_norm": 2.237032890319824, "learning_rate": 1.0654710723498342e-05, "loss": 0.26512467861175537, "step": 3905 }, { "epoch": 0.47397160538769567, "grad_norm": 2.4029390811920166, "learning_rate": 1.0652254022847317e-05, "loss": 0.2347608208656311, "step": 3906 }, { "epoch": 0.4740929498847227, "grad_norm": 2.534771680831909, "learning_rate": 1.0649797322196291e-05, "loss": 0.0783708393573761, "step": 3907 }, { "epoch": 0.4742142943817498, "grad_norm": 2.047633647918701, "learning_rate": 1.0647340621545265e-05, "loss": 0.1865750551223755, "step": 3908 }, { "epoch": 0.47433563887877683, "grad_norm": 1.6869794130325317, "learning_rate": 1.064488392089424e-05, "loss": 0.12035293877124786, "step": 3909 }, { "epoch": 0.4744569833758039, "grad_norm": 3.8962488174438477, "learning_rate": 1.0642427220243214e-05, "loss": 0.4050816595554352, "step": 3910 }, { "epoch": 0.47457832787283094, "grad_norm": 1.9148086309432983, "learning_rate": 1.0639970519592188e-05, "loss": 0.10218142718076706, "step": 3911 }, { "epoch": 0.47469967236985805, "grad_norm": 2.1516618728637695, "learning_rate": 1.0637513818941164e-05, "loss": 0.23401297628879547, "step": 3912 }, { "epoch": 0.4748210168668851, "grad_norm": 2.0974016189575195, "learning_rate": 1.0635057118290138e-05, "loss": 0.4570923447608948, "step": 3913 }, { "epoch": 0.47494236136391216, "grad_norm": 3.9381473064422607, "learning_rate": 1.0632600417639112e-05, "loss": 0.4090957045555115, "step": 3914 }, { "epoch": 0.4750637058609392, "grad_norm": 3.0685372352600098, "learning_rate": 1.0630143716988087e-05, "loss": 0.3796621859073639, "step": 3915 }, { "epoch": 0.47518505035796627, "grad_norm": 1.3861891031265259, "learning_rate": 1.0627687016337061e-05, "loss": 0.14838837087154388, "step": 3916 }, { "epoch": 0.4753063948549933, "grad_norm": 2.1783580780029297, "learning_rate": 1.0625230315686035e-05, "loss": 0.4516194462776184, "step": 3917 }, { "epoch": 0.4754277393520204, "grad_norm": 2.5179131031036377, "learning_rate": 1.062277361503501e-05, "loss": 0.36951062083244324, "step": 3918 }, { "epoch": 0.4755490838490474, "grad_norm": 6.605739593505859, "learning_rate": 1.0620316914383984e-05, "loss": 0.3154905438423157, "step": 3919 }, { "epoch": 0.4756704283460745, "grad_norm": 0.9335907697677612, "learning_rate": 1.0617860213732958e-05, "loss": 0.049954358488321304, "step": 3920 }, { "epoch": 0.4757917728431016, "grad_norm": 2.6936075687408447, "learning_rate": 1.0615403513081932e-05, "loss": 0.16908985376358032, "step": 3921 }, { "epoch": 0.47591311734012864, "grad_norm": 1.6764365434646606, "learning_rate": 1.0612946812430906e-05, "loss": 0.26403748989105225, "step": 3922 }, { "epoch": 0.4760344618371557, "grad_norm": 5.9298577308654785, "learning_rate": 1.061049011177988e-05, "loss": 0.08351560682058334, "step": 3923 }, { "epoch": 0.47615580633418275, "grad_norm": 2.9468815326690674, "learning_rate": 1.0608033411128855e-05, "loss": 0.360771119594574, "step": 3924 }, { "epoch": 0.4762771508312098, "grad_norm": 1.9476665258407593, "learning_rate": 1.060557671047783e-05, "loss": 0.3027259409427643, "step": 3925 }, { "epoch": 0.47639849532823686, "grad_norm": 4.023041725158691, "learning_rate": 1.0603120009826803e-05, "loss": 0.4723225235939026, "step": 3926 }, { "epoch": 0.4765198398252639, "grad_norm": 1.9701441526412964, "learning_rate": 1.0600663309175778e-05, "loss": 0.29517021775245667, "step": 3927 }, { "epoch": 0.47664118432229097, "grad_norm": 1.5217077732086182, "learning_rate": 1.0598206608524752e-05, "loss": 0.10690990835428238, "step": 3928 }, { "epoch": 0.476762528819318, "grad_norm": 3.051661252975464, "learning_rate": 1.0595749907873726e-05, "loss": 0.3336365520954132, "step": 3929 }, { "epoch": 0.47688387331634513, "grad_norm": 2.147693634033203, "learning_rate": 1.05932932072227e-05, "loss": 0.5873215198516846, "step": 3930 }, { "epoch": 0.4770052178133722, "grad_norm": 2.739251136779785, "learning_rate": 1.0590836506571675e-05, "loss": 0.34239891171455383, "step": 3931 }, { "epoch": 0.47712656231039924, "grad_norm": 4.637502670288086, "learning_rate": 1.058837980592065e-05, "loss": 0.11938660591840744, "step": 3932 }, { "epoch": 0.4772479068074263, "grad_norm": 3.1877243518829346, "learning_rate": 1.0585923105269625e-05, "loss": 0.7206730842590332, "step": 3933 }, { "epoch": 0.47736925130445335, "grad_norm": 2.642648935317993, "learning_rate": 1.05834664046186e-05, "loss": 0.24321579933166504, "step": 3934 }, { "epoch": 0.4774905958014804, "grad_norm": 2.448634624481201, "learning_rate": 1.0581009703967574e-05, "loss": 0.22593624889850616, "step": 3935 }, { "epoch": 0.47761194029850745, "grad_norm": 2.391491174697876, "learning_rate": 1.0578553003316548e-05, "loss": 0.33686891198158264, "step": 3936 }, { "epoch": 0.4777332847955345, "grad_norm": 3.104855537414551, "learning_rate": 1.0576096302665522e-05, "loss": 0.4050844609737396, "step": 3937 }, { "epoch": 0.47785462929256156, "grad_norm": 4.130711555480957, "learning_rate": 1.0573639602014496e-05, "loss": 0.2961690127849579, "step": 3938 }, { "epoch": 0.4779759737895886, "grad_norm": 3.7535223960876465, "learning_rate": 1.057118290136347e-05, "loss": 0.1767973005771637, "step": 3939 }, { "epoch": 0.4780973182866157, "grad_norm": 1.657116413116455, "learning_rate": 1.0568726200712445e-05, "loss": 0.06132087856531143, "step": 3940 }, { "epoch": 0.4782186627836428, "grad_norm": 1.1180098056793213, "learning_rate": 1.0566269500061419e-05, "loss": 0.07620692998170853, "step": 3941 }, { "epoch": 0.47834000728066983, "grad_norm": 2.728780746459961, "learning_rate": 1.0563812799410393e-05, "loss": 0.299428254365921, "step": 3942 }, { "epoch": 0.4784613517776969, "grad_norm": 2.388319969177246, "learning_rate": 1.0561356098759368e-05, "loss": 0.147850900888443, "step": 3943 }, { "epoch": 0.47858269627472394, "grad_norm": 2.5944557189941406, "learning_rate": 1.0558899398108342e-05, "loss": 0.2355179488658905, "step": 3944 }, { "epoch": 0.478704040771751, "grad_norm": 2.288933753967285, "learning_rate": 1.0556442697457316e-05, "loss": 0.33115917444229126, "step": 3945 }, { "epoch": 0.47882538526877805, "grad_norm": 3.3465325832366943, "learning_rate": 1.055398599680629e-05, "loss": 0.5318613052368164, "step": 3946 }, { "epoch": 0.4789467297658051, "grad_norm": 1.7699624300003052, "learning_rate": 1.0551529296155265e-05, "loss": 0.13044926524162292, "step": 3947 }, { "epoch": 0.47906807426283216, "grad_norm": 2.82124924659729, "learning_rate": 1.0549072595504239e-05, "loss": 0.47051340341567993, "step": 3948 }, { "epoch": 0.47918941875985926, "grad_norm": 2.446516990661621, "learning_rate": 1.0546615894853213e-05, "loss": 0.526097297668457, "step": 3949 }, { "epoch": 0.4793107632568863, "grad_norm": 2.2702369689941406, "learning_rate": 1.0544159194202187e-05, "loss": 0.4103502631187439, "step": 3950 }, { "epoch": 0.4794321077539134, "grad_norm": 2.536848783493042, "learning_rate": 1.0541702493551162e-05, "loss": 0.2067994773387909, "step": 3951 }, { "epoch": 0.4795534522509404, "grad_norm": 3.593503952026367, "learning_rate": 1.0539245792900138e-05, "loss": 0.181950181722641, "step": 3952 }, { "epoch": 0.4796747967479675, "grad_norm": 3.185767889022827, "learning_rate": 1.0536789092249112e-05, "loss": 0.18063871562480927, "step": 3953 }, { "epoch": 0.47979614124499453, "grad_norm": 1.8873003721237183, "learning_rate": 1.0534332391598086e-05, "loss": 0.18147023022174835, "step": 3954 }, { "epoch": 0.4799174857420216, "grad_norm": 2.7884440422058105, "learning_rate": 1.053187569094706e-05, "loss": 0.22004428505897522, "step": 3955 }, { "epoch": 0.48003883023904864, "grad_norm": 3.063300848007202, "learning_rate": 1.0529418990296035e-05, "loss": 0.3766789436340332, "step": 3956 }, { "epoch": 0.4801601747360757, "grad_norm": 2.764601707458496, "learning_rate": 1.0526962289645009e-05, "loss": 0.26879075169563293, "step": 3957 }, { "epoch": 0.4802815192331028, "grad_norm": 1.9297682046890259, "learning_rate": 1.0524505588993983e-05, "loss": 0.14930634200572968, "step": 3958 }, { "epoch": 0.48040286373012986, "grad_norm": 1.9681339263916016, "learning_rate": 1.0522048888342957e-05, "loss": 0.397515207529068, "step": 3959 }, { "epoch": 0.4805242082271569, "grad_norm": 1.9679991006851196, "learning_rate": 1.0519592187691932e-05, "loss": 0.1813061386346817, "step": 3960 }, { "epoch": 0.48064555272418397, "grad_norm": 2.6101455688476562, "learning_rate": 1.0517135487040906e-05, "loss": 0.2768298089504242, "step": 3961 }, { "epoch": 0.480766897221211, "grad_norm": 2.4954378604888916, "learning_rate": 1.051467878638988e-05, "loss": 0.3034391403198242, "step": 3962 }, { "epoch": 0.4808882417182381, "grad_norm": 4.449526786804199, "learning_rate": 1.0512222085738854e-05, "loss": 0.38359954953193665, "step": 3963 }, { "epoch": 0.48100958621526513, "grad_norm": 2.071256160736084, "learning_rate": 1.0509765385087829e-05, "loss": 0.3157460391521454, "step": 3964 }, { "epoch": 0.4811309307122922, "grad_norm": 2.6139607429504395, "learning_rate": 1.0507308684436803e-05, "loss": 0.30131807923316956, "step": 3965 }, { "epoch": 0.48125227520931924, "grad_norm": 0.8780409097671509, "learning_rate": 1.0504851983785777e-05, "loss": 0.01298319548368454, "step": 3966 }, { "epoch": 0.4813736197063463, "grad_norm": 1.8879942893981934, "learning_rate": 1.0502395283134751e-05, "loss": 0.08332539349794388, "step": 3967 }, { "epoch": 0.4814949642033734, "grad_norm": 3.2966036796569824, "learning_rate": 1.0499938582483724e-05, "loss": 0.34805336594581604, "step": 3968 }, { "epoch": 0.48161630870040045, "grad_norm": 2.2880709171295166, "learning_rate": 1.0497481881832698e-05, "loss": 0.18685874342918396, "step": 3969 }, { "epoch": 0.4817376531974275, "grad_norm": 4.809329986572266, "learning_rate": 1.0495025181181673e-05, "loss": 0.2955242991447449, "step": 3970 }, { "epoch": 0.48185899769445456, "grad_norm": 1.616361379623413, "learning_rate": 1.0492568480530647e-05, "loss": 0.2602810859680176, "step": 3971 }, { "epoch": 0.4819803421914816, "grad_norm": 1.6564544439315796, "learning_rate": 1.0490111779879621e-05, "loss": 0.09533637762069702, "step": 3972 }, { "epoch": 0.48210168668850867, "grad_norm": 1.8999651670455933, "learning_rate": 1.0487655079228595e-05, "loss": 0.21959394216537476, "step": 3973 }, { "epoch": 0.4822230311855357, "grad_norm": 1.5243175029754639, "learning_rate": 1.048519837857757e-05, "loss": 0.07984542846679688, "step": 3974 }, { "epoch": 0.4823443756825628, "grad_norm": 3.108914613723755, "learning_rate": 1.0482741677926544e-05, "loss": 0.5572518110275269, "step": 3975 }, { "epoch": 0.48246572017958983, "grad_norm": 2.040226697921753, "learning_rate": 1.0480284977275518e-05, "loss": 0.33752697706222534, "step": 3976 }, { "epoch": 0.48258706467661694, "grad_norm": 3.3626208305358887, "learning_rate": 1.0477828276624492e-05, "loss": 0.24655762314796448, "step": 3977 }, { "epoch": 0.482708409173644, "grad_norm": 4.025614261627197, "learning_rate": 1.0475371575973468e-05, "loss": 0.4968215525150299, "step": 3978 }, { "epoch": 0.48282975367067105, "grad_norm": 3.119117021560669, "learning_rate": 1.0472914875322443e-05, "loss": 0.6192764639854431, "step": 3979 }, { "epoch": 0.4829510981676981, "grad_norm": 3.2061896324157715, "learning_rate": 1.0470458174671417e-05, "loss": 0.24846620857715607, "step": 3980 }, { "epoch": 0.48307244266472515, "grad_norm": 1.9638543128967285, "learning_rate": 1.0468001474020391e-05, "loss": 0.35574042797088623, "step": 3981 }, { "epoch": 0.4831937871617522, "grad_norm": 3.0407400131225586, "learning_rate": 1.0465544773369365e-05, "loss": 0.2249983549118042, "step": 3982 }, { "epoch": 0.48331513165877926, "grad_norm": 2.090866804122925, "learning_rate": 1.046308807271834e-05, "loss": 0.2959258556365967, "step": 3983 }, { "epoch": 0.4834364761558063, "grad_norm": 2.555753707885742, "learning_rate": 1.0460631372067314e-05, "loss": 0.15021148324012756, "step": 3984 }, { "epoch": 0.48355782065283337, "grad_norm": 2.147218704223633, "learning_rate": 1.0458174671416288e-05, "loss": 0.2335168421268463, "step": 3985 }, { "epoch": 0.4836791651498605, "grad_norm": 2.133526086807251, "learning_rate": 1.0455717970765262e-05, "loss": 0.2494381070137024, "step": 3986 }, { "epoch": 0.48380050964688753, "grad_norm": 2.633037805557251, "learning_rate": 1.0453261270114237e-05, "loss": 0.4223847985267639, "step": 3987 }, { "epoch": 0.4839218541439146, "grad_norm": 1.964646577835083, "learning_rate": 1.0450804569463211e-05, "loss": 0.015639489516615868, "step": 3988 }, { "epoch": 0.48404319864094164, "grad_norm": 2.0672714710235596, "learning_rate": 1.0448347868812185e-05, "loss": 0.35201549530029297, "step": 3989 }, { "epoch": 0.4841645431379687, "grad_norm": 2.8944873809814453, "learning_rate": 1.044589116816116e-05, "loss": 0.36844390630722046, "step": 3990 }, { "epoch": 0.48428588763499575, "grad_norm": 2.2492825984954834, "learning_rate": 1.0443434467510134e-05, "loss": 0.159017875790596, "step": 3991 }, { "epoch": 0.4844072321320228, "grad_norm": 2.8350884914398193, "learning_rate": 1.0440977766859108e-05, "loss": 0.20941796898841858, "step": 3992 }, { "epoch": 0.48452857662904986, "grad_norm": 2.2333664894104004, "learning_rate": 1.0438521066208082e-05, "loss": 0.17051367461681366, "step": 3993 }, { "epoch": 0.4846499211260769, "grad_norm": 3.7527835369110107, "learning_rate": 1.0436064365557056e-05, "loss": 0.34921470284461975, "step": 3994 }, { "epoch": 0.484771265623104, "grad_norm": 0.799025297164917, "learning_rate": 1.043360766490603e-05, "loss": 0.02540355548262596, "step": 3995 }, { "epoch": 0.4848926101201311, "grad_norm": 2.9625139236450195, "learning_rate": 1.0431150964255005e-05, "loss": 0.31718266010284424, "step": 3996 }, { "epoch": 0.4850139546171581, "grad_norm": 6.872302532196045, "learning_rate": 1.0428694263603981e-05, "loss": 0.2765273451805115, "step": 3997 }, { "epoch": 0.4851352991141852, "grad_norm": 1.9910902976989746, "learning_rate": 1.0426237562952955e-05, "loss": 0.06150764226913452, "step": 3998 }, { "epoch": 0.48525664361121224, "grad_norm": 2.3983571529388428, "learning_rate": 1.042378086230193e-05, "loss": 0.46947625279426575, "step": 3999 }, { "epoch": 0.4853779881082393, "grad_norm": 2.5228049755096436, "learning_rate": 1.0421324161650904e-05, "loss": 0.29351717233657837, "step": 4000 } ], "logging_steps": 1, "max_steps": 8241, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.4044208466290734e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }