misterJB commited on
Commit
5215cda
·
verified ·
1 Parent(s): e751160

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:526cd145d38b5d607d81e63c85c7c52896c874f2136eda50a3ad728915bc0888
3
  size 5228717512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e85d31a9400a3e476e57eb02f66875fe02d94a2e7f0498f6daf9772ea3cabf
3
  size 5228717512
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea3610d93c47c718418200139666d3cd598641c899018a9797d1f7478a99919c
3
  size 10457622711
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795383d344f81ffeed9736876921ab7c34ee1e1e8f7ae853b548fbe86125a4d4
3
  size 10457622711
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8a0161fb643893b4bd0a9724aa51736729cc07ff0a3f386f1ba978002596386
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7949a82b4956d6dc1683588213cba80de08632b440866c7377566d6b5ddef0f6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.171875,
6
  "eval_steps": 500,
7
- "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -308,6 +308,106 @@
308
  "mean_token_accuracy": 0.9931734573841094,
309
  "num_tokens": 8601406.0,
310
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  }
312
  ],
313
  "logging_steps": 50,
@@ -327,7 +427,7 @@
327
  "attributes": {}
328
  }
329
  },
330
- "total_flos": 1.167079738487593e+17,
331
  "train_batch_size": 2,
332
  "trial_name": null,
333
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.5625,
6
  "eval_steps": 500,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
308
  "mean_token_accuracy": 0.9931734573841094,
309
  "num_tokens": 8601406.0,
310
  "step": 1500
311
+ },
312
+ {
313
+ "entropy": 0.01845017326530069,
314
+ "epoch": 1.2109375,
315
+ "grad_norm": 0.087890625,
316
+ "learning_rate": 8.776041666666667e-06,
317
+ "loss": 0.017589352130889892,
318
+ "mean_token_accuracy": 0.9928986424207688,
319
+ "num_tokens": 8887025.0,
320
+ "step": 1550
321
+ },
322
+ {
323
+ "entropy": 0.016624693870544435,
324
+ "epoch": 1.25,
325
+ "grad_norm": 0.138671875,
326
+ "learning_rate": 8.342013888888889e-06,
327
+ "loss": 0.015853718519210816,
328
+ "mean_token_accuracy": 0.9933998480439186,
329
+ "num_tokens": 9173407.0,
330
+ "step": 1600
331
+ },
332
+ {
333
+ "entropy": 0.018500901735387744,
334
+ "epoch": 1.2890625,
335
+ "grad_norm": 0.1142578125,
336
+ "learning_rate": 7.907986111111112e-06,
337
+ "loss": 0.017626932859420776,
338
+ "mean_token_accuracy": 0.9930863061547279,
339
+ "num_tokens": 9454970.0,
340
+ "step": 1650
341
+ },
342
+ {
343
+ "entropy": 0.01800002105999738,
344
+ "epoch": 1.328125,
345
+ "grad_norm": 0.0986328125,
346
+ "learning_rate": 7.473958333333334e-06,
347
+ "loss": 0.017116209268569948,
348
+ "mean_token_accuracy": 0.9930115470290184,
349
+ "num_tokens": 9739610.0,
350
+ "step": 1700
351
+ },
352
+ {
353
+ "entropy": 0.017908857897855342,
354
+ "epoch": 1.3671875,
355
+ "grad_norm": 0.1064453125,
356
+ "learning_rate": 7.039930555555556e-06,
357
+ "loss": 0.017099602222442625,
358
+ "mean_token_accuracy": 0.9931422612071037,
359
+ "num_tokens": 10028723.0,
360
+ "step": 1750
361
+ },
362
+ {
363
+ "entropy": 0.017319361912086606,
364
+ "epoch": 1.40625,
365
+ "grad_norm": 0.14453125,
366
+ "learning_rate": 6.605902777777779e-06,
367
+ "loss": 0.016603636741638183,
368
+ "mean_token_accuracy": 0.9933136883378029,
369
+ "num_tokens": 10313957.0,
370
+ "step": 1800
371
+ },
372
+ {
373
+ "entropy": 0.017652450683526694,
374
+ "epoch": 1.4453125,
375
+ "grad_norm": 0.11572265625,
376
+ "learning_rate": 6.171875e-06,
377
+ "loss": 0.01667865037918091,
378
+ "mean_token_accuracy": 0.9932633358240127,
379
+ "num_tokens": 10597623.0,
380
+ "step": 1850
381
+ },
382
+ {
383
+ "entropy": 0.01626451033167541,
384
+ "epoch": 1.484375,
385
+ "grad_norm": 0.119140625,
386
+ "learning_rate": 5.737847222222222e-06,
387
+ "loss": 0.015588784217834472,
388
+ "mean_token_accuracy": 0.993659851551056,
389
+ "num_tokens": 10887482.0,
390
+ "step": 1900
391
+ },
392
+ {
393
+ "entropy": 0.016533851716667415,
394
+ "epoch": 1.5234375,
395
+ "grad_norm": 0.1630859375,
396
+ "learning_rate": 5.303819444444445e-06,
397
+ "loss": 0.01556604504585266,
398
+ "mean_token_accuracy": 0.9936296039819718,
399
+ "num_tokens": 11176455.0,
400
+ "step": 1950
401
+ },
402
+ {
403
+ "entropy": 0.017771934717893602,
404
+ "epoch": 1.5625,
405
+ "grad_norm": 0.11572265625,
406
+ "learning_rate": 4.869791666666667e-06,
407
+ "loss": 0.017040348052978514,
408
+ "mean_token_accuracy": 0.9932206523418426,
409
+ "num_tokens": 11462052.0,
410
+ "step": 2000
411
  }
412
  ],
413
  "logging_steps": 50,
 
427
  "attributes": {}
428
  }
429
  },
430
+ "total_flos": 1.5546599214235546e+17,
431
  "train_batch_size": 2,
432
  "trial_name": null,
433
  "trial_params": null