mirsazzathossain commited on
Commit
4cd12fd
·
verified ·
1 Parent(s): 0914328

Training in progress, step 742, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:234e9faba9167eb8026311e501ed98ddda9c7b01d85dc697e157b3825c672f19
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4212534360d91c4750eb0488f33c9bbf9c2aa016775d2ebcf0907d221c856f
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d984355f4d14d29e5f0c33d7322eaf76ca629af03d42896d42a0709d5c62e9a
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0077ed6560805092ce34c46b5a0748dc789084a6d0c9b77d75383b2617145658
3
  size 335922386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93dfec376fcae514cc1172061dc894c88fb64c05da47049b55b9c420e46c5b6e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ace2e53e972d164c5bd1d27983df1e697839e80dc6b8cb8c6d7c973b8f5b1f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc2d32c8942649c1bcec036a7c90f4e993d8936218dbf9a4b3743ca60e925e0b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c10f13d9ed8b535eb36de3bf00ca4405c444905a0f35975465b1e49e824c6a21
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.6173546756529065,
6
  "eval_steps": 200,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -452,6 +452,104 @@
452
  "eval_samples_per_second": 1.825,
453
  "eval_steps_per_second": 1.825,
454
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  }
456
  ],
457
  "logging_steps": 10,
@@ -466,12 +564,12 @@
466
  "should_evaluate": false,
467
  "should_log": false,
468
  "should_save": true,
469
- "should_training_stop": false
470
  },
471
  "attributes": {}
472
  }
473
  },
474
- "total_flos": 4.903142754091008e+17,
475
  "train_batch_size": 1,
476
  "trial_name": null,
477
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
  "eval_steps": 200,
7
+ "global_step": 742,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
452
  "eval_samples_per_second": 1.825,
453
  "eval_steps_per_second": 1.825,
454
  "step": 600
455
+ },
456
+ {
457
+ "epoch": 1.644313395113732,
458
+ "grad_norm": 0.3200387954711914,
459
+ "learning_rate": 9.49385233424856e-07,
460
+ "loss": 0.9581,
461
+ "step": 610
462
+ },
463
+ {
464
+ "epoch": 1.6712721145745577,
465
+ "grad_norm": 0.3195631504058838,
466
+ "learning_rate": 8.158632442673603e-07,
467
+ "loss": 0.9446,
468
+ "step": 620
469
+ },
470
+ {
471
+ "epoch": 1.6982308340353833,
472
+ "grad_norm": 0.26112768054008484,
473
+ "learning_rate": 6.916218039089961e-07,
474
+ "loss": 0.9549,
475
+ "step": 630
476
+ },
477
+ {
478
+ "epoch": 1.725189553496209,
479
+ "grad_norm": 0.2657977044582367,
480
+ "learning_rate": 5.769364838278063e-07,
481
+ "loss": 0.995,
482
+ "step": 640
483
+ },
484
+ {
485
+ "epoch": 1.7521482729570346,
486
+ "grad_norm": 0.3035813868045807,
487
+ "learning_rate": 4.720616597222205e-07,
488
+ "loss": 0.9935,
489
+ "step": 650
490
+ },
491
+ {
492
+ "epoch": 1.7791069924178602,
493
+ "grad_norm": 0.29046186804771423,
494
+ "learning_rate": 3.7722994729763427e-07,
495
+ "loss": 1.0067,
496
+ "step": 660
497
+ },
498
+ {
499
+ "epoch": 1.8060657118786858,
500
+ "grad_norm": 0.27917370200157166,
501
+ "learning_rate": 2.9265168631736005e-07,
502
+ "loss": 1.0086,
503
+ "step": 670
504
+ },
505
+ {
506
+ "epoch": 1.8330244313395114,
507
+ "grad_norm": 0.26591578125953674,
508
+ "learning_rate": 2.1851447406231573e-07,
509
+ "loss": 0.9478,
510
+ "step": 680
511
+ },
512
+ {
513
+ "epoch": 1.8599831508003368,
514
+ "grad_norm": 0.30971062183380127,
515
+ "learning_rate": 1.5498274923427925e-07,
516
+ "loss": 0.9364,
517
+ "step": 690
518
+ },
519
+ {
520
+ "epoch": 1.8869418702611624,
521
+ "grad_norm": 0.24184127151966095,
522
+ "learning_rate": 1.0219742722559433e-07,
523
+ "loss": 1.0085,
524
+ "step": 700
525
+ },
526
+ {
527
+ "epoch": 1.913900589721988,
528
+ "grad_norm": 0.32377859950065613,
529
+ "learning_rate": 6.027558756434015e-08,
530
+ "loss": 1.0718,
531
+ "step": 710
532
+ },
533
+ {
534
+ "epoch": 1.9408593091828137,
535
+ "grad_norm": 0.24339060485363007,
536
+ "learning_rate": 2.9310214228202016e-08,
537
+ "loss": 0.9437,
538
+ "step": 720
539
+ },
540
+ {
541
+ "epoch": 1.9678180286436393,
542
+ "grad_norm": 0.2631000876426697,
543
+ "learning_rate": 9.369989403041347e-09,
544
+ "loss": 0.9958,
545
+ "step": 730
546
+ },
547
+ {
548
+ "epoch": 1.994776748104465,
549
+ "grad_norm": 0.24943451583385468,
550
+ "learning_rate": 4.991411436189308e-10,
551
+ "loss": 0.9993,
552
+ "step": 740
553
  }
554
  ],
555
  "logging_steps": 10,
 
564
  "should_evaluate": false,
565
  "should_log": false,
566
  "should_save": true,
567
+ "should_training_stop": true
568
  },
569
  "attributes": {}
570
  }
571
  },
572
+ "total_flos": 6.06897586520064e+17,
573
  "train_batch_size": 1,
574
  "trial_name": null,
575
  "trial_params": null