apriasmoro commited on
Commit
74ca747
·
verified ·
1 Parent(s): 9dac0c6

Training in progress, step 1150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bfa76b770f1256ad8dc526ec7699bb5a46e7d2ef36022e1d8593e3a00c7582b
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a929d3820cc1c1b87aaa3a19c3151498709177752334961b9baea187ddadb3d2
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49467ba00ed292c49fac7b8369f3055f16a73196f23984014172f580dec7b7cf
3
  size 162934501
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:599b6cf815824832480a957835de20f2fb6f9fe2a51fece8ad90ddab79816071
3
  size 162934501
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e82bd5e5e964db7f6dbf891762cacec10852f2811198604a5b25178c46e8cdf
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37e03f7c5107969cc1329932dd9cfeb378051a709bd5fa18e9c0569938b2a7c7
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.06909113749136361,
6
  "eval_steps": 500,
7
- "global_step": 1100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1673,6 +1673,81 @@
1673
  "rewards/margins": -38.8865966796875,
1674
  "rewards/rejected": -276.6972961425781,
1675
  "step": 1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1676
  }
1677
  ],
1678
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.07223164374097105,
6
  "eval_steps": 500,
7
+ "global_step": 1150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1673
  "rewards/margins": -38.8865966796875,
1674
  "rewards/rejected": -276.6972961425781,
1675
  "step": 1100
1676
+ },
1677
+ {
1678
+ "epoch": 0.06971923874128509,
1679
+ "grad_norm": 7.386142730712891,
1680
+ "learning_rate": 0.00019874079093938475,
1681
+ "logits/chosen": -1.8908523321151733,
1682
+ "logits/rejected": -1.890451192855835,
1683
+ "logps/chosen": -2971.73095703125,
1684
+ "logps/rejected": -2527.33984375,
1685
+ "loss": 62.1834,
1686
+ "rewards/accuracies": 0.39375001192092896,
1687
+ "rewards/chosen": -272.0863037109375,
1688
+ "rewards/margins": -42.23844528198242,
1689
+ "rewards/rejected": -229.8478546142578,
1690
+ "step": 1110
1691
+ },
1692
+ {
1693
+ "epoch": 0.07034733999120658,
1694
+ "grad_norm": 57.4601936340332,
1695
+ "learning_rate": 0.00019871299308267236,
1696
+ "logits/chosen": -2.0698752403259277,
1697
+ "logits/rejected": -2.07073974609375,
1698
+ "logps/chosen": -3427.44482421875,
1699
+ "logps/rejected": -3055.614990234375,
1700
+ "loss": 60.3318,
1701
+ "rewards/accuracies": 0.38749998807907104,
1702
+ "rewards/chosen": -316.5343933105469,
1703
+ "rewards/margins": -34.9571418762207,
1704
+ "rewards/rejected": -281.5772705078125,
1705
+ "step": 1120
1706
+ },
1707
+ {
1708
+ "epoch": 0.07097544124112808,
1709
+ "grad_norm": 73.67867279052734,
1710
+ "learning_rate": 0.000198684893725599,
1711
+ "logits/chosen": -2.4461870193481445,
1712
+ "logits/rejected": -2.4461569786071777,
1713
+ "logps/chosen": -2597.948974609375,
1714
+ "logps/rejected": -2381.15234375,
1715
+ "loss": 43.2422,
1716
+ "rewards/accuracies": 0.4375,
1717
+ "rewards/chosen": -234.696044921875,
1718
+ "rewards/margins": -20.43332290649414,
1719
+ "rewards/rejected": -214.26272583007812,
1720
+ "step": 1130
1721
+ },
1722
+ {
1723
+ "epoch": 0.07160354249104955,
1724
+ "grad_norm": 24.876911163330078,
1725
+ "learning_rate": 0.00019865649295398893,
1726
+ "logits/chosen": -2.420064687728882,
1727
+ "logits/rejected": -2.4179110527038574,
1728
+ "logps/chosen": -2495.120849609375,
1729
+ "logps/rejected": -2211.05419921875,
1730
+ "loss": 44.6958,
1731
+ "rewards/accuracies": 0.375,
1732
+ "rewards/chosen": -223.16287231445312,
1733
+ "rewards/margins": -26.39817237854004,
1734
+ "rewards/rejected": -196.76467895507812,
1735
+ "step": 1140
1736
+ },
1737
+ {
1738
+ "epoch": 0.07223164374097105,
1739
+ "grad_norm": 1182.2891845703125,
1740
+ "learning_rate": 0.00019862779085458697,
1741
+ "logits/chosen": -2.5218708515167236,
1742
+ "logits/rejected": -2.5206217765808105,
1743
+ "logps/chosen": -2362.462158203125,
1744
+ "logps/rejected": -2134.76416015625,
1745
+ "loss": 38.0256,
1746
+ "rewards/accuracies": 0.40625,
1747
+ "rewards/chosen": -211.2278594970703,
1748
+ "rewards/margins": -21.650144577026367,
1749
+ "rewards/rejected": -189.57769775390625,
1750
+ "step": 1150
1751
  }
1752
  ],
1753
  "logging_steps": 10,