Upload model/checkpoint.json with huggingface_hub
Browse files- model/checkpoint.json +79 -0
model/checkpoint.json
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"state": {
|
| 3 |
+
"phase_index": 4,
|
| 4 |
+
"epoch_index": 5,
|
| 5 |
+
"global_step": 8440,
|
| 6 |
+
"best_metric": 1.857707867548929,
|
| 7 |
+
"metrics": {
|
| 8 |
+
"loss": 1.857707867548929,
|
| 9 |
+
"running_terms": {
|
| 10 |
+
"state_consistency": 699.7146280673333,
|
| 11 |
+
"constraint_satisfaction": 101.4670789560328,
|
| 12 |
+
"semantic_alignment": 129.8230349663645,
|
| 13 |
+
"gos_coherence": 1246.1481617949903,
|
| 14 |
+
"multi_state_margin": 1679.8049648925662
|
| 15 |
+
},
|
| 16 |
+
"last_batch": {
|
| 17 |
+
"state_consistency": 0.4334685243666172,
|
| 18 |
+
"constraint_satisfaction": 0.09194006177131087,
|
| 19 |
+
"semantic_alignment": 0.0810413584113121,
|
| 20 |
+
"gos_coherence": 0.7358924970030785,
|
| 21 |
+
"multi_state_margin": 1.115578532218933
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"config_snapshot": {
|
| 25 |
+
"seed": 42,
|
| 26 |
+
"device": "cuda",
|
| 27 |
+
"curriculum": {
|
| 28 |
+
"phase_index": 4,
|
| 29 |
+
"objectives": {
|
| 30 |
+
"state_consistency": 1.0,
|
| 31 |
+
"constraint_satisfaction": 1.0,
|
| 32 |
+
"semantic_alignment": 1.0,
|
| 33 |
+
"gos_coherence": 0.1,
|
| 34 |
+
"multi_state_margin": 1.0
|
| 35 |
+
},
|
| 36 |
+
"phase_advanced": true
|
| 37 |
+
},
|
| 38 |
+
"model": {
|
| 39 |
+
"vocab_size": 5000,
|
| 40 |
+
"affix_vocab_size": 40,
|
| 41 |
+
"kbbi_input_dim": 16,
|
| 42 |
+
"bsu_config": {
|
| 43 |
+
"d_morpheme": 64,
|
| 44 |
+
"d_semantic": 64,
|
| 45 |
+
"d_role": 32,
|
| 46 |
+
"d_context": 64,
|
| 47 |
+
"dropout": 0.1
|
| 48 |
+
},
|
| 49 |
+
"meb_config": {
|
| 50 |
+
"n_layers": 4,
|
| 51 |
+
"n_dep_heads": 4,
|
| 52 |
+
"dropout": 0.1
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
},
|
| 57 |
+
"metadata": {
|
| 58 |
+
"loss": 1.857707867548929,
|
| 59 |
+
"diagnostics": {
|
| 60 |
+
"running_terms": {
|
| 61 |
+
"state_consistency": 699.7146280673333,
|
| 62 |
+
"constraint_satisfaction": 101.4670789560328,
|
| 63 |
+
"semantic_alignment": 129.8230349663645,
|
| 64 |
+
"gos_coherence": 1246.1481617949903,
|
| 65 |
+
"multi_state_margin": 1679.8049648925662
|
| 66 |
+
},
|
| 67 |
+
"last_batch": {
|
| 68 |
+
"state_consistency": 0.4334685243666172,
|
| 69 |
+
"constraint_satisfaction": 0.09194006177131087,
|
| 70 |
+
"semantic_alignment": 0.0810413584113121,
|
| 71 |
+
"gos_coherence": 0.7358924970030785,
|
| 72 |
+
"multi_state_margin": 1.115578532218933
|
| 73 |
+
},
|
| 74 |
+
"validation": {},
|
| 75 |
+
"test": {},
|
| 76 |
+
"avg_loss": 1.857707867548929
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
}
|