manavdhamecha77 commited on
Commit
c5be891
·
verified ·
1 Parent(s): 2ff09d8

Upload 5 files

Browse files
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "embedding_size": 768,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "B",
14
+ "1": "I",
15
+ "2": "O"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "B": 0,
21
+ "I": 1,
22
+ "O": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 3,
30
+ "position_embedding_type": "absolute",
31
+ "transformers_version": "4.56.2",
32
+ "type_vocab_size": 2,
33
+ "use_cache": true,
34
+ "vocab_size": 250000
35
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<as>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "<bd>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "<bn>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "8": {
68
+ "content": "<dg>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "9": {
76
+ "content": "<en>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "10": {
84
+ "content": "<gom>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "11": {
92
+ "content": "<gu>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "12": {
100
+ "content": "<hi>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "13": {
108
+ "content": "<kha>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "14": {
116
+ "content": "<kn>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "15": {
124
+ "content": "<ks>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "16": {
132
+ "content": "<mai>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "17": {
140
+ "content": "<ml>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "18": {
148
+ "content": "<mni>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "19": {
156
+ "content": "<mr>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "20": {
164
+ "content": "<ne>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "21": {
172
+ "content": "<or>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "22": {
180
+ "content": "<pa>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "23": {
188
+ "content": "<sa>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "24": {
196
+ "content": "<sd>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "25": {
204
+ "content": "<sat>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "26": {
212
+ "content": "<ta>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "27": {
220
+ "content": "<te>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "28": {
228
+ "content": "<ur>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ }
235
+ },
236
+ "clean_up_tokenization_spaces": false,
237
+ "cls_token": "[CLS]",
238
+ "extra_special_tokens": {},
239
+ "mask_token": "[MASK]",
240
+ "model_max_length": 1000000000000000019884624838656,
241
+ "pad_token": "[PAD]",
242
+ "sep_token": "[SEP]",
243
+ "tokenizer_class": "PreTrainedTokenizerFast",
244
+ "unk_token": "[UNK]"
245
+ }
trainer_state.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 20.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1240,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.8064516129032258,
14
+ "grad_norm": 2.2094340324401855,
15
+ "learning_rate": 2.8814516129032256e-05,
16
+ "loss": 0.821,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 1.6129032258064515,
21
+ "grad_norm": 4.53880500793457,
22
+ "learning_rate": 2.760483870967742e-05,
23
+ "loss": 0.3767,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 2.4193548387096775,
28
+ "grad_norm": 2.487438917160034,
29
+ "learning_rate": 2.6395161290322583e-05,
30
+ "loss": 0.228,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 3.225806451612903,
35
+ "grad_norm": 1.6553895473480225,
36
+ "learning_rate": 2.5185483870967742e-05,
37
+ "loss": 0.1713,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 4.032258064516129,
42
+ "grad_norm": 0.6422418355941772,
43
+ "learning_rate": 2.3975806451612904e-05,
44
+ "loss": 0.1186,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 4.838709677419355,
49
+ "grad_norm": 1.2269980907440186,
50
+ "learning_rate": 2.2766129032258065e-05,
51
+ "loss": 0.0827,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 5.645161290322581,
56
+ "grad_norm": 1.165654182434082,
57
+ "learning_rate": 2.1556451612903224e-05,
58
+ "loss": 0.0598,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 6.451612903225806,
63
+ "grad_norm": 1.4293478727340698,
64
+ "learning_rate": 2.034677419354839e-05,
65
+ "loss": 0.0373,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 7.258064516129032,
70
+ "grad_norm": 1.201370120048523,
71
+ "learning_rate": 1.913709677419355e-05,
72
+ "loss": 0.0339,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 8.064516129032258,
77
+ "grad_norm": 1.2729601860046387,
78
+ "learning_rate": 1.792741935483871e-05,
79
+ "loss": 0.0318,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 8.870967741935484,
84
+ "grad_norm": 0.45044800639152527,
85
+ "learning_rate": 1.671774193548387e-05,
86
+ "loss": 0.0234,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 9.67741935483871,
91
+ "grad_norm": 0.26697471737861633,
92
+ "learning_rate": 1.5508064516129033e-05,
93
+ "loss": 0.0219,
94
+ "step": 600
95
+ },
96
+ {
97
+ "epoch": 10.483870967741936,
98
+ "grad_norm": 0.787188708782196,
99
+ "learning_rate": 1.4298387096774195e-05,
100
+ "loss": 0.0132,
101
+ "step": 650
102
+ },
103
+ {
104
+ "epoch": 11.290322580645162,
105
+ "grad_norm": 2.1090033054351807,
106
+ "learning_rate": 1.3088709677419355e-05,
107
+ "loss": 0.0106,
108
+ "step": 700
109
+ },
110
+ {
111
+ "epoch": 12.096774193548388,
112
+ "grad_norm": 0.08617769926786423,
113
+ "learning_rate": 1.1879032258064517e-05,
114
+ "loss": 0.0097,
115
+ "step": 750
116
+ },
117
+ {
118
+ "epoch": 12.903225806451612,
119
+ "grad_norm": 0.5370274782180786,
120
+ "learning_rate": 1.0669354838709679e-05,
121
+ "loss": 0.0096,
122
+ "step": 800
123
+ },
124
+ {
125
+ "epoch": 13.709677419354838,
126
+ "grad_norm": 0.9249340891838074,
127
+ "learning_rate": 9.459677419354839e-06,
128
+ "loss": 0.009,
129
+ "step": 850
130
+ },
131
+ {
132
+ "epoch": 14.516129032258064,
133
+ "grad_norm": 0.43685030937194824,
134
+ "learning_rate": 8.25e-06,
135
+ "loss": 0.0068,
136
+ "step": 900
137
+ },
138
+ {
139
+ "epoch": 15.32258064516129,
140
+ "grad_norm": 0.44842422008514404,
141
+ "learning_rate": 7.040322580645162e-06,
142
+ "loss": 0.005,
143
+ "step": 950
144
+ },
145
+ {
146
+ "epoch": 16.129032258064516,
147
+ "grad_norm": 0.44923698902130127,
148
+ "learning_rate": 5.8306451612903235e-06,
149
+ "loss": 0.0054,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 16.93548387096774,
154
+ "grad_norm": 0.49068453907966614,
155
+ "learning_rate": 4.620967741935484e-06,
156
+ "loss": 0.0043,
157
+ "step": 1050
158
+ },
159
+ {
160
+ "epoch": 17.741935483870968,
161
+ "grad_norm": 0.147027388215065,
162
+ "learning_rate": 3.4112903225806454e-06,
163
+ "loss": 0.0028,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 18.548387096774192,
168
+ "grad_norm": 1.196764349937439,
169
+ "learning_rate": 2.2016129032258064e-06,
170
+ "loss": 0.0035,
171
+ "step": 1150
172
+ },
173
+ {
174
+ "epoch": 19.35483870967742,
175
+ "grad_norm": 0.022890331223607063,
176
+ "learning_rate": 9.919354838709678e-07,
177
+ "loss": 0.0027,
178
+ "step": 1200
179
+ }
180
+ ],
181
+ "logging_steps": 50,
182
+ "max_steps": 1240,
183
+ "num_input_tokens_seen": 0,
184
+ "num_train_epochs": 20,
185
+ "save_steps": 500,
186
+ "stateful_callbacks": {
187
+ "TrainerControl": {
188
+ "args": {
189
+ "should_epoch_stop": false,
190
+ "should_evaluate": false,
191
+ "should_log": false,
192
+ "should_save": true,
193
+ "should_training_stop": true
194
+ },
195
+ "attributes": {}
196
+ }
197
+ },
198
+ "total_flos": 1192381370848800.0,
199
+ "train_batch_size": 8,
200
+ "trial_name": null,
201
+ "trial_params": null
202
+ }