aleynahukmet commited on
Commit
e71f1db
·
verified ·
1 Parent(s): 403a38d

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,780 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - generated_from_trainer
7
+ - dataset_size:9623924
8
+ - loss:MSELoss
9
+ base_model: BAAI/bge-m3
10
+ widget:
11
+ - source_sentence: Ak Hunlar'ın kültürel etkileşimleri ve mirasları hakkında ne söyleyebiliriz?
12
+ Ak Hunlar'ın diğer kültürler üzerindeki etkileri ve izleri nelerdir?
13
+ sentences:
14
+ - Film, hangi oyun yazarının hayatını konu almaktadır?
15
+ - Bir Eskişehir-Afyonkarahisar tren yolculuğu ne kadar sürmektedir?
16
+ - Mektupta, Türkiye'nin adaya tek taraflı müdahalesinin Türk ve Yunan tarafları
17
+ arasında savaşa yol açabileceği ve NATO üyesi olan bu iki ülkenin savaşmasının
18
+ kabul edilemez olduğu ifade edilmiştir. Türkiye'nin müdahale kararı almadan önce
19
+ müttefiklerine danışması gerektiği anımsatılmıştır. Ayrıca bu savaşın Sovyetler
20
+ Birliği'nin de Türkiye'ye müdahale ihtimalini doğuracağı ve NATO'nun böyle bir
21
+ durumda Türkiye'yi savunma konusunda isteksiz olacağı ima edilmiştir. ABD'nin
22
+ Türkiye'ye sağladığı askeri malzemenin bu müdahalede kullanılmasına izin verilmeyeceği
23
+ belirtilmiştir. Mektubun ardından Türkiye müdahale kararından vazgeçmiştir. İsmet
24
+ İnönü 21 Haziran 1964'te ABD'ye giderek başkan Johnson ile bir görüşmede bulunmuştur.
25
+ - source_sentence: Evet, metinde teslimiyetçilik, edilgenlik veya boyun eğme olarak
26
+ da tanımlanmaktadır.
27
+ sentences:
28
+ - Cezary Kucharski'nin doğduğu tarih nedir?
29
+ - Beylerbeyi Camii, 2013 yılında yapılan restorasyon çalışmaları sonrasında ne durumda?
30
+ - "İkinci Dünya Savaşı esnasında ve sonrasında elektroniklerin doğasından kaynaklanan\
31
+ \ birçok güvenilir olmama durumu ve ürün yorgunluğu gündeme geldi. 1945'te M.A.\
32
+ \ Miner, ASME (Amerikan Makine Mühendisleri Topluluğu) Dergisi içerisinde \"Yorulma\
33
+ \ Esnasında Birikimli Hasar\" adında taslak bir yazı paylaştı. Ordu için uygulanan\
34
+ \ ilk güvenilirlik hususu, Radar Sistemleri ve diğer elektronik parçalarda kullanılan,\
35
+ \ yine güvenilirlik analizi sayesinde kanıtlanmış, oldukça arıza çıkarmaya yatkın\
36
+ \ ve maliyetli bir vakum silindiri idi. Elektrik ve Elektronik Mühendisleri Enstitüsü,\
37
+ \ 1948 yılında Güvenilirlik Topluluğunu kurmuştur. 1950 yılı içerisinde, asker\
38
+ \ tarafında, Elektronik Ekipman Güvenilirliği Tavsiye Grubu kurulmuştur. Bu grup,\
39
+ \ 3 ana çalışma yolu tavsiye etmiştir. Bunlar:\n\n Parça güvenilirliğinin arttırılması,\n\
40
+ \ Tedarikçiler için kalite ve güvenilirlik gereksinimlerinin tanımlanması,\n Saha\
41
+ \ verilerinin toplanması ve kök analiz yapılması."
42
+ - source_sentence: Belgrad'ın ele geçirilmesinde Klingenberg'in rolü nedir ve bu olay
43
+ nasıl gerçekleşti?
44
+ sentences:
45
+ - Jimmy White ve Peter Ebdon.
46
+ - DualSense kontrolörünün titreşim özelliği hakkında detaylı bilgi verir misiniz?
47
+ - "Kozluk, Kocaeli ilinin İzmit ilçesine bağlı bir mahalledir.\n\nNüfus\n\nKaynakça\
48
+ \ \n\nİzmit'in mahalleleri"
49
+ - source_sentence: 1996 yılında kurulmuştur. Ağırlıklı olarak standart caz repertuvarından
50
+ parçalar sunmuşlardır.
51
+ sentences:
52
+ - San Leucio'nun coğrafi konumu hakkında bilgi verir misiniz?
53
+ - Kinik felsefesinin öncüsüdür.
54
+ - Aydın Doğu Demirkol'un vizyona girmesi planlanan sinema filmleri nelerdir ve yönetmenleri
55
+ kimlerdir?
56
+ - source_sentence: Serbest pazar prensiplerinin varlıklı ve yoksul futbol kulüpleri
57
+ arasındaki farkı büyütmesine yönelik kaygılar nedeniyle bu durum önemlidir.
58
+ sentences:
59
+ - Yazar, 12 Mart baskınlarının ve işkencelerinin sonucunda, ideolojik kimlikleriyle
60
+ küçük burjuva kimlikleri arasında çelişkiye düşen devrimcilerin rejime boyun eğmelerini
61
+ gösterme çabasındadır.
62
+ - "Verilen kesin süre \niçinde şikayetçi tarafından ilgili masraflar yatırıldığından\
63
+ \ PTT’ce söz konusu \nkeşfa.va.nsınıngeri önd.e-rilmesi sonucu talimat \nmahkemesince\
64
+ \ keşf yapılmamış ise de burada şikayetçiye atfedilebilecek bir kusur \nbulunmadığından,\
65
+ \ keşif avansının ilgili mahkemeye tekrar gönderilerek keşfin \nyapılmasının sağlanarak\
66
+ \ oluşacak sonuca göre bir karar verilmesi gerekir."
67
+ - This Kind of Bird Flies Backwards (Bu Cins Kuş Tersten Uçar) adlı ilk kitabı,
68
+ LeRoy Jones ve Hettie Jones'un kurduğu Totem Press tarafından 1958 yılında yayınlandı.
69
+ pipeline_tag: sentence-similarity
70
+ library_name: sentence-transformers
71
+ metrics:
72
+ - pearson_cosine
73
+ - spearman_cosine
74
+ - negative_mse
75
+ model-index:
76
+ - name: SentenceTransformer based on BAAI/bge-m3
77
+ results:
78
+ - task:
79
+ type: semantic-similarity
80
+ name: Semantic Similarity
81
+ dataset:
82
+ name: sts dev
83
+ type: sts-dev
84
+ metrics:
85
+ - type: pearson_cosine
86
+ value: 0.9378885799751235
87
+ name: Pearson Cosine
88
+ - type: spearman_cosine
89
+ value: 0.930037764519436
90
+ name: Spearman Cosine
91
+ - task:
92
+ type: knowledge-distillation
93
+ name: Knowledge Distillation
94
+ dataset:
95
+ name: Unknown
96
+ type: unknown
97
+ metrics:
98
+ - type: negative_mse
99
+ value: -0.010874464351218194
100
+ name: Negative Mse
101
+ - task:
102
+ type: semantic-similarity
103
+ name: Semantic Similarity
104
+ dataset:
105
+ name: sts test
106
+ type: sts-test
107
+ metrics:
108
+ - type: pearson_cosine
109
+ value: 0.9378994572414889
110
+ name: Pearson Cosine
111
+ - type: spearman_cosine
112
+ value: 0.9300802695581766
113
+ name: Spearman Cosine
114
+ ---
115
+
116
+ # SentenceTransformer based on BAAI/bge-m3
117
+
118
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) on the tr-sentences dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
119
+
120
+ ## Model Details
121
+
122
+ ### Model Description
123
+ - **Model Type:** Sentence Transformer
124
+ - **Base model:** [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) <!-- at revision 5617a9f61b028005a4858fdac845db406aefb181 -->
125
+ - **Maximum Sequence Length:** 8192 tokens
126
+ - **Output Dimensionality:** 1024 dimensions
127
+ - **Similarity Function:** Cosine Similarity
128
+ - **Training Dataset:**
129
+ - tr-sentences
130
+ <!-- - **Language:** Unknown -->
131
+ <!-- - **License:** Unknown -->
132
+
133
+ ### Model Sources
134
+
135
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
136
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
137
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
138
+
139
+ ### Full Model Architecture
140
+
141
+ ```
142
+ SentenceTransformer(
143
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
144
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
145
+ (2): Normalize()
146
+ )
147
+ ```
148
+
149
+ ## Usage
150
+
151
+ ### Direct Usage (Sentence Transformers)
152
+
153
+ First install the Sentence Transformers library:
154
+
155
+ ```bash
156
+ pip install -U sentence-transformers
157
+ ```
158
+
159
+ Then you can load this model and run inference.
160
+ ```python
161
+ from sentence_transformers import SentenceTransformer
162
+
163
+ # Download from the 🤗 Hub
164
+ model = SentenceTransformer("sentence_transformers_model_id")
165
+ # Run inference
166
+ sentences = [
167
+ 'Serbest pazar prensiplerinin varlıklı ve yoksul futbol kulüpleri arasındaki farkı büyütmesine yönelik kaygılar nedeniyle bu durum önemlidir.',
168
+ 'Yazar, 12 Mart baskınlarının ve işkencelerinin sonucunda, ideolojik kimlikleriyle küçük burjuva kimlikleri arasında çelişkiye düşen devrimcilerin rejime boyun eğmelerini gösterme çabasındadır.',
169
+ "This Kind of Bird Flies Backwards (Bu Cins Kuş Tersten Uçar) adlı ilk kitabı, LeRoy Jones ve Hettie Jones'un kurduğu Totem Press tarafından 1958 yılında yayınlandı.",
170
+ ]
171
+ embeddings = model.encode(sentences)
172
+ print(embeddings.shape)
173
+ # [3, 1024]
174
+
175
+ # Get the similarity scores for the embeddings
176
+ similarities = model.similarity(embeddings, embeddings)
177
+ print(similarities.shape)
178
+ # [3, 3]
179
+ ```
180
+
181
+ <!--
182
+ ### Direct Usage (Transformers)
183
+
184
+ <details><summary>Click to see the direct usage in Transformers</summary>
185
+
186
+ </details>
187
+ -->
188
+
189
+ <!--
190
+ ### Downstream Usage (Sentence Transformers)
191
+
192
+ You can finetune this model on your own dataset.
193
+
194
+ <details><summary>Click to expand</summary>
195
+
196
+ </details>
197
+ -->
198
+
199
+ <!--
200
+ ### Out-of-Scope Use
201
+
202
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
203
+ -->
204
+
205
+ ## Evaluation
206
+
207
+ ### Metrics
208
+
209
+ #### Semantic Similarity
210
+
211
+ * Datasets: `sts-dev` and `sts-test`
212
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
213
+
214
+ | Metric | sts-dev | sts-test |
215
+ |:--------------------|:---------|:-----------|
216
+ | pearson_cosine | 0.9379 | 0.9379 |
217
+ | **spearman_cosine** | **0.93** | **0.9301** |
218
+
219
+ #### Knowledge Distillation
220
+
221
+ * Evaluated with [<code>MSEEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.MSEEvaluator)
222
+
223
+ | Metric | Value |
224
+ |:-----------------|:------------|
225
+ | **negative_mse** | **-0.0109** |
226
+
227
+ <!--
228
+ ## Bias, Risks and Limitations
229
+
230
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
231
+ -->
232
+
233
+ <!--
234
+ ### Recommendations
235
+
236
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
237
+ -->
238
+
239
+ ## Training Details
240
+
241
+ ### Training Dataset
242
+
243
+ #### tr-sentences
244
+
245
+ * Dataset: tr-sentences
246
+ * Size: 9,623,924 training samples
247
+ * Columns: <code>sentence</code> and <code>label</code>
248
+ * Approximate statistics based on the first 1000 samples:
249
+ | | sentence | label |
250
+ |:--------|:-----------------------------------------------------------------------------------|:--------------------------------------|
251
+ | type | string | list |
252
+ | details | <ul><li>min: 5 tokens</li><li>mean: 55.78 tokens</li><li>max: 468 tokens</li></ul> | <ul><li>size: 1024 elements</li></ul> |
253
+ * Samples:
254
+ | sentence | label |
255
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------|
256
+ | <code>NBA tarihinde bu ödülü en çok kaç kez kim kazanmıştır?</code> | <code>[-0.027497457340359688, -0.024517377838492393, -0.013820995576679707, 0.00024465256137773395, -0.020534219220280647, ...]</code> |
257
+ | <code>Romero ve yapımcı Richard P. Rubinstein, yeni bir proje için herhangi bir yerli yatırımcılara temin koyamadıklarını söyledi. Romero Şans eseri, İtalyan korku yönetmeni Dario Argento'ya ulaştı. bu film Yaşayan Ölülerin Gecesi filmin'in kritik savunucusudur, Argento filmin korku klasik arasında yer almasına yardımcı olmak için istekliydi. uluslararası dağıtım hakları karşılığında finansman sağlamak için, Romero ve Rubinstein bir araya geldi. Senaryoyu yazarken bir sahnede değişiklik yapmak için Argento Roma'yı Romero filme davet etti. İkisi de daha sonra arsa gelişmelerini tartışmak için bir olabilirdi. Romero Monroeville Mall'ın durumunun yanı sıra Oxford Kalkınma'da alışveriş merkezi sahipleri ile bağlantıları ile ek bir güvenli finansman başardı. Döküm tamamlandıktan sonra, başlıca çekim tarihinin 13 Kasım, 1977 tarihinde film'in Pensilvanya'da başlaması planlanıyordu.</code> | <code>[-0.02431895025074482, -0.03177526593208313, -0.010546382516622543, 0.0393124595284462, -0.03390512242913246, ...]</code> |
258
+ | <code>Evet, Nasuhlar ismi Adapazarı, Kandıra ve Yenipazar ilçelerinde farklı yer isimlerine aittir.</code> | <code>[0.0020795632153749466, -0.013080586679279804, -0.018256550654768944, 0.022429518401622772, -0.03087380714714527, ...]</code> |
259
+ * Loss: [<code>MSELoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#mseloss)
260
+
261
+ ### Evaluation Dataset
262
+
263
+ #### tr-sentences
264
+
265
+ * Dataset: tr-sentences
266
+ * Size: 9,623,924 evaluation samples
267
+ * Columns: <code>sentence</code> and <code>label</code>
268
+ * Approximate statistics based on the first 1000 samples:
269
+ | | sentence | label |
270
+ |:--------|:-----------------------------------------------------------------------------------|:--------------------------------------|
271
+ | type | string | list |
272
+ | details | <ul><li>min: 3 tokens</li><li>mean: 51.95 tokens</li><li>max: 614 tokens</li></ul> | <ul><li>size: 1024 elements</li></ul> |
273
+ * Samples:
274
+ | sentence | label |
275
+ |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------|
276
+ | <code>Bernhard, şiirle yazarlık hayatına başlamış ve 1963'te "Frost" (Don) adlı ilk romanını yayınlamıştır. 1957'den itibaren serbest yazarlık yapmaya başlamış ve hayatı boyunca yazarlık sayesinde geçimini sağlamıştır.</code> | <code>[-0.019921669736504555, -0.007309767417609692, 0.01690034568309784, -0.03302725777029991, -0.003539217868819833, ...]</code> |
277
+ | <code>Sonraki maçta AJ Styles ile Kevin Owens, WWE Birleşik Devletler Şampiyonluğu kemeri için maça çıktı. Shane McMahon, maçın özel konuk hakemliğini yaptı. As Shane, Owens'ı kontrol etti. Styles, Owens'a Springboard 450 Splash yapmaya çalışırken yanlışlıkla Shane'e de yaptı. Owens, Styles'a Pop Up Powerbomb yaptıktan sonra Styles'ı tuşlamaya çalıştı ancak Styles son anda kurtuldu. Owens, Shane'in kararını beğenmeyince ikisi arasında kısa süreli bir tartışma oldu. Owens, Styles'ın Calf Crusher hareketini karşıladıktan sonra Styles'tan tekme yiyince Shane'in üzerine düştü. Styles, Owens'ı Calf Crusher ile pes ettirse de ringin dışında aşağıda yatan Shane bunu göremedi. Bunun üzerine Styles da Shane ile tartıştı. Styles, Owens'a Styles Clash yaptıktan sonra tuşa gitti ancak Owens son anda kurtuldu. Owens'ın yaptığı Pop Up Powerbomb'dan sonra Styles'ı tuşladı ancak Shane son anda Styles'ın ayağının iplerde olduğunu fark edince tuşu iptal etti. Owens ve Shane tartışmaya başladı ve Shane,</code> | <code>[0.04532943293452263, -0.007217255420982838, -0.019380981102585793, -0.0026675150729715824, 0.018997980281710625, ...]</code> |
278
+ | <code>Leylek yavruları, anne ve babaları tarafından yiyip kısmen sindirdikleri besinleri kusarak beslenirler. Anne leylek yavruları yağmur, fırtına ve güneşten korurken, baba leylek yavrularını beslemekle yükümlüdür.</code> | <code>[-0.055585864931344986, 0.045432090759277344, -0.04405859857797623, 0.0009241091320291162, -0.0689476728439331, ...]</code> |
279
+ * Loss: [<code>MSELoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#mseloss)
280
+
281
+ ### Training Hyperparameters
282
+ #### Non-Default Hyperparameters
283
+
284
+ - `eval_strategy`: steps
285
+ - `per_device_train_batch_size`: 64
286
+ - `per_device_eval_batch_size`: 64
287
+ - `learning_rate`: 0.0001
288
+ - `num_train_epochs`: 1
289
+ - `warmup_ratio`: 0.1
290
+ - `bf16`: True
291
+ - `load_best_model_at_end`: True
292
+
293
+ #### All Hyperparameters
294
+ <details><summary>Click to expand</summary>
295
+
296
+ - `overwrite_output_dir`: False
297
+ - `do_predict`: False
298
+ - `eval_strategy`: steps
299
+ - `prediction_loss_only`: True
300
+ - `per_device_train_batch_size`: 64
301
+ - `per_device_eval_batch_size`: 64
302
+ - `per_gpu_train_batch_size`: None
303
+ - `per_gpu_eval_batch_size`: None
304
+ - `gradient_accumulation_steps`: 1
305
+ - `eval_accumulation_steps`: None
306
+ - `torch_empty_cache_steps`: None
307
+ - `learning_rate`: 0.0001
308
+ - `weight_decay`: 0.0
309
+ - `adam_beta1`: 0.9
310
+ - `adam_beta2`: 0.999
311
+ - `adam_epsilon`: 1e-08
312
+ - `max_grad_norm`: 1.0
313
+ - `num_train_epochs`: 1
314
+ - `max_steps`: -1
315
+ - `lr_scheduler_type`: linear
316
+ - `lr_scheduler_kwargs`: {}
317
+ - `warmup_ratio`: 0.1
318
+ - `warmup_steps`: 0
319
+ - `log_level`: passive
320
+ - `log_level_replica`: warning
321
+ - `log_on_each_node`: True
322
+ - `logging_nan_inf_filter`: True
323
+ - `save_safetensors`: True
324
+ - `save_on_each_node`: False
325
+ - `save_only_model`: False
326
+ - `restore_callback_states_from_checkpoint`: False
327
+ - `no_cuda`: False
328
+ - `use_cpu`: False
329
+ - `use_mps_device`: False
330
+ - `seed`: 42
331
+ - `data_seed`: None
332
+ - `jit_mode_eval`: False
333
+ - `use_ipex`: False
334
+ - `bf16`: True
335
+ - `fp16`: False
336
+ - `fp16_opt_level`: O1
337
+ - `half_precision_backend`: auto
338
+ - `bf16_full_eval`: False
339
+ - `fp16_full_eval`: False
340
+ - `tf32`: None
341
+ - `local_rank`: 0
342
+ - `ddp_backend`: None
343
+ - `tpu_num_cores`: None
344
+ - `tpu_metrics_debug`: False
345
+ - `debug`: []
346
+ - `dataloader_drop_last`: False
347
+ - `dataloader_num_workers`: 0
348
+ - `dataloader_prefetch_factor`: None
349
+ - `past_index`: -1
350
+ - `disable_tqdm`: False
351
+ - `remove_unused_columns`: True
352
+ - `label_names`: None
353
+ - `load_best_model_at_end`: True
354
+ - `ignore_data_skip`: False
355
+ - `fsdp`: []
356
+ - `fsdp_min_num_params`: 0
357
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
358
+ - `fsdp_transformer_layer_cls_to_wrap`: None
359
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
360
+ - `deepspeed`: None
361
+ - `label_smoothing_factor`: 0.0
362
+ - `optim`: adamw_torch
363
+ - `optim_args`: None
364
+ - `adafactor`: False
365
+ - `group_by_length`: False
366
+ - `length_column_name`: length
367
+ - `ddp_find_unused_parameters`: None
368
+ - `ddp_bucket_cap_mb`: None
369
+ - `ddp_broadcast_buffers`: False
370
+ - `dataloader_pin_memory`: True
371
+ - `dataloader_persistent_workers`: False
372
+ - `skip_memory_metrics`: True
373
+ - `use_legacy_prediction_loop`: False
374
+ - `push_to_hub`: False
375
+ - `resume_from_checkpoint`: None
376
+ - `hub_model_id`: None
377
+ - `hub_strategy`: every_save
378
+ - `hub_private_repo`: None
379
+ - `hub_always_push`: False
380
+ - `gradient_checkpointing`: False
381
+ - `gradient_checkpointing_kwargs`: None
382
+ - `include_inputs_for_metrics`: False
383
+ - `include_for_metrics`: []
384
+ - `eval_do_concat_batches`: True
385
+ - `fp16_backend`: auto
386
+ - `push_to_hub_model_id`: None
387
+ - `push_to_hub_organization`: None
388
+ - `mp_parameters`:
389
+ - `auto_find_batch_size`: False
390
+ - `full_determinism`: False
391
+ - `torchdynamo`: None
392
+ - `ray_scope`: last
393
+ - `ddp_timeout`: 1800
394
+ - `torch_compile`: False
395
+ - `torch_compile_backend`: None
396
+ - `torch_compile_mode`: None
397
+ - `dispatch_batches`: None
398
+ - `split_batches`: None
399
+ - `include_tokens_per_second`: False
400
+ - `include_num_input_tokens_seen`: False
401
+ - `neftune_noise_alpha`: None
402
+ - `optim_target_modules`: None
403
+ - `batch_eval_metrics`: False
404
+ - `eval_on_start`: False
405
+ - `use_liger_kernel`: False
406
+ - `eval_use_gather_object`: False
407
+ - `average_tokens_across_devices`: False
408
+ - `prompts`: None
409
+ - `batch_sampler`: batch_sampler
410
+ - `multi_dataset_batch_sampler`: proportional
411
+
412
+ </details>
413
+
414
+ ### Training Logs
415
+ <details><summary>Click to expand</summary>
416
+
417
+ | Epoch | Step | Training Loss | Validation Loss | sts-dev_spearman_cosine | negative_mse | sts-test_spearman_cosine |
418
+ |:----------:|:----------:|:-------------:|:---------------:|:-----------------------:|:------------:|:------------------------:|
419
+ | 0 | 0 | - | - | 0.0291 | -0.1556 | - |
420
+ | 0.0033 | 500 | - | 0.0009 | 0.0699 | -0.0943 | - |
421
+ | 0.0067 | 1000 | 0.0011 | 0.0008 | 0.4391 | -0.0823 | - |
422
+ | 0.0100 | 1500 | - | 0.0007 | 0.5327 | -0.0744 | - |
423
+ | 0.0133 | 2000 | 0.0008 | 0.0007 | 0.5789 | -0.0691 | - |
424
+ | 0.0167 | 2500 | - | 0.0006 | 0.6090 | -0.0645 | - |
425
+ | 0.0200 | 3000 | 0.0007 | 0.0006 | 0.6253 | -0.0605 | - |
426
+ | 0.0233 | 3500 | - | 0.0006 | 0.6452 | -0.0571 | - |
427
+ | 0.0267 | 4000 | 0.0006 | 0.0005 | 0.6625 | -0.0537 | - |
428
+ | 0.0300 | 4500 | - | 0.0005 | 0.6673 | -0.0510 | - |
429
+ | 0.0333 | 5000 | 0.0006 | 0.0005 | 0.6855 | -0.0483 | - |
430
+ | 0.0367 | 5500 | - | 0.0005 | 0.7052 | -0.0458 | - |
431
+ | 0.0400 | 6000 | 0.0005 | 0.0004 | 0.7045 | -0.0439 | - |
432
+ | 0.0433 | 6500 | - | 0.0004 | 0.7274 | -0.0419 | - |
433
+ | 0.0466 | 7000 | 0.0005 | 0.0004 | 0.7454 | -0.0404 | - |
434
+ | 0.0500 | 7500 | - | 0.0004 | 0.7504 | -0.0388 | - |
435
+ | 0.0533 | 8000 | 0.0004 | 0.0004 | 0.7616 | -0.0374 | - |
436
+ | 0.0566 | 8500 | - | 0.0004 | 0.7666 | -0.0360 | - |
437
+ | 0.0600 | 9000 | 0.0004 | 0.0003 | 0.7721 | -0.0347 | - |
438
+ | 0.0633 | 9500 | - | 0.0003 | 0.7838 | -0.0337 | - |
439
+ | 0.0666 | 10000 | 0.0004 | 0.0003 | 0.7871 | -0.0326 | - |
440
+ | 0.0700 | 10500 | - | 0.0003 | 0.7920 | -0.0316 | - |
441
+ | 0.0733 | 11000 | 0.0004 | 0.0003 | 0.7949 | -0.0307 | - |
442
+ | 0.0766 | 11500 | - | 0.0003 | 0.8020 | -0.0298 | - |
443
+ | 0.0800 | 12000 | 0.0004 | 0.0003 | 0.8013 | -0.0290 | - |
444
+ | 0.0833 | 12500 | - | 0.0003 | 0.8141 | -0.0282 | - |
445
+ | 0.0866 | 13000 | 0.0003 | 0.0003 | 0.8179 | -0.0275 | - |
446
+ | 0.0900 | 13500 | - | 0.0003 | 0.8216 | -0.0267 | - |
447
+ | 0.0933 | 14000 | 0.0003 | 0.0003 | 0.8228 | -0.0261 | - |
448
+ | 0.0966 | 14500 | - | 0.0003 | 0.8270 | -0.0256 | - |
449
+ | 0.1000 | 15000 | 0.0003 | 0.0003 | 0.8263 | -0.0250 | - |
450
+ | 0.1033 | 15500 | - | 0.0002 | 0.8376 | -0.0243 | - |
451
+ | 0.1066 | 16000 | 0.0003 | 0.0002 | 0.8362 | -0.0238 | - |
452
+ | 0.1100 | 16500 | - | 0.0002 | 0.8386 | -0.0233 | - |
453
+ | 0.1133 | 17000 | 0.0003 | 0.0002 | 0.8411 | -0.0228 | - |
454
+ | 0.1166 | 17500 | - | 0.0002 | 0.8441 | -0.0225 | - |
455
+ | 0.1200 | 18000 | 0.0003 | 0.0002 | 0.8456 | -0.0221 | - |
456
+ | 0.1233 | 18500 | - | 0.0002 | 0.8483 | -0.0217 | - |
457
+ | 0.1266 | 19000 | 0.0003 | 0.0002 | 0.8556 | -0.0214 | - |
458
+ | 0.1299 | 19500 | - | 0.0002 | 0.8543 | -0.0211 | - |
459
+ | 0.1333 | 20000 | 0.0003 | 0.0002 | 0.8581 | -0.0208 | - |
460
+ | 0.1366 | 20500 | - | 0.0002 | 0.8595 | -0.0205 | - |
461
+ | 0.1399 | 21000 | 0.0003 | 0.0002 | 0.8589 | -0.0202 | - |
462
+ | 0.1433 | 21500 | - | 0.0002 | 0.8628 | -0.0200 | - |
463
+ | 0.1466 | 22000 | 0.0003 | 0.0002 | 0.8591 | -0.0197 | - |
464
+ | 0.1499 | 22500 | - | 0.0002 | 0.8644 | -0.0196 | - |
465
+ | 0.1533 | 23000 | 0.0002 | 0.0002 | 0.8703 | -0.0193 | - |
466
+ | 0.1566 | 23500 | - | 0.0002 | 0.8674 | -0.0190 | - |
467
+ | 0.1599 | 24000 | 0.0002 | 0.0002 | 0.8695 | -0.0189 | - |
468
+ | 0.1633 | 24500 | - | 0.0002 | 0.8686 | -0.0187 | - |
469
+ | 0.1666 | 25000 | 0.0002 | 0.0002 | 0.8730 | -0.0185 | - |
470
+ | 0.1699 | 25500 | - | 0.0002 | 0.8760 | -0.0184 | - |
471
+ | 0.1733 | 26000 | 0.0002 | 0.0002 | 0.8744 | -0.0181 | - |
472
+ | 0.1766 | 26500 | - | 0.0002 | 0.8775 | -0.0180 | - |
473
+ | 0.1799 | 27000 | 0.0002 | 0.0002 | 0.8755 | -0.0178 | - |
474
+ | 0.1833 | 27500 | - | 0.0002 | 0.8754 | -0.0177 | - |
475
+ | 0.1866 | 28000 | 0.0002 | 0.0002 | 0.8771 | -0.0176 | - |
476
+ | 0.1899 | 28500 | - | 0.0002 | 0.8808 | -0.0175 | - |
477
+ | 0.1933 | 29000 | 0.0002 | 0.0002 | 0.8807 | -0.0174 | - |
478
+ | 0.1966 | 29500 | - | 0.0002 | 0.8833 | -0.0172 | - |
479
+ | 0.1999 | 30000 | 0.0002 | 0.0002 | 0.8814 | -0.0171 | - |
480
+ | 0.2032 | 30500 | - | 0.0002 | 0.8819 | -0.0170 | - |
481
+ | 0.2066 | 31000 | 0.0002 | 0.0002 | 0.8831 | -0.0168 | - |
482
+ | 0.2099 | 31500 | - | 0.0002 | 0.8859 | -0.0167 | - |
483
+ | 0.2132 | 32000 | 0.0002 | 0.0002 | 0.8864 | -0.0166 | - |
484
+ | 0.2166 | 32500 | - | 0.0002 | 0.8848 | -0.0165 | - |
485
+ | 0.2199 | 33000 | 0.0002 | 0.0002 | 0.8871 | -0.0164 | - |
486
+ | 0.2232 | 33500 | - | 0.0002 | 0.8875 | -0.0163 | - |
487
+ | 0.2266 | 34000 | 0.0002 | 0.0002 | 0.8883 | -0.0162 | - |
488
+ | 0.2299 | 34500 | - | 0.0002 | 0.8892 | -0.0161 | - |
489
+ | 0.2332 | 35000 | 0.0002 | 0.0002 | 0.8879 | -0.0161 | - |
490
+ | 0.2366 | 35500 | - | 0.0002 | 0.8897 | -0.0160 | - |
491
+ | 0.2399 | 36000 | 0.0002 | 0.0002 | 0.8887 | -0.0159 | - |
492
+ | 0.2432 | 36500 | - | 0.0002 | 0.8919 | -0.0158 | - |
493
+ | 0.2466 | 37000 | 0.0002 | 0.0002 | 0.8931 | -0.0157 | - |
494
+ | 0.2499 | 37500 | - | 0.0002 | 0.8924 | -0.0156 | - |
495
+ | 0.2532 | 38000 | 0.0002 | 0.0002 | 0.8955 | -0.0156 | - |
496
+ | 0.2566 | 38500 | - | 0.0002 | 0.8941 | -0.0155 | - |
497
+ | 0.2599 | 39000 | 0.0002 | 0.0002 | 0.8940 | -0.0154 | - |
498
+ | 0.2632 | 39500 | - | 0.0002 | 0.8981 | -0.0154 | - |
499
+ | 0.2666 | 40000 | 0.0002 | 0.0002 | 0.8967 | -0.0153 | - |
500
+ | 0.2699 | 40500 | - | 0.0002 | 0.8959 | -0.0152 | - |
501
+ | 0.2732 | 41000 | 0.0002 | 0.0002 | 0.8982 | -0.0151 | - |
502
+ | 0.2766 | 41500 | - | 0.0002 | 0.8981 | -0.0151 | - |
503
+ | 0.2799 | 42000 | 0.0002 | 0.0002 | 0.8977 | -0.0151 | - |
504
+ | 0.2832 | 42500 | - | 0.0001 | 0.9001 | -0.0149 | - |
505
+ | 0.2865 | 43000 | 0.0002 | 0.0001 | 0.8987 | -0.0149 | - |
506
+ | 0.2899 | 43500 | - | 0.0001 | 0.8995 | -0.0148 | - |
507
+ | 0.2932 | 44000 | 0.0002 | 0.0001 | 0.8999 | -0.0148 | - |
508
+ | 0.2965 | 44500 | - | 0.0001 | 0.9014 | -0.0147 | - |
509
+ | 0.2999 | 45000 | 0.0002 | 0.0001 | 0.9013 | -0.0147 | - |
510
+ | 0.3032 | 45500 | - | 0.0001 | 0.9031 | -0.0146 | - |
511
+ | 0.3065 | 46000 | 0.0002 | 0.0001 | 0.9025 | -0.0146 | - |
512
+ | 0.3099 | 46500 | - | 0.0001 | 0.9023 | -0.0145 | - |
513
+ | 0.3132 | 47000 | 0.0002 | 0.0001 | 0.9016 | -0.0145 | - |
514
+ | 0.3165 | 47500 | - | 0.0001 | 0.9022 | -0.0144 | - |
515
+ | 0.3199 | 48000 | 0.0002 | 0.0001 | 0.9041 | -0.0143 | - |
516
+ | 0.3232 | 48500 | - | 0.0001 | 0.9044 | -0.0143 | - |
517
+ | 0.3265 | 49000 | 0.0002 | 0.0001 | 0.9045 | -0.0143 | - |
518
+ | 0.3299 | 49500 | - | 0.0001 | 0.9058 | -0.0142 | - |
519
+ | 0.3332 | 50000 | 0.0002 | 0.0001 | 0.9046 | -0.0142 | - |
520
+ | 0.3365 | 50500 | - | 0.0001 | 0.9062 | -0.0141 | - |
521
+ | 0.3399 | 51000 | 0.0002 | 0.0001 | 0.9068 | -0.0141 | - |
522
+ | 0.3432 | 51500 | - | 0.0001 | 0.9051 | -0.0140 | - |
523
+ | 0.3465 | 52000 | 0.0002 | 0.0001 | 0.9058 | -0.0140 | - |
524
+ | 0.3499 | 52500 | - | 0.0001 | 0.9067 | -0.0140 | - |
525
+ | 0.3532 | 53000 | 0.0002 | 0.0001 | 0.9054 | -0.0139 | - |
526
+ | 0.3565 | 53500 | - | 0.0001 | 0.9078 | -0.0139 | - |
527
+ | 0.3599 | 54000 | 0.0002 | 0.0001 | 0.9078 | -0.0138 | - |
528
+ | 0.3632 | 54500 | - | 0.0001 | 0.9082 | -0.0138 | - |
529
+ | 0.3665 | 55000 | 0.0002 | 0.0001 | 0.9061 | -0.0138 | - |
530
+ | 0.3698 | 55500 | - | 0.0001 | 0.9094 | -0.0137 | - |
531
+ | 0.3732 | 56000 | 0.0002 | 0.0001 | 0.9074 | -0.0137 | - |
532
+ | 0.3765 | 56500 | - | 0.0001 | 0.9099 | -0.0136 | - |
533
+ | 0.3798 | 57000 | 0.0002 | 0.0001 | 0.9095 | -0.0136 | - |
534
+ | 0.3832 | 57500 | - | 0.0001 | 0.9092 | -0.0136 | - |
535
+ | 0.3865 | 58000 | 0.0002 | 0.0001 | 0.9101 | -0.0135 | - |
536
+ | 0.3898 | 58500 | - | 0.0001 | 0.9100 | -0.0135 | - |
537
+ | 0.3932 | 59000 | 0.0002 | 0.0001 | 0.9089 | -0.0135 | - |
538
+ | 0.3965 | 59500 | - | 0.0001 | 0.9103 | -0.0134 | - |
539
+ | 0.3998 | 60000 | 0.0002 | 0.0001 | 0.9107 | -0.0134 | - |
540
+ | 0.4032 | 60500 | - | 0.0001 | 0.9104 | -0.0134 | - |
541
+ | 0.4065 | 61000 | 0.0002 | 0.0001 | 0.9093 | -0.0133 | - |
542
+ | 0.4098 | 61500 | - | 0.0001 | 0.9111 | -0.0133 | - |
543
+ | 0.4132 | 62000 | 0.0002 | 0.0001 | 0.9099 | -0.0133 | - |
544
+ | 0.4165 | 62500 | - | 0.0001 | 0.9105 | -0.0132 | - |
545
+ | 0.4198 | 63000 | 0.0002 | 0.0001 | 0.9113 | -0.0132 | - |
546
+ | 0.4232 | 63500 | - | 0.0001 | 0.9135 | -0.0132 | - |
547
+ | 0.4265 | 64000 | 0.0002 | 0.0001 | 0.9138 | -0.0131 | - |
548
+ | 0.4298 | 64500 | - | 0.0001 | 0.9131 | -0.0132 | - |
549
+ | 0.4332 | 65000 | 0.0002 | 0.0001 | 0.9124 | -0.0131 | - |
550
+ | 0.4365 | 65500 | - | 0.0001 | 0.9137 | -0.0131 | - |
551
+ | 0.4398 | 66000 | 0.0002 | 0.0001 | 0.9141 | -0.0130 | - |
552
+ | 0.4432 | 66500 | - | 0.0001 | 0.9144 | -0.0130 | - |
553
+ | 0.4465 | 67000 | 0.0002 | 0.0001 | 0.9148 | -0.0130 | - |
554
+ | 0.4498 | 67500 | - | 0.0001 | 0.9158 | -0.0129 | - |
555
+ | 0.4531 | 68000 | 0.0002 | 0.0001 | 0.9156 | -0.0129 | - |
556
+ | 0.4565 | 68500 | - | 0.0001 | 0.9151 | -0.0129 | - |
557
+ | 0.4598 | 69000 | 0.0002 | 0.0001 | 0.9152 | -0.0128 | - |
558
+ | 0.4631 | 69500 | - | 0.0001 | 0.9153 | -0.0129 | - |
559
+ | 0.4665 | 70000 | 0.0002 | 0.0001 | 0.9146 | -0.0128 | - |
560
+ | 0.4698 | 70500 | - | 0.0001 | 0.9152 | -0.0128 | - |
561
+ | 0.4731 | 71000 | 0.0002 | 0.0001 | 0.9165 | -0.0127 | - |
562
+ | 0.4765 | 71500 | - | 0.0001 | 0.9168 | -0.0128 | - |
563
+ | 0.4798 | 72000 | 0.0002 | 0.0001 | 0.9160 | -0.0127 | - |
564
+ | 0.4831 | 72500 | - | 0.0001 | 0.9162 | -0.0127 | - |
565
+ | 0.4865 | 73000 | 0.0002 | 0.0001 | 0.9161 | -0.0127 | - |
566
+ | 0.4898 | 73500 | - | 0.0001 | 0.9167 | -0.0126 | - |
567
+ | 0.4931 | 74000 | 0.0002 | 0.0001 | 0.9164 | -0.0126 | - |
568
+ | 0.4965 | 74500 | - | 0.0001 | 0.9174 | -0.0126 | - |
569
+ | 0.4998 | 75000 | 0.0002 | 0.0001 | 0.9181 | -0.0126 | - |
570
+ | 0.5031 | 75500 | - | 0.0001 | 0.9173 | -0.0126 | - |
571
+ | 0.5065 | 76000 | 0.0002 | 0.0001 | 0.9176 | -0.0125 | - |
572
+ | 0.5098 | 76500 | - | 0.0001 | 0.9161 | -0.0125 | - |
573
+ | 0.5131 | 77000 | 0.0002 | 0.0001 | 0.9186 | -0.0124 | - |
574
+ | 0.5165 | 77500 | - | 0.0001 | 0.9186 | -0.0125 | - |
575
+ | 0.5198 | 78000 | 0.0002 | 0.0001 | 0.9172 | -0.0124 | - |
576
+ | 0.5231 | 78500 | - | 0.0001 | 0.9179 | -0.0124 | - |
577
+ | 0.5264 | 79000 | 0.0002 | 0.0001 | 0.9193 | -0.0124 | - |
578
+ | 0.5298 | 79500 | - | 0.0001 | 0.9176 | -0.0124 | - |
579
+ | 0.5331 | 80000 | 0.0002 | 0.0001 | 0.9183 | -0.0123 | - |
580
+ | 0.5364 | 80500 | - | 0.0001 | 0.9170 | -0.0123 | - |
581
+ | 0.5398 | 81000 | 0.0002 | 0.0001 | 0.9184 | -0.0124 | - |
582
+ | 0.5431 | 81500 | - | 0.0001 | 0.9194 | -0.0123 | - |
583
+ | 0.5464 | 82000 | 0.0002 | 0.0001 | 0.9189 | -0.0123 | - |
584
+ | 0.5498 | 82500 | - | 0.0001 | 0.9201 | -0.0122 | - |
585
+ | 0.5531 | 83000 | 0.0002 | 0.0001 | 0.9191 | -0.0123 | - |
586
+ | 0.5564 | 83500 | - | 0.0001 | 0.9186 | -0.0122 | - |
587
+ | 0.5598 | 84000 | 0.0002 | 0.0001 | 0.9200 | -0.0122 | - |
588
+ | 0.5631 | 84500 | - | 0.0001 | 0.9201 | -0.0122 | - |
589
+ | 0.5664 | 85000 | 0.0002 | 0.0001 | 0.9196 | -0.0122 | - |
590
+ | 0.5698 | 85500 | - | 0.0001 | 0.9209 | -0.0121 | - |
591
+ | 0.5731 | 86000 | 0.0002 | 0.0001 | 0.9202 | -0.0122 | - |
592
+ | 0.5764 | 86500 | - | 0.0001 | 0.9207 | -0.0121 | - |
593
+ | 0.5798 | 87000 | 0.0002 | 0.0001 | 0.9213 | -0.0121 | - |
594
+ | 0.5831 | 87500 | - | 0.0001 | 0.9206 | -0.0121 | - |
595
+ | 0.5864 | 88000 | 0.0002 | 0.0001 | 0.9201 | -0.0121 | - |
596
+ | 0.5898 | 88500 | - | 0.0001 | 0.9201 | -0.0120 | - |
597
+ | 0.5931 | 89000 | 0.0002 | 0.0001 | 0.9207 | -0.0120 | - |
598
+ | 0.5964 | 89500 | - | 0.0001 | 0.9213 | -0.0120 | - |
599
+ | 0.5998 | 90000 | 0.0002 | 0.0001 | 0.9214 | -0.0120 | - |
600
+ | 0.6031 | 90500 | - | 0.0001 | 0.9224 | -0.0120 | - |
601
+ | 0.6064 | 91000 | 0.0002 | 0.0001 | 0.9220 | -0.0120 | - |
602
+ | 0.6097 | 91500 | - | 0.0001 | 0.9225 | -0.0119 | - |
603
+ | 0.6131 | 92000 | 0.0002 | 0.0001 | 0.9211 | -0.0120 | - |
604
+ | 0.6164 | 92500 | - | 0.0001 | 0.9229 | -0.0119 | - |
605
+ | 0.6197 | 93000 | 0.0002 | 0.0001 | 0.9205 | -0.0119 | - |
606
+ | 0.6231 | 93500 | - | 0.0001 | 0.9223 | -0.0119 | - |
607
+ | 0.6264 | 94000 | 0.0002 | 0.0001 | 0.9227 | -0.0119 | - |
608
+ | 0.6297 | 94500 | - | 0.0001 | 0.9232 | -0.0119 | - |
609
+ | 0.6331 | 95000 | 0.0002 | 0.0001 | 0.9223 | -0.0118 | - |
610
+ | 0.6364 | 95500 | - | 0.0001 | 0.9216 | -0.0118 | - |
611
+ | 0.6397 | 96000 | 0.0002 | 0.0001 | 0.9226 | -0.0118 | - |
612
+ | 0.6431 | 96500 | - | 0.0001 | 0.9225 | -0.0118 | - |
613
+ | 0.6464 | 97000 | 0.0002 | 0.0001 | 0.9235 | -0.0118 | - |
614
+ | 0.6497 | 97500 | - | 0.0001 | 0.9226 | -0.0118 | - |
615
+ | 0.6531 | 98000 | 0.0002 | 0.0001 | 0.9237 | -0.0118 | - |
616
+ | 0.6564 | 98500 | - | 0.0001 | 0.9228 | -0.0117 | - |
617
+ | 0.6597 | 99000 | 0.0002 | 0.0001 | 0.9236 | -0.0117 | - |
618
+ | 0.6631 | 99500 | - | 0.0001 | 0.9230 | -0.0117 | - |
619
+ | 0.6664 | 100000 | 0.0002 | 0.0001 | 0.9239 | -0.0117 | - |
620
+ | 0.6697 | 100500 | - | 0.0001 | 0.9238 | -0.0117 | - |
621
+ | 0.6731 | 101000 | 0.0002 | 0.0001 | 0.9243 | -0.0117 | - |
622
+ | 0.6764 | 101500 | - | 0.0001 | 0.9239 | -0.0116 | - |
623
+ | 0.6797 | 102000 | 0.0002 | 0.0001 | 0.9238 | -0.0117 | - |
624
+ | 0.6831 | 102500 | - | 0.0001 | 0.9238 | -0.0117 | - |
625
+ | 0.6864 | 103000 | 0.0002 | 0.0001 | 0.9237 | -0.0116 | - |
626
+ | 0.6897 | 103500 | - | 0.0001 | 0.9248 | -0.0116 | - |
627
+ | 0.6930 | 104000 | 0.0002 | 0.0001 | 0.9250 | -0.0116 | - |
628
+ | 0.6964 | 104500 | - | 0.0001 | 0.9253 | -0.0116 | - |
629
+ | 0.6997 | 105000 | 0.0002 | 0.0001 | 0.9254 | -0.0116 | - |
630
+ | 0.7030 | 105500 | - | 0.0001 | 0.9254 | -0.0116 | - |
631
+ | 0.7064 | 106000 | 0.0002 | 0.0001 | 0.9243 | -0.0115 | - |
632
+ | 0.7097 | 106500 | - | 0.0001 | 0.9253 | -0.0115 | - |
633
+ | 0.7130 | 107000 | 0.0002 | 0.0001 | 0.9265 | -0.0115 | - |
634
+ | 0.7164 | 107500 | - | 0.0001 | 0.9259 | -0.0115 | - |
635
+ | 0.7197 | 108000 | 0.0002 | 0.0001 | 0.9264 | -0.0115 | - |
636
+ | 0.7230 | 108500 | - | 0.0001 | 0.9253 | -0.0115 | - |
637
+ | 0.7264 | 109000 | 0.0002 | 0.0001 | 0.9263 | -0.0115 | - |
638
+ | 0.7297 | 109500 | - | 0.0001 | 0.9254 | -0.0115 | - |
639
+ | 0.7330 | 110000 | 0.0002 | 0.0001 | 0.9259 | -0.0114 | - |
640
+ | 0.7364 | 110500 | - | 0.0001 | 0.9261 | -0.0114 | - |
641
+ | 0.7397 | 111000 | 0.0002 | 0.0001 | 0.9257 | -0.0114 | - |
642
+ | 0.7430 | 111500 | - | 0.0001 | 0.9260 | -0.0114 | - |
643
+ | 0.7464 | 112000 | 0.0002 | 0.0001 | 0.9264 | -0.0114 | - |
644
+ | 0.7497 | 112500 | - | 0.0001 | 0.9262 | -0.0114 | - |
645
+ | 0.7530 | 113000 | 0.0002 | 0.0001 | 0.9270 | -0.0114 | - |
646
+ | 0.7564 | 113500 | - | 0.0001 | 0.9263 | -0.0114 | - |
647
+ | 0.7597 | 114000 | 0.0002 | 0.0001 | 0.9264 | -0.0114 | - |
648
+ | 0.7630 | 114500 | - | 0.0001 | 0.9261 | -0.0114 | - |
649
+ | 0.7663 | 115000 | 0.0002 | 0.0001 | 0.9262 | -0.0113 | - |
650
+ | 0.7697 | 115500 | - | 0.0001 | 0.9267 | -0.0113 | - |
651
+ | 0.7730 | 116000 | 0.0002 | 0.0001 | 0.9272 | -0.0113 | - |
652
+ | 0.7763 | 116500 | - | 0.0001 | 0.9270 | -0.0113 | - |
653
+ | 0.7797 | 117000 | 0.0002 | 0.0001 | 0.9267 | -0.0113 | - |
654
+ | 0.7830 | 117500 | - | 0.0001 | 0.9266 | -0.0113 | - |
655
+ | 0.7863 | 118000 | 0.0002 | 0.0001 | 0.9273 | -0.0113 | - |
656
+ | 0.7897 | 118500 | - | 0.0001 | 0.9273 | -0.0113 | - |
657
+ | 0.7930 | 119000 | 0.0002 | 0.0001 | 0.9278 | -0.0112 | - |
658
+ | 0.7963 | 119500 | - | 0.0001 | 0.9273 | -0.0112 | - |
659
+ | 0.7997 | 120000 | 0.0002 | 0.0001 | 0.9281 | -0.0112 | - |
660
+ | 0.8030 | 120500 | - | 0.0001 | 0.9277 | -0.0112 | - |
661
+ | 0.8063 | 121000 | 0.0002 | 0.0001 | 0.9275 | -0.0112 | - |
662
+ | 0.8097 | 121500 | - | 0.0001 | 0.9284 | -0.0112 | - |
663
+ | 0.8130 | 122000 | 0.0002 | 0.0001 | 0.9274 | -0.0112 | - |
664
+ | 0.8163 | 122500 | - | 0.0001 | 0.9276 | -0.0112 | - |
665
+ | 0.8197 | 123000 | 0.0002 | 0.0001 | 0.9281 | -0.0112 | - |
666
+ | 0.8230 | 123500 | - | 0.0001 | 0.9271 | -0.0112 | - |
667
+ | 0.8263 | 124000 | 0.0001 | 0.0001 | 0.9275 | -0.0112 | - |
668
+ | 0.8297 | 124500 | - | 0.0001 | 0.9285 | -0.0112 | - |
669
+ | 0.8330 | 125000 | 0.0001 | 0.0001 | 0.9282 | -0.0112 | - |
670
+ | 0.8363 | 125500 | - | 0.0001 | 0.9278 | -0.0112 | - |
671
+ | 0.8397 | 126000 | 0.0001 | 0.0001 | 0.9277 | -0.0111 | - |
672
+ | 0.8430 | 126500 | - | 0.0001 | 0.9274 | -0.0111 | - |
673
+ | 0.8463 | 127000 | 0.0001 | 0.0001 | 0.9277 | -0.0111 | - |
674
+ | 0.8496 | 127500 | - | 0.0001 | 0.9281 | -0.0111 | - |
675
+ | 0.8530 | 128000 | 0.0001 | 0.0001 | 0.9283 | -0.0111 | - |
676
+ | 0.8563 | 128500 | - | 0.0001 | 0.9281 | -0.0111 | - |
677
+ | 0.8596 | 129000 | 0.0001 | 0.0001 | 0.9288 | -0.0111 | - |
678
+ | 0.8630 | 129500 | - | 0.0001 | 0.9287 | -0.0111 | - |
679
+ | 0.8663 | 130000 | 0.0001 | 0.0001 | 0.9284 | -0.0111 | - |
680
+ | 0.8696 | 130500 | - | 0.0001 | 0.9281 | -0.0111 | - |
681
+ | 0.8730 | 131000 | 0.0001 | 0.0001 | 0.9285 | -0.0111 | - |
682
+ | 0.8763 | 131500 | - | 0.0001 | 0.9286 | -0.0111 | - |
683
+ | 0.8796 | 132000 | 0.0001 | 0.0001 | 0.9288 | -0.0111 | - |
684
+ | 0.8830 | 132500 | - | 0.0001 | 0.9288 | -0.0110 | - |
685
+ | 0.8863 | 133000 | 0.0001 | 0.0001 | 0.9288 | -0.0110 | - |
686
+ | 0.8896 | 133500 | - | 0.0001 | 0.9296 | -0.0110 | - |
687
+ | 0.8930 | 134000 | 0.0001 | 0.0001 | 0.9293 | -0.0110 | - |
688
+ | 0.8963 | 134500 | - | 0.0001 | 0.9298 | -0.0110 | - |
689
+ | 0.8996 | 135000 | 0.0001 | 0.0001 | 0.9292 | -0.0110 | - |
690
+ | 0.9030 | 135500 | - | 0.0001 | 0.9293 | -0.0110 | - |
691
+ | 0.9063 | 136000 | 0.0001 | 0.0001 | 0.9293 | -0.0110 | - |
692
+ | 0.9096 | 136500 | - | 0.0001 | 0.9293 | -0.0110 | - |
693
+ | 0.9130 | 137000 | 0.0001 | 0.0001 | 0.9293 | -0.0110 | - |
694
+ | 0.9163 | 137500 | - | 0.0001 | 0.9295 | -0.0110 | - |
695
+ | 0.9196 | 138000 | 0.0001 | 0.0001 | 0.9292 | -0.0110 | - |
696
+ | 0.9230 | 138500 | - | 0.0001 | 0.9293 | -0.0110 | - |
697
+ | 0.9263 | 139000 | 0.0001 | 0.0001 | 0.9296 | -0.0110 | - |
698
+ | 0.9296 | 139500 | - | 0.0001 | 0.9292 | -0.0110 | - |
699
+ | 0.9329 | 140000 | 0.0001 | 0.0001 | 0.9296 | -0.0110 | - |
700
+ | 0.9363 | 140500 | - | 0.0001 | 0.9299 | -0.0109 | - |
701
+ | 0.9396 | 141000 | 0.0001 | 0.0001 | 0.9294 | -0.0109 | - |
702
+ | 0.9429 | 141500 | - | 0.0001 | 0.9295 | -0.0109 | - |
703
+ | 0.9463 | 142000 | 0.0001 | 0.0001 | 0.9294 | -0.0109 | - |
704
+ | 0.9496 | 142500 | - | 0.0001 | 0.9298 | -0.0109 | - |
705
+ | 0.9529 | 143000 | 0.0001 | 0.0001 | 0.9298 | -0.0109 | - |
706
+ | 0.9563 | 143500 | - | 0.0001 | 0.9298 | -0.0109 | - |
707
+ | 0.9596 | 144000 | 0.0001 | 0.0001 | 0.9296 | -0.0109 | - |
708
+ | 0.9629 | 144500 | - | 0.0001 | 0.9297 | -0.0109 | - |
709
+ | 0.9663 | 145000 | 0.0001 | 0.0001 | 0.9297 | -0.0109 | - |
710
+ | 0.9696 | 145500 | - | 0.0001 | 0.9299 | -0.0109 | - |
711
+ | 0.9729 | 146000 | 0.0001 | 0.0001 | 0.9299 | -0.0109 | - |
712
+ | 0.9763 | 146500 | - | 0.0001 | 0.9297 | -0.0109 | - |
713
+ | 0.9796 | 147000 | 0.0001 | 0.0001 | 0.9300 | -0.0109 | - |
714
+ | 0.9829 | 147500 | - | 0.0001 | 0.9298 | -0.0109 | - |
715
+ | 0.9863 | 148000 | 0.0001 | 0.0001 | 0.9298 | -0.0109 | - |
716
+ | 0.9896 | 148500 | - | 0.0001 | 0.9300 | -0.0109 | - |
717
+ | 0.9929 | 149000 | 0.0001 | 0.0001 | 0.9299 | -0.0109 | - |
718
+ | **0.9963** | **149500** | **-** | **0.0001** | **0.9301** | **-0.0109** | **-** |
719
+ | 0.9996 | 150000 | 0.0001 | 0.0001 | 0.9300 | -0.0109 | - |
720
+ | 1.0 | 150062 | - | - | - | - | 0.9301 |
721
+
722
+ * The bold row denotes the saved checkpoint.
723
+ </details>
724
+
725
+ ### Framework Versions
726
+ - Python: 3.12.4
727
+ - Sentence Transformers: 3.3.1
728
+ - Transformers: 4.47.1
729
+ - PyTorch: 2.5.1+cu124
730
+ - Accelerate: 1.0.1
731
+ - Datasets: 2.19.0
732
+ - Tokenizers: 0.21.0
733
+
734
+ ## Citation
735
+
736
+ ### BibTeX
737
+
738
+ #### Sentence Transformers
739
+ ```bibtex
740
+ @inproceedings{reimers-2019-sentence-bert,
741
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
742
+ author = "Reimers, Nils and Gurevych, Iryna",
743
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
744
+ month = "11",
745
+ year = "2019",
746
+ publisher = "Association for Computational Linguistics",
747
+ url = "https://arxiv.org/abs/1908.10084",
748
+ }
749
+ ```
750
+
751
+ #### MSELoss
752
+ ```bibtex
753
+ @inproceedings{reimers-2020-multilingual-sentence-bert,
754
+ title = "Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation",
755
+ author = "Reimers, Nils and Gurevych, Iryna",
756
+ booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing",
757
+ month = "11",
758
+ year = "2020",
759
+ publisher = "Association for Computational Linguistics",
760
+ url = "https://arxiv.org/abs/2004.09813",
761
+ }
762
+ ```
763
+
764
+ <!--
765
+ ## Glossary
766
+
767
+ *Clearly define terms in order to be accessible across audiences.*
768
+ -->
769
+
770
+ <!--
771
+ ## Model Card Authors
772
+
773
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
774
+ -->
775
+
776
+ <!--
777
+ ## Model Card Contact
778
+
779
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
780
+ -->
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "BAAI/bge-m3",
3
+ "architectures": [
4
+ "XLMRobertaModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 8194,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 4,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.47.1",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 250002
28
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.3.1",
4
+ "transformers": "4.47.1",
5
+ "pytorch": "2.5.1+cu124"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7262989751a87e4067635a51aeb0045c863f5e18a753fa749ad99eb35372f7d
3
+ size 1263329232
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4f7e21bec3fb0044ca0bb2d50eb5d4d8c596273c422baef84466d2c73748b9c
3
+ size 17083053
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 8192,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "sp_model_kwargs": {},
54
+ "tokenizer_class": "XLMRobertaTokenizer",
55
+ "unk_token": "<unk>"
56
+ }