nicolauduran45 commited on
Commit
dc536cc
·
verified ·
1 Parent(s): baffba6

nicolauduran45/climate-check-reranker

Browse files
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "bert",
22
+ "num_attention_heads": 12,
23
+ "num_hidden_layers": 6,
24
+ "pad_token_id": 0,
25
+ "position_embedding_type": "absolute",
26
+ "sentence_transformers": {
27
+ "activation_fn": "torch.nn.modules.linear.Identity",
28
+ "version": "4.1.0"
29
+ },
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.51.3",
32
+ "type_vocab_size": 2,
33
+ "use_cache": true,
34
+ "vocab_size": 30522
35
+ }
eval/CrossEncoderRerankingEvaluator_climate-science-eval_results_@10.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ epoch,steps,MAP,MRR@10,NDCG@10
2
+ 1.0,101,0.5691197419138596,0.5676184725097769,0.6679989404424285
3
+ 2.0,202,0.5638170002300437,0.5624137336093858,0.6643217667508755
4
+ 3.0,303,0.5558319863668835,0.5544801012192316,0.6582215365988808
5
+ 1.0,129,0.5086280999004784,0.5045864964343225,0.613350555873281
6
+ 2.0,258,0.5374024928372755,0.5358235564757303,0.6433150788570099
7
+ 3.0,387,0.5368503879373444,0.5346675868414998,0.6409086539085324
final/README.md ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - cross-encoder
8
+ - generated_from_trainer
9
+ - dataset_size:2057
10
+ - loss:BinaryCrossEntropyLoss
11
+ base_model: cross-encoder/ms-marco-MiniLM-L6-v2
12
+ pipeline_tag: text-ranking
13
+ library_name: sentence-transformers
14
+ metrics:
15
+ - map
16
+ - mrr@10
17
+ - ndcg@10
18
+ model-index:
19
+ - name: Climate-Science-Reranker
20
+ results:
21
+ - task:
22
+ type: cross-encoder-reranking
23
+ name: Cross Encoder Reranking
24
+ dataset:
25
+ name: climate science eval
26
+ type: climate-science-eval
27
+ metrics:
28
+ - type: map
29
+ value: 0.5374
30
+ name: Map
31
+ - type: mrr@10
32
+ value: 0.5358
33
+ name: Mrr@10
34
+ - type: ndcg@10
35
+ value: 0.6433
36
+ name: Ndcg@10
37
+ ---
38
+
39
+ # Climate-Science-Reranker
40
+
41
+ This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [cross-encoder/ms-marco-MiniLM-L6-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2) using the [sentence-transformers](https://www.SBERT.net) library. It computes scores for pairs of texts, which can be used for text reranking and semantic search.
42
+
43
+ ## Model Details
44
+
45
+ ### Model Description
46
+ - **Model Type:** Cross Encoder
47
+ - **Base model:** [cross-encoder/ms-marco-MiniLM-L6-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2) <!-- at revision ce0834f22110de6d9222af7a7a03628121708969 -->
48
+ - **Maximum Sequence Length:** 512 tokens
49
+ - **Number of Output Labels:** 1 label
50
+ <!-- - **Training Dataset:** Unknown -->
51
+ - **Language:** en
52
+ - **License:** apache-2.0
53
+
54
+ ### Model Sources
55
+
56
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
57
+ - **Documentation:** [Cross Encoder Documentation](https://www.sbert.net/docs/cross_encoder/usage/usage.html)
58
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
59
+ - **Hugging Face:** [Cross Encoders on Hugging Face](https://huggingface.co/models?library=sentence-transformers&other=cross-encoder)
60
+
61
+ ## Usage
62
+
63
+ ### Direct Usage (Sentence Transformers)
64
+
65
+ First install the Sentence Transformers library:
66
+
67
+ ```bash
68
+ pip install -U sentence-transformers
69
+ ```
70
+
71
+ Then you can load this model and run inference.
72
+ ```python
73
+ from sentence_transformers import CrossEncoder
74
+
75
+ # Download from the 🤗 Hub
76
+ model = CrossEncoder("cross_encoder_model_id")
77
+ # Get scores for pairs of texts
78
+ pairs = [
79
+ ['New study suggests global warming might be less severe than previously predicted by models. #ClimateChange #Science', 'Abstract. Using a recent theoretical approach, we study how global warming impacts the thermodynamics of the climate system by performing experiments with a simplified yet Earth-like climate model. The intensity of the Lorenz energy cycle, the Carnot efficiency, the material entropy production, and the degree of irreversibility of the system change monotonically with the CO2 concentration. Moreover, these quantities feature an approximately linear behaviour with respect to the logarithm of the CO2 concentration in a relatively wide range. These generalized sensitivities suggest that the climate becomes less efficient, more irreversible, and features higher entropy production as it becomes warmer, with changes in the latent heat fluxes playing a predominant role. These results may be of help for explaining recent findings obtained with state of the art climate models regarding how increases in CO2 concentration impact the vertical stratification of the tropical and extratropical atmosphere and the position of the storm tracks.'],
80
+ ["Heard someone saying human-made carbon might be a cause of climate change, but there's no proof it's a big deal. 🤔 #climatechange #science", "Climate change has been recognized as the foremost environmental problem of the twentyfirst century and has been a subject of considerable debate and controversy.It is predicted to lead to adverse, irreversible impacts on the earth and the ecosystem as a whole.Although it is difficult to connect specific weather events to climate change, increases in global temperature has been predicted to cause broader changes, including glacial retreat, arctic shrinkage and worldwide sea level rise.Climate change has been implicated in mass mortalities of several aquatic species including plants, fish, corals and mammals.The present chapter has been divided in to two parts; the first part discusses the causes and general concerns of global climate change and the second part deals, specifically, on the impacts of climate change on fisheries and aquaculture, possible mitigation options and development of suitable monitoring tools. www.intechopen.comClimate Change and Variability 120 anthropogenic greenhouse gas (GHG) emissions over the decades are major contributors to the global warming. Natural processes affecting the earth's temperatureSun is the primary source of energy on earth.Though the sun's output is nearly constant, small changes over an extended period of time can lead to climate change.The earth's climate changes are in response to many natural processes like orbital forcing (variations in its orbit around the Sun), volcanic eruptions, and atmospheric greenhouse gas concentrations.Changes in atmospheric concentrations of greenhouse gases and aerosols, land-cover and solar radiation alter the energy balance of the climate system and causes warming or cooling of the earth's atmosphere.Volcanic eruptions emit many gases and one of the most important of these is sulfur dioxide (SO 2 ) which forms sulfate aerosol (SO 4 ) in the atmosphere. Greenhouse gasesGreenhouse gases (GHGs) are those gaseous constituents of the atmosphere, both natural and anthropogenic, that are responsible for the greenhouse effect, leading to an increase in the amount of infrared or thermal radiation near the surface.While water vapor (H 2 O), carbon dioxide (CO 2 ), nitrous oxide (N 2 O), methane (CH 4 ), and ozone (O 3 ) are the primary greenhouse gases in the Earth's atmosphere, there are a number of entirely human-made greenhouse gases in the atmosphere, such as the halocarbons and other chlorine-and bromine-containing substances.Halocarbons such as CFCs (chlorofluorocarbons) are completely artificial (man-made), and are produced from the chemical industry in which they are used as coolants and in foam blowing.Increases in CO 2 are the single largest factor contributing more than 60% of humanenhanced increases and more than 90% of rapid increase in past decade.Most CO 2 emissions are from the burning of fossil fuels such as coal, oil, and gas.Rising CO 2 is also related to deforestation, which eliminates an important carbon sink of the terrestrial biosphere (www.ncdc.noaa.gov/oa/climate/globalwarming.html;Shea et al., 2007).Currently, the atmosphere contains about 370 ppm of CO 2 , which is the highest concentration in 420000 years and perhaps as long as 2 million years.Estimates of CO 2 concentrations at the end of the 21 st century range from 490 to 1260 ppm, or a 75% to 350% increase above preindustrial concentrations (WMO World Data Centre for Greenhouse Gases.Greenhouse gas bulletin, 2006; Shea KM and the Committee on Environmental Health, 2007) . Impacts of climate changeAlthough it is difficult to connect specific weather events to global warming, an increase in global temperatures may in turn cause broader changes, including glacial retreat, arctic shrinkage, and worldwide sea level rise.Changes in the amount and pattern of precipitation may result in flooding and drought.Other effects may include changes in agricultural yields, addition of new trade routes, reduced summer stream flows, species extinctions, and increases in the range of disease vectors (Understanding and responding to Climate Change."],
81
+ ['New research shows Antarctic is actually cooling down.. #ClimateScience #Antarctica', 'An annually resolved ice-core record from West Antarctica indicates that warming driven by local insolation resulting from sea-ice decline began in that region about 2,000 years before warming in East Antarctica, reconciling two alternative explanations for deglacial warming in the Southern Hemisphere. There are two main theories vying to explain Antarctic climate changes at the time of Northern Hemisphere deglaciations. One holds that changes in ocean circulation — driven by changes in Northern Hemisphere insolation — govern Southern Hemisphere climate. The other argues for a dominant influence from local changes in insolation. It has been difficult to differentiate between the two because of the low resolution of many ice-core records. Now Tyler Fudge and colleagues present an annually resolved ice-core record from the West Antarctic Ice Sheet Divide site and reveal an increase in snowfall about 18,000 years ago, preceded by a distinct warming 20,000 years ago. Changes in local insolation and nearby sea ice appear to be the cause of the early warming, suggesting that East and West Antarctica may respond to different deglacial forcings. The cause of warming in the Southern Hemisphere during the most recent deglaciation remains a matter of debate1,2. Hypotheses for a Northern Hemisphere trigger, through oceanic redistributions of heat, are based in part on the abrupt onset of warming seen in East Antarctic ice cores and dated to 18,000 years ago, which is several thousand years after high-latitude Northern Hemisphere summer insolation intensity began increasing from its minimum, approximately 24,000 years ago3,4. An alternative explanation is that local solar insolation changes cause the Southern Hemisphere to warm independently2,5. Here we present results from a new, annually resolved ice-core record from West Antarctica that reconciles these two views. The records show that 18,000 years ago snow accumulation in West Antarctica began increasing, coincident with increasing carbon dioxide concentrations, warming in East Antarctica and cooling in the Northern Hemisphere6 associated with an abrupt decrease in Atlantic meridional overturning circulation7. However, significant warming in West Antarctica began at least 2,000 years earlier. Circum-Antarctic sea-ice decline, driven by increasing local insolation, is the likely cause of this warming. The marine-influenced West Antarctic records suggest a more active role for the Southern Ocean in the onset of deglaciation than is inferred from ice cores in the East Antarctic interior, which are largely isolated from sea-ice changes.'],
82
+ ['There are significant #environment benefits to cutting animal products out of the diet', 'Abstract Climate change is the most significant threat to the Great Barrier Reef (GBR). While Australians express appreciation and concern for the GBR, it is not clear whether they connect climate‐related action with reef conservation. An online survey of 4,285 Australians asked “…what types of actions could people like you do that would be helpful for the GBR?” Only 4.1% mentioned a specific action related to mitigating climate change; another 3.8% mentioned climate change but no specific action. The most common responses related to reducing plastic pollution (25.6%). These findings demonstrate that most Australians have poor capacity to identify individual climate‐related actions as helpful for reef protection, and that generic calls to action—such as “protect the reef”—are unlikely to elicit climate‐related actions. As such, reef conservation initiatives must explicitly promote actions—in the home and in society—that reduce emissions and support the transition to a low carbon society.'],
83
+ ["According to a recent study, there's no link between CO2 emissions and temperature changes in our lifetime. #ClimateChange #Science", 'Abstract It has been speculated that future global warming will change the frequency and severity of tropical and extratropical storms. The U.S. east coast is heavily impacted by such storms, so it is important to determine their natural temporal variability for the last century during which global warming has been relatively small. Storm surge data obtained from hourly tide gauge records provide a unique quantitative measure of storm duration and intensity, unlike qualitative estimates based on eyewitness reports or meteorological hindcasts. To demonstrate the potential of storm surge data for climate analysis, the authors have evaluated 10 very long records of water level anomalies. An analysis of the hourly tide gauge records along the U.S. east coast shows a considerable interdecadal variation but no discernible long-term trend in the number and intensity of moderate and severe coastal storms during this century. However, sea level rise over the last century has exacerbated the damage to fixed structu...'],
84
+ ]
85
+ scores = model.predict(pairs)
86
+ print(scores.shape)
87
+ # (5,)
88
+
89
+ # Or rank different texts based on similarity to a single text
90
+ ranks = model.rank(
91
+ 'New study suggests global warming might be less severe than previously predicted by models. #ClimateChange #Science',
92
+ [
93
+ 'Abstract. Using a recent theoretical approach, we study how global warming impacts the thermodynamics of the climate system by performing experiments with a simplified yet Earth-like climate model. The intensity of the Lorenz energy cycle, the Carnot efficiency, the material entropy production, and the degree of irreversibility of the system change monotonically with the CO2 concentration. Moreover, these quantities feature an approximately linear behaviour with respect to the logarithm of the CO2 concentration in a relatively wide range. These generalized sensitivities suggest that the climate becomes less efficient, more irreversible, and features higher entropy production as it becomes warmer, with changes in the latent heat fluxes playing a predominant role. These results may be of help for explaining recent findings obtained with state of the art climate models regarding how increases in CO2 concentration impact the vertical stratification of the tropical and extratropical atmosphere and the position of the storm tracks.',
94
+ "Climate change has been recognized as the foremost environmental problem of the twentyfirst century and has been a subject of considerable debate and controversy.It is predicted to lead to adverse, irreversible impacts on the earth and the ecosystem as a whole.Although it is difficult to connect specific weather events to climate change, increases in global temperature has been predicted to cause broader changes, including glacial retreat, arctic shrinkage and worldwide sea level rise.Climate change has been implicated in mass mortalities of several aquatic species including plants, fish, corals and mammals.The present chapter has been divided in to two parts; the first part discusses the causes and general concerns of global climate change and the second part deals, specifically, on the impacts of climate change on fisheries and aquaculture, possible mitigation options and development of suitable monitoring tools. www.intechopen.comClimate Change and Variability 120 anthropogenic greenhouse gas (GHG) emissions over the decades are major contributors to the global warming. Natural processes affecting the earth's temperatureSun is the primary source of energy on earth.Though the sun's output is nearly constant, small changes over an extended period of time can lead to climate change.The earth's climate changes are in response to many natural processes like orbital forcing (variations in its orbit around the Sun), volcanic eruptions, and atmospheric greenhouse gas concentrations.Changes in atmospheric concentrations of greenhouse gases and aerosols, land-cover and solar radiation alter the energy balance of the climate system and causes warming or cooling of the earth's atmosphere.Volcanic eruptions emit many gases and one of the most important of these is sulfur dioxide (SO 2 ) which forms sulfate aerosol (SO 4 ) in the atmosphere. Greenhouse gasesGreenhouse gases (GHGs) are those gaseous constituents of the atmosphere, both natural and anthropogenic, that are responsible for the greenhouse effect, leading to an increase in the amount of infrared or thermal radiation near the surface.While water vapor (H 2 O), carbon dioxide (CO 2 ), nitrous oxide (N 2 O), methane (CH 4 ), and ozone (O 3 ) are the primary greenhouse gases in the Earth's atmosphere, there are a number of entirely human-made greenhouse gases in the atmosphere, such as the halocarbons and other chlorine-and bromine-containing substances.Halocarbons such as CFCs (chlorofluorocarbons) are completely artificial (man-made), and are produced from the chemical industry in which they are used as coolants and in foam blowing.Increases in CO 2 are the single largest factor contributing more than 60% of humanenhanced increases and more than 90% of rapid increase in past decade.Most CO 2 emissions are from the burning of fossil fuels such as coal, oil, and gas.Rising CO 2 is also related to deforestation, which eliminates an important carbon sink of the terrestrial biosphere (www.ncdc.noaa.gov/oa/climate/globalwarming.html;Shea et al., 2007).Currently, the atmosphere contains about 370 ppm of CO 2 , which is the highest concentration in 420000 years and perhaps as long as 2 million years.Estimates of CO 2 concentrations at the end of the 21 st century range from 490 to 1260 ppm, or a 75% to 350% increase above preindustrial concentrations (WMO World Data Centre for Greenhouse Gases.Greenhouse gas bulletin, 2006; Shea KM and the Committee on Environmental Health, 2007) . Impacts of climate changeAlthough it is difficult to connect specific weather events to global warming, an increase in global temperatures may in turn cause broader changes, including glacial retreat, arctic shrinkage, and worldwide sea level rise.Changes in the amount and pattern of precipitation may result in flooding and drought.Other effects may include changes in agricultural yields, addition of new trade routes, reduced summer stream flows, species extinctions, and increases in the range of disease vectors (Understanding and responding to Climate Change.",
95
+ 'An annually resolved ice-core record from West Antarctica indicates that warming driven by local insolation resulting from sea-ice decline began in that region about 2,000 years before warming in East Antarctica, reconciling two alternative explanations for deglacial warming in the Southern Hemisphere. There are two main theories vying to explain Antarctic climate changes at the time of Northern Hemisphere deglaciations. One holds that changes in ocean circulation — driven by changes in Northern Hemisphere insolation — govern Southern Hemisphere climate. The other argues for a dominant influence from local changes in insolation. It has been difficult to differentiate between the two because of the low resolution of many ice-core records. Now Tyler Fudge and colleagues present an annually resolved ice-core record from the West Antarctic Ice Sheet Divide site and reveal an increase in snowfall about 18,000 years ago, preceded by a distinct warming 20,000 years ago. Changes in local insolation and nearby sea ice appear to be the cause of the early warming, suggesting that East and West Antarctica may respond to different deglacial forcings. The cause of warming in the Southern Hemisphere during the most recent deglaciation remains a matter of debate1,2. Hypotheses for a Northern Hemisphere trigger, through oceanic redistributions of heat, are based in part on the abrupt onset of warming seen in East Antarctic ice cores and dated to 18,000 years ago, which is several thousand years after high-latitude Northern Hemisphere summer insolation intensity began increasing from its minimum, approximately 24,000 years ago3,4. An alternative explanation is that local solar insolation changes cause the Southern Hemisphere to warm independently2,5. Here we present results from a new, annually resolved ice-core record from West Antarctica that reconciles these two views. The records show that 18,000 years ago snow accumulation in West Antarctica began increasing, coincident with increasing carbon dioxide concentrations, warming in East Antarctica and cooling in the Northern Hemisphere6 associated with an abrupt decrease in Atlantic meridional overturning circulation7. However, significant warming in West Antarctica began at least 2,000 years earlier. Circum-Antarctic sea-ice decline, driven by increasing local insolation, is the likely cause of this warming. The marine-influenced West Antarctic records suggest a more active role for the Southern Ocean in the onset of deglaciation than is inferred from ice cores in the East Antarctic interior, which are largely isolated from sea-ice changes.',
96
+ 'Abstract Climate change is the most significant threat to the Great Barrier Reef (GBR). While Australians express appreciation and concern for the GBR, it is not clear whether they connect climate‐related action with reef conservation. An online survey of 4,285 Australians asked “…what types of actions could people like you do that would be helpful for the GBR?” Only 4.1% mentioned a specific action related to mitigating climate change; another 3.8% mentioned climate change but no specific action. The most common responses related to reducing plastic pollution (25.6%). These findings demonstrate that most Australians have poor capacity to identify individual climate‐related actions as helpful for reef protection, and that generic calls to action—such as “protect the reef”—are unlikely to elicit climate‐related actions. As such, reef conservation initiatives must explicitly promote actions—in the home and in society—that reduce emissions and support the transition to a low carbon society.',
97
+ 'Abstract It has been speculated that future global warming will change the frequency and severity of tropical and extratropical storms. The U.S. east coast is heavily impacted by such storms, so it is important to determine their natural temporal variability for the last century during which global warming has been relatively small. Storm surge data obtained from hourly tide gauge records provide a unique quantitative measure of storm duration and intensity, unlike qualitative estimates based on eyewitness reports or meteorological hindcasts. To demonstrate the potential of storm surge data for climate analysis, the authors have evaluated 10 very long records of water level anomalies. An analysis of the hourly tide gauge records along the U.S. east coast shows a considerable interdecadal variation but no discernible long-term trend in the number and intensity of moderate and severe coastal storms during this century. However, sea level rise over the last century has exacerbated the damage to fixed structu...',
98
+ ]
99
+ )
100
+ # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
101
+ ```
102
+
103
+ <!--
104
+ ### Direct Usage (Transformers)
105
+
106
+ <details><summary>Click to see the direct usage in Transformers</summary>
107
+
108
+ </details>
109
+ -->
110
+
111
+ <!--
112
+ ### Downstream Usage (Sentence Transformers)
113
+
114
+ You can finetune this model on your own dataset.
115
+
116
+ <details><summary>Click to expand</summary>
117
+
118
+ </details>
119
+ -->
120
+
121
+ <!--
122
+ ### Out-of-Scope Use
123
+
124
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
125
+ -->
126
+
127
+ ## Evaluation
128
+
129
+ ### Metrics
130
+
131
+ #### Cross Encoder Reranking
132
+
133
+ * Dataset: `climate-science-eval`
134
+ * Evaluated with [<code>CrossEncoderRerankingEvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderRerankingEvaluator) with these parameters:
135
+ ```json
136
+ {
137
+ "at_k": 10,
138
+ "always_rerank_positives": true
139
+ }
140
+ ```
141
+
142
+ | Metric | Value |
143
+ |:------------|:---------------------|
144
+ | map | 0.5374 (+0.3696) |
145
+ | mrr@10 | 0.5358 (+0.3812) |
146
+ | **ndcg@10** | **0.6433 (+0.4521)** |
147
+
148
+ <!--
149
+ ## Bias, Risks and Limitations
150
+
151
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
152
+ -->
153
+
154
+ <!--
155
+ ### Recommendations
156
+
157
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
158
+ -->
159
+
160
+ ## Training Details
161
+
162
+ ### Training Dataset
163
+
164
+ #### Unnamed Dataset
165
+
166
+ * Size: 2,057 training samples
167
+ * Columns: <code>query</code>, <code>answer</code>, and <code>label</code>
168
+ * Approximate statistics based on the first 1000 samples:
169
+ | | query | answer | label |
170
+ |:--------|:-------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
171
+ | type | string | string | int |
172
+ | details | <ul><li>min: 27 characters</li><li>mean: 113.61 characters</li><li>max: 236 characters</li></ul> | <ul><li>min: 231 characters</li><li>mean: 1952.39 characters</li><li>max: 23789 characters</li></ul> | <ul><li>0: ~73.70%</li><li>1: ~26.30%</li></ul> |
173
+ * Samples:
174
+ | query | answer | label |
175
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
176
+ | <code>New study suggests global warming might be less severe than previously predicted by models. #ClimateChange #Science</code> | <code>Abstract. Using a recent theoretical approach, we study how global warming impacts the thermodynamics of the climate system by performing experiments with a simplified yet Earth-like climate model. The intensity of the Lorenz energy cycle, the Carnot efficiency, the material entropy production, and the degree of irreversibility of the system change monotonically with the CO2 concentration. Moreover, these quantities feature an approximately linear behaviour with respect to the logarithm of the CO2 concentration in a relatively wide range. These generalized sensitivities suggest that the climate becomes less efficient, more irreversible, and features higher entropy production as it becomes warmer, with changes in the latent heat fluxes playing a predominant role. These results may be of help for explaining recent findings obtained with state of the art climate models regarding how increases in CO2 concentration impact the vertical stratification of the tropical and extratropical atmosph...</code> | <code>0</code> |
177
+ | <code>Heard someone saying human-made carbon might be a cause of climate change, but there's no proof it's a big deal. 🤔 #climatechange #science</code> | <code>Climate change has been recognized as the foremost environmental problem of the twentyfirst century and has been a subject of considerable debate and controversy.It is predicted to lead to adverse, irreversible impacts on the earth and the ecosystem as a whole.Although it is difficult to connect specific weather events to climate change, increases in global temperature has been predicted to cause broader changes, including glacial retreat, arctic shrinkage and worldwide sea level rise.Climate change has been implicated in mass mortalities of several aquatic species including plants, fish, corals and mammals.The present chapter has been divided in to two parts; the first part discusses the causes and general concerns of global climate change and the second part deals, specifically, on the impacts of climate change on fisheries and aquaculture, possible mitigation options and development of suitable monitoring tools. www.intechopen.comClimate Change and Variability 120 anthropogenic gree...</code> | <code>1</code> |
178
+ | <code>New research shows Antarctic is actually cooling down.. #ClimateScience #Antarctica</code> | <code>An annually resolved ice-core record from West Antarctica indicates that warming driven by local insolation resulting from sea-ice decline began in that region about 2,000 years before warming in East Antarctica, reconciling two alternative explanations for deglacial warming in the Southern Hemisphere. There are two main theories vying to explain Antarctic climate changes at the time of Northern Hemisphere deglaciations. One holds that changes in ocean circulation — driven by changes in Northern Hemisphere insolation — govern Southern Hemisphere climate. The other argues for a dominant influence from local changes in insolation. It has been difficult to differentiate between the two because of the low resolution of many ice-core records. Now Tyler Fudge and colleagues present an annually resolved ice-core record from the West Antarctic Ice Sheet Divide site and reveal an increase in snowfall about 18,000 years ago, preceded by a distinct warming 20,000 years ago. Changes in local insol...</code> | <code>0</code> |
179
+ * Loss: [<code>BinaryCrossEntropyLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#binarycrossentropyloss) with these parameters:
180
+ ```json
181
+ {
182
+ "activation_fn": "torch.nn.modules.linear.Identity",
183
+ "pos_weight": 6
184
+ }
185
+ ```
186
+
187
+ ### Training Hyperparameters
188
+ #### Non-Default Hyperparameters
189
+
190
+ - `eval_strategy`: epoch
191
+ - `per_device_train_batch_size`: 16
192
+ - `per_device_eval_batch_size`: 16
193
+ - `learning_rate`: 2e-05
194
+ - `warmup_ratio`: 0.1
195
+ - `fp16`: True
196
+ - `dataloader_num_workers`: 4
197
+ - `load_best_model_at_end`: True
198
+
199
+ #### All Hyperparameters
200
+ <details><summary>Click to expand</summary>
201
+
202
+ - `overwrite_output_dir`: False
203
+ - `do_predict`: False
204
+ - `eval_strategy`: epoch
205
+ - `prediction_loss_only`: True
206
+ - `per_device_train_batch_size`: 16
207
+ - `per_device_eval_batch_size`: 16
208
+ - `per_gpu_train_batch_size`: None
209
+ - `per_gpu_eval_batch_size`: None
210
+ - `gradient_accumulation_steps`: 1
211
+ - `eval_accumulation_steps`: None
212
+ - `torch_empty_cache_steps`: None
213
+ - `learning_rate`: 2e-05
214
+ - `weight_decay`: 0.0
215
+ - `adam_beta1`: 0.9
216
+ - `adam_beta2`: 0.999
217
+ - `adam_epsilon`: 1e-08
218
+ - `max_grad_norm`: 1.0
219
+ - `num_train_epochs`: 3
220
+ - `max_steps`: -1
221
+ - `lr_scheduler_type`: linear
222
+ - `lr_scheduler_kwargs`: {}
223
+ - `warmup_ratio`: 0.1
224
+ - `warmup_steps`: 0
225
+ - `log_level`: passive
226
+ - `log_level_replica`: warning
227
+ - `log_on_each_node`: True
228
+ - `logging_nan_inf_filter`: True
229
+ - `save_safetensors`: True
230
+ - `save_on_each_node`: False
231
+ - `save_only_model`: False
232
+ - `restore_callback_states_from_checkpoint`: False
233
+ - `no_cuda`: False
234
+ - `use_cpu`: False
235
+ - `use_mps_device`: False
236
+ - `seed`: 42
237
+ - `data_seed`: None
238
+ - `jit_mode_eval`: False
239
+ - `use_ipex`: False
240
+ - `bf16`: False
241
+ - `fp16`: True
242
+ - `fp16_opt_level`: O1
243
+ - `half_precision_backend`: auto
244
+ - `bf16_full_eval`: False
245
+ - `fp16_full_eval`: False
246
+ - `tf32`: None
247
+ - `local_rank`: 0
248
+ - `ddp_backend`: None
249
+ - `tpu_num_cores`: None
250
+ - `tpu_metrics_debug`: False
251
+ - `debug`: []
252
+ - `dataloader_drop_last`: False
253
+ - `dataloader_num_workers`: 4
254
+ - `dataloader_prefetch_factor`: None
255
+ - `past_index`: -1
256
+ - `disable_tqdm`: False
257
+ - `remove_unused_columns`: True
258
+ - `label_names`: None
259
+ - `load_best_model_at_end`: True
260
+ - `ignore_data_skip`: False
261
+ - `fsdp`: []
262
+ - `fsdp_min_num_params`: 0
263
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
264
+ - `tp_size`: 0
265
+ - `fsdp_transformer_layer_cls_to_wrap`: None
266
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
267
+ - `deepspeed`: None
268
+ - `label_smoothing_factor`: 0.0
269
+ - `optim`: adamw_torch
270
+ - `optim_args`: None
271
+ - `adafactor`: False
272
+ - `group_by_length`: False
273
+ - `length_column_name`: length
274
+ - `ddp_find_unused_parameters`: None
275
+ - `ddp_bucket_cap_mb`: None
276
+ - `ddp_broadcast_buffers`: False
277
+ - `dataloader_pin_memory`: True
278
+ - `dataloader_persistent_workers`: False
279
+ - `skip_memory_metrics`: True
280
+ - `use_legacy_prediction_loop`: False
281
+ - `push_to_hub`: False
282
+ - `resume_from_checkpoint`: None
283
+ - `hub_model_id`: None
284
+ - `hub_strategy`: every_save
285
+ - `hub_private_repo`: None
286
+ - `hub_always_push`: False
287
+ - `gradient_checkpointing`: False
288
+ - `gradient_checkpointing_kwargs`: None
289
+ - `include_inputs_for_metrics`: False
290
+ - `include_for_metrics`: []
291
+ - `eval_do_concat_batches`: True
292
+ - `fp16_backend`: auto
293
+ - `push_to_hub_model_id`: None
294
+ - `push_to_hub_organization`: None
295
+ - `mp_parameters`:
296
+ - `auto_find_batch_size`: False
297
+ - `full_determinism`: False
298
+ - `torchdynamo`: None
299
+ - `ray_scope`: last
300
+ - `ddp_timeout`: 1800
301
+ - `torch_compile`: False
302
+ - `torch_compile_backend`: None
303
+ - `torch_compile_mode`: None
304
+ - `include_tokens_per_second`: False
305
+ - `include_num_input_tokens_seen`: False
306
+ - `neftune_noise_alpha`: None
307
+ - `optim_target_modules`: None
308
+ - `batch_eval_metrics`: False
309
+ - `eval_on_start`: False
310
+ - `use_liger_kernel`: False
311
+ - `eval_use_gather_object`: False
312
+ - `average_tokens_across_devices`: False
313
+ - `prompts`: None
314
+ - `batch_sampler`: batch_sampler
315
+ - `multi_dataset_batch_sampler`: proportional
316
+
317
+ </details>
318
+
319
+ ### Training Logs
320
+ | Epoch | Step | Training Loss | climate-science-eval_ndcg@10 |
321
+ |:-------:|:-------:|:-------------:|:----------------------------:|
322
+ | 0.0078 | 1 | 4.4699 | - |
323
+ | 1.0 | 129 | - | 0.6134 (+0.4221) |
324
+ | **2.0** | **258** | **-** | **0.6433 (+0.4521)** |
325
+ | 3.0 | 387 | - | 0.6409 (+0.4497) |
326
+ | -1 | -1 | - | 0.6433 (+0.4521) |
327
+
328
+ * The bold row denotes the saved checkpoint.
329
+
330
+ ### Framework Versions
331
+ - Python: 3.11.12
332
+ - Sentence Transformers: 4.1.0
333
+ - Transformers: 4.51.3
334
+ - PyTorch: 2.6.0+cu124
335
+ - Accelerate: 1.6.0
336
+ - Datasets: 3.6.0
337
+ - Tokenizers: 0.21.1
338
+
339
+ ## Citation
340
+
341
+ ### BibTeX
342
+
343
+ #### Sentence Transformers
344
+ ```bibtex
345
+ @inproceedings{reimers-2019-sentence-bert,
346
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
347
+ author = "Reimers, Nils and Gurevych, Iryna",
348
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
349
+ month = "11",
350
+ year = "2019",
351
+ publisher = "Association for Computational Linguistics",
352
+ url = "https://arxiv.org/abs/1908.10084",
353
+ }
354
+ ```
355
+
356
+ <!--
357
+ ## Glossary
358
+
359
+ *Clearly define terms in order to be accessible across audiences.*
360
+ -->
361
+
362
+ <!--
363
+ ## Model Card Authors
364
+
365
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
366
+ -->
367
+
368
+ <!--
369
+ ## Model Card Contact
370
+
371
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
372
+ -->
final/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "bert",
22
+ "num_attention_heads": 12,
23
+ "num_hidden_layers": 6,
24
+ "pad_token_id": 0,
25
+ "position_embedding_type": "absolute",
26
+ "sentence_transformers": {
27
+ "activation_fn": "torch.nn.modules.linear.Identity",
28
+ "version": "4.1.0"
29
+ },
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.51.3",
32
+ "type_vocab_size": 2,
33
+ "use_cache": true,
34
+ "vocab_size": 30522
35
+ }
final/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:979fc41c25ae713ec3eec27176d2dea58a605c06b2461b630f9c42f324bf5537
3
+ size 90866412
final/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
final/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
final/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
final/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:979fc41c25ae713ec3eec27176d2dea58a605c06b2461b630f9c42f324bf5537
3
+ size 90866412
runs/May14_14-13-26_4dbe68c1c791/events.out.tfevents.1747232019.4dbe68c1c791.3104.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a1b092b1521f96bc54e0b3a6e3c07d404bc9d8a4ae32c84deb34a7de8cc2cfb
3
+ size 6377
runs/May14_14-15-01_4dbe68c1c791/events.out.tfevents.1747232104.4dbe68c1c791.3104.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:958b7109d6b81d34faa4aa3ce602bdacad0cfca9d05fb63435ead89b9844e457
3
+ size 5747
runs/May14_14-21-16_4dbe68c1c791/events.out.tfevents.1747232479.4dbe68c1c791.3104.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97af20c79e83c376f02d94d2879770e1ba35722d83f72e38e8424b2e34545806
3
+ size 8071
runs/May14_14-25-47_4dbe68c1c791/events.out.tfevents.1747232750.4dbe68c1c791.3104.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:610c776df753082bab96db2f377d8e135c5666aa89c08bcc686ba61498e89d4a
3
+ size 8081
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc6a752c01ca6461c5922d067f4345b9fb01fe24e636a0e9390837765654b57
3
+ size 5880
vocab.txt ADDED
The diff for this file is too large to render. See raw diff