Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +454 -0
- config.json +32 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: BAAI/bge-base-en-v1.5
|
3 |
+
datasets: []
|
4 |
+
language: []
|
5 |
+
library_name: sentence-transformers
|
6 |
+
pipeline_tag: sentence-similarity
|
7 |
+
tags:
|
8 |
+
- sentence-transformers
|
9 |
+
- sentence-similarity
|
10 |
+
- feature-extraction
|
11 |
+
- generated_from_trainer
|
12 |
+
- dataset_size:1340
|
13 |
+
- loss:MultipleNegativesRankingLoss
|
14 |
+
widget:
|
15 |
+
- source_sentence: Who popularized the term 'Dalit'?
|
16 |
+
sentences:
|
17 |
+
- Fakhruddin Ali Ahmed was the fifth President of India from 1974 to 1977 and also
|
18 |
+
the 2nd President of India to die in office.
|
19 |
+
- Arunachal Pradesh or South Tibet is a state between India and China. The country
|
20 |
+
that owns this region is disputed. China says that they own it and call it South
|
21 |
+
Tibet (Zangnan 藏南). In 2017, China started renaming places in this territory.
|
22 |
+
In 2019 China destroyed 30,000 "incorrect" world maps that showed South Tibet
|
23 |
+
as part of India.
|
24 |
+
- '"Dalit" refers to socially, economically and historically marginalized communities
|
25 |
+
predominantly in India . It also means "broken/scattered" in Sanskrit and Hindi
|
26 |
+
. The term "dalits" was in use as a translation for the British Raj census classification
|
27 |
+
of "Depressed Classes" prior to 1935. It was popularised by the economist and
|
28 |
+
reformer B. R. Ambedkar (1891–1956), who included all depressed people irrespective
|
29 |
+
of their caste into the definition of dalits. Hence the first group he made was
|
30 |
+
called the "Labour Party" and included as its members all people of the society
|
31 |
+
who were kept depressed, including women, small scale farmers and people from
|
32 |
+
backward castes.'
|
33 |
+
- source_sentence: What is India's contribution to the Olympic Movement?
|
34 |
+
sentences:
|
35 |
+
- Prem Pal Singh Rawat (in India called Maharaji and in the past called Guru Maharaj
|
36 |
+
Ji and Balyogeshwar) was born in India on December 10, 1957. He teaches inner
|
37 |
+
peace by the use of what he calls "Knowledge". Groups that have helped him are
|
38 |
+
the Divine Light Mission, Elan Vital (1983), and The Prem Rawat Foundation (2001).
|
39 |
+
- 'Boota Singh (Gurmukhi: ਬੂਟਾ ਸਿੰਘ; Shahmukhi: بوٹا سنگھ), sometimes spelled as
|
40 |
+
Buta Singh, was a Sikh soldier in the British Army. He served in Burma during
|
41 |
+
World War II, under the command of Lord Mountbatten. He is very well known in
|
42 |
+
India and Pakistan. He is famous for his tragic love story with Zainab, a Muslim
|
43 |
+
girl who he rescued from the riots during the partition of India in 1947.'
|
44 |
+
- India at the Olympics is a history which includes 32 games in 19 countries and
|
45 |
+
800+ athletes. Since 1900, India has contributed to the growth of the "Olympic
|
46 |
+
Movement".
|
47 |
+
- source_sentence: What is significant about the fort in Jhansi?
|
48 |
+
sentences:
|
49 |
+
- Western India is a region of the Republic of India, it includes Gujarat, Madhya
|
50 |
+
Pradesh and Maharashtra.
|
51 |
+
- The Government of India Act 1858 was an Act of the Parliament of the United Kingdom
|
52 |
+
(21 & 22 Vict. c. 106) passed on August 2, 1858. Its provisions called for the
|
53 |
+
liquidation of the British East India Company (who had up to this point been ruling
|
54 |
+
British India under the auspices of Parliament) and the transference of its functions
|
55 |
+
to the British Crown.
|
56 |
+
- Jhansi is a historic city of India between the rivers Pahunj and Betwa in the
|
57 |
+
northern state of Uttar Pradesh, close to the border with Madhya Pradesh. Jhansi
|
58 |
+
is the administrative headquarters of Jhansi District and Jhansi Division. The
|
59 |
+
original walled city grew up around its stone fort, which was built in 1613. The
|
60 |
+
city is well connected to all other major towns in Uttar Pradesh by road and railway
|
61 |
+
networks. It is called "gateway to Bundelkhand". Jhansi was besieged and taken
|
62 |
+
by British forces in 1858 during the Indian Rebellion of 1857.
|
63 |
+
- source_sentence: How is Dhanteras celebrated in Nepal?
|
64 |
+
sentences:
|
65 |
+
- The National Stock Exchange of India Limited (NSE), is a Mumbai-based stock exchange.
|
66 |
+
It is the biggest stock exchange in India and the third biggest in the world in
|
67 |
+
terms of amounts of transactions. NSE is mutually-owned by a set of leading financial
|
68 |
+
institutions, banks, insurance companies and other financial intermediaries in
|
69 |
+
India but its ownership and management operate as separate groups. As of 2006,
|
70 |
+
the NSE VSAT terminals, 2799 in total, cover more than 1500 cities across India.
|
71 |
+
In July 2007, the NSE had a total market capitalization of 42,74,509 crore INR
|
72 |
+
making it the second-largest stock market in South Asia in terms of market-capitalization.
|
73 |
+
- 'Dhanteras (Sanskrit: धनतेरस), also known as Dhanatrayodashi () or Dhanvantari
|
74 |
+
Trayodashi, is the first day of the festival of Diwali in India and the festival
|
75 |
+
of Tihar in Nepal.'
|
76 |
+
- Perur taluk is a taluk in Coimbatore district, Tamil Nadu, India associated with
|
77 |
+
the neighbourhood of Perur. It was created by Government of Tamil Nadu in 2013.
|
78 |
+
- source_sentence: What political roles did Rao hold in Andhra Pradesh?
|
79 |
+
sentences:
|
80 |
+
- The 2023 ICC Cricket World Cup is scheduled to be hosted by India and India was
|
81 |
+
selected as the host at an International Cricket Council (ICC) meeting in London
|
82 |
+
in June 2013. This will be the 13th Cricket World Cup competition. It will be
|
83 |
+
the fourth time that India will be the host. This will be the first time that
|
84 |
+
India has hosted the tournament on its own. India hosted previous World Cup tournaments
|
85 |
+
in 1987 (with Pakistan), 1996 (with Pakistan and Sri Lanka) and 2011 (with Sri
|
86 |
+
Lanka and Bangladesh). The semi final will be played at Wankhede Stadium. And
|
87 |
+
final will be played at Eden Gardens, Kolkata.
|
88 |
+
- Ayyavazhi (, "path of the father"), is a religion with one god that started in
|
89 |
+
South India in the middle of the 19th century. The 'zhi' () in the word, 'Ayyavazhi',
|
90 |
+
is a retroflex, ri.
|
91 |
+
- Balli Durga Prasad Rao (15 June 1956 – 16 September 2020) was an Indian politician.
|
92 |
+
He was elected to the Lok Sabha, lower house of the Parliament of India in the
|
93 |
+
2019 Indian general election. He was a member of the YSR Congress Party. Rao was
|
94 |
+
also a member of the Andhra Pradesh MLA from 1985 to 1989, 1994 to 1999, and 2009
|
95 |
+
to 2014.
|
96 |
+
---
|
97 |
+
|
98 |
+
# SentenceTransformer based on BAAI/bge-base-en-v1.5
|
99 |
+
|
100 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
101 |
+
|
102 |
+
## Model Details
|
103 |
+
|
104 |
+
### Model Description
|
105 |
+
- **Model Type:** Sentence Transformer
|
106 |
+
- **Base model:** [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) <!-- at revision a5beb1e3e68b9ab74eb54cfd186867f64f240e1a -->
|
107 |
+
- **Maximum Sequence Length:** 512 tokens
|
108 |
+
- **Output Dimensionality:** 768 tokens
|
109 |
+
- **Similarity Function:** Cosine Similarity
|
110 |
+
<!-- - **Training Dataset:** Unknown -->
|
111 |
+
<!-- - **Language:** Unknown -->
|
112 |
+
<!-- - **License:** Unknown -->
|
113 |
+
|
114 |
+
### Model Sources
|
115 |
+
|
116 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
117 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
118 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
119 |
+
|
120 |
+
### Full Model Architecture
|
121 |
+
|
122 |
+
```
|
123 |
+
SentenceTransformer(
|
124 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
125 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
126 |
+
(2): Normalize()
|
127 |
+
)
|
128 |
+
```
|
129 |
+
|
130 |
+
## Usage
|
131 |
+
|
132 |
+
### Direct Usage (Sentence Transformers)
|
133 |
+
|
134 |
+
First install the Sentence Transformers library:
|
135 |
+
|
136 |
+
```bash
|
137 |
+
pip install -U sentence-transformers
|
138 |
+
```
|
139 |
+
|
140 |
+
Then you can load this model and run inference.
|
141 |
+
```python
|
142 |
+
from sentence_transformers import SentenceTransformer
|
143 |
+
|
144 |
+
# Download from the 🤗 Hub
|
145 |
+
model = SentenceTransformer("dipanjanS/bge-base-en-v1.5-fte")
|
146 |
+
# Run inference
|
147 |
+
sentences = [
|
148 |
+
'What political roles did Rao hold in Andhra Pradesh?',
|
149 |
+
'Balli Durga Prasad Rao (15 June 1956 – 16 September 2020) was an Indian politician. He was elected to the Lok Sabha, lower house of the Parliament of India in the 2019 Indian general election. He was a member of the YSR Congress Party. Rao was also a member of the Andhra Pradesh MLA from 1985 to 1989, 1994 to 1999, and 2009 to 2014.',
|
150 |
+
'Ayyavazhi (, "path of the father"), is a religion with one god that started in South India in the middle of the 19th century. The \'zhi\' () in the word, \'Ayyavazhi\', is a retroflex, ri.',
|
151 |
+
]
|
152 |
+
embeddings = model.encode(sentences)
|
153 |
+
print(embeddings.shape)
|
154 |
+
# [3, 768]
|
155 |
+
|
156 |
+
# Get the similarity scores for the embeddings
|
157 |
+
similarities = model.similarity(embeddings, embeddings)
|
158 |
+
print(similarities.shape)
|
159 |
+
# [3, 3]
|
160 |
+
```
|
161 |
+
|
162 |
+
<!--
|
163 |
+
### Direct Usage (Transformers)
|
164 |
+
|
165 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
166 |
+
|
167 |
+
</details>
|
168 |
+
-->
|
169 |
+
|
170 |
+
<!--
|
171 |
+
### Downstream Usage (Sentence Transformers)
|
172 |
+
|
173 |
+
You can finetune this model on your own dataset.
|
174 |
+
|
175 |
+
<details><summary>Click to expand</summary>
|
176 |
+
|
177 |
+
</details>
|
178 |
+
-->
|
179 |
+
|
180 |
+
<!--
|
181 |
+
### Out-of-Scope Use
|
182 |
+
|
183 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
184 |
+
-->
|
185 |
+
|
186 |
+
<!--
|
187 |
+
## Bias, Risks and Limitations
|
188 |
+
|
189 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
190 |
+
-->
|
191 |
+
|
192 |
+
<!--
|
193 |
+
### Recommendations
|
194 |
+
|
195 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
196 |
+
-->
|
197 |
+
|
198 |
+
## Training Details
|
199 |
+
|
200 |
+
### Training Dataset
|
201 |
+
|
202 |
+
#### Unnamed Dataset
|
203 |
+
|
204 |
+
|
205 |
+
* Size: 1,340 training samples
|
206 |
+
* Columns: <code>question</code> and <code>context</code>
|
207 |
+
* Approximate statistics based on the first 1000 samples:
|
208 |
+
| | question | context |
|
209 |
+
|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
210 |
+
| type | string | string |
|
211 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 12.39 tokens</li><li>max: 24 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 83.99 tokens</li><li>max: 510 tokens</li></ul> |
|
212 |
+
* Samples:
|
213 |
+
| question | context |
|
214 |
+
|:-----------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
215 |
+
| <code>What is Basil commonly known as?</code> | <code>Basil ("Ocimum basilicum") ( or ) is a plant of the Family Lamiaceae. It is also known as Sweet Basil or Tulsi. It is a tender low-growing herb that is grown as a perennial in warm, tropical climates. Basil is originally native to India and other tropical regions of Asia. It has been cultivated there for more than 5,000 years. It is prominently featured in many cuisines throughout the world. Some of them are Italian, Thai, Vietnamese and Laotian cuisines. It grows to between 30–60 cm tall. It has light green, silky leaves 3–5 cm long and 1–3 cm broad. The leaves are opposite each other. The flowers are quite big. They are white in color and arranged as a spike.</code> |
|
216 |
+
| <code>Where is Basil originally native to?</code> | <code>Basil ("Ocimum basilicum") ( or ) is a plant of the Family Lamiaceae. It is also known as Sweet Basil or Tulsi. It is a tender low-growing herb that is grown as a perennial in warm, tropical climates. Basil is originally native to India and other tropical regions of Asia. It has been cultivated there for more than 5,000 years. It is prominently featured in many cuisines throughout the world. Some of them are Italian, Thai, Vietnamese and Laotian cuisines. It grows to between 30–60 cm tall. It has light green, silky leaves 3–5 cm long and 1–3 cm broad. The leaves are opposite each other. The flowers are quite big. They are white in color and arranged as a spike.</code> |
|
217 |
+
| <code>What is the significance of the Roerich Pact?</code> | <code>The Roerich Pact is a treaty on Protection of Artistic and Scientific Institutions and Historic Monuments, signed by the representatives of 21 states in the Oval Office of the White House on 15 April 1935. As of January 1, 1990, the Roerich Pact had been ratified by ten nations: Brazil, Chile, Colombia, Cuba, the Dominican Republic, El Salvador, Guatemala, Mexico, the United States, and Venezuela. It went into effect on 26 August 1935. The Government of India approved the Treaty in 1948, but did not take any further formal action. The Roerich Pact is also known as "Pax Cultura" ("Cultural Peace" or "Peace through Culture"). The most important part of the Roerich Pact is the legal recognition that the protection of culture is always more important than any military necessity.</code> |
|
218 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
219 |
+
```json
|
220 |
+
{
|
221 |
+
"scale": 20.0,
|
222 |
+
"similarity_fct": "cos_sim"
|
223 |
+
}
|
224 |
+
```
|
225 |
+
|
226 |
+
### Evaluation Dataset
|
227 |
+
|
228 |
+
#### Unnamed Dataset
|
229 |
+
|
230 |
+
|
231 |
+
* Size: 100 evaluation samples
|
232 |
+
* Columns: <code>question</code> and <code>context</code>
|
233 |
+
* Approximate statistics based on the first 1000 samples:
|
234 |
+
| | question | context |
|
235 |
+
|:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
236 |
+
| type | string | string |
|
237 |
+
| details | <ul><li>min: 7 tokens</li><li>mean: 12.36 tokens</li><li>max: 19 tokens</li></ul> | <ul><li>min: 12 tokens</li><li>mean: 84.15 tokens</li><li>max: 235 tokens</li></ul> |
|
238 |
+
* Samples:
|
239 |
+
| question | context |
|
240 |
+
|:--------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
241 |
+
| <code>What is the demographic composition of Kolathur?</code> | <code>Kolathur () is a town in Salem district in the Indian state of Tamil Nadu. As of the 2001 India census, Kolathur had a population of 10,319. Males make up 53% of the population and females 47%. A total of 9% of the population is under 6 years of age.</code> |
|
242 |
+
| <code>What is notable about India's democracy?</code> | <code>India is a country in Asia. It has an area of . It is at the center of South Asia. India has more than 1.2 billion (1,210,000,000) people, which is the second largest population in the world. It is the seventh largest country in the world by area and the largest country in South Asia. It is also the most populous democracy in the world.</code> |
|
243 |
+
| <code>Who was the Chief Justice of India before Dipak Misra?</code> | <code>Justice Dipak Misra (born 3 October 1953) was the Judge of the Supreme Court and the Chief Justice of India. He took over as the 45th Chief Justice of India (CJI), succeeding the 44th CJI, Justice J. S. Khehar.</code> |
|
244 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
245 |
+
```json
|
246 |
+
{
|
247 |
+
"scale": 20.0,
|
248 |
+
"similarity_fct": "cos_sim"
|
249 |
+
}
|
250 |
+
```
|
251 |
+
|
252 |
+
### Training Hyperparameters
|
253 |
+
#### Non-Default Hyperparameters
|
254 |
+
|
255 |
+
- `eval_strategy`: steps
|
256 |
+
- `per_device_train_batch_size`: 16
|
257 |
+
- `per_device_eval_batch_size`: 16
|
258 |
+
- `learning_rate`: 3e-06
|
259 |
+
- `max_steps`: 332
|
260 |
+
- `warmup_ratio`: 0.1
|
261 |
+
- `fp16`: True
|
262 |
+
- `batch_sampler`: no_duplicates
|
263 |
+
|
264 |
+
#### All Hyperparameters
|
265 |
+
<details><summary>Click to expand</summary>
|
266 |
+
|
267 |
+
- `overwrite_output_dir`: False
|
268 |
+
- `do_predict`: False
|
269 |
+
- `eval_strategy`: steps
|
270 |
+
- `prediction_loss_only`: True
|
271 |
+
- `per_device_train_batch_size`: 16
|
272 |
+
- `per_device_eval_batch_size`: 16
|
273 |
+
- `per_gpu_train_batch_size`: None
|
274 |
+
- `per_gpu_eval_batch_size`: None
|
275 |
+
- `gradient_accumulation_steps`: 1
|
276 |
+
- `eval_accumulation_steps`: None
|
277 |
+
- `learning_rate`: 3e-06
|
278 |
+
- `weight_decay`: 0.0
|
279 |
+
- `adam_beta1`: 0.9
|
280 |
+
- `adam_beta2`: 0.999
|
281 |
+
- `adam_epsilon`: 1e-08
|
282 |
+
- `max_grad_norm`: 1.0
|
283 |
+
- `num_train_epochs`: 3.0
|
284 |
+
- `max_steps`: 332
|
285 |
+
- `lr_scheduler_type`: linear
|
286 |
+
- `lr_scheduler_kwargs`: {}
|
287 |
+
- `warmup_ratio`: 0.1
|
288 |
+
- `warmup_steps`: 0
|
289 |
+
- `log_level`: passive
|
290 |
+
- `log_level_replica`: warning
|
291 |
+
- `log_on_each_node`: True
|
292 |
+
- `logging_nan_inf_filter`: True
|
293 |
+
- `save_safetensors`: True
|
294 |
+
- `save_on_each_node`: False
|
295 |
+
- `save_only_model`: False
|
296 |
+
- `restore_callback_states_from_checkpoint`: False
|
297 |
+
- `no_cuda`: False
|
298 |
+
- `use_cpu`: False
|
299 |
+
- `use_mps_device`: False
|
300 |
+
- `seed`: 42
|
301 |
+
- `data_seed`: None
|
302 |
+
- `jit_mode_eval`: False
|
303 |
+
- `use_ipex`: False
|
304 |
+
- `bf16`: False
|
305 |
+
- `fp16`: True
|
306 |
+
- `fp16_opt_level`: O1
|
307 |
+
- `half_precision_backend`: auto
|
308 |
+
- `bf16_full_eval`: False
|
309 |
+
- `fp16_full_eval`: False
|
310 |
+
- `tf32`: None
|
311 |
+
- `local_rank`: 0
|
312 |
+
- `ddp_backend`: None
|
313 |
+
- `tpu_num_cores`: None
|
314 |
+
- `tpu_metrics_debug`: False
|
315 |
+
- `debug`: []
|
316 |
+
- `dataloader_drop_last`: False
|
317 |
+
- `dataloader_num_workers`: 0
|
318 |
+
- `dataloader_prefetch_factor`: None
|
319 |
+
- `past_index`: -1
|
320 |
+
- `disable_tqdm`: False
|
321 |
+
- `remove_unused_columns`: True
|
322 |
+
- `label_names`: None
|
323 |
+
- `load_best_model_at_end`: False
|
324 |
+
- `ignore_data_skip`: False
|
325 |
+
- `fsdp`: []
|
326 |
+
- `fsdp_min_num_params`: 0
|
327 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
328 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
329 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
330 |
+
- `deepspeed`: None
|
331 |
+
- `label_smoothing_factor`: 0.0
|
332 |
+
- `optim`: adamw_torch
|
333 |
+
- `optim_args`: None
|
334 |
+
- `adafactor`: False
|
335 |
+
- `group_by_length`: False
|
336 |
+
- `length_column_name`: length
|
337 |
+
- `ddp_find_unused_parameters`: None
|
338 |
+
- `ddp_bucket_cap_mb`: None
|
339 |
+
- `ddp_broadcast_buffers`: False
|
340 |
+
- `dataloader_pin_memory`: True
|
341 |
+
- `dataloader_persistent_workers`: False
|
342 |
+
- `skip_memory_metrics`: True
|
343 |
+
- `use_legacy_prediction_loop`: False
|
344 |
+
- `push_to_hub`: False
|
345 |
+
- `resume_from_checkpoint`: None
|
346 |
+
- `hub_model_id`: None
|
347 |
+
- `hub_strategy`: every_save
|
348 |
+
- `hub_private_repo`: False
|
349 |
+
- `hub_always_push`: False
|
350 |
+
- `gradient_checkpointing`: False
|
351 |
+
- `gradient_checkpointing_kwargs`: None
|
352 |
+
- `include_inputs_for_metrics`: False
|
353 |
+
- `eval_do_concat_batches`: True
|
354 |
+
- `fp16_backend`: auto
|
355 |
+
- `push_to_hub_model_id`: None
|
356 |
+
- `push_to_hub_organization`: None
|
357 |
+
- `mp_parameters`:
|
358 |
+
- `auto_find_batch_size`: False
|
359 |
+
- `full_determinism`: False
|
360 |
+
- `torchdynamo`: None
|
361 |
+
- `ray_scope`: last
|
362 |
+
- `ddp_timeout`: 1800
|
363 |
+
- `torch_compile`: False
|
364 |
+
- `torch_compile_backend`: None
|
365 |
+
- `torch_compile_mode`: None
|
366 |
+
- `dispatch_batches`: None
|
367 |
+
- `split_batches`: None
|
368 |
+
- `include_tokens_per_second`: False
|
369 |
+
- `include_num_input_tokens_seen`: False
|
370 |
+
- `neftune_noise_alpha`: None
|
371 |
+
- `optim_target_modules`: None
|
372 |
+
- `batch_eval_metrics`: False
|
373 |
+
- `eval_on_start`: False
|
374 |
+
- `batch_sampler`: no_duplicates
|
375 |
+
- `multi_dataset_batch_sampler`: proportional
|
376 |
+
|
377 |
+
</details>
|
378 |
+
|
379 |
+
### Training Logs
|
380 |
+
| Epoch | Step | Training Loss | loss |
|
381 |
+
|:------:|:----:|:-------------:|:------:|
|
382 |
+
| 0.2381 | 20 | 0.1832 | 0.0491 |
|
383 |
+
| 0.4762 | 40 | 0.1118 | 0.0246 |
|
384 |
+
| 0.7143 | 60 | 0.0991 | 0.0152 |
|
385 |
+
| 0.9524 | 80 | 0.0518 | 0.0106 |
|
386 |
+
| 1.1905 | 100 | 0.0665 | 0.0073 |
|
387 |
+
| 1.4286 | 120 | 0.0539 | 0.0058 |
|
388 |
+
| 1.6667 | 140 | 0.0548 | 0.0048 |
|
389 |
+
| 1.9048 | 160 | 0.0354 | 0.0041 |
|
390 |
+
| 2.1429 | 180 | 0.038 | 0.0034 |
|
391 |
+
| 2.3810 | 200 | 0.0592 | 0.0030 |
|
392 |
+
| 2.6190 | 220 | 0.0203 | 0.0027 |
|
393 |
+
| 2.8571 | 240 | 0.0441 | 0.0025 |
|
394 |
+
| 3.0952 | 260 | 0.023 | 0.0024 |
|
395 |
+
| 3.3333 | 280 | 0.0452 | 0.0023 |
|
396 |
+
| 3.5714 | 300 | 0.0128 | 0.0022 |
|
397 |
+
| 3.8095 | 320 | 0.0495 | 0.0022 |
|
398 |
+
|
399 |
+
|
400 |
+
### Framework Versions
|
401 |
+
- Python: 3.10.12
|
402 |
+
- Sentence Transformers: 3.0.1
|
403 |
+
- Transformers: 4.42.4
|
404 |
+
- PyTorch: 2.3.1+cu121
|
405 |
+
- Accelerate: 0.32.1
|
406 |
+
- Datasets: 2.20.0
|
407 |
+
- Tokenizers: 0.19.1
|
408 |
+
|
409 |
+
## Citation
|
410 |
+
|
411 |
+
### BibTeX
|
412 |
+
|
413 |
+
#### Sentence Transformers
|
414 |
+
```bibtex
|
415 |
+
@inproceedings{reimers-2019-sentence-bert,
|
416 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
417 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
418 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
419 |
+
month = "11",
|
420 |
+
year = "2019",
|
421 |
+
publisher = "Association for Computational Linguistics",
|
422 |
+
url = "https://arxiv.org/abs/1908.10084",
|
423 |
+
}
|
424 |
+
```
|
425 |
+
|
426 |
+
#### MultipleNegativesRankingLoss
|
427 |
+
```bibtex
|
428 |
+
@misc{henderson2017efficient,
|
429 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
430 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
431 |
+
year={2017},
|
432 |
+
eprint={1705.00652},
|
433 |
+
archivePrefix={arXiv},
|
434 |
+
primaryClass={cs.CL}
|
435 |
+
}
|
436 |
+
```
|
437 |
+
|
438 |
+
<!--
|
439 |
+
## Glossary
|
440 |
+
|
441 |
+
*Clearly define terms in order to be accessible across audiences.*
|
442 |
+
-->
|
443 |
+
|
444 |
+
<!--
|
445 |
+
## Model Card Authors
|
446 |
+
|
447 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
448 |
+
-->
|
449 |
+
|
450 |
+
<!--
|
451 |
+
## Model Card Contact
|
452 |
+
|
453 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
454 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "BAAI/bge-base-en-v1.5",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 3072,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0
|
19 |
+
},
|
20 |
+
"layer_norm_eps": 1e-12,
|
21 |
+
"max_position_embeddings": 512,
|
22 |
+
"model_type": "bert",
|
23 |
+
"num_attention_heads": 12,
|
24 |
+
"num_hidden_layers": 12,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"position_embedding_type": "absolute",
|
27 |
+
"torch_dtype": "float32",
|
28 |
+
"transformers_version": "4.42.4",
|
29 |
+
"type_vocab_size": 2,
|
30 |
+
"use_cache": true,
|
31 |
+
"vocab_size": 30522
|
32 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.42.4",
|
5 |
+
"pytorch": "2.3.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d353208aafcd7ee302242c87c086a74d3cafd4b9c5be8a33d54b876cc10f82a0
|
3 |
+
size 437951328
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|