Add SetFit model
Browse files- 1_Pooling/config.json +10 -0
- README.md +286 -0
- config.json +32 -0
- config_sentence_transformers.json +10 -0
- config_setfit.json +4 -0
- model.safetensors +3 -0
- model_head.pkl +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: BAAI/bge-base-en-v1.5
|
3 |
+
library_name: setfit
|
4 |
+
metrics:
|
5 |
+
- accuracy
|
6 |
+
pipeline_tag: text-classification
|
7 |
+
tags:
|
8 |
+
- setfit
|
9 |
+
- sentence-transformers
|
10 |
+
- text-classification
|
11 |
+
- generated_from_setfit_trainer
|
12 |
+
widget:
|
13 |
+
- text: 'The provided answer is quite disjointed and does not directly address the
|
14 |
+
specific question about accessing the company''s training resources. The listed
|
15 |
+
methods are more related to accessing various internal tools and processes rather
|
16 |
+
than directly answering the question. Let''s break down the problems:
|
17 |
+
|
18 |
+
|
19 |
+
1. **System and personal documents**: Mentions accessing personal documents, contracts,
|
20 |
+
and making reimbursements, which are not directly related to training resources.
|
21 |
+
|
22 |
+
2. **Password Manager (1Password)**: This piece of information about managing
|
23 |
+
passwords is irrelevant to accessing training resources.
|
24 |
+
|
25 |
+
3. **Tresorit**: Focuses on secure information sharing, not on training resources.
|
26 |
+
|
27 |
+
4. **Coffee session for feedback**: This is related to clarifying feedback, not
|
28 |
+
directly about accessing training resources.
|
29 |
+
|
30 |
+
5. **Learning Budget**: This section is somewhat relevant but is more about requesting
|
31 |
+
financial support for training rather than providing a direct method to access
|
32 |
+
training resources.
|
33 |
+
|
34 |
+
|
35 |
+
The answer needs to clearly outline specific steps or platforms dedicated to training
|
36 |
+
resources, which is mostly missing here.
|
37 |
+
|
38 |
+
|
39 |
+
Final Result: **Bad**'
|
40 |
+
- text: 'The answer succinctly addresses the question by stating that finance@ORGANIZATION_2.<89312988>
|
41 |
+
should be contacted for questions about travel reimbursement. This is correctly
|
42 |
+
derived from the provided document, which specifies that questions about travel
|
43 |
+
costs and reimbursements should be directed to the finance email.
|
44 |
+
|
45 |
+
|
46 |
+
Final evaluation: Good'
|
47 |
+
- text: 'The answer provided correctly mentions the essential aspects outlined in
|
48 |
+
the document, such as the importance for team leads to actively consider the possibility
|
49 |
+
of team members leaving, to flag these situations to HR, analyze problems, provide
|
50 |
+
feedback, and take proactive steps in various issues like underperformance or
|
51 |
+
lack of growth. The answer also captures the significance of creating a supportive
|
52 |
+
environment, maintaining alignment with the company''s vision and mission, ensuring
|
53 |
+
work-life balance, and providing regular feedback and praise.
|
54 |
+
|
55 |
+
|
56 |
+
However, while the answer is generally comprehensive, it could be slightly more
|
57 |
+
direct and concise in its communication. The document points to the necessity
|
58 |
+
for team leads to think about potential exits to preemptively address issues and
|
59 |
+
essentially prevent situations that may necessitate separation if they aren''t
|
60 |
+
managed well. This could have been emphasized more clearly.
|
61 |
+
|
62 |
+
|
63 |
+
Overall, the answer aligns well with the content and intent of the document.
|
64 |
+
|
65 |
+
|
66 |
+
Final evaluation: Good'
|
67 |
+
- text: 'Reasoning:
|
68 |
+
|
69 |
+
The answer provided is relevant to the question as it directs the user to go to
|
70 |
+
the website and check job ads and newsletters for more information about ORGANIZATION.
|
71 |
+
However, it lacks comprehensive details. It only partially addresses how one can
|
72 |
+
understand ORGANIZATION''s product, challenges, and future since the documents
|
73 |
+
suggest accessing job ads and newsletters, and no further content or documents
|
74 |
+
were leveraged to provide insights into product details, current challenges, or
|
75 |
+
future plans.
|
76 |
+
|
77 |
+
|
78 |
+
Final evaluation: Bad'
|
79 |
+
- text: 'Evaluation:
|
80 |
+
|
81 |
+
The answer provides a detailed response directly addressing the question, mentioning
|
82 |
+
that the ORGANIZATION_2 and key individuals like Thomas Barnes and Charlotte Herrera
|
83 |
+
play a supportive role in the farewell process. This includes handling paperwork,
|
84 |
+
providing guidance, and assisting with tough conversations. The answer aligns
|
85 |
+
well with the details provided in the document.
|
86 |
+
|
87 |
+
|
88 |
+
The final evaluation: Good'
|
89 |
+
inference: true
|
90 |
+
model-index:
|
91 |
+
- name: SetFit with BAAI/bge-base-en-v1.5
|
92 |
+
results:
|
93 |
+
- task:
|
94 |
+
type: text-classification
|
95 |
+
name: Text Classification
|
96 |
+
dataset:
|
97 |
+
name: Unknown
|
98 |
+
type: unknown
|
99 |
+
split: test
|
100 |
+
metrics:
|
101 |
+
- type: accuracy
|
102 |
+
value: 0.6268656716417911
|
103 |
+
name: Accuracy
|
104 |
+
---
|
105 |
+
|
106 |
+
# SetFit with BAAI/bge-base-en-v1.5
|
107 |
+
|
108 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
109 |
+
|
110 |
+
The model has been trained using an efficient few-shot learning technique that involves:
|
111 |
+
|
112 |
+
1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
|
113 |
+
2. Training a classification head with features from the fine-tuned Sentence Transformer.
|
114 |
+
|
115 |
+
## Model Details
|
116 |
+
|
117 |
+
### Model Description
|
118 |
+
- **Model Type:** SetFit
|
119 |
+
- **Sentence Transformer body:** [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)
|
120 |
+
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
121 |
+
- **Maximum Sequence Length:** 512 tokens
|
122 |
+
- **Number of Classes:** 2 classes
|
123 |
+
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
124 |
+
<!-- - **Language:** Unknown -->
|
125 |
+
<!-- - **License:** Unknown -->
|
126 |
+
|
127 |
+
### Model Sources
|
128 |
+
|
129 |
+
- **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
|
130 |
+
- **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
|
131 |
+
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
132 |
+
|
133 |
+
### Model Labels
|
134 |
+
| Label | Examples |
|
135 |
+
|:------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
136 |
+
| 0 | <ul><li>'The answer provided is a general approach to saving money, offering advice on spending wisely, making good investments, and considering personal goals. It also suggests discussing with a lead or ORGANIZATION_2 for guidance and highlights the importance of health and setting priorities.\n\nHowever, the answer lacks direct grounding in the provided document, which primarily discusses budgeting for studies and spending on learning and development. The document emphasizes evaluating expenditures based on their benefits to personal and organizational goals but does not offer explicit general financial saving tips.\n\nDue to this lack of specific correlation and grounding in the document, the evaluation is:\n\nBad'</li><li>'The answer provided reads incoherently due to the insertion of "Cassandra Rivera Heather Nelson" and other names in inappropriate places throughout the text, making it difficult to assess its alignment with the original document.\n\nFurthermore, the inserted names disrupt the meaning of the text, making it unclear if all relevant points from the document are covered accurately. The structure of the sentences becomes disjointed and presents an overall lack of clarity. \n\nGiven the nature of these errors, it\'s impossible to fairly evaluate whether the answer strictly adheres to the details provided in the source documents. Therefore, based on the clarity, coherence, and alignment to the original text, the final result is:\n\nBad'</li><li>"The provided answer does not satisfactorily respond to the question about accessing the company's training resources. Instead, it contains unrelated information about document management, security protocols, feedback processes, and learning budget requests. The relevant information about accessing training resources is clearly missing or obscure.\n\n**Reasoning:**\n\n1. **Irrelevant Details**: The answer includes details about the usage of a password manager, secure sharing of information, and expense reimbursement, none of which pertain to accessing training resources.\n\n2. **Lack of Specificity**: No explicit method or platform for accessing training resources is mentioned, which is the core inquiry.\n\n3. **Missed Key Point**: The document points towards systems used for personal documents and reimbursement requests but fails to highlight training resource access points.\n\nFinal evaluation: **Bad**"</li></ul> |
|
137 |
+
| 1 | <ul><li>'The answer demonstrates a clear connection to the provided document, outlining key tips and principles for giving feedback as requested by the question. The response includes the importance of timing, focusing on the situation rather than the person, being clear and direct, and the goal of helping rather than shaming. It also mentions the importance of appreciation and receiving feedback with an open mind. \n\nHowever, there are some typographical errors and misplaced words (e.g., "emichelle James Johnson MDamples") that detract slightly from the clarity. Despite these minor issues, the content provided accurately reflects the information in the source document and comprehensively addresses the question. Therefore, the final evaluation is:\n\nGood'</li><li>"The given answer provides a general explanation of why it is important to proactively share information from high-level meetings, but it lacks grounding in the provided document. \n\nWhile the answer discusses the benefits of transparency, alignment with the organization's vision and mission, and fostering an open work environment, it does not directly quote or refer to specific points in the document. This weakens the argument, as it seems more like an independent explanation rather than an answer strictly based on the provided material.\n\nThe document mentions the importance of employees knowing why they are doing what they do and the necessity of closing the information gap by proactively sharing what was discussed in high-level meetings. This specific point from Document 4 could have been directly referenced to make the answer more aligned with the source material.\n\nThus, the answer, although conceptually correct, does not appropriately leverage the provided document.\n\nFinal result: Bad"</li><li>"The answer is partially correct since it provides the most important guidance on how to report car travel expenses for reimbursement. However, it contains inaccuracies and omissions that could cause confusion. \n\n1. The email addresses cited in the answer don’t match those in the document.\n2. There’s mention of requesting a parking card for a specific date (2004-04-14), which implies an inaccurate, irrelevant detail that might mislead the reader.\n3. The answer doesn't explicitly suggest that travel cost reimbursement is handled monthly, which is a crucial piece of information.\n\nConsidering these elements, the evaluation is as follows:\n\n**Evaluation:**\nThe essential details related to car travel reimbursement are present, but incorrect email addresses and irrelevant details might mislead or cause inconvenience for employees.\n\nThe final evaluation: **Bad**"</li></ul> |
|
138 |
+
|
139 |
+
## Evaluation
|
140 |
+
|
141 |
+
### Metrics
|
142 |
+
| Label | Accuracy |
|
143 |
+
|:--------|:---------|
|
144 |
+
| **all** | 0.6269 |
|
145 |
+
|
146 |
+
## Uses
|
147 |
+
|
148 |
+
### Direct Use for Inference
|
149 |
+
|
150 |
+
First install the SetFit library:
|
151 |
+
|
152 |
+
```bash
|
153 |
+
pip install setfit
|
154 |
+
```
|
155 |
+
|
156 |
+
Then you can load this model and run inference.
|
157 |
+
|
158 |
+
```python
|
159 |
+
from setfit import SetFitModel
|
160 |
+
|
161 |
+
# Download from the 🤗 Hub
|
162 |
+
model = SetFitModel.from_pretrained("Netta1994/setfit_baai_newrelic_gpt-4o_cot-few_shot_only_reasoning_1726750220.456809")
|
163 |
+
# Run inference
|
164 |
+
preds = model("The answer succinctly addresses the question by stating that finance@ORGANIZATION_2.<89312988> should be contacted for questions about travel reimbursement. This is correctly derived from the provided document, which specifies that questions about travel costs and reimbursements should be directed to the finance email.
|
165 |
+
|
166 |
+
Final evaluation: Good")
|
167 |
+
```
|
168 |
+
|
169 |
+
<!--
|
170 |
+
### Downstream Use
|
171 |
+
|
172 |
+
*List how someone could finetune this model on their own dataset.*
|
173 |
+
-->
|
174 |
+
|
175 |
+
<!--
|
176 |
+
### Out-of-Scope Use
|
177 |
+
|
178 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
179 |
+
-->
|
180 |
+
|
181 |
+
<!--
|
182 |
+
## Bias, Risks and Limitations
|
183 |
+
|
184 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
185 |
+
-->
|
186 |
+
|
187 |
+
<!--
|
188 |
+
### Recommendations
|
189 |
+
|
190 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
191 |
+
-->
|
192 |
+
|
193 |
+
## Training Details
|
194 |
+
|
195 |
+
### Training Set Metrics
|
196 |
+
| Training set | Min | Median | Max |
|
197 |
+
|:-------------|:----|:--------|:----|
|
198 |
+
| Word count | 30 | 85.7538 | 210 |
|
199 |
+
|
200 |
+
| Label | Training Sample Count |
|
201 |
+
|:------|:----------------------|
|
202 |
+
| 0 | 32 |
|
203 |
+
| 1 | 33 |
|
204 |
+
|
205 |
+
### Training Hyperparameters
|
206 |
+
- batch_size: (16, 16)
|
207 |
+
- num_epochs: (5, 5)
|
208 |
+
- max_steps: -1
|
209 |
+
- sampling_strategy: oversampling
|
210 |
+
- num_iterations: 20
|
211 |
+
- body_learning_rate: (2e-05, 2e-05)
|
212 |
+
- head_learning_rate: 2e-05
|
213 |
+
- loss: CosineSimilarityLoss
|
214 |
+
- distance_metric: cosine_distance
|
215 |
+
- margin: 0.25
|
216 |
+
- end_to_end: False
|
217 |
+
- use_amp: False
|
218 |
+
- warmup_proportion: 0.1
|
219 |
+
- l2_weight: 0.01
|
220 |
+
- seed: 42
|
221 |
+
- eval_max_steps: -1
|
222 |
+
- load_best_model_at_end: False
|
223 |
+
|
224 |
+
### Training Results
|
225 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
226 |
+
|:------:|:----:|:-------------:|:---------------:|
|
227 |
+
| 0.0061 | 1 | 0.2304 | - |
|
228 |
+
| 0.3067 | 50 | 0.2556 | - |
|
229 |
+
| 0.6135 | 100 | 0.244 | - |
|
230 |
+
| 0.9202 | 150 | 0.1218 | - |
|
231 |
+
| 1.2270 | 200 | 0.0041 | - |
|
232 |
+
| 1.5337 | 250 | 0.0022 | - |
|
233 |
+
| 1.8405 | 300 | 0.0017 | - |
|
234 |
+
| 2.1472 | 350 | 0.0017 | - |
|
235 |
+
| 2.4540 | 400 | 0.0015 | - |
|
236 |
+
| 2.7607 | 450 | 0.0014 | - |
|
237 |
+
| 3.0675 | 500 | 0.0013 | - |
|
238 |
+
| 3.3742 | 550 | 0.0013 | - |
|
239 |
+
| 3.6810 | 600 | 0.0012 | - |
|
240 |
+
| 3.9877 | 650 | 0.0012 | - |
|
241 |
+
| 4.2945 | 700 | 0.0012 | - |
|
242 |
+
| 4.6012 | 750 | 0.0012 | - |
|
243 |
+
| 4.9080 | 800 | 0.0012 | - |
|
244 |
+
|
245 |
+
### Framework Versions
|
246 |
+
- Python: 3.10.14
|
247 |
+
- SetFit: 1.1.0
|
248 |
+
- Sentence Transformers: 3.1.0
|
249 |
+
- Transformers: 4.44.0
|
250 |
+
- PyTorch: 2.4.1+cu121
|
251 |
+
- Datasets: 2.19.2
|
252 |
+
- Tokenizers: 0.19.1
|
253 |
+
|
254 |
+
## Citation
|
255 |
+
|
256 |
+
### BibTeX
|
257 |
+
```bibtex
|
258 |
+
@article{https://doi.org/10.48550/arxiv.2209.11055,
|
259 |
+
doi = {10.48550/ARXIV.2209.11055},
|
260 |
+
url = {https://arxiv.org/abs/2209.11055},
|
261 |
+
author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
|
262 |
+
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
|
263 |
+
title = {Efficient Few-Shot Learning Without Prompts},
|
264 |
+
publisher = {arXiv},
|
265 |
+
year = {2022},
|
266 |
+
copyright = {Creative Commons Attribution 4.0 International}
|
267 |
+
}
|
268 |
+
```
|
269 |
+
|
270 |
+
<!--
|
271 |
+
## Glossary
|
272 |
+
|
273 |
+
*Clearly define terms in order to be accessible across audiences.*
|
274 |
+
-->
|
275 |
+
|
276 |
+
<!--
|
277 |
+
## Model Card Authors
|
278 |
+
|
279 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
280 |
+
-->
|
281 |
+
|
282 |
+
<!--
|
283 |
+
## Model Card Contact
|
284 |
+
|
285 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
286 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "BAAI/bge-base-en-v1.5",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 3072,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0
|
19 |
+
},
|
20 |
+
"layer_norm_eps": 1e-12,
|
21 |
+
"max_position_embeddings": 512,
|
22 |
+
"model_type": "bert",
|
23 |
+
"num_attention_heads": 12,
|
24 |
+
"num_hidden_layers": 12,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"position_embedding_type": "absolute",
|
27 |
+
"torch_dtype": "float32",
|
28 |
+
"transformers_version": "4.44.0",
|
29 |
+
"type_vocab_size": 2,
|
30 |
+
"use_cache": true,
|
31 |
+
"vocab_size": 30522
|
32 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.0",
|
4 |
+
"transformers": "4.44.0",
|
5 |
+
"pytorch": "2.4.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
config_setfit.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"normalize_embeddings": false,
|
3 |
+
"labels": null
|
4 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2da6e45b31db132d81d4c5e76bd9f9d2f3e811065bba93c41e2b061bdc39a8c5
|
3 |
+
size 437951328
|
model_head.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3609d95f5b36a966b7dd5e98869dce62e93ac81633130bfb76299c51f945112
|
3 |
+
size 7007
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|