MAP@25: 0.44064028964485386
Browse files- 1_Pooling/config.json +1 -1
- README.md +104 -279
- config.json +20 -32
- config_sentence_transformers.json +1 -1
- model.safetensors +2 -2
- sentence_bert_config.json +2 -2
- tokenizer.json +1 -1
- tokenizer_config.json +3 -1
1_Pooling/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"word_embedding_dimension":
|
3 |
"pooling_mode_cls_token": true,
|
4 |
"pooling_mode_mean_tokens": false,
|
5 |
"pooling_mode_max_tokens": false,
|
|
|
1 |
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
"pooling_mode_cls_token": true,
|
4 |
"pooling_mode_mean_tokens": false,
|
5 |
"pooling_mode_max_tokens": false,
|
README.md
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
---
|
2 |
-
base_model: Alibaba-NLP/gte-base-en-v1.5
|
3 |
library_name: sentence-transformers
|
4 |
pipeline_tag: sentence-similarity
|
5 |
tags:
|
@@ -7,187 +6,73 @@ tags:
|
|
7 |
- sentence-similarity
|
8 |
- feature-extraction
|
9 |
- generated_from_trainer
|
10 |
-
- dataset_size:
|
11 |
- loss:MultipleNegativesRankingLoss
|
12 |
widget:
|
13 |
-
- source_sentence:
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
What would be a possible ratio of boys to girls?
|
19 |
-
|
20 |
-
|
21 |
-
Options:
|
22 |
-
|
23 |
-
A. 3: 7
|
24 |
-
|
25 |
-
B. 3: 4
|
26 |
-
|
27 |
-
C. 3: 10
|
28 |
-
|
29 |
-
D. 4: 3
|
30 |
-
|
31 |
-
|
32 |
-
Correct Answer: 3: 4
|
33 |
-
|
34 |
-
|
35 |
-
Incorrect Answer: 4: 3
|
36 |
-
|
37 |
-
|
38 |
-
Predicted Misconception: Confusing fractions with whole number ratios directly.'
|
39 |
sentences:
|
40 |
-
- Does not
|
41 |
-
-
|
42 |
-
|
43 |
-
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
Options:
|
51 |
-
|
52 |
-
A. Always true
|
53 |
-
|
54 |
-
B. Sometimes true
|
55 |
-
|
56 |
-
C. Never true
|
57 |
-
|
58 |
-
D. I don''t
|
59 |
-
|
60 |
-
know
|
61 |
-
|
62 |
-
|
63 |
-
Correct Answer: Never true
|
64 |
-
|
65 |
-
|
66 |
-
Incorrect Answer: Always true
|
67 |
-
|
68 |
-
|
69 |
-
Predicted Misconception: Division is commutative, like addition.'
|
70 |
sentences:
|
71 |
-
-
|
72 |
-
-
|
73 |
-
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
A. 5.0
|
84 |
-
|
85 |
-
B. 0.4
|
86 |
-
|
87 |
-
C. 0.2
|
88 |
-
|
89 |
-
D. 4.20
|
90 |
-
|
91 |
-
|
92 |
-
Correct Answer: 0.2
|
93 |
-
|
94 |
-
|
95 |
-
Incorrect Answer: 5.0
|
96 |
-
|
97 |
-
|
98 |
-
Predicted Misconception: Believing that dividing a number greater than 1 by another
|
99 |
-
number greater than 1 results in a decimal greater than 1.'
|
100 |
sentences:
|
101 |
-
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
(
|
112 |
-
|
113 |
-
(a+b / c)=(a+b) / c
|
114 |
-
|
115 |
-
)
|
116 |
-
|
117 |
-
Is the above relationship...
|
118 |
-
|
119 |
-
|
120 |
-
Options:
|
121 |
-
|
122 |
-
A. always true
|
123 |
-
|
124 |
-
B. sometimes true
|
125 |
-
|
126 |
-
C. never true
|
127 |
-
|
128 |
-
D. Need more information
|
129 |
-
|
130 |
-
|
131 |
-
Correct Answer: always true
|
132 |
-
|
133 |
-
|
134 |
-
Incorrect Answer: never true
|
135 |
-
|
136 |
-
|
137 |
-
Predicted Misconception: Belief that division distributes over addition.'
|
138 |
sentences:
|
139 |
-
-
|
140 |
-
-
|
141 |
-
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
Question: A straight line on squared paper. Points P, Q and R lie on this line.
|
147 |
-
The leftmost end of the line is labelled P. If you travel right 4 squares and
|
148 |
-
up 1 square you get to point Q. If you then travel 8 squares right and 2 squares
|
149 |
-
up from Q you reach point R. What is the ratio of P Q: P R ?
|
150 |
-
|
151 |
-
|
152 |
-
Options:
|
153 |
-
|
154 |
-
A. 1: 12
|
155 |
-
|
156 |
-
B. 1: 4
|
157 |
-
|
158 |
-
C. 1: 2
|
159 |
-
|
160 |
-
D. 1: 3
|
161 |
-
|
162 |
-
|
163 |
-
Correct Answer: 1: 3
|
164 |
-
|
165 |
-
|
166 |
-
Incorrect Answer: 1: 2
|
167 |
-
|
168 |
-
|
169 |
-
Predicted Misconception: Misunderstanding the ratio calculation by not considering
|
170 |
-
the correct horizontal and vertical distances between points P, Q, and R.'
|
171 |
sentences:
|
172 |
-
-
|
173 |
-
-
|
174 |
-
-
|
175 |
---
|
176 |
|
177 |
-
# SentenceTransformer
|
178 |
|
179 |
-
This is a [sentence-transformers](https://www.SBERT.net) model
|
180 |
|
181 |
## Model Details
|
182 |
|
183 |
### Model Description
|
184 |
- **Model Type:** Sentence Transformer
|
185 |
-
- **Base model:** [
|
186 |
-
- **Maximum Sequence Length:**
|
187 |
-
- **Output Dimensionality:**
|
188 |
- **Similarity Function:** Cosine Similarity
|
189 |
-
- **Training Dataset:**
|
190 |
-
- csv
|
191 |
<!-- - **Language:** Unknown -->
|
192 |
<!-- - **License:** Unknown -->
|
193 |
|
@@ -201,8 +86,8 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
|
|
201 |
|
202 |
```
|
203 |
SentenceTransformer(
|
204 |
-
(0): Transformer({'max_seq_length':
|
205 |
-
(1): Pooling({'word_embedding_dimension':
|
206 |
)
|
207 |
```
|
208 |
|
@@ -224,13 +109,13 @@ from sentence_transformers import SentenceTransformer
|
|
224 |
model = SentenceTransformer("Gurveer05/gte-base-eedi-2024")
|
225 |
# Run inference
|
226 |
sentences = [
|
227 |
-
'
|
228 |
-
'
|
229 |
-
'
|
230 |
]
|
231 |
embeddings = model.encode(sentences)
|
232 |
print(embeddings.shape)
|
233 |
-
# [3,
|
234 |
|
235 |
# Get the similarity scores for the embeddings
|
236 |
similarities = model.similarity(embeddings, embeddings)
|
@@ -278,48 +163,22 @@ You can finetune this model on your own dataset.
|
|
278 |
|
279 |
### Training Dataset
|
280 |
|
281 |
-
####
|
282 |
-
|
283 |
-
* Dataset: csv
|
284 |
-
* Size: 12,210 training samples
|
285 |
-
* Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
|
286 |
-
* Approximate statistics based on the first 1000 samples:
|
287 |
-
| | qa_pair_text | MisconceptionName | negative |
|
288 |
-
|:--------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
289 |
-
| type | string | string | string |
|
290 |
-
| details | <ul><li>min: 54 tokens</li><li>mean: 124.3 tokens</li><li>max: 618 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.16 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 14.49 tokens</li><li>max: 40 tokens</li></ul> |
|
291 |
-
* Samples:
|
292 |
-
| qa_pair_text | MisconceptionName | negative |
|
293 |
-
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
294 |
-
| <code>Construct: Construct frequency tables.<br><br>Question: Dave has recorded the number of pets his classmates have in the frequency table on the right. <br> Number of pets | Frequency <br> 0 | 4 <br> 1 | 6 <br> 2 | 3 <br> 3 | 2 <br> 4 | 5 <br><br> If Dave wanted to work out the total number of pets own by his classmates, what would be a useful column to include?<br><br>Options:<br>A. Number of pets x Frequency<br>B. Number of pets / Frequency<br>C. Number of pets + Frequency<br>D. Number of pets -<br>Frequency<br><br>Correct Answer: Number of pets x Frequency<br><br>Incorrect Answer: Number of pets + Frequency<br><br>Predicted Misconception: Adding frequency to the number of pets to find total pets.</code> | <code>Adds instead of multiplies when calculating total frequency</code> | <code>Believes the mean is total frequency divided by sum of categories given</code> |
|
295 |
-
| <code>Construct: Convert between any other time periods.<br><br>Question: To work out how many hours in a year you could do...<br><br>Options:<br>A. 365 x 7<br>B. 365 x 60<br>C. 365 x 12<br>D. 365 x 24<br><br>Correct Answer: 365 x 24<br><br>Incorrect Answer: 365 x 60<br><br>Predicted Misconception: Multiplying days by hours per minute instead of hours per day.</code> | <code>Answers as if there are 60 hours in a day</code> | <code>Confuses an equation with an expression</code> |
|
296 |
-
| <code>Construct: Given information about one part, work out other parts.<br><br>Question: Jess and Heena share some sweets in the ratio 3;: 5 .<br>Jess gets 15 sweets.<br>How many sweets does Heena get?<br><br>Options:<br>A. 17<br>B. 9<br>C. 5<br>D. 25<br><br>Correct Answer: 25<br><br>Incorrect Answer: 17<br><br>Predicted Misconception: Misunderstanding the direct proportionality between the ratio and actual quantities.</code> | <code>Thinks a difference of one part in a ratio means the quantities will differ by one unit</code> | <code>Believes dividing two positives will give a negative answer</code> |
|
297 |
-
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
298 |
-
```json
|
299 |
-
{
|
300 |
-
"scale": 20.0,
|
301 |
-
"similarity_fct": "cos_sim"
|
302 |
-
}
|
303 |
-
```
|
304 |
|
305 |
-
### Evaluation Dataset
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
* Dataset: csv
|
310 |
-
* Size: 9,640 evaluation samples
|
311 |
-
* Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
|
312 |
* Approximate statistics based on the first 1000 samples:
|
313 |
-
| |
|
314 |
-
|
315 |
-
| type | string
|
316 |
-
| details | <ul><li>min:
|
317 |
* Samples:
|
318 |
-
|
|
319 |
-
|
320 |
-
| <code>
|
321 |
-
| <code>
|
322 |
-
| <code>
|
323 |
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
324 |
```json
|
325 |
{
|
@@ -331,20 +190,14 @@ You can finetune this model on your own dataset.
|
|
331 |
### Training Hyperparameters
|
332 |
#### Non-Default Hyperparameters
|
333 |
|
334 |
-
- `eval_strategy`: steps
|
335 |
-
- `per_device_train_batch_size`: 32
|
336 |
-
- `per_device_eval_batch_size`: 32
|
337 |
- `gradient_accumulation_steps`: 16
|
338 |
-
- `
|
|
|
339 |
- `weight_decay`: 0.01
|
340 |
-
- `num_train_epochs`:
|
341 |
-
- `lr_scheduler_type`:
|
342 |
-
- `lr_scheduler_kwargs`: {'num_cycles': 20}
|
343 |
- `warmup_ratio`: 0.1
|
344 |
- `fp16`: True
|
345 |
-
- `load_best_model_at_end`: True
|
346 |
-
- `gradient_checkpointing`: True
|
347 |
-
- `gradient_checkpointing_kwargs`: {'use_reentrant': False}
|
348 |
- `batch_sampler`: no_duplicates
|
349 |
|
350 |
#### All Hyperparameters
|
@@ -352,25 +205,24 @@ You can finetune this model on your own dataset.
|
|
352 |
|
353 |
- `overwrite_output_dir`: False
|
354 |
- `do_predict`: False
|
355 |
-
- `eval_strategy`:
|
356 |
- `prediction_loss_only`: True
|
357 |
-
- `per_device_train_batch_size`:
|
358 |
-
- `per_device_eval_batch_size`:
|
359 |
- `per_gpu_train_batch_size`: None
|
360 |
- `per_gpu_eval_batch_size`: None
|
361 |
- `gradient_accumulation_steps`: 16
|
362 |
-
- `eval_accumulation_steps`:
|
363 |
-
- `
|
364 |
-
- `learning_rate`: 1e-05
|
365 |
- `weight_decay`: 0.01
|
366 |
- `adam_beta1`: 0.9
|
367 |
- `adam_beta2`: 0.999
|
368 |
- `adam_epsilon`: 1e-08
|
369 |
- `max_grad_norm`: 1.0
|
370 |
-
- `num_train_epochs`:
|
371 |
- `max_steps`: -1
|
372 |
-
- `lr_scheduler_type`:
|
373 |
-
- `lr_scheduler_kwargs`: {
|
374 |
- `warmup_ratio`: 0.1
|
375 |
- `warmup_steps`: 0
|
376 |
- `log_level`: passive
|
@@ -407,7 +259,7 @@ You can finetune this model on your own dataset.
|
|
407 |
- `disable_tqdm`: False
|
408 |
- `remove_unused_columns`: True
|
409 |
- `label_names`: None
|
410 |
-
- `load_best_model_at_end`:
|
411 |
- `ignore_data_skip`: False
|
412 |
- `fsdp`: []
|
413 |
- `fsdp_min_num_params`: 0
|
@@ -434,8 +286,8 @@ You can finetune this model on your own dataset.
|
|
434 |
- `hub_strategy`: every_save
|
435 |
- `hub_private_repo`: False
|
436 |
- `hub_always_push`: False
|
437 |
-
- `gradient_checkpointing`:
|
438 |
-
- `gradient_checkpointing_kwargs`:
|
439 |
- `include_inputs_for_metrics`: False
|
440 |
- `eval_do_concat_batches`: True
|
441 |
- `fp16_backend`: auto
|
@@ -458,61 +310,34 @@ You can finetune this model on your own dataset.
|
|
458 |
- `optim_target_modules`: None
|
459 |
- `batch_eval_metrics`: False
|
460 |
- `eval_on_start`: False
|
461 |
-
- `eval_use_gather_object`: False
|
462 |
- `batch_sampler`: no_duplicates
|
463 |
- `multi_dataset_batch_sampler`: proportional
|
464 |
|
465 |
</details>
|
466 |
|
467 |
### Training Logs
|
468 |
-
| Epoch
|
469 |
-
|
470 |
-
| 0.
|
471 |
-
|
|
472 |
-
|
|
473 |
-
|
|
474 |
-
|
|
475 |
-
|
|
476 |
-
|
|
477 |
-
|
|
478 |
-
|
|
479 |
-
|
|
480 |
-
|
|
481 |
-
|
482 |
-
| 6.4712 | 78 | 1.0166 | - |
|
483 |
-
| 6.9738 | 84 | 1.0095 | 1.0651 |
|
484 |
-
| 7.4660 | 90 | 0.8951 | - |
|
485 |
-
| 7.9686 | 96 | 0.8782 | 1.0386 |
|
486 |
-
| 8.4607 | 102 | 0.8305 | - |
|
487 |
-
| 8.9634 | 108 | 0.809 | 1.0174 |
|
488 |
-
| 9.4555 | 114 | 0.7202 | - |
|
489 |
-
| 9.9581 | 120 | 0.7403 | 1.0041 |
|
490 |
-
| 10.4503 | 126 | 0.6737 | - |
|
491 |
-
| 10.9529 | 132 | 0.6499 | 0.9903 |
|
492 |
-
| 11.4450 | 138 | 0.6149 | - |
|
493 |
-
| 11.9476 | 144 | 0.6185 | 0.9889 |
|
494 |
-
| 12.4398 | 150 | 0.5492 | - |
|
495 |
-
| **12.9424** | **156** | **0.5595** | **0.9878** |
|
496 |
-
| 13.4346 | 162 | 0.5146 | - |
|
497 |
-
| 13.9372 | 168 | 0.5097 | 0.9927 |
|
498 |
-
| 14.4293 | 174 | 0.4584 | - |
|
499 |
-
| 14.9319 | 180 | 0.4746 | 0.9912 |
|
500 |
-
| 15.4241 | 186 | 0.4331 | - |
|
501 |
-
| 15.9267 | 192 | 0.424 | 1.0016 |
|
502 |
-
| 16.4188 | 198 | 0.3946 | - |
|
503 |
-
| 16.9215 | 204 | 0.4077 | 1.0002 |
|
504 |
-
| 17.4136 | 210 | 0.366 | - |
|
505 |
-
| 17.9162 | 216 | 0.3721 | 1.0070 |
|
506 |
-
|
507 |
-
* The bold row denotes the saved checkpoint.
|
508 |
|
509 |
### Framework Versions
|
510 |
-
- Python: 3.
|
511 |
-
- Sentence Transformers: 3.1.
|
512 |
-
- Transformers: 4.
|
513 |
-
- PyTorch: 2.
|
514 |
-
- Accelerate: 0.
|
515 |
-
- Datasets:
|
516 |
- Tokenizers: 0.19.1
|
517 |
|
518 |
## Citation
|
|
|
1 |
---
|
|
|
2 |
library_name: sentence-transformers
|
3 |
pipeline_tag: sentence-similarity
|
4 |
tags:
|
|
|
6 |
- sentence-similarity
|
7 |
- feature-extraction
|
8 |
- generated_from_trainer
|
9 |
+
- dataset_size:218496
|
10 |
- loss:MultipleNegativesRankingLoss
|
11 |
widget:
|
12 |
+
- source_sentence: "when dividing involving a multiple of 10, gives an answer 10 times\
|
13 |
+
\ bigger than it should be\n\ndivide decimals by 10(multiplying and dividing with\
|
14 |
+
\ decimals).\nquestion: 43.2 \\div 10= \ncorrect answer: 4.32 \nincorrect answer:\
|
15 |
+
\ 33.2"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
sentences:
|
17 |
+
- Does not recognise that a shape translated would not change orientation
|
18 |
+
- Thinks you can find missing values in a given table by treating the row as linear
|
19 |
+
and adding on the difference between the first two values given.
|
20 |
+
- Subtracts instead of divides
|
21 |
+
- source_sentence: "incorrectly cancels what they believe is a factor in algebraic\
|
22 |
+
\ fractions\n\nsimplify an algebraic fraction by factorising the numerator(simplifying\
|
23 |
+
\ algebraic fractions).\nquestion: simplify the following, if possible: \\frac{m^{2}+2\
|
24 |
+
\ m-3}{m-3} \ncorrect answer: does not simplify\nincorrect answer: m+1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
sentences:
|
26 |
+
- Does not know units of area should be squared
|
27 |
+
- Thinks all lines on a net will form edges in 3D
|
28 |
+
- 'Does not know that to factorise a quadratic expression, to find two numbers that
|
29 |
+
add to give the coefficient of the x term, and multiply to give the non variable
|
30 |
+
term
|
31 |
+
|
32 |
+
'
|
33 |
+
- source_sentence: "believes that the order of operations does not affect the answer\
|
34 |
+
\ to a calculation\n\nuse the order of operations to carry out calculations involving\
|
35 |
+
\ powers(bidmas).\nquestion: \\[\n3 \\times 2+4-5\n\\]\nwhere do the brackets\
|
36 |
+
\ need to go to make the answer equal 13 ?\ncorrect answer: 3 \\times(2+4)-5 \n\
|
37 |
+
incorrect answer: does not need brackets"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
sentences:
|
39 |
+
- Thinks that when you cancel identical terms from the numerator and denominator,
|
40 |
+
they just disappear
|
41 |
+
- Believes both the x and y co-ordinates of the x-intercept of a quadratic are derived
|
42 |
+
from the constants in the factorised form.
|
43 |
+
- 'Confuses the order of operations, believes addition comes before multiplication '
|
44 |
+
- source_sentence: "believes that the order of operations does not affect the answer\
|
45 |
+
\ to a calculation\n\nuse the order of operations to carry out calculations involving\
|
46 |
+
\ powers(bidmas).\nquestion: \\[\n3 \\times 2+4-5\n\\]\nwhere do the brackets\
|
47 |
+
\ need to go to make the answer equal 13 ?\ncorrect answer: 3 \\times(2+4)-5 \n\
|
48 |
+
incorrect answer: does not need brackets"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
sentences:
|
50 |
+
- 'Confuses the order of operations, believes addition comes before multiplication '
|
51 |
+
- Does not recognise the properties of a kite
|
52 |
+
- 'Confuses the order of operations, believes addition comes before multiplication '
|
53 |
+
- source_sentence: "believes percentages cannot be converted into fractions\n\nconvert\
|
54 |
+
\ two digit integer percentages to fractions(converting between fractions and\
|
55 |
+
\ percentages).\nquestion: convert this percentage to a fraction\n 62 \\% \ncorrect\
|
56 |
+
\ answer: \\frac{31}{50} \nincorrect answer: none of these"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
sentences:
|
58 |
+
- Believes the gradients of perpendicular lines are reciprocals of the same sign
|
59 |
+
- Does not know the properties of a rectangle
|
60 |
+
- Does not understand a percentage is out of 100
|
61 |
---
|
62 |
|
63 |
+
# SentenceTransformer
|
64 |
|
65 |
+
This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
66 |
|
67 |
## Model Details
|
68 |
|
69 |
### Model Description
|
70 |
- **Model Type:** Sentence Transformer
|
71 |
+
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
|
72 |
+
- **Maximum Sequence Length:** 512 tokens
|
73 |
+
- **Output Dimensionality:** 1024 tokens
|
74 |
- **Similarity Function:** Cosine Similarity
|
75 |
+
<!-- - **Training Dataset:** Unknown -->
|
|
|
76 |
<!-- - **Language:** Unknown -->
|
77 |
<!-- - **License:** Unknown -->
|
78 |
|
|
|
86 |
|
87 |
```
|
88 |
SentenceTransformer(
|
89 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
90 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
91 |
)
|
92 |
```
|
93 |
|
|
|
109 |
model = SentenceTransformer("Gurveer05/gte-base-eedi-2024")
|
110 |
# Run inference
|
111 |
sentences = [
|
112 |
+
'believes percentages cannot be converted into fractions\n\nconvert two digit integer percentages to fractions(converting between fractions and percentages).\nquestion: convert this percentage to a fraction\n 62 \\% \ncorrect answer: \\frac{31}{50} \nincorrect answer: none of these',
|
113 |
+
'Does not understand a percentage is out of 100',
|
114 |
+
'Believes the gradients of perpendicular lines are reciprocals of the same sign',
|
115 |
]
|
116 |
embeddings = model.encode(sentences)
|
117 |
print(embeddings.shape)
|
118 |
+
# [3, 1024]
|
119 |
|
120 |
# Get the similarity scores for the embeddings
|
121 |
similarities = model.similarity(embeddings, embeddings)
|
|
|
163 |
|
164 |
### Training Dataset
|
165 |
|
166 |
+
#### Unnamed Dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
|
|
168 |
|
169 |
+
* Size: 218,496 training samples
|
170 |
+
* Columns: <code>FullText</code>, <code>GroundTruthMisconception</code>, and <code>PredictMisconception</code>
|
|
|
|
|
|
|
171 |
* Approximate statistics based on the first 1000 samples:
|
172 |
+
| | FullText | GroundTruthMisconception | PredictMisconception |
|
173 |
+
|:--------|:------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
174 |
+
| type | string | string | string |
|
175 |
+
| details | <ul><li>min: 58 tokens</li><li>mean: 95.45 tokens</li><li>max: 159 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 17.0 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.27 tokens</li><li>max: 40 tokens</li></ul> |
|
176 |
* Samples:
|
177 |
+
| FullText | GroundTruthMisconception | PredictMisconception |
|
178 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------|
|
179 |
+
| <code>believes that the order of operations does not affect the answer to a calculation<br><br>use the order of operations to carry out calculations involving powers(bidmas).<br>question: \[<br>3 \times 2+4-5<br>\]<br>where do the brackets need to go to make the answer equal 13 ?<br>correct answer: 3 \times(2+4)-5 <br>incorrect answer: does not need brackets</code> | <code>Confuses the order of operations, believes addition comes before multiplication </code> | <code>Believes infinite gradient is not possible in real life. </code> |
|
180 |
+
| <code>believes that the order of operations does not affect the answer to a calculation<br><br>use the order of operations to carry out calculations involving powers(bidmas).<br>question: \[<br>3 \times 2+4-5<br>\]<br>where do the brackets need to go to make the answer equal 13 ?<br>correct answer: 3 \times(2+4)-5 <br>incorrect answer: does not need brackets</code> | <code>Confuses the order of operations, believes addition comes before multiplication </code> | <code>Struggles to draw 3D shapes on isometric paper</code> |
|
181 |
+
| <code>believes that the order of operations does not affect the answer to a calculation<br><br>use the order of operations to carry out calculations involving powers(bidmas).<br>question: \[<br>3 \times 2+4-5<br>\]<br>where do the brackets need to go to make the answer equal 13 ?<br>correct answer: 3 \times(2+4)-5 <br>incorrect answer: does not need brackets</code> | <code>Confuses the order of operations, believes addition comes before multiplication </code> | <code>Believes an upward slope on a distance-time graph means travelling back towards the starting point.</code> |
|
182 |
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
183 |
```json
|
184 |
{
|
|
|
190 |
### Training Hyperparameters
|
191 |
#### Non-Default Hyperparameters
|
192 |
|
|
|
|
|
|
|
193 |
- `gradient_accumulation_steps`: 16
|
194 |
+
- `eval_accumulation_steps`: 16
|
195 |
+
- `learning_rate`: 2e-05
|
196 |
- `weight_decay`: 0.01
|
197 |
+
- `num_train_epochs`: 2
|
198 |
+
- `lr_scheduler_type`: cosine_with_restarts
|
|
|
199 |
- `warmup_ratio`: 0.1
|
200 |
- `fp16`: True
|
|
|
|
|
|
|
201 |
- `batch_sampler`: no_duplicates
|
202 |
|
203 |
#### All Hyperparameters
|
|
|
205 |
|
206 |
- `overwrite_output_dir`: False
|
207 |
- `do_predict`: False
|
208 |
+
- `eval_strategy`: no
|
209 |
- `prediction_loss_only`: True
|
210 |
+
- `per_device_train_batch_size`: 8
|
211 |
+
- `per_device_eval_batch_size`: 8
|
212 |
- `per_gpu_train_batch_size`: None
|
213 |
- `per_gpu_eval_batch_size`: None
|
214 |
- `gradient_accumulation_steps`: 16
|
215 |
+
- `eval_accumulation_steps`: 16
|
216 |
+
- `learning_rate`: 2e-05
|
|
|
217 |
- `weight_decay`: 0.01
|
218 |
- `adam_beta1`: 0.9
|
219 |
- `adam_beta2`: 0.999
|
220 |
- `adam_epsilon`: 1e-08
|
221 |
- `max_grad_norm`: 1.0
|
222 |
+
- `num_train_epochs`: 2
|
223 |
- `max_steps`: -1
|
224 |
+
- `lr_scheduler_type`: cosine_with_restarts
|
225 |
+
- `lr_scheduler_kwargs`: {}
|
226 |
- `warmup_ratio`: 0.1
|
227 |
- `warmup_steps`: 0
|
228 |
- `log_level`: passive
|
|
|
259 |
- `disable_tqdm`: False
|
260 |
- `remove_unused_columns`: True
|
261 |
- `label_names`: None
|
262 |
+
- `load_best_model_at_end`: False
|
263 |
- `ignore_data_skip`: False
|
264 |
- `fsdp`: []
|
265 |
- `fsdp_min_num_params`: 0
|
|
|
286 |
- `hub_strategy`: every_save
|
287 |
- `hub_private_repo`: False
|
288 |
- `hub_always_push`: False
|
289 |
+
- `gradient_checkpointing`: False
|
290 |
+
- `gradient_checkpointing_kwargs`: None
|
291 |
- `include_inputs_for_metrics`: False
|
292 |
- `eval_do_concat_batches`: True
|
293 |
- `fp16_backend`: auto
|
|
|
310 |
- `optim_target_modules`: None
|
311 |
- `batch_eval_metrics`: False
|
312 |
- `eval_on_start`: False
|
|
|
313 |
- `batch_sampler`: no_duplicates
|
314 |
- `multi_dataset_batch_sampler`: proportional
|
315 |
|
316 |
</details>
|
317 |
|
318 |
### Training Logs
|
319 |
+
| Epoch | Step | Training Loss |
|
320 |
+
|:------:|:----:|:-------------:|
|
321 |
+
| 0.1757 | 300 | 0.9143 |
|
322 |
+
| 0.3515 | 600 | 0.8284 |
|
323 |
+
| 0.5272 | 900 | 0.8444 |
|
324 |
+
| 0.7030 | 1200 | 0.7999 |
|
325 |
+
| 0.8787 | 1500 | 0.8901 |
|
326 |
+
| 1.0512 | 1800 | 0.8029 |
|
327 |
+
| 1.2269 | 2100 | 0.6198 |
|
328 |
+
| 1.4027 | 2400 | 0.5837 |
|
329 |
+
| 1.5784 | 2700 | 0.603 |
|
330 |
+
| 1.7542 | 3000 | 0.5336 |
|
331 |
+
| 1.9299 | 3300 | 0.5977 |
|
332 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
### Framework Versions
|
335 |
+
- Python: 3.12.3
|
336 |
+
- Sentence Transformers: 3.1.0
|
337 |
+
- Transformers: 4.42.3
|
338 |
+
- PyTorch: 2.3.0+cu121
|
339 |
+
- Accelerate: 0.32.1
|
340 |
+
- Datasets: 3.0.0
|
341 |
- Tokenizers: 0.19.1
|
342 |
|
343 |
## Citation
|
config.json
CHANGED
@@ -1,44 +1,32 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
-
"attention_probs_dropout_prob": 0.
|
7 |
-
"auto_map": {
|
8 |
-
"AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
|
9 |
-
"AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
|
10 |
-
"AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
|
11 |
-
"AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
|
12 |
-
"AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
|
13 |
-
"AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
|
14 |
-
"AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
|
15 |
-
},
|
16 |
"classifier_dropout": null,
|
|
|
17 |
"hidden_act": "gelu",
|
18 |
"hidden_dropout_prob": 0.1,
|
19 |
-
"hidden_size":
|
|
|
|
|
|
|
20 |
"initializer_range": 0.02,
|
21 |
-
"intermediate_size":
|
|
|
|
|
|
|
22 |
"layer_norm_eps": 1e-12,
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"model_type": "new",
|
28 |
-
"num_attention_heads": 12,
|
29 |
-
"num_hidden_layers": 12,
|
30 |
-
"pack_qkv": true,
|
31 |
"pad_token_id": 0,
|
32 |
-
"position_embedding_type": "
|
33 |
-
"rope_scaling": {
|
34 |
-
"factor": 2.0,
|
35 |
-
"type": "ntk"
|
36 |
-
},
|
37 |
-
"rope_theta": 500000,
|
38 |
"torch_dtype": "float32",
|
39 |
"transformers_version": "4.44.0",
|
40 |
-
"type_vocab_size":
|
41 |
-
"
|
42 |
-
"
|
43 |
-
"vocab_size": 30528
|
44 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/kaggle/input/eedi-finetuned-bge-public/Eedi-finetuned-bge",
|
3 |
"architectures": [
|
4 |
+
"BertModel"
|
5 |
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
"hidden_act": "gelu",
|
10 |
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0"
|
14 |
+
},
|
15 |
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 4096,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0
|
19 |
+
},
|
20 |
"layer_norm_eps": 1e-12,
|
21 |
+
"max_position_embeddings": 512,
|
22 |
+
"model_type": "bert",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
|
|
|
|
|
|
|
|
25 |
"pad_token_id": 0,
|
26 |
+
"position_embedding_type": "absolute",
|
|
|
|
|
|
|
|
|
|
|
27 |
"torch_dtype": "float32",
|
28 |
"transformers_version": "4.44.0",
|
29 |
+
"type_vocab_size": 2,
|
30 |
+
"use_cache": true,
|
31 |
+
"vocab_size": 30522
|
|
|
32 |
}
|
config_sentence_transformers.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
-
"sentence_transformers": "3.
|
4 |
"transformers": "4.44.0",
|
5 |
"pytorch": "2.4.0"
|
6 |
},
|
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
+
"sentence_transformers": "3.2.1",
|
4 |
"transformers": "4.44.0",
|
5 |
"pytorch": "2.4.0"
|
6 |
},
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce2a1e027d4bd6ed1d80c768f08f44843917e99ed19af1106cdf5237ab5db08d
|
3 |
+
size 1340612432
|
sentence_bert_config.json
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
{
|
2 |
-
"max_seq_length":
|
3 |
-
"do_lower_case":
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
}
|
tokenizer.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
-
"max_length":
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
+
"max_length": 512,
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
tokenizer_config.json
CHANGED
@@ -43,10 +43,12 @@
|
|
43 |
},
|
44 |
"clean_up_tokenization_spaces": true,
|
45 |
"cls_token": "[CLS]",
|
|
|
46 |
"do_lower_case": true,
|
47 |
"mask_token": "[MASK]",
|
48 |
"max_length": 512,
|
49 |
-
"model_max_length":
|
|
|
50 |
"pad_to_multiple_of": null,
|
51 |
"pad_token": "[PAD]",
|
52 |
"pad_token_type_id": 0,
|
|
|
43 |
},
|
44 |
"clean_up_tokenization_spaces": true,
|
45 |
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
"mask_token": "[MASK]",
|
49 |
"max_length": 512,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
"pad_to_multiple_of": null,
|
53 |
"pad_token": "[PAD]",
|
54 |
"pad_token_type_id": 0,
|