MAP@25: 0.44064028964485386

Browse files

Files changed (8) hide show

1_Pooling/config.json +1 -1
README.md +104 -279
config.json +20 -32
config_sentence_transformers.json +1 -1
model.safetensors +2 -2
sentence_bert_config.json +2 -2
tokenizer.json +1 -1
tokenizer_config.json +3 -1

1_Pooling/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "word_embedding_dimension": 768,
   "pooling_mode_cls_token": true,
   "pooling_mode_mean_tokens": false,
   "pooling_mode_max_tokens": false,

 {
+  "word_embedding_dimension": 1024,
   "pooling_mode_cls_token": true,
   "pooling_mode_mean_tokens": false,
   "pooling_mode_max_tokens": false,

README.md CHANGED Viewed

@@ -1,5 +1,4 @@
 ---
-base_model: Alibaba-NLP/gte-base-en-v1.5
 library_name: sentence-transformers
 pipeline_tag: sentence-similarity
 tags:
@@ -7,187 +6,73 @@ tags:
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
-- dataset_size:12210
 - loss:MultipleNegativesRankingLoss
 widget:
-- source_sentence: 'Construct:  Convert fractions to ratios.
-    Question:  (3 / 7)  of a group of students are boys.
-    What would be a possible ratio of boys to girls?
-    Options:
-    A. 3: 7
-    B. 3: 4
-    C. 3: 10
-    D. 4: 3
-    Correct Answer: 3: 4
-    Incorrect Answer: 4: 3
-    Predicted Misconception: Confusing fractions with whole number ratios directly.'
   sentences:
-  - Does not understand that a remainder can be written as a fraction
-  - Difficulty with translating a mathematical answer into a real world context
-  - Gets order of ratio wrong
-- source_sentence: 'Construct:  Understand the non-commutativity of division.
-    Question:  Jacob makes a conjecture: If you have two different numbers,  P  and  Q
-    , the order you divide does not matter.  P / Q=Q / P  Is Jacob''s conjecture...
-    Options:
-    A. Always true
-    B. Sometimes true
-    C. Never true
-    D. I don''t
-    know
-    Correct Answer: Never true
-    Incorrect Answer: Always true
-    Predicted Misconception: Division is commutative, like addition.'
   sentences:
-  - 'Believes division is commutative '
-  - Does not understand the term factor
-  - Believes you don't need to numerically order the data before calculating the median
-- source_sentence: 'Construct:  Convert fractions less than 1 to terminating decimals
-    of 1 decimal place.
-    Question:  Convert  (4 / 20)  into a decimal.
-    Options:
-    A. 5.0
-    B. 0.4
-    C. 0.2
-    D. 4.20
-    Correct Answer: 0.2
-    Incorrect Answer: 5.0
-    Predicted Misconception: Believing that dividing a number greater than 1 by another
-    number greater than 1 results in a decimal greater than 1.'
   sentences:
-  - Adds when calculating area instead of multiplying
-  - Believes a decimal with one significant figure is a multiple of 10 when converted
-    to a percentage, regardless of place value
-  - Believes that a fraction means dividing the denominator by the numerator
-- source_sentence: 'Construct:  Write algebraic expressions with correct algebraic
-    convention.
-    Question:  a, b  and  c  are different numbers
-    (
-    (a+b / c)=(a+b) / c
-    )
-    Is the above relationship...
-    Options:
-    A. always true
-    B. sometimes true
-    C. never true
-    D. Need more information
-    Correct Answer: always true
-    Incorrect Answer: never true
-    Predicted Misconception: Belief that division distributes over addition.'
   sentences:
-  - Does not realize we can write a division as a fraction
-  - Believes the solution of mx + c = a is x =a
-  - When calculating the mean from a list of data, adds up the numbers but does not
-    divide
-- source_sentence: 'Construct:  Solve coordinate geometry questions involving ratio.
-    Question:  A straight line on squared paper. Points P, Q and R lie on this line.
-    The leftmost end of the line is labelled P. If you travel right 4 squares and
-    up 1 square you get to point Q. If you then travel 8 squares right and 2 squares
-    up from Q you reach point R. What is the ratio of  P Q: P R  ?
-    Options:
-    A. 1: 12
-    B. 1: 4
-    C. 1: 2
-    D. 1: 3
-    Correct Answer: 1: 3
-    Incorrect Answer: 1: 2
-    Predicted Misconception: Misunderstanding the ratio calculation by not considering
-    the correct horizontal and vertical distances between points P, Q, and R.'
   sentences:
-  - May have estimated when using ratios with geometry
-  - Believes switching the gradient and y-intercept gives a parallel line
-  - Thinks x = y is an axis
 ---
-# SentenceTransformer based on Alibaba-NLP/gte-base-en-v1.5
-This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
 ## Model Details
 ### Model Description
 - **Model Type:** Sentence Transformer
-- **Base model:** [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5) <!-- at revision a8e4f3e0ee719c75bc30d12b8eae0f8440502718 -->
-- **Maximum Sequence Length:** 8192 tokens
-- **Output Dimensionality:** 768 tokens
 - **Similarity Function:** Cosine Similarity
-- **Training Dataset:**
-    - csv
 <!-- - **Language:** Unknown -->
 <!-- - **License:** Unknown -->
@@ -201,8 +86,8 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
 ```
 SentenceTransformer(
-  (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
-  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 )
 ```
@@ -224,13 +109,13 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer("Gurveer05/gte-base-eedi-2024")
 # Run inference
 sentences = [
-    'Construct:  Solve coordinate geometry questions involving ratio.\n\nQuestion:  A straight line on squared paper. Points P, Q and R lie on this line. The leftmost end of the line is labelled P. If you travel right 4 squares and up 1 square you get to point Q. If you then travel 8 squares right and 2 squares up from Q you reach point R. What is the ratio of  P Q: P R  ?\n\nOptions:\nA. 1: 12\nB. 1: 4\nC. 1: 2\nD. 1: 3\n\nCorrect Answer: 1: 3\n\nIncorrect Answer: 1: 2\n\nPredicted Misconception: Misunderstanding the ratio calculation by not considering the correct horizontal and vertical distances between points P, Q, and R.',
-    'May have estimated when using ratios with geometry',
-    'Thinks x = y is an axis',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
-# [3, 768]
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
@@ -278,48 +163,22 @@ You can finetune this model on your own dataset.
 ### Training Dataset
-#### csv
-* Dataset: csv
-* Size: 12,210 training samples
-* Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
-* Approximate statistics based on the first 1000 samples:
-  |         | qa_pair_text                                                                        | MisconceptionName                                                                 | negative                                                                          |
-  |:--------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
-  | type    | string                                                                              | string                                                                            | string                                                                            |
-  | details | <ul><li>min: 54 tokens</li><li>mean: 124.3 tokens</li><li>max: 618 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.16 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 14.49 tokens</li><li>max: 40 tokens</li></ul> |
-* Samples:
-  | qa_pair_text                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | MisconceptionName                                                                                    | negative                                                                             |
-  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
-  | <code>Construct:  Construct frequency tables.<br><br>Question:  Dave has recorded the number of pets his classmates have in the frequency table on the right. <br> Number of pets | Frequency <br>  0  |  4  <br>  1  |  6  <br>  2  |  3  <br>  3  |  2  <br>  4  |  5  <br><br> If Dave wanted to work out the total number of pets own by his classmates, what would be a useful column to include?<br><br>Options:<br>A. Number of pets  x  Frequency<br>B. Number of pets  /  Frequency<br>C. Number of pets + Frequency<br>D. Number of pets -<br>Frequency<br><br>Correct Answer: Number of pets  x  Frequency<br><br>Incorrect Answer: Number of pets + Frequency<br><br>Predicted Misconception: Adding frequency to the number of pets to find total pets.</code> | <code>Adds instead of multiplies when calculating total frequency</code>                             | <code>Believes the mean is total frequency divided by sum of categories given</code> |
-  | <code>Construct:  Convert between any other time periods.<br><br>Question:  To work out how many hours in a year you could do...<br><br>Options:<br>A. 365 x 7<br>B. 365 x 60<br>C. 365 x 12<br>D. 365 x 24<br><br>Correct Answer: 365 x 24<br><br>Incorrect Answer: 365 x 60<br><br>Predicted Misconception: Multiplying days by hours per minute instead of hours per day.</code>                                                                                                                                                                                                                                                                                                                                                                                         | <code>Answers as if there are 60 hours in a day</code>                                               | <code>Confuses an equation with an expression</code>                                 |
-  | <code>Construct:  Given information about one part, work out other parts.<br><br>Question:  Jess and Heena share some sweets in the ratio  3;: 5 .<br>Jess gets  15  sweets.<br>How many sweets does Heena get?<br><br>Options:<br>A. 17<br>B. 9<br>C. 5<br>D. 25<br><br>Correct Answer: 25<br><br>Incorrect Answer: 17<br><br>Predicted Misconception: Misunderstanding the direct proportionality between the ratio and actual quantities.</code>                                                                                                                                                                                                                                                                                                                         | <code>Thinks a difference of one part in a ratio means the quantities will differ by one unit</code> | <code>Believes dividing two positives will give a negative answer</code>             |
-* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
-  ```json
-  {
-      "scale": 20.0,
-      "similarity_fct": "cos_sim"
-  }
-  ```
-### Evaluation Dataset
-#### csv
-* Dataset: csv
-* Size: 9,640 evaluation samples
-* Columns: <code>qa_pair_text</code>, <code>MisconceptionName</code>, and <code>negative</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | qa_pair_text                                                                          | MisconceptionName                                                                 | negative                                                                          |
-  |:--------|:--------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
-  | type    | string                                                                                | string                                                                            | string                                                                            |
-  | details | <ul><li>min: 56 tokens</li><li>mean: 123.29 tokens</li><li>max: 1092 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.51 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.86 tokens</li><li>max: 40 tokens</li></ul> |
 * Samples:
-  | qa_pair_text                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | MisconceptionName                                                                                                                             | negative                                                                                                                                         |
-  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|
-  | <code>Construct:  Identify when rounding a calculation will give an over or under approximation.<br><br>Question:  Tom and Katie are discussing how to estimate the answer to<br>[<br>38.8745 / 7.9302<br>]<br><br>Tom says  40 / 7.9302  would give an overestimate.<br><br>Katie says  38.8745 / 8  would give an overestimate.<br><br>Who is correct?<br><br>Options:<br>A. Only Tom<br>B. Only Katie<br>C. Both Tom and Katie<br>D. Neither is correct<br><br>Correct Answer: Only Tom<br><br>Incorrect Answer: Neither is correct<br><br>Predicted Misconception: Rounding both numbers up leads to an overestimate.</code>                                      | <code>Believes that the larger the dividend, the smaller the answer.</code>                                                                   | <code>Does not know how to calculate the mean</code>                                                                                             |
-  | <code>Construct:  Substitute negative integer values into expressions involving no powers or roots.<br><br>Question:  Amy is trying to work out the distance between these two points:  (1,-6)  and  (-5,2)  She labels them like this:  x_1 | y_1 | x_2 | y_2  (1, | -6) | (-5, | 2)  And then decides to use this formula:  square root of (x_2)-x_{1^2+y_2-y_1^2}  What does the second bracket equal?<br><br>Options:<br>A. (-8)^2<br>B. (-4)^2<br>C. (8)^2<br>D. (4)^2<br><br>Correct Answer: (8)^2<br><br>Incorrect Answer: (-4)^2<br><br>Predicted Misconception: Misunderstanding of squaring negative numbers and treating them as separate entities.</code> | <code>When subtracting a negative number from a positive number, uses a method which assumes one of the negative signs can be ignored </code> | <code>Ignores a more than question and just states one of the values</code>                                                                      |
-  | <code>Construct:  Round numbers to three or more decimal places.<br><br>Question:  What is  20.15349  rounded to  3  decimal places?<br><br>Options:<br>A. 20.153<br>B. 20.15<br>C. 20.154<br>D. 20.253<br><br>Correct Answer: 20.153<br><br>Incorrect Answer: 20.154<br><br>Predicted Misconception: Rounding up the fourth decimal place without considering the fifth decimal place.</code>                                                                                                                                                                                                                                                                        | <code>Rounds up instead of down</code>                                                                                                        | <code>When dividing decimals, does not realize that the order and position of the digits (relative to each other) has to remain constant.</code> |
 * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
@@ -331,20 +190,14 @@ You can finetune this model on your own dataset.
 ### Training Hyperparameters
 #### Non-Default Hyperparameters
-- `eval_strategy`: steps
-- `per_device_train_batch_size`: 32
-- `per_device_eval_batch_size`: 32
 - `gradient_accumulation_steps`: 16
-- `learning_rate`: 1e-05
 - `weight_decay`: 0.01
-- `num_train_epochs`: 40
-- `lr_scheduler_type`: cosine
-- `lr_scheduler_kwargs`: {'num_cycles': 20}
 - `warmup_ratio`: 0.1
 - `fp16`: True
-- `load_best_model_at_end`: True
-- `gradient_checkpointing`: True
-- `gradient_checkpointing_kwargs`: {'use_reentrant': False}
 - `batch_sampler`: no_duplicates
 #### All Hyperparameters
@@ -352,25 +205,24 @@ You can finetune this model on your own dataset.
 - `overwrite_output_dir`: False
 - `do_predict`: False
-- `eval_strategy`: steps
 - `prediction_loss_only`: True
-- `per_device_train_batch_size`: 32
-- `per_device_eval_batch_size`: 32
 - `per_gpu_train_batch_size`: None
 - `per_gpu_eval_batch_size`: None
 - `gradient_accumulation_steps`: 16
-- `eval_accumulation_steps`: None
-- `torch_empty_cache_steps`: None
-- `learning_rate`: 1e-05
 - `weight_decay`: 0.01
 - `adam_beta1`: 0.9
 - `adam_beta2`: 0.999
 - `adam_epsilon`: 1e-08
 - `max_grad_norm`: 1.0
-- `num_train_epochs`: 40
 - `max_steps`: -1
-- `lr_scheduler_type`: cosine
-- `lr_scheduler_kwargs`: {'num_cycles': 20}
 - `warmup_ratio`: 0.1
 - `warmup_steps`: 0
 - `log_level`: passive
@@ -407,7 +259,7 @@ You can finetune this model on your own dataset.
 - `disable_tqdm`: False
 - `remove_unused_columns`: True
 - `label_names`: None
-- `load_best_model_at_end`: True
 - `ignore_data_skip`: False
 - `fsdp`: []
 - `fsdp_min_num_params`: 0
@@ -434,8 +286,8 @@ You can finetune this model on your own dataset.
 - `hub_strategy`: every_save
 - `hub_private_repo`: False
 - `hub_always_push`: False
-- `gradient_checkpointing`: True
-- `gradient_checkpointing_kwargs`: {'use_reentrant': False}
 - `include_inputs_for_metrics`: False
 - `eval_do_concat_batches`: True
 - `fp16_backend`: auto
@@ -458,61 +310,34 @@ You can finetune this model on your own dataset.
 - `optim_target_modules`: None
 - `batch_eval_metrics`: False
 - `eval_on_start`: False
-- `eval_use_gather_object`: False
 - `batch_sampler`: no_duplicates
 - `multi_dataset_batch_sampler`: proportional
 </details>
 ### Training Logs
-| Epoch       | Step    | Training Loss | loss       |
-|:-----------:|:-------:|:-------------:|:----------:|
-| 0.5026      | 6       | 2.8901        | -          |
-| 1.0052      | 12      | 2.5455        | 2.1423     |
-| 1.4974      | 18      | 2.2716        | -          |
-| 2.0         | 24      | 2.0293        | 1.7440     |
-| 2.4921      | 30      | 1.8326        | -          |
-| 2.9948      | 36      | 1.6703        | 1.4220     |
-| 3.4869      | 42      | 1.4876        | -          |
-| 3.9895      | 48      | 1.3571        | 1.2232     |
-| 4.4817      | 54      | 1.2347        | -          |
-| 4.9843      | 60      | 1.2289        | 1.1891     |
-| 5.4764      | 66      | 1.1551        | -          |
-| 5.9791      | 72      | 1.0629        | 1.1069     |
-| 6.4712      | 78      | 1.0166        | -          |
-| 6.9738      | 84      | 1.0095        | 1.0651     |
-| 7.4660      | 90      | 0.8951        | -          |
-| 7.9686      | 96      | 0.8782        | 1.0386     |
-| 8.4607      | 102     | 0.8305        | -          |
-| 8.9634      | 108     | 0.809         | 1.0174     |
-| 9.4555      | 114     | 0.7202        | -          |
-| 9.9581      | 120     | 0.7403        | 1.0041     |
-| 10.4503     | 126     | 0.6737        | -          |
-| 10.9529     | 132     | 0.6499        | 0.9903     |
-| 11.4450     | 138     | 0.6149        | -          |
-| 11.9476     | 144     | 0.6185        | 0.9889     |
-| 12.4398     | 150     | 0.5492        | -          |
-| **12.9424** | **156** | **0.5595**    | **0.9878** |
-| 13.4346     | 162     | 0.5146        | -          |
-| 13.9372     | 168     | 0.5097        | 0.9927     |
-| 14.4293     | 174     | 0.4584        | -          |
-| 14.9319     | 180     | 0.4746        | 0.9912     |
-| 15.4241     | 186     | 0.4331        | -          |
-| 15.9267     | 192     | 0.424         | 1.0016     |
-| 16.4188     | 198     | 0.3946        | -          |
-| 16.9215     | 204     | 0.4077        | 1.0002     |
-| 17.4136     | 210     | 0.366         | -          |
-| 17.9162     | 216     | 0.3721        | 1.0070     |
-* The bold row denotes the saved checkpoint.
 ### Framework Versions
-- Python: 3.10.14
-- Sentence Transformers: 3.1.1
-- Transformers: 4.44.0
-- PyTorch: 2.4.0
-- Accelerate: 0.33.0
-- Datasets: 2.19.2
 - Tokenizers: 0.19.1
 ## Citation

 ---
 library_name: sentence-transformers
 pipeline_tag: sentence-similarity
 tags:
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
+- dataset_size:218496
 - loss:MultipleNegativesRankingLoss
 widget:
+- source_sentence: "when dividing involving a multiple of 10, gives an answer 10 times\
+    \ bigger than it should be\n\ndivide decimals by 10(multiplying and dividing with\
+    \ decimals).\nquestion: 43.2 \\div 10= \ncorrect answer: 4.32 \nincorrect answer:\
+    \ 33.2"
   sentences:
+  - Does not recognise that a shape translated would not change orientation
+  - Thinks you can find missing values in a given table by treating the row as linear
+    and adding on the difference between the first two values given.
+  - Subtracts instead of divides
+- source_sentence: "incorrectly cancels what they believe is a factor in algebraic\
+    \ fractions\n\nsimplify an algebraic fraction by factorising the numerator(simplifying\
+    \ algebraic fractions).\nquestion: simplify the following, if possible: \\frac{m^{2}+2\
+    \ m-3}{m-3} \ncorrect answer: does not simplify\nincorrect answer: m+1"
   sentences:
+  - Does not know units of area should be squared
+  - Thinks all lines on a net will form edges in 3D
+  - 'Does not know that to factorise a quadratic expression, to find two numbers that
+    add to give the coefficient of the x term, and multiply to give the non variable
+    term
+    '
+- source_sentence: "believes that the order of operations does not affect the answer\
+    \ to a calculation\n\nuse the order of operations to carry out calculations involving\
+    \ powers(bidmas).\nquestion: \\[\n3 \\times 2+4-5\n\\]\nwhere do the brackets\
+    \ need to go to make the answer equal 13 ?\ncorrect answer: 3 \\times(2+4)-5 \n\
+    incorrect answer: does not need brackets"
   sentences:
+  - Thinks that when you cancel identical terms from the numerator and denominator,
+    they just disappear
+  - Believes both the x and y co-ordinates of the x-intercept of a quadratic are derived
+    from the constants in the factorised form.
+  - 'Confuses the order of operations, believes addition comes before multiplication '
+- source_sentence: "believes that the order of operations does not affect the answer\
+    \ to a calculation\n\nuse the order of operations to carry out calculations involving\
+    \ powers(bidmas).\nquestion: \\[\n3 \\times 2+4-5\n\\]\nwhere do the brackets\
+    \ need to go to make the answer equal 13 ?\ncorrect answer: 3 \\times(2+4)-5 \n\
+    incorrect answer: does not need brackets"
   sentences:
+  - 'Confuses the order of operations, believes addition comes before multiplication '
+  - Does not recognise the properties of a kite
+  - 'Confuses the order of operations, believes addition comes before multiplication '
+- source_sentence: "believes percentages cannot be converted into fractions\n\nconvert\
+    \ two digit integer percentages to fractions(converting between fractions and\
+    \ percentages).\nquestion: convert this percentage to a fraction\n 62 \\% \ncorrect\
+    \ answer: \\frac{31}{50} \nincorrect answer: none of these"
   sentences:
+  - Believes the gradients of perpendicular lines are reciprocals of the same sign
+  - Does not know the properties of a rectangle
+  - Does not understand a percentage is out of 100
 ---
+# SentenceTransformer
+This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
 ## Model Details
 ### Model Description
 - **Model Type:** Sentence Transformer
+<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
+- **Maximum Sequence Length:** 512 tokens
+- **Output Dimensionality:** 1024 tokens
 - **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
 <!-- - **Language:** Unknown -->
 <!-- - **License:** Unknown -->
 ```
 SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
+  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 )
 ```
 model = SentenceTransformer("Gurveer05/gte-base-eedi-2024")
 # Run inference
 sentences = [
+    'believes percentages cannot be converted into fractions\n\nconvert two digit integer percentages to fractions(converting between fractions and percentages).\nquestion: convert this percentage to a fraction\n 62 \\% \ncorrect answer: \\frac{31}{50} \nincorrect answer: none of these',
+    'Does not understand a percentage is out of 100',
+    'Believes the gradients of perpendicular lines are reciprocals of the same sign',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
+# [3, 1024]
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 ### Training Dataset
+#### Unnamed Dataset
+* Size: 218,496 training samples
+* Columns: <code>FullText</code>, <code>GroundTruthMisconception</code>, and <code>PredictMisconception</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | FullText                                                                            | GroundTruthMisconception                                                         | PredictMisconception                                                              |
+  |:--------|:------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
+  | type    | string                                                                              | string                                                                           | string                                                                            |
+  | details | <ul><li>min: 58 tokens</li><li>mean: 95.45 tokens</li><li>max: 159 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 17.0 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.27 tokens</li><li>max: 40 tokens</li></ul> |
 * Samples:
+  | FullText                                                                                                                                                                                                                                                                                                                                                                        | GroundTruthMisconception                                                                      | PredictMisconception                                                                                             |
+  |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------|
+  | <code>believes that the order of operations does not affect the answer to a calculation<br><br>use the order of operations to carry out calculations involving powers(bidmas).<br>question: \[<br>3 \times 2+4-5<br>\]<br>where do the brackets need to go to make the answer equal 13 ?<br>correct answer: 3 \times(2+4)-5 <br>incorrect answer: does not need brackets</code> | <code>Confuses the order of operations, believes addition comes before multiplication </code> | <code>Believes infinite gradient is not possible in real life. </code>                                           |
+  | <code>believes that the order of operations does not affect the answer to a calculation<br><br>use the order of operations to carry out calculations involving powers(bidmas).<br>question: \[<br>3 \times 2+4-5<br>\]<br>where do the brackets need to go to make the answer equal 13 ?<br>correct answer: 3 \times(2+4)-5 <br>incorrect answer: does not need brackets</code> | <code>Confuses the order of operations, believes addition comes before multiplication </code> | <code>Struggles to draw 3D shapes on isometric paper</code>                                                      |
+  | <code>believes that the order of operations does not affect the answer to a calculation<br><br>use the order of operations to carry out calculations involving powers(bidmas).<br>question: \[<br>3 \times 2+4-5<br>\]<br>where do the brackets need to go to make the answer equal 13 ?<br>correct answer: 3 \times(2+4)-5 <br>incorrect answer: does not need brackets</code> | <code>Confuses the order of operations, believes addition comes before multiplication </code> | <code>Believes an upward slope on a distance-time graph means travelling back towards the starting point.</code> |
 * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
 ### Training Hyperparameters
 #### Non-Default Hyperparameters
 - `gradient_accumulation_steps`: 16
+- `eval_accumulation_steps`: 16
+- `learning_rate`: 2e-05
 - `weight_decay`: 0.01
+- `num_train_epochs`: 2
+- `lr_scheduler_type`: cosine_with_restarts
 - `warmup_ratio`: 0.1
 - `fp16`: True
 - `batch_sampler`: no_duplicates
 #### All Hyperparameters
 - `overwrite_output_dir`: False
 - `do_predict`: False
+- `eval_strategy`: no
 - `prediction_loss_only`: True
+- `per_device_train_batch_size`: 8
+- `per_device_eval_batch_size`: 8
 - `per_gpu_train_batch_size`: None
 - `per_gpu_eval_batch_size`: None
 - `gradient_accumulation_steps`: 16
+- `eval_accumulation_steps`: 16
+- `learning_rate`: 2e-05
 - `weight_decay`: 0.01
 - `adam_beta1`: 0.9
 - `adam_beta2`: 0.999
 - `adam_epsilon`: 1e-08
 - `max_grad_norm`: 1.0
+- `num_train_epochs`: 2
 - `max_steps`: -1
+- `lr_scheduler_type`: cosine_with_restarts
+- `lr_scheduler_kwargs`: {}
 - `warmup_ratio`: 0.1
 - `warmup_steps`: 0
 - `log_level`: passive
 - `disable_tqdm`: False
 - `remove_unused_columns`: True
 - `label_names`: None
+- `load_best_model_at_end`: False
 - `ignore_data_skip`: False
 - `fsdp`: []
 - `fsdp_min_num_params`: 0
 - `hub_strategy`: every_save
 - `hub_private_repo`: False
 - `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
 - `include_inputs_for_metrics`: False
 - `eval_do_concat_batches`: True
 - `fp16_backend`: auto
 - `optim_target_modules`: None
 - `batch_eval_metrics`: False
 - `eval_on_start`: False
 - `batch_sampler`: no_duplicates
 - `multi_dataset_batch_sampler`: proportional
 </details>
 ### Training Logs
+| Epoch  | Step | Training Loss |
+|:------:|:----:|:-------------:|
+| 0.1757 | 300  | 0.9143        |
+| 0.3515 | 600  | 0.8284        |
+| 0.5272 | 900  | 0.8444        |
+| 0.7030 | 1200 | 0.7999        |
+| 0.8787 | 1500 | 0.8901        |
+| 1.0512 | 1800 | 0.8029        |
+| 1.2269 | 2100 | 0.6198        |
+| 1.4027 | 2400 | 0.5837        |
+| 1.5784 | 2700 | 0.603         |
+| 1.7542 | 3000 | 0.5336        |
+| 1.9299 | 3300 | 0.5977        |
 ### Framework Versions
+- Python: 3.12.3
+- Sentence Transformers: 3.1.0
+- Transformers: 4.42.3
+- PyTorch: 2.3.0+cu121
+- Accelerate: 0.32.1
+- Datasets: 3.0.0
 - Tokenizers: 0.19.1
 ## Citation

config.json CHANGED Viewed

@@ -1,44 +1,32 @@
 {
-  "_name_or_path": "Alibaba-NLP/gte-base-en-v1.5",
   "architectures": [
-    "NewModel"
   ],
-  "attention_probs_dropout_prob": 0.0,
-  "auto_map": {
-    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
-    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
-    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
-    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
-    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
-    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
-    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
-  },
   "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "layer_norm_eps": 1e-12,
-  "layer_norm_type": "layer_norm",
-  "logn_attention_clip1": false,
-  "logn_attention_scale": false,
-  "max_position_embeddings": 8192,
-  "model_type": "new",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pack_qkv": true,
   "pad_token_id": 0,
-  "position_embedding_type": "rope",
-  "rope_scaling": {
-    "factor": 2.0,
-    "type": "ntk"
-  },
-  "rope_theta": 500000,
   "torch_dtype": "float32",
   "transformers_version": "4.44.0",
-  "type_vocab_size": 0,
-  "unpad_inputs": false,
-  "use_memory_efficient_attention": false,
-  "vocab_size": 30528
 }

 {
+  "_name_or_path": "/kaggle/input/eedi-finetuned-bge-public/Eedi-finetuned-bge",
   "architectures": [
+    "BertModel"
   ],
+  "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
+  "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "LABEL_0"
+  },
   "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "LABEL_0": 0
+  },
   "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
   "pad_token_id": 0,
+  "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.44.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
 }

config_sentence_transformers.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "__version__": {
-    "sentence_transformers": "3.1.1",
     "transformers": "4.44.0",
     "pytorch": "2.4.0"
   },

 {
   "__version__": {
+    "sentence_transformers": "3.2.1",
     "transformers": "4.44.0",
     "pytorch": "2.4.0"
   },

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9a3cff2df37b85a7a0b3b7953d156029625c6b2ee5b6ed8bea9e8f144b64982
-size 547119128

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce2a1e027d4bd6ed1d80c768f08f44843917e99ed19af1106cdf5237ab5db08d
+size 1340612432

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "max_seq_length": 8192,
-  "do_lower_case": false
 }

 {
+  "max_seq_length": 512,
+  "do_lower_case": true
 }

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 8192,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 512,
     "strategy": "LongestFirst",
     "stride": 0
   },

tokenizer_config.json CHANGED Viewed

@@ -43,10 +43,12 @@
   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
   "max_length": 512,
-  "model_max_length": 8192,
   "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
   "pad_token_type_id": 0,

   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
   "do_lower_case": true,
   "mask_token": "[MASK]",
   "max_length": 512,
+  "model_max_length": 512,
+  "never_split": null,
   "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
   "pad_token_type_id": 0,