yaniseuranova commited on
Commit
1e9ec87
1 Parent(s): 77e267e

Add SetFit model

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 1024,
3
- "pooling_mode_cls_token": true,
4
- "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -5,7 +5,7 @@ tags:
5
  - sentence-transformers
6
  - text-classification
7
  - generated_from_setfit_trainer
8
- base_model: BAAI/bge-m3
9
  metrics:
10
  - accuracy
11
  widget:
@@ -22,7 +22,7 @@ widget:
22
  pipeline_tag: text-classification
23
  inference: true
24
  model-index:
25
- - name: SetFit with BAAI/bge-m3
26
  results:
27
  - task:
28
  type: text-classification
@@ -37,9 +37,9 @@ model-index:
37
  name: Accuracy
38
  ---
39
 
40
- # SetFit with BAAI/bge-m3
41
 
42
- This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
43
 
44
  The model has been trained using an efficient few-shot learning technique that involves:
45
 
@@ -50,9 +50,9 @@ The model has been trained using an efficient few-shot learning technique that i
50
 
51
  ### Model Description
52
  - **Model Type:** SetFit
53
- - **Sentence Transformer body:** [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3)
54
  - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
55
- - **Maximum Sequence Length:** 8192 tokens
56
  - **Number of Classes:** 2 classes
57
  <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
58
  <!-- - **Language:** Unknown -->
@@ -152,41 +152,41 @@ preds = model("What distinguishes a transforming industry from one that merely i
152
  - load_best_model_at_end: True
153
 
154
  ### Training Results
155
- | Epoch | Step | Training Loss | Validation Loss |
156
- |:-------:|:-------:|:-------------:|:---------------:|
157
- | 0.0021 | 1 | 0.2141 | - |
158
- | 0.1033 | 50 | 0.0725 | - |
159
- | 0.2066 | 100 | 0.0013 | - |
160
- | 0.3099 | 150 | 0.0007 | - |
161
- | 0.4132 | 200 | 0.0007 | - |
162
- | 0.5165 | 250 | 0.0005 | - |
163
- | 0.6198 | 300 | 0.0005 | - |
164
- | 0.7231 | 350 | 0.0004 | - |
165
- | 0.8264 | 400 | 0.0005 | - |
166
- | 0.9298 | 450 | 0.0003 | - |
167
- | **1.0** | **484** | **-** | **0.0** |
168
- | 1.0331 | 500 | 0.0004 | - |
169
- | 1.1364 | 550 | 0.0003 | - |
170
- | 1.2397 | 600 | 0.0004 | - |
171
- | 1.3430 | 650 | 0.0003 | - |
172
- | 1.4463 | 700 | 0.0003 | - |
173
- | 1.5496 | 750 | 0.0002 | - |
174
- | 1.6529 | 800 | 0.0003 | - |
175
- | 1.7562 | 850 | 0.0002 | - |
176
- | 1.8595 | 900 | 0.0002 | - |
177
- | 1.9628 | 950 | 0.0002 | - |
178
- | 2.0 | 968 | - | 0.0 |
179
- | 2.0661 | 1000 | 0.0002 | - |
180
- | 2.1694 | 1050 | 0.0003 | - |
181
- | 2.2727 | 1100 | 0.0002 | - |
182
- | 2.3760 | 1150 | 0.0002 | - |
183
- | 2.4793 | 1200 | 0.0002 | - |
184
- | 2.5826 | 1250 | 0.0002 | - |
185
- | 2.6860 | 1300 | 0.0002 | - |
186
- | 2.7893 | 1350 | 0.0002 | - |
187
- | 2.8926 | 1400 | 0.0002 | - |
188
- | 2.9959 | 1450 | 0.0002 | - |
189
- | 3.0 | 1452 | - | 0.0 |
190
 
191
  * The bold row denotes the saved checkpoint.
192
  ### Framework Versions
 
5
  - sentence-transformers
6
  - text-classification
7
  - generated_from_setfit_trainer
8
+ base_model: sentence-transformers/all-mpnet-base-v2
9
  metrics:
10
  - accuracy
11
  widget:
 
22
  pipeline_tag: text-classification
23
  inference: true
24
  model-index:
25
+ - name: SetFit with sentence-transformers/all-mpnet-base-v2
26
  results:
27
  - task:
28
  type: text-classification
 
37
  name: Accuracy
38
  ---
39
 
40
+ # SetFit with sentence-transformers/all-mpnet-base-v2
41
 
42
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
43
 
44
  The model has been trained using an efficient few-shot learning technique that involves:
45
 
 
50
 
51
  ### Model Description
52
  - **Model Type:** SetFit
53
+ - **Sentence Transformer body:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)
54
  - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
55
+ - **Maximum Sequence Length:** 384 tokens
56
  - **Number of Classes:** 2 classes
57
  <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
58
  <!-- - **Language:** Unknown -->
 
152
  - load_best_model_at_end: True
153
 
154
  ### Training Results
155
+ | Epoch | Step | Training Loss | Validation Loss |
156
+ |:-------:|:--------:|:-------------:|:---------------:|
157
+ | 0.0021 | 1 | 0.301 | - |
158
+ | 0.1033 | 50 | 0.1244 | - |
159
+ | 0.2066 | 100 | 0.0021 | - |
160
+ | 0.3099 | 150 | 0.0006 | - |
161
+ | 0.4132 | 200 | 0.0002 | - |
162
+ | 0.5165 | 250 | 0.0002 | - |
163
+ | 0.6198 | 300 | 0.0001 | - |
164
+ | 0.7231 | 350 | 0.0001 | - |
165
+ | 0.8264 | 400 | 0.0001 | - |
166
+ | 0.9298 | 450 | 0.0001 | - |
167
+ | 1.0 | 484 | - | 0.0001 |
168
+ | 1.0331 | 500 | 0.0001 | - |
169
+ | 1.1364 | 550 | 0.0001 | - |
170
+ | 1.2397 | 600 | 0.0001 | - |
171
+ | 1.3430 | 650 | 0.0 | - |
172
+ | 1.4463 | 700 | 0.0001 | - |
173
+ | 1.5496 | 750 | 0.0001 | - |
174
+ | 1.6529 | 800 | 0.0001 | - |
175
+ | 1.7562 | 850 | 0.0001 | - |
176
+ | 1.8595 | 900 | 0.0 | - |
177
+ | 1.9628 | 950 | 0.0 | - |
178
+ | 2.0 | 968 | - | 0.0001 |
179
+ | 2.0661 | 1000 | 0.0001 | - |
180
+ | 2.1694 | 1050 | 0.0001 | - |
181
+ | 2.2727 | 1100 | 0.0 | - |
182
+ | 2.3760 | 1150 | 0.0 | - |
183
+ | 2.4793 | 1200 | 0.0 | - |
184
+ | 2.5826 | 1250 | 0.0 | - |
185
+ | 2.6860 | 1300 | 0.0001 | - |
186
+ | 2.7893 | 1350 | 0.0 | - |
187
+ | 2.8926 | 1400 | 0.0001 | - |
188
+ | 2.9959 | 1450 | 0.0 | - |
189
+ | **3.0** | **1452** | **-** | **0.0001** |
190
 
191
  * The bold row denotes the saved checkpoint.
192
  ### Framework Versions
config.json CHANGED
@@ -1,28 +1,24 @@
1
  {
2
- "_name_or_path": "checkpoints/step_484",
3
  "architectures": [
4
- "XLMRobertaModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
8
- "classifier_dropout": null,
9
  "eos_token_id": 2,
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
- "hidden_size": 1024,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 4096,
15
  "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 8194,
17
- "model_type": "xlm-roberta",
18
- "num_attention_heads": 16,
19
- "num_hidden_layers": 24,
20
- "output_past": true,
21
  "pad_token_id": 1,
22
- "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
  "transformers_version": "4.39.0",
25
- "type_vocab_size": 1,
26
- "use_cache": true,
27
- "vocab_size": 250002
28
  }
 
1
  {
2
+ "_name_or_path": "checkpoints/step_1452",
3
  "architectures": [
4
+ "MPNetModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
 
8
  "eos_token_id": 2,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
  "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
 
19
  "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.39.0",
23
+ "vocab_size": 30527
 
 
24
  }
config_sentence_transformers.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "__version__": {
3
- "sentence_transformers": "2.2.2",
4
- "transformers": "4.33.0",
5
- "pytorch": "2.1.2+cu121"
6
  },
7
  "prompts": {},
8
  "default_prompt_name": null
 
1
  {
2
  "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.6.1",
5
+ "pytorch": "1.8.1"
6
  },
7
  "prompts": {},
8
  "default_prompt_name": null
config_setfit.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "normalize_embeddings": false,
3
  "labels": [
4
  "lexical",
5
  "semantic"
6
- ]
 
7
  }
 
1
  {
 
2
  "labels": [
3
  "lexical",
4
  "semantic"
5
+ ],
6
+ "normalize_embeddings": false
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b33180d678dcfc0fdadea9d0349b233782e46cce322e45bd85d8323898d9fdb5
3
- size 2271064456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3aecb73df1f4899ecef5a4732119cf7319250902ae704c7a28e0ffc99a99eda
3
+ size 437967672
model_head.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52b6eb505197312d7321f12c0fc66b1e083991737f8ff6fedb5dd3d4e33695f0
3
- size 9087
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03051f677d184f53e2f1ee6703c4aba2d3bf9688700c5baa2bcd55b8790363d4
3
+ size 7039
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 8192,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 384,
3
  "do_lower_case": false
4
  }
special_tokens_map.json CHANGED
@@ -42,7 +42,7 @@
42
  "single_word": false
43
  },
44
  "unk_token": {
45
- "content": "<unk>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
 
42
  "single_word": false
43
  },
44
  "unk_token": {
45
+ "content": "[UNK]",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1af481bd08ed9347cf9d3d07c24e5de75a10983819de076436400609e6705686
3
- size 17083075
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46fb1f735006f52c1f9744bb05f7bc1544ec8475955af30396212c7737558d1e
3
+ size 710932
tokenizer_config.json CHANGED
@@ -27,12 +27,20 @@
27
  "3": {
28
  "content": "<unk>",
29
  "lstrip": false,
 
 
 
 
 
 
 
 
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "250001": {
36
  "content": "<mask>",
37
  "lstrip": true,
38
  "normalized": false,
@@ -44,19 +52,21 @@
44
  "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
 
47
  "eos_token": "</s>",
48
  "mask_token": "<mask>",
49
- "max_length": 8192,
50
- "model_max_length": 8192,
51
  "pad_to_multiple_of": null,
52
  "pad_token": "<pad>",
53
  "pad_token_type_id": 0,
54
  "padding_side": "right",
55
  "sep_token": "</s>",
56
- "sp_model_kwargs": {},
57
  "stride": 0,
58
- "tokenizer_class": "XLMRobertaTokenizer",
 
 
59
  "truncation_side": "right",
60
  "truncation_strategy": "longest_first",
61
- "unk_token": "<unk>"
62
  }
 
27
  "3": {
28
  "content": "<unk>",
29
  "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "30526": {
44
  "content": "<mask>",
45
  "lstrip": true,
46
  "normalized": false,
 
52
  "bos_token": "<s>",
53
  "clean_up_tokenization_spaces": true,
54
  "cls_token": "<s>",
55
+ "do_lower_case": true,
56
  "eos_token": "</s>",
57
  "mask_token": "<mask>",
58
+ "max_length": 128,
59
+ "model_max_length": 512,
60
  "pad_to_multiple_of": null,
61
  "pad_token": "<pad>",
62
  "pad_token_type_id": 0,
63
  "padding_side": "right",
64
  "sep_token": "</s>",
 
65
  "stride": 0,
66
+ "strip_accents": null,
67
+ "tokenize_chinese_chars": true,
68
+ "tokenizer_class": "MPNetTokenizer",
69
  "truncation_side": "right",
70
  "truncation_strategy": "longest_first",
71
+ "unk_token": "[UNK]"
72
  }