SeungHeeKim commited on
Commit
3df2dce
1 Parent(s): 356a09a

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: microsoft/deberta-v3-large
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - pearsonr
8
+ - spearmanr
9
+ model-index:
10
+ - name: enc_cross_encoder__lr_7e-6__wd_0.1__trans_False__obj_mse__tri_None__s_42
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # enc_cross_encoder__lr_7e-6__wd_0.1__trans_False__obj_mse__tri_None__s_42
18
+
19
+ This model is a fine-tuned version of [microsoft/deberta-v3-large](https://huggingface.co/microsoft/deberta-v3-large) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.0917
22
+ - Mse: 0.0917
23
+ - Pearsonr: 0.4893
24
+ - Spearmanr: 0.4924
25
+
26
+ ## Model description
27
+
28
+ More information needed
29
+
30
+ ## Intended uses & limitations
31
+
32
+ More information needed
33
+
34
+ ## Training and evaluation data
35
+
36
+ More information needed
37
+
38
+ ## Training procedure
39
+
40
+ ### Training hyperparameters
41
+
42
+ The following hyperparameters were used during training:
43
+ - learning_rate: 7e-06
44
+ - train_batch_size: 8
45
+ - eval_batch_size: 8
46
+ - seed: 42
47
+ - gradient_accumulation_steps: 4
48
+ - total_train_batch_size: 32
49
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
+ - lr_scheduler_type: linear
51
+ - lr_scheduler_warmup_ratio: 0.1
52
+ - num_epochs: 3.0
53
+
54
+ ### Training results
55
+
56
+ | Training Loss | Epoch | Step | Validation Loss | Mse | Pearsonr | Spearmanr |
57
+ |:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|:---------:|
58
+ | No log | 1.0 | 354 | 0.0982 | 0.0982 | 0.3975 | 0.4064 |
59
+ | 0.1166 | 2.0 | 709 | 0.0880 | 0.0880 | 0.4867 | 0.4889 |
60
+ | 0.0903 | 3.0 | 1062 | 0.0917 | 0.0917 | 0.4893 | 0.4924 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.37.2
66
+ - Pytorch 2.0.1
67
+ - Datasets 2.19.1
68
+ - Tokenizers 0.15.2
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.09168503433465958,
4
+ "eval_mse": 0.09168502688407898,
5
+ "eval_pearsonr": 0.48928741659848096,
6
+ "eval_runtime": 16.8465,
7
+ "eval_samples": 2834,
8
+ "eval_samples_per_second": 168.225,
9
+ "eval_spearmanr": 0.4923729823700353,
10
+ "eval_steps_per_second": 21.073,
11
+ "train_loss": 0.10231328010559082,
12
+ "train_runtime": 760.799,
13
+ "train_samples": 11342,
14
+ "train_samples_per_second": 44.724,
15
+ "train_steps_per_second": 1.396
16
+ }
config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "architectures": [
4
+ "CrossEncoderForClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "cache_dir": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 4096,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_eps": 1e-07,
20
+ "max_position_embeddings": 512,
21
+ "max_relative_positions": -1,
22
+ "model_name_or_path": "microsoft/deberta-v3-large",
23
+ "model_revision": "main",
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 16,
27
+ "num_hidden_layers": 24,
28
+ "objective": "mse",
29
+ "pad_token_id": 0,
30
+ "pooler_dropout": 0,
31
+ "pooler_hidden_act": "gelu",
32
+ "pooler_hidden_size": 1024,
33
+ "pooler_type": "cls",
34
+ "pos_att_type": [
35
+ "p2c",
36
+ "c2p"
37
+ ],
38
+ "position_biased_input": false,
39
+ "position_buckets": 256,
40
+ "relative_attention": true,
41
+ "share_att_key": true,
42
+ "torch_dtype": "float32",
43
+ "transform": false,
44
+ "transformers_version": "4.37.2",
45
+ "triencoder_head": "None",
46
+ "type_vocab_size": 0,
47
+ "use_auth_token": false,
48
+ "vocab_size": 128100
49
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.09168503433465958,
4
+ "eval_mse": 0.09168502688407898,
5
+ "eval_pearsonr": 0.48928741659848096,
6
+ "eval_runtime": 16.8465,
7
+ "eval_samples": 2834,
8
+ "eval_samples_per_second": 168.225,
9
+ "eval_spearmanr": 0.4923729823700353,
10
+ "eval_steps_per_second": 21.073
11
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46f875a6a62b9880d89b1f86157a77563c13432438b480a10bb0ae72f30d6add
3
+ size 1740302700
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
test_predictions.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
train_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.09168503433465958,
4
+ "eval_mse": 0.09168502688407898,
5
+ "eval_pearsonr": 0.48928741659848096,
6
+ "eval_runtime": 16.8465,
7
+ "eval_samples": 2834,
8
+ "eval_samples_per_second": 168.225,
9
+ "eval_spearmanr": 0.4923729823700353,
10
+ "eval_steps_per_second": 21.073
11
+ }
trainer_state.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9957686882933707,
5
+ "eval_steps": 500,
6
+ "global_step": 1062,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.09817185997962952,
14
+ "eval_mse": 0.09817185997962952,
15
+ "eval_pearsonr": 0.39754295469203044,
16
+ "eval_runtime": 16.9607,
17
+ "eval_samples_per_second": 167.092,
18
+ "eval_spearmanr": 0.40639190714439083,
19
+ "eval_steps_per_second": 20.931,
20
+ "step": 354
21
+ },
22
+ {
23
+ "epoch": 1.41,
24
+ "learning_rate": 4.119371727748691e-06,
25
+ "loss": 0.1166,
26
+ "step": 500
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "eval_loss": 0.08804947137832642,
31
+ "eval_mse": 0.08804947882890701,
32
+ "eval_pearsonr": 0.48670442010425546,
33
+ "eval_runtime": 16.8904,
34
+ "eval_samples_per_second": 167.788,
35
+ "eval_spearmanr": 0.48893639844454545,
36
+ "eval_steps_per_second": 21.018,
37
+ "step": 709
38
+ },
39
+ {
40
+ "epoch": 2.82,
41
+ "learning_rate": 4.5445026178010473e-07,
42
+ "loss": 0.0903,
43
+ "step": 1000
44
+ },
45
+ {
46
+ "epoch": 3.0,
47
+ "eval_loss": 0.09168503433465958,
48
+ "eval_mse": 0.09168502688407898,
49
+ "eval_pearsonr": 0.48928741659848096,
50
+ "eval_runtime": 16.7916,
51
+ "eval_samples_per_second": 168.774,
52
+ "eval_spearmanr": 0.4923729823700353,
53
+ "eval_steps_per_second": 21.141,
54
+ "step": 1062
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "step": 1062,
59
+ "total_flos": 3498948102716928.0,
60
+ "train_loss": 0.10231328010559082,
61
+ "train_runtime": 760.799,
62
+ "train_samples_per_second": 44.724,
63
+ "train_steps_per_second": 1.396
64
+ }
65
+ ],
66
+ "logging_steps": 500,
67
+ "max_steps": 1062,
68
+ "num_input_tokens_seen": 0,
69
+ "num_train_epochs": 3,
70
+ "save_steps": 500,
71
+ "total_flos": 3498948102716928.0,
72
+ "train_batch_size": 8,
73
+ "trial_name": null,
74
+ "trial_params": null
75
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a88a741293877e35cd713b4bc437a6b7c20ebccab68a4ec55117c1136e100f2
3
+ size 4411