langdonholmes commited on
Commit
8ac50d2
1 Parent(s): 6ecf7b0

Update spaCy pipeline

Browse files
.ipynb_checkpoints/meta-checkpoint.json CHANGED
@@ -2,13 +2,13 @@
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
- "spacy_version":">=3.3.1,<3.4.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
9
  "url":"",
10
  "license":"",
11
- "spacy_git_version":"5fb597f77",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -36,18 +36,18 @@
36
 
37
  ],
38
  "performance":{
39
- "ents_f":0.7145877378,
40
- "ents_p":0.6353383459,
41
- "ents_r":0.8164251208,
42
  "ents_per_type":{
43
  "STUDENT":{
44
- "p":0.6353383459,
45
- "r":0.8164251208,
46
- "f":0.7145877378
47
  }
48
  },
49
- "speed":21016.7125076686,
50
- "transformer_loss":264.9285582366,
51
- "ner_loss":160.91844294
52
  }
53
  }
 
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
+ "spacy_version":">=3.4.1,<3.5.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
9
  "url":"",
10
  "license":"",
11
+ "spacy_git_version":"5c2a00cef",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
36
 
37
  ],
38
  "performance":{
39
+ "ents_f":0.8366013072,
40
+ "ents_p":0.8311688312,
41
+ "ents_r":0.8421052632,
42
  "ents_per_type":{
43
  "STUDENT":{
44
+ "p":0.8311688312,
45
+ "r":0.8421052632,
46
+ "f":0.8366013072
47
  }
48
  },
49
+ "speed":22300.9340042652,
50
+ "transformer_loss":562550.263513213,
51
+ "ner_loss":311.5489451973
52
  }
53
  }
README.md CHANGED
@@ -5,7 +5,7 @@ tags:
5
  language:
6
  - en
7
  model-index:
8
- - name: en-student-name-detector
9
  results:
10
  - task:
11
  name: NER
@@ -18,6 +18,39 @@ model-index:
18
  type: recall
19
  value: 0.8421052632
20
  - name: NER F Score
21
- type: f-score
22
  value: 0.8366013072
23
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  language:
6
  - en
7
  model-index:
8
+ - name: en_student_name_detector
9
  results:
10
  - task:
11
  name: NER
 
18
  type: recall
19
  value: 0.8421052632
20
  - name: NER F Score
21
+ type: f_score
22
  value: 0.8366013072
23
  ---
24
+ | Feature | Description |
25
+ | --- | --- |
26
+ | **Name** | `en_student_name_detector` |
27
+ | **Version** | `0.0.1` |
28
+ | **spaCy** | `>=3.4.1,<3.5.0` |
29
+ | **Default Pipeline** | `transformer`, `ner` |
30
+ | **Components** | `transformer`, `ner` |
31
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
+ | **Sources** | n/a |
33
+ | **License** | n/a |
34
+ | **Author** | [n/a]() |
35
+
36
+ ### Label Scheme
37
+
38
+ <details>
39
+
40
+ <summary>View label scheme (1 labels for 1 components)</summary>
41
+
42
+ | Component | Labels |
43
+ | --- | --- |
44
+ | **`ner`** | `STUDENT` |
45
+
46
+ </details>
47
+
48
+ ### Accuracy
49
+
50
+ | Type | Score |
51
+ | --- | --- |
52
+ | `ENTS_F` | 83.66 |
53
+ | `ENTS_P` | 83.12 |
54
+ | `ENTS_R` | 84.21 |
55
+ | `TRANSFORMER_LOSS` | 56255026.35 |
56
+ | `NER_LOSS` | 31154.89 |
config.cfg CHANGED
@@ -31,7 +31,7 @@ update_with_oracle_cut_size = 100
31
  @architectures = "spacy.TransitionBasedParser.v2"
32
  state_type = "ner"
33
  extra_state_tokens = true
34
- hidden_width = 128
35
  maxout_pieces = 2
36
  use_upper = false
37
  nO = null
@@ -49,13 +49,9 @@ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotati
49
 
50
  [components.transformer.model]
51
  @architectures = "spacy-transformers.TransformerModel.v3"
52
- name = "allenai/longformer-base-4096"
53
  mixed_precision = false
54
-
55
- [components.transformer.model.get_spans]
56
- @span_getters = "spacy-transformers.strided_spans.v1"
57
- window = 4096
58
- stride = 2048
59
 
60
  [components.transformer.model.grad_scaler_config]
61
 
@@ -105,13 +101,8 @@ buffer = 256
105
  get_length = null
106
 
107
  [training.logger]
108
- @loggers = "spacy.WandbLogger.v3"
109
- project_name = "monitor_spacy_training"
110
- remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
111
- log_dataset_dir = "corpus"
112
- model_log_interval = 1000
113
- entity = null
114
- run_name = null
115
 
116
  [training.optimizer]
117
  @optimizers = "Adam.v1"
 
31
  @architectures = "spacy.TransitionBasedParser.v2"
32
  state_type = "ner"
33
  extra_state_tokens = true
34
+ hidden_width = 64
35
  maxout_pieces = 2
36
  use_upper = false
37
  nO = null
 
49
 
50
  [components.transformer.model]
51
  @architectures = "spacy-transformers.TransformerModel.v3"
52
+ name = "../2022_06_05_deidentification_of_student_writing_in_moocs/bin/results/v1"
53
  mixed_precision = false
54
+ get_spans = {"@span_getters":"spacy-transformers.doc_spans.v1"}
 
 
 
 
55
 
56
  [components.transformer.model.grad_scaler_config]
57
 
 
101
  get_length = null
102
 
103
  [training.logger]
104
+ @loggers = "spacy.ConsoleLogger.v1"
105
+ progress_bar = false
 
 
 
 
 
106
 
107
  [training.optimizer]
108
  @optimizers = "Adam.v1"
en_student_name_detector-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7471fd88a60cd66107c1a0a13589ca36f88458c385edae26957454d19eb8c42
3
- size 521550516
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e81239b4aa7def59db35c9e1dc5e2fbceb016b47065349ff469b3f01fb8b2cda
3
+ size 525251536
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�l{"0":{},"1":{"STUDENT":6982},"2":{"STUDENT":6982},"3":{"STUDENT":6982},"4":{"STUDENT":6982,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves�l{"0":{},"1":{"STUDENT":1663},"2":{"STUDENT":1663},"3":{"STUDENT":1663},"4":{"STUDENT":1663,"":1},"5":{"":1}}�cfg��neg_key�
tokenizer CHANGED
The diff for this file is too large to render. See raw diff
 
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9fe8dcc988f1b1ed32ef9459967839727264ba8fa14eb025fa2e256b29d9509
3
- size 598140907
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8208b2db8c6fb3c3eecf7e77a110a905a01b4a11e9735002977bc2aa6e5f5af2
3
+ size 598141086
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff