KoichiYasuoka
commited on
Commit
•
8385fd2
1
Parent(s):
4dfc957
model improved for transformers 4.42
Browse files- config.json +1 -12
- maker.sh +1 -37
- pytorch_model-00001-of-00006.bin +1 -1
- pytorch_model-00002-of-00006.bin +1 -1
- pytorch_model-00003-of-00006.bin +1 -1
- pytorch_model-00004-of-00006.bin +1 -1
- pytorch_model-00005-of-00006.bin +1 -1
- pytorch_model-00006-of-00006.bin +1 -1
- pytorch_model.bin.index.json +3 -3
- tokenizer_config.json +1 -0
- upos.py +1 -40
config.json
CHANGED
@@ -3,22 +3,11 @@
|
|
3 |
"MistralForTokenClassification"
|
4 |
],
|
5 |
"attention_dropout": 0.0,
|
6 |
-
"auto_map": {
|
7 |
-
"AutoModelForTokenClassification": "upos.MistralForTokenClassification"
|
8 |
-
},
|
9 |
"bos_token_id": 1,
|
10 |
"custom_pipelines": {
|
11 |
"upos": {
|
12 |
"impl": "upos.BellmanFordTokenClassificationPipeline",
|
13 |
"pt": "AutoModelForTokenClassification"
|
14 |
-
},
|
15 |
-
"token-classification": {
|
16 |
-
"impl": "upos.RawTokenClassificationPipeline",
|
17 |
-
"pt": "AutoModelForTokenClassification"
|
18 |
-
},
|
19 |
-
"ner": {
|
20 |
-
"impl": "upos.RawTokenClassificationPipeline",
|
21 |
-
"pt": "AutoModelForTokenClassification"
|
22 |
}
|
23 |
},
|
24 |
"eos_token_id": 2,
|
@@ -161,7 +150,7 @@
|
|
161 |
"tie_word_embeddings": false,
|
162 |
"tokenizer_class": "LlamaTokenizerFast",
|
163 |
"torch_dtype": "float32",
|
164 |
-
"transformers_version": "4.
|
165 |
"use_cache": true,
|
166 |
"use_transformers_inputs": true,
|
167 |
"vocab_size": 48000
|
|
|
3 |
"MistralForTokenClassification"
|
4 |
],
|
5 |
"attention_dropout": 0.0,
|
|
|
|
|
|
|
6 |
"bos_token_id": 1,
|
7 |
"custom_pipelines": {
|
8 |
"upos": {
|
9 |
"impl": "upos.BellmanFordTokenClassificationPipeline",
|
10 |
"pt": "AutoModelForTokenClassification"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
}
|
12 |
},
|
13 |
"eos_token_id": 2,
|
|
|
150 |
"tie_word_embeddings": false,
|
151 |
"tokenizer_class": "LlamaTokenizerFast",
|
152 |
"torch_dtype": "float32",
|
153 |
+
"transformers_version": "4.42.4",
|
154 |
"use_cache": true,
|
155 |
"use_transformers_inputs": true,
|
156 |
"vocab_size": 48000
|
maker.sh
CHANGED
@@ -30,45 +30,9 @@ cat << 'EOF' > $TMPB
|
|
30 |
#! /usr/bin/env deepspeed
|
31 |
src="exRakutenAI-7B"
|
32 |
tgt="KoichiYasuoka/RakutenAI-7B-upos"
|
33 |
-
from transformers import LlamaTokenizerFast,
|
34 |
-
from transformers.modeling_outputs import TokenClassifierOutput
|
35 |
from tokenizers.normalizers import Replace
|
36 |
|
37 |
-
class MistralForTokenClassification(MistralPreTrainedModel):
|
38 |
-
def __init__(self,config):
|
39 |
-
from torch import nn
|
40 |
-
super().__init__(config)
|
41 |
-
self.num_labels=config.num_labels
|
42 |
-
self.model=MistralModel(config)
|
43 |
-
if hasattr(config,"classifier_dropout") and config.classifier_dropout is not None:
|
44 |
-
classifier_dropout=config.classifier_dropout
|
45 |
-
elif hasattr(config,"hidden_dropout") and config.hidden_dropout is not None:
|
46 |
-
classifier_dropout=config.hidden_dropout
|
47 |
-
else:
|
48 |
-
classifier_dropout=0.1
|
49 |
-
self.dropout=nn.Dropout(classifier_dropout)
|
50 |
-
self.classifier=nn.Linear(config.hidden_size,config.num_labels)
|
51 |
-
self.post_init()
|
52 |
-
def get_input_embeddings(self):
|
53 |
-
return self.model.embed_tokens
|
54 |
-
def set_input_embeddings(self,value):
|
55 |
-
self.model.embed_tokens=value
|
56 |
-
def forward(self,input_ids=None,past_key_values=None,attention_mask=None,position_ids=None,inputs_embeds=None,labels=None,use_cache=None,output_attentions=None,output_hidden_states=None,return_dict=None):
|
57 |
-
return_dict=return_dict if return_dict is not None else self.config.use_return_dict
|
58 |
-
transformer_outputs=self.model(input_ids,past_key_values=past_key_values,attention_mask=attention_mask,position_ids=position_ids,inputs_embeds=inputs_embeds,use_cache=use_cache,output_attentions=output_attentions,output_hidden_states=output_hidden_states,return_dict=return_dict)
|
59 |
-
hidden_states=transformer_outputs[0]
|
60 |
-
hidden_states=self.dropout(hidden_states)
|
61 |
-
logits=self.classifier(hidden_states)
|
62 |
-
loss=None
|
63 |
-
if labels is not None:
|
64 |
-
from torch import nn
|
65 |
-
loss_fct=nn.CrossEntropyLoss()
|
66 |
-
loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
|
67 |
-
if not return_dict:
|
68 |
-
output=(logits,)+transformer_outputs[2:]
|
69 |
-
return ((loss,)+output) if loss is not None else output
|
70 |
-
return TokenClassifierOutput(loss=loss,logits=logits,hidden_states=transformer_outputs.hidden_states,attentions=transformer_outputs.attentions)
|
71 |
-
|
72 |
class UPOSFileDataset(object):
|
73 |
def __init__(self,conllu,tokenizer):
|
74 |
self.conllu=open(conllu,"r",encoding="utf-8")
|
|
|
30 |
#! /usr/bin/env deepspeed
|
31 |
src="exRakutenAI-7B"
|
32 |
tgt="KoichiYasuoka/RakutenAI-7B-upos"
|
33 |
+
from transformers import LlamaTokenizerFast,MistralForTokenClassification,AutoConfig,DataCollatorForTokenClassification,TrainingArguments,Trainer
|
|
|
34 |
from tokenizers.normalizers import Replace
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
class UPOSFileDataset(object):
|
37 |
def __init__(self,conllu,tokenizer):
|
38 |
self.conllu=open(conllu,"r",encoding="utf-8")
|
pytorch_model-00001-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4913773120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4964869128383677fa5606d0a4ed8d1671e017f9fcee8d5bb76c86698bd0bec7
|
3 |
size 4913773120
|
pytorch_model-00002-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999825256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:987a14b03e439ece74b9c28d2334fc5a9a84e8ad17e6e6f1c81ad09c70d7fbb0
|
3 |
size 4999825256
|
pytorch_model-00003-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999825316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff7f12de394e69c4ab16ee0aad77095085d75f3def0ecf8de9b521690a22c536
|
3 |
size 4999825316
|
pytorch_model-00004-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4832018324
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aef0b1302652d535cf187d1c5a833e1508671b7487f2f19001b7cfa8ec778454
|
3 |
size 4832018324
|
pytorch_model-00005-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999825320
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9657d37d2e4f7c275f3960d5d31ef541223074220ffe5af727b7eed35685da40
|
3 |
size 4999825320
|
pytorch_model-00006-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3960601264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:333bc2435071a127df8259f18bfeef267f86423bcef9e9380dbe70750c85a49f
|
3 |
size 3960601264
|
pytorch_model.bin.index.json
CHANGED
@@ -3,8 +3,6 @@
|
|
3 |
"total_size": 28705767664
|
4 |
},
|
5 |
"weight_map": {
|
6 |
-
"classifier.bias": "pytorch_model-00006-of-00006.bin",
|
7 |
-
"classifier.weight": "pytorch_model-00006-of-00006.bin",
|
8 |
"model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
|
9 |
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
|
10 |
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
|
@@ -294,6 +292,8 @@
|
|
294 |
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
|
295 |
"model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
|
296 |
"model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
|
297 |
-
"model.norm.weight": "pytorch_model-00006-of-00006.bin"
|
|
|
|
|
298 |
}
|
299 |
}
|
|
|
3 |
"total_size": 28705767664
|
4 |
},
|
5 |
"weight_map": {
|
|
|
|
|
6 |
"model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
|
7 |
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
|
8 |
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
|
|
|
292 |
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
|
293 |
"model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
|
294 |
"model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
|
295 |
+
"model.norm.weight": "pytorch_model-00006-of-00006.bin",
|
296 |
+
"score.bias": "pytorch_model-00006-of-00006.bin",
|
297 |
+
"score.weight": "pytorch_model-00006-of-00006.bin"
|
298 |
}
|
299 |
}
|
tokenizer_config.json
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
|
|
4 |
"added_tokens_decoder": {
|
5 |
"0": {
|
6 |
"content": "<unk>",
|
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
7 |
"content": "<unk>",
|
upos.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
from transformers import TokenClassificationPipeline
|
2 |
-
from transformers.modeling_outputs import TokenClassifierOutput
|
3 |
|
4 |
class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
|
5 |
def __init__(self,**kwargs):
|
@@ -40,41 +39,3 @@ class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
|
|
40 |
t["text"]=model_outputs["sentence"][t["start"]:t["end"]]
|
41 |
return w
|
42 |
|
43 |
-
class RawTokenClassificationPipeline(TokenClassificationPipeline):
|
44 |
-
def check_model_type(self,supported_models):
|
45 |
-
pass
|
46 |
-
|
47 |
-
class MistralForTokenClassification(MistralPreTrainedModel):
|
48 |
-
def __init__(self,config):
|
49 |
-
from torch import nn
|
50 |
-
super().__init__(config)
|
51 |
-
self.num_labels=config.num_labels
|
52 |
-
self.model=MistralModel(config)
|
53 |
-
if hasattr(config,"classifier_dropout") and config.classifier_dropout is not None:
|
54 |
-
classifier_dropout=config.classifier_dropout
|
55 |
-
elif hasattr(config,"hidden_dropout") and config.hidden_dropout is not None:
|
56 |
-
classifier_dropout=config.hidden_dropout
|
57 |
-
else:
|
58 |
-
classifier_dropout=0.1
|
59 |
-
self.dropout=nn.Dropout(classifier_dropout)
|
60 |
-
self.classifier=nn.Linear(config.hidden_size,config.num_labels)
|
61 |
-
self.post_init()
|
62 |
-
def get_input_embeddings(self):
|
63 |
-
return self.model.embed_tokens
|
64 |
-
def set_input_embeddings(self,value):
|
65 |
-
self.model.embed_tokens=value
|
66 |
-
def forward(self,input_ids=None,past_key_values=None,attention_mask=None,position_ids=None,inputs_embeds=None,labels=None,use_cache=None,output_attentions=None,output_hidden_states=None,return_dict=None):
|
67 |
-
return_dict=return_dict if return_dict is not None else self.config.use_return_dict
|
68 |
-
transformer_outputs=self.model(input_ids,past_key_values=past_key_values,attention_mask=attention_mask,position_ids=position_ids,inputs_embeds=inputs_embeds,use_cache=use_cache,output_attentions=output_attentions,output_hidden_states=output_hidden_states,return_dict=return_dict)
|
69 |
-
hidden_states=transformer_outputs[0]
|
70 |
-
hidden_states=self.dropout(hidden_states)
|
71 |
-
logits=self.classifier(hidden_states)
|
72 |
-
loss=None
|
73 |
-
if labels is not None:
|
74 |
-
from torch import nn
|
75 |
-
loss_fct=nn.CrossEntropyLoss()
|
76 |
-
loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
|
77 |
-
if not return_dict:
|
78 |
-
output=(logits,)+transformer_outputs[2:]
|
79 |
-
return ((loss,)+output) if loss is not None else output
|
80 |
-
return TokenClassifierOutput(loss=loss,logits=logits,hidden_states=transformer_outputs.hidden_states,attentions=transformer_outputs.attentions)
|
|
|
1 |
+
from transformers import TokenClassificationPipeline
|
|
|
2 |
|
3 |
class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
|
4 |
def __init__(self,**kwargs):
|
|
|
39 |
t["text"]=model_outputs["sentence"][t["start"]:t["end"]]
|
40 |
return w
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|