dicta-il
/

dictabert-joint

Feature Extraction

text-embeddings-inference

Model card Files Files and versions Community

Shaltiel commited on about 1 month ago

Commit

e68d30b

•

1 Parent(s): 6235062

Update BertForJointParsing.py

Files changed (1) hide show

BertForJointParsing.py +4 -0

BertForJointParsing.py CHANGED Viewed

@@ -273,6 +273,8 @@ def combine_token_wordpieces(input_ids: List[int], offset_mapping: torch.Tensor,
     ret = []
     special_toks = tokenizer.all_special_tokens
     special_toks.remove(tokenizer.unk_token)
     for token, offsets in zip(tokenizer.convert_ids_to_tokens(input_ids), offset_mapping):
         if token in special_toks: continue
         if token.startswith('##'):
@@ -287,6 +289,8 @@ def ner_parse_logits(input_ids: List[List[int]], sentences: List[str], tokenizer
     special_toks = tokenizer.all_special_tokens
     special_toks.remove(tokenizer.unk_token)
     for batch_idx in range(len(sentences)):
         ret = []

     ret = []
     special_toks = tokenizer.all_special_tokens
     special_toks.remove(tokenizer.unk_token)
+    special_toks.remove(tokenizer.mask_token)
     for token, offsets in zip(tokenizer.convert_ids_to_tokens(input_ids), offset_mapping):
         if token in special_toks: continue
         if token.startswith('##'):
     special_toks = tokenizer.all_special_tokens
     special_toks.remove(tokenizer.unk_token)
+    special_toks.remove(tokenizer.mask_token)
     for batch_idx in range(len(sentences)):
         ret = []