Upload BertForJointParsing.py
Browse files- BertForJointParsing.py +19 -0
BertForJointParsing.py
CHANGED
@@ -187,6 +187,25 @@ class BertForJointParsing(BertPreTrainedModel):
|
|
187 |
)
|
188 |
|
189 |
def predict(self, sentences: Union[str, List[str]], tokenizer: BertTokenizerFast, padding='longest', truncation=True, compute_syntax_mst=True, per_token_ner=False, output_style: Literal['json', 'ud', 'iahlt_ud'] = 'json'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
is_single_sentence = isinstance(sentences, str)
|
191 |
if is_single_sentence:
|
192 |
sentences = [sentences]
|
|
|
187 |
)
|
188 |
|
189 |
def predict(self, sentences: Union[str, List[str]], tokenizer: BertTokenizerFast, padding='longest', truncation=True, compute_syntax_mst=True, per_token_ner=False, output_style: Literal['json', 'ud', 'iahlt_ud'] = 'json'):
|
190 |
+
"""
|
191 |
+
Predicts various linguistic features using the DictaBERT model.
|
192 |
+
|
193 |
+
This function takes a sentence or a list of sentences in Hebrew and applies the BERT model to predict multiple linguistic attributes simultaneously. These include syntax, named entity recognition (NER), morphological analysis, lexical information, and text segmentation.
|
194 |
+
|
195 |
+
Parameters:
|
196 |
+
sentences (Union[str, List[str]]): A single sentence or a list of sentences in Hebrew.
|
197 |
+
tokenizer (BertTokenizerFast): The tokenizer used for preprocessing the input sentences.
|
198 |
+
padding (str, optional): The strategy for padding sentences. Defaults to 'longest'.
|
199 |
+
truncation (bool, optional): Flag to enable or disable truncation. Defaults to True.
|
200 |
+
compute_syntax_mst (bool, optional): If True, computes the maximum spanning tree for syntax prediction. Defaults to True.
|
201 |
+
per_token_ner (bool, optional): If True, performs NER for each token. Defaults to False.
|
202 |
+
output_style (Literal['json', 'ud', 'iahlt_ud'], optional): The format of the output. Choices are 'json', 'ud' (Universal Dependencies), or 'iahlt_ud' (UD in the style of IAHLT). Defaults to 'json'.
|
203 |
+
|
204 |
+
Returns:
|
205 |
+
Depending on the output_style chosen, returns the linguistic analysis in the specified format.
|
206 |
+
|
207 |
+
The function is integral for comprehensive linguistic analysis in applications involving Hebrew text, catering to a variety of NLP tasks.
|
208 |
+
"""
|
209 |
is_single_sentence = isinstance(sentences, str)
|
210 |
if is_single_sentence:
|
211 |
sentences = [sentences]
|