dicta-il
/

dictabert-joint

Feature Extraction

text-embeddings-inference

Model card Files Files and versions Community

Shaltiel commited on Jan 13

Commit

88c6751

•

1 Parent(s): 0dd6a49

Upload BertForJointParsing.py

Files changed (1) hide show

BertForJointParsing.py +19 -0

BertForJointParsing.py CHANGED Viewed

@@ -187,6 +187,25 @@ class BertForJointParsing(BertPreTrainedModel):
         )
     def predict(self, sentences: Union[str, List[str]], tokenizer: BertTokenizerFast, padding='longest', truncation=True, compute_syntax_mst=True, per_token_ner=False, output_style: Literal['json', 'ud', 'iahlt_ud'] = 'json'):
         is_single_sentence = isinstance(sentences, str)
         if is_single_sentence:
             sentences = [sentences]

         )
     def predict(self, sentences: Union[str, List[str]], tokenizer: BertTokenizerFast, padding='longest', truncation=True, compute_syntax_mst=True, per_token_ner=False, output_style: Literal['json', 'ud', 'iahlt_ud'] = 'json'):
+        """
+        Predicts various linguistic features using the DictaBERT model.
+        This function takes a sentence or a list of sentences in Hebrew and applies the BERT model to predict multiple linguistic attributes simultaneously. These include syntax, named entity recognition (NER), morphological analysis, lexical information, and text segmentation.
+        Parameters:
+        sentences (Union[str, List[str]]): A single sentence or a list of sentences in Hebrew.
+        tokenizer (BertTokenizerFast): The tokenizer used for preprocessing the input sentences.
+        padding (str, optional): The strategy for padding sentences. Defaults to 'longest'.
+        truncation (bool, optional): Flag to enable or disable truncation. Defaults to True.
+        compute_syntax_mst (bool, optional): If True, computes the maximum spanning tree for syntax prediction. Defaults to True.
+        per_token_ner (bool, optional): If True, performs NER for each token. Defaults to False.
+        output_style (Literal['json', 'ud', 'iahlt_ud'], optional): The format of the output. Choices are 'json', 'ud' (Universal Dependencies), or 'iahlt_ud' (UD in the style of IAHLT). Defaults to 'json'.
+        Returns:
+        Depending on the output_style chosen, returns the linguistic analysis in the specified format.
+        The function is integral for comprehensive linguistic analysis in applications involving Hebrew text, catering to a variety of NLP tasks.
+        """
         is_single_sentence = isinstance(sentences, str)
         if is_single_sentence:
             sentences = [sentences]