Spaces:

anakin87
/

fact-checking-rocks

Running

App Files Files Community

anakin87 commited on Aug 22, 2022

Commit

d6bdb02

•

1 Parent(s): a027256

class entailment_checker

Browse files

Files changed (5) hide show

README.md +1 -1
app.py → Rock_fact_checker.py +0 -0
app_utils/entailment_checker.py +66 -0
pages/{app.py → Info.py} +0 -0
pages/info.py +0 -3

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: purple
 colorTo: blue
 sdk: streamlit
 sdk_version: 1.10.0
-app_file: app.py
 pinned: false
 license: apache-2.0
 ---

 colorTo: blue
 sdk: streamlit
 sdk_version: 1.10.0
+app_file: rock_fact_checker.py
 pinned: false
 license: apache-2.0
 ---

app.py → Rock_fact_checker.py RENAMED Viewed

File without changes

app_utils/entailment_checker.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from typing import List, Optional
+from transformers import AutoModelForSequenceClassification,AutoTokenizer,AutoConfig
+import torch
+from haystack.nodes.base import BaseComponent
+from haystack.modeling.utils import initialize_device_settings
+from haystack.schema import Document, Answer, Span
+class EntailmentChecker(BaseComponent):
+    """
+    This node checks the entailment between every document content and the query.
+    It enrichs the documents metadata with entailment_info
+    """
+    outgoing_edges = 1
+    def __init__(
+        self,
+        model_name_or_path: str = "roberta-large-mnli",
+        model_version: Optional[str] = None,
+        tokenizer: Optional[str] = None,
+        use_gpu: bool = True,
+        batch_size: int = 16,
+    ):
+        """
+        Load a Natural Language Inference model from Transformers.
+        :param model_name_or_path: Directory of a saved model or the name of a public model.
+        See https://huggingface.co/models for full list of available models.
+        :param model_version: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash.
+        :param tokenizer: Name of the tokenizer (usually the same as model)
+        :param use_gpu: Whether to use GPU (if available).
+        # :param batch_size: Number of Documents to be processed at a time.
+        """
+        super().__init__()
+        self.devices, _ = initialize_device_settings(use_cuda=use_gpu, multi_gpu=False)
+        tokenizer = tokenizer or model_name_or_path
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer)
+        self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=model_name_or_path,revision=model_version)
+        self.batch_size = batch_size
+        self.model.to(str(self.devices[0]))
+        id2label = AutoConfig.from_pretrained(model_name_or_path).id2label
+        self.labels= [id2label[k].lower() for k in sorted(id2label)]
+        if 'entailment' not in self.labels:
+            raise ValueError("The model config must contain entailment value in the id2label dict.")
+    def run(self, query: str, documents: List[Document]):
+        for doc in documents:
+            entailment_dict=self.get_entailment(premise=doc.content, hypotesis=query)
+            doc.meta['entailment_info']=entailment_dict
+        return {'documents':documents}, "output_1"
+    def run_batch():
+        pass
+    def get_entailment(self, premise,hypotesis):
+        with torch.no_grad():
+            inputs = self.tokenizer(f'{premise}{self.tokenizer.sep_token}{hypotesis}', return_tensors="pt").to(self.devices[0])
+            out = self.model(**inputs)
+            logits = out.logits
+            probs = torch.nn.functional.softmax(logits, dim=-1)[0,:].cpu().detach().numpy()
+        entailment_dict={k.lower():v for k,v in zip (self.labels, probs)}
+        return entailment_dict

pages/{app.py → Info.py} RENAMED Viewed

File without changes

pages/info.py DELETED Viewed

@@ -1,3 +0,0 @@
-import streamlit as st
-st.title("Test")