Spaces:

ThongDang2714
/

Chatbot_Law

Sleeping

App Files Files Community

minhdang14902 commited on Aug 11, 2024

Commit

931f63c

verified ·

1 Parent(s): 5f236ec

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -2

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import nltk
 from transformers.models.roberta.modeling_roberta import *
 from transformers import RobertaForQuestionAnswering
@@ -354,6 +355,81 @@ def extract_answer(inputs, outputs, tokenizer):
         })
     return plain_result
 # st.title("Chatbot Roberta")
 # st.write("Hi! Tôi là trợ lý của bạn trong việc trả lời các câu hỏi.")
 # text = st.text_input("User: ", key="input")
@@ -398,7 +474,7 @@ def get_response(text):
     answer, context = chatRoberta(text)
     result = answer[0]['answer']
     if result == "":
-        return "Xin lỗi, tôi không thể tìm được đáp án phù hợp cho câu hỏi này ... Hãy thử trả lời bằng câu hỏi khác!"
     return result
 st.title("General Law Chatbot")

 import streamlit as st
 import torch
+import pytorch_lightning as pl
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline, T5Tokenizer, T5ForConditionalGeneration
 import nltk
 from transformers.models.roberta.modeling_roberta import *
 from transformers import RobertaForQuestionAnswering
         })
     return plain_result
+#T555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+INPUT_MAX_LEN = 128  # Adjusted input length
+OUTPUT_MAX_LEN = 256  # Adjusted output length
+MODEL_NAME = "VietAI/vit5-base"
+tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME, model_max_length=INPUT_MAX_LEN)
+class T5Model(pl.LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME, return_dict=True)
+    def forward(self, input_ids, attention_mask, labels=None):
+        output = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            labels=labels
+        )
+        return output.loss, output.logits
+    def training_step(self, batch, batch_idx):
+        input_ids = batch["input_ids"].to(DEVICE)
+        attention_mask = batch["attention_mask"].to(DEVICE)
+        labels = batch["target"].to(DEVICE)
+        loss, logits = self(input_ids, attention_mask, labels)
+        self.log("train_loss", loss, prog_bar=True, logger=True)
+        return {'loss': loss}
+    def validation_step(self, batch, batch_idx):
+        input_ids = batch["input_ids"].to(DEVICE)
+        attention_mask = batch["attention_mask"].to(DEVICE)
+        labels = batch["target"].to(DEVICE)
+        loss, logits = self(input_ids, attention_mask, labels)
+        self.log("val_loss", loss, prog_bar=True, logger=True)
+        return {'val_loss': loss}
+    def configure_optimizers(self):
+        return AdamW(self.parameters(), lr=0.0001)
+train_model = T5Model.load_from_checkpoint('./data-law/law-model-v1.ckpt')
+train_model.freeze()
+def generate_question(question):
+    inputs_encoding = tokenizer(
+        question,
+        add_special_tokens=True,
+        max_length=INPUT_MAX_LEN,
+        padding='max_length',
+        truncation='only_first',
+        return_attention_mask=True,
+        return_tensors="pt"
+    ).to(DEVICE)
+    generate_ids = train_model.model.generate(
+        input_ids=inputs_encoding["input_ids"],
+        attention_mask=inputs_encoding["attention_mask"],
+        max_length=INPUT_MAX_LEN,
+        num_beams=4,
+        num_return_sequences=1,
+        no_repeat_ngram_size=2,
+        early_stopping=True,
+    )
+    preds = [
+        tokenizer.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+        for gen_id in generate_ids
+    ]
+    response = " ".join(preds[0].split())
+    return response
 # st.title("Chatbot Roberta")
 # st.write("Hi! Tôi là trợ lý của bạn trong việc trả lời các câu hỏi.")
 # text = st.text_input("User: ", key="input")
     answer, context = chatRoberta(text)
     result = answer[0]['answer']
     if result == "":
+        return generate_question(text)
     return result
 st.title("General Law Chatbot")