|
import os |
|
|
|
|
|
groq_api_key = os.environ.get('groq2') |
|
|
|
|
|
from langchain_groq import ChatGroq |
|
|
|
llm = ChatGroq(model="llama-3.1-70b-versatile",api_key=groq_api_key ) |
|
|
|
from langchain.prompts import ChatPromptTemplate, PromptTemplate |
|
from langchain.output_parsers import ResponseSchema, StructuredOutputParser |
|
|
|
import PyPDF2 |
|
|
|
TEMPLATE = """ |
|
You are a helpful agent. Your task is to generate a meaningful question and an answer using the following provided "{context}" |
|
|
|
You MUST obey the following criteria: |
|
- No preamble. |
|
- Restrict the question to the context information provided and provide answer with its details in summary. |
|
- Do NOT create a question that cannot be answered from the context. |
|
- Phrase the question so that it does NOT refer to specific context. |
|
- For instance, do NOT use phrases like 'given the provided context' or 'in this work' in the question or 'according to the text' in the answer because if the question is asked elsewhere it would not be provided specific context. Replace these terms with specific details. |
|
- Please do NOT repeat the provided context. |
|
- Please Only generate a question and an answer without any sentence in advance such as "Here is the generated question and answer:". |
|
- Please follow the JSON recommended format below. |
|
- Please ensure that the output is a valid JSON object. |
|
{format_instructions} |
|
""" |
|
|
|
prompt = ChatPromptTemplate.from_template(template=TEMPLATE) |
|
response_schemas = [ |
|
{"name": "Question", "description": "The generated question from the provided context"}, |
|
{"name": "Answer", "description": "The corresponding answer from the provided context"} |
|
] |
|
output_parser = StructuredOutputParser.from_response_schemas(response_schemas) |
|
format_instructions = output_parser.get_format_instructions(only_json=True) |
|
|
|
|
|
folder_path = "/content/drive/MyDrive/Chatbot" |
|
|
|
|
|
data = [] |
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
with open(pdf_path, "rb") as file: |
|
reader = PyPDF2.PdfReader(file) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
for filename in os.listdir(folder_path): |
|
if filename.endswith(".pdf"): |
|
pdf_path = os.path.join(folder_path, filename) |
|
try: |
|
|
|
context = extract_text_from_pdf(pdf_path) |
|
|
|
|
|
chunks = [context[i:i+200] for i in range(0, len(context), 200)] |
|
|
|
for chunk in chunks: |
|
|
|
messages = prompt.format_messages(context=chunk, format_instructions=format_instructions) |
|
|
|
|
|
response = llm.invoke(messages) |
|
|
|
|
|
output_dict = output_parser.parse(response.content) |
|
|
|
|
|
question = output_dict["Question"] |
|
answer = output_dict["Answer"] |
|
|
|
|
|
data.append((question, answer)) |
|
|
|
except Exception as e: |
|
print(f"Error processing file {filename}: {e}") |
|
|
|
import PyPDF2 |
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
with open(pdf_path, 'rb') as file: |
|
reader = PyPDF2.PdfReader(file) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
def chunk_text(text, max_length=500): |
|
return [text[i:i + max_length] for i in range(0, len(text), max_length)] |
|
|
|
|
|
pdf_path = "/content/drive/MyDrive/LAW Nº 59 ON THE CRIME OF GENOCIDE IDEOLOGY AND RELATED CRIMES.pdf" |
|
|
|
context_data = [] |
|
|
|
try: |
|
|
|
pdf_text = extract_text_from_pdf(pdf_path) |
|
|
|
if pdf_text: |
|
|
|
chunks = chunk_text(pdf_text, max_length=500) |
|
|
|
|
|
context_data = [] |
|
for chunk in chunks: |
|
context_data.append(chunk) |
|
|
|
|
|
for entry in context_data: |
|
print(entry) |
|
print("-" * 40) |
|
else: |
|
print("No text found in the PDF.") |
|
except Exception as e: |
|
print(f"Error reading the PDF: {e}") |
|
|
|
context_data.extend(data) |
|
|
|
processed_texts = [] |
|
|
|
for element in context_data: |
|
if isinstance(element, tuple): |
|
question, answer = element |
|
processed_texts.append(f"Question: {question} Answer: {answer}") |
|
elif isinstance(element, str): |
|
|
|
processed_texts.append(element) |
|
else: |
|
|
|
processed_texts.append(str(element)) |
|
|
|
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1") |
|
|
|
from google.colab import drive |
|
drive.mount('/content/drive') |
|
|
|
|
|
from langchain_chroma import Chroma |
|
|
|
vectorstore = Chroma( |
|
collection_name="laws_dataset", |
|
embedding_function=embed_model, |
|
persist_directory="./", |
|
) |
|
|
|
vectorstore.get().keys() |
|
|
|
|
|
vectorstore.add_texts(processed_texts) |
|
|
|
from langchain_core.prompts import PromptTemplate |
|
|
|
|
|
template = ("""You are a legal expert specializing in providing precise and reliable legal assistance. |
|
Use the provided legal context to answer the question with clear and accurate legal advice. |
|
If the context is irrelevant or insufficient, state so concisely without elaboration. |
|
Do not discuss or analyze the context unless absolutely necessary for clarity. |
|
Ensure your response is professional, detailed in summary and rooted in legal reasoning |
|
|
|
Legal Context: {context} |
|
|
|
Question: {question} |
|
|
|
Legal Advice:""") |
|
|
|
|
|
rag_prompt = PromptTemplate.from_template(template) |
|
|
|
retriever = vectorstore.as_retriever() |
|
|
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_core.runnables import RunnablePassthrough |
|
|
|
rag_chain = ( |
|
{"context": retriever, "question": RunnablePassthrough()} |
|
| rag_prompt |
|
| llm |
|
| StrOutputParser() |
|
) |
|
|
|
import gradio as gr |
|
|
|
def rag_memory_stream(message, history): |
|
partial_text = "" |
|
for new_text in rag_chain.stream(message): |
|
partial_text += new_text |
|
yield partial_text |
|
|
|
|
|
examples = [ |
|
["What is the main purpose of Law Nº 59/2018 of 22/8/2018?"] |
|
] |
|
|
|
description = ( |
|
"This Regal AI Assistance specializes in LAW Nº 59/2018 OF 22/8/2018 " |
|
"ON THE CRIME OF GENOCIDE IDEOLOGY AND RELATED CRIMES." |
|
) |
|
|
|
title = "⚖️ Chat with me and learn Laws! ⚖️" |
|
|
|
|
|
custom_css = """ |
|
body { |
|
background-color: black; |
|
color: white; |
|
font-family: "Times New Roman", serif; |
|
} |
|
.gradio-container { |
|
font-family: "Times New Roman", serif; |
|
color: white; |
|
} |
|
.gr-chatbot { |
|
background-color: #222; /* Dark background for chatbot */ |
|
border: 1px solid #555; |
|
border-radius: 10px; |
|
padding: 10px; |
|
margin-bottom: 20px; |
|
} |
|
.gr-textbox { |
|
background-color: #333; /* Slightly lighter than background */ |
|
color: white; |
|
border: 1px solid #555; |
|
border-radius: 5px; |
|
} |
|
.gr-button { |
|
background-color: #007bff; /* Blue button */ |
|
color: white; |
|
border: none; |
|
border-radius: 5px; |
|
font-size: 16px; |
|
padding: 10px 20px; |
|
cursor: pointer; |
|
} |
|
.gr-button:hover { |
|
background-color: #0056b3; /* Darker blue on hover */ |
|
} |
|
""" |
|
|
|
|
|
demo = gr.ChatInterface( |
|
fn=rag_memory_stream, |
|
type="messages", |
|
title=title, |
|
description=description, |
|
fill_height=True, |
|
examples=examples, |
|
theme="soft", |
|
css=custom_css, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|