Spaces:

UKURIKIYEYEZU
/

Regal_assistance_Chatbot

Running

App Files Files Community

Regal_assistance_Chatbot / app.py

UKURIKIYEYEZU

Update app.py

ef66d7b verified 11 days ago

raw

history blame

8.3 kB

	import os

	# Get the secret key from the environment
	groq_api_key = os.environ.get('groq2')

	## LLM used for RAG
	from langchain_groq import ChatGroq

	llm = ChatGroq(model="llama-3.1-70b-versatile",api_key=groq_api_key )

	from langchain.prompts import ChatPromptTemplate, PromptTemplate
	from langchain.output_parsers import ResponseSchema, StructuredOutputParser

	import PyPDF2
	# Initialize required components
	TEMPLATE = """
	You are a helpful agent. Your task is to generate a meaningful question and an answer using the following provided "{context}"

	You MUST obey the following criteria:
	- No preamble.
	- Restrict the question to the context information provided and provide answer with its details in summary.
	- Do NOT create a question that cannot be answered from the context.
	- Phrase the question so that it does NOT refer to specific context.
	- For instance, do NOT use phrases like 'given the provided context' or 'in this work' in the question or 'according to the text' in the answer because if the question is asked elsewhere it would not be provided specific context. Replace these terms with specific details.
	- Please do NOT repeat the provided context.
	- Please Only generate a question and an answer without any sentence in advance such as "Here is the generated question and answer:".
	- Please follow the JSON recommended format below.
	- Please ensure that the output is a valid JSON object.
	{format_instructions}
	"""

	prompt = ChatPromptTemplate.from_template(template=TEMPLATE)
	response_schemas = [
	{"name": "Question", "description": "The generated question from the provided context"},
	{"name": "Answer", "description": "The corresponding answer from the provided context"}
	]
	output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
	format_instructions = output_parser.get_format_instructions(only_json=True)

	# Folder containing PDF files
	folder_path = "/content/drive/MyDrive/Chatbot"

	# List to store questions and answers as tuples
	data = []

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_path):
	with open(pdf_path, "rb") as file:
	reader = PyPDF2.PdfReader(file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	# Process each PDF in the folder
	for filename in os.listdir(folder_path):
	if filename.endswith(".pdf"):
	pdf_path = os.path.join(folder_path, filename)
	try:
	# Extract text from the PDF
	context = extract_text_from_pdf(pdf_path)

	# Split context into manageable chunks (optional)
	chunks = [context[i:i+200] for i in range(0, len(context), 200)]

	for chunk in chunks:
	# Format the messages
	messages = prompt.format_messages(context=chunk, format_instructions=format_instructions)

	# Invoke the LLM
	response = llm.invoke(messages)

	# Parse the response
	output_dict = output_parser.parse(response.content)

	# Extract question and answer
	question = output_dict["Question"]
	answer = output_dict["Answer"]

	# Append question and answer as a tuple to the list
	data.append((question, answer))

	except Exception as e:
	print(f"Error processing file {filename}: {e}")

	import PyPDF2

	# Function to extract text from a PDF
	def extract_text_from_pdf(pdf_path):
	with open(pdf_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	# Function to chunk text into pieces of max_length
	def chunk_text(text, max_length=500):
	return [text[i:i + max_length] for i in range(0, len(text), max_length)]

	# Specify the path to the PDF file
	pdf_path = "/content/drive/MyDrive/LAW Nº 59 ON THE CRIME OF GENOCIDE IDEOLOGY AND RELATED CRIMES.pdf"
	# List to hold context data
	context_data = []

	try:
	# Extract text from the PDF
	pdf_text = extract_text_from_pdf(pdf_path)

	if pdf_text:
	# Create chunks of 500 characters
	chunks = chunk_text(pdf_text, max_length=500)

	# Add each chunk to context_data list as plain strings
	context_data = [] # Initialize the list
	for chunk in chunks:
	context_data.append(chunk) # Save each chunk as a string

	# Print the context_data list
	for entry in context_data:
	print(entry)
	print("-" * 40) # Separator for readability
	else:
	print("No text found in the PDF.")
	except Exception as e:
	print(f"Error reading the PDF: {e}")

	context_data.extend(data)

	processed_texts = []

	for element in context_data:
	if isinstance(element, tuple):
	question, answer = element
	processed_texts.append(f"Question: {question} Answer: {answer}")
	elif isinstance(element, str):

	processed_texts.append(element)
	else:

	processed_texts.append(str(element))

	## Embedding model!
	from langchain_huggingface import HuggingFaceEmbeddings
	embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")

	from google.colab import drive
	drive.mount('/content/drive')

	# create vector store!
	from langchain_chroma import Chroma

	vectorstore = Chroma(
	collection_name="laws_dataset", # Changed the name to be compliant
	embedding_function=embed_model,
	persist_directory="./",
	)

	vectorstore.get().keys()

	# add data to vector nstore
	vectorstore.add_texts(processed_texts)

	from langchain_core.prompts import PromptTemplate


	template = ("""You are a legal expert specializing in providing precise and reliable legal assistance.
	Use the provided legal context to answer the question with clear and accurate legal advice.
	If the context is irrelevant or insufficient, state so concisely without elaboration.
	Do not discuss or analyze the context unless absolutely necessary for clarity.
	Ensure your response is professional, detailed in summary and rooted in legal reasoning

	Legal Context: {context}

	Question: {question}

	Legal Advice:""")


	rag_prompt = PromptTemplate.from_template(template)

	retriever = vectorstore.as_retriever()

	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough

	rag_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| rag_prompt
	\| llm
	\| StrOutputParser()
	)

	import gradio as gr

	def rag_memory_stream(message, history):
	partial_text = ""
	for new_text in rag_chain.stream(message): # Replace with actual streaming logic
	partial_text += new_text
	yield partial_text

	# Correctly define examples as a list
	examples = [
	["What is the main purpose of Law Nº 59/2018 of 22/8/2018?"]
	]

	description = (
	"This Regal AI Assistance specializes in LAW Nº 59/2018 OF 22/8/2018 "
	"ON THE CRIME OF GENOCIDE IDEOLOGY AND RELATED CRIMES."
	)

	title = "⚖️ Chat with me and learn Laws! ⚖️"

	# Custom CSS for styling the interface
	custom_css = """
	body {
	background-color: black;
	color: white;
	font-family: "Times New Roman", serif;
	}
	.gradio-container {
	font-family: "Times New Roman", serif;
	color: white;
	}
	.gr-chatbot {
	background-color: #222; /* Dark background for chatbot */
	border: 1px solid #555;
	border-radius: 10px;
	padding: 10px;
	margin-bottom: 20px;
	}
	.gr-textbox {
	background-color: #333; /* Slightly lighter than background */
	color: white;
	border: 1px solid #555;
	border-radius: 5px;
	}
	.gr-button {
	background-color: #007bff; /* Blue button */
	color: white;
	border: none;
	border-radius: 5px;
	font-size: 16px;
	padding: 10px 20px;
	cursor: pointer;
	}
	.gr-button:hover {
	background-color: #0056b3; /* Darker blue on hover */
	}
	"""

	# Create the Chat Interface
	demo = gr.ChatInterface(
	fn=rag_memory_stream,
	type="messages",
	title=title,
	description=description,
	fill_height=True,
	examples=examples, # Pass the corrected examples list
	theme="soft",
	css=custom_css, # Apply the custom CSS
	)

	if __name__ == "__main__":
	demo.launch()