Spaces:

Khaledmd12
/

Mech-Bot

Running

App Files Files Community

Mech-Bot / app.py

Khaledmd12

Rename app (1).py to app.py

3bf5f4f verified 2 months ago

raw

history blame contribute delete

8.39 kB

	# -- coding: utf-8 --
	"""app.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/14T27f82OgH2BZgVkanyyUKMrM1KBBJjM
	"""

	from langchain_core.prompts import ChatPromptTemplate
	from langchain.prompts import PromptTemplate
	from langchain_core.output_parsers import StrOutputParser

	import os
	from langchain_community.llms import Together
	os.environ["TOGETHER_API_KEY"] = "d94547f9415b99f85bfaa0de7bc43476f3450985ac6bf1ccab9942448293c00e"

	import fitz # PyMuPDF

	def extract_and_split_pdf(pdf_path, split_key="ENDOFTUT"):
	combined_list = []

	# Open the PDF file
	document = fitz.open(pdf_path)

	# Extract text from each page
	all_text = ""
	for page_num in range(document.page_count):
	page = document.load_page(page_num)
	all_text += page.get_text()

	# Split the text by the key
	combined_list = all_text.split(split_key)

	return combined_list

	# Example usage
	pdf_path = "Mech-chunks.pdf"
	combined_list = extract_and_split_pdf(pdf_path)

	#take json file and make each q&a in single cell in the list
	import json

	# Load the JSON file
	with open('output_data (1).json', 'r') as file:
	data = json.load(file)


	for item in data:
	question = item.get("question", "")
	answer = item.get("answer", "")
	combined_list.append(f"Q: {question}?\nA: {answer}\n")

	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.schema import Document

	db = FAISS.from_texts(combined_list,
	HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))

	retriever = db.as_retriever(
	search_type="similarity",
	search_kwargs={'k': 4}
	)

	# Manually test retrieval to verify correct chunk fetching
	query = "what are the tools needed to change Brake Discs on a Car?"
	retrieved_docs = db.similarity_search(query, k=5)

	# Print out the content of retrieved documents to verify
	for idx, doc in enumerate(retrieved_docs):
	print(f"Document {idx + 1}:", doc.page_content)

	llm = Together(

	model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
	max_tokens=512,
	temperature=0.2,
	#top_p=0.7,
	#top_k=50,
	repetition_penalty=1.1,
	)

	Llama_guard = Together(

	model="meta-llama/Meta-Llama-Guard-3-8B",
	max_tokens=10,
	temperature=0.2,
	#top_p=0.7,
	#top_k=50,
	repetition_penalty=1.1,
	)

	guard_temp_ans = """
	<\|system\|>
	Task: Check if there is unsafe content in conversations according our safety policy with the below categories.

	<BEGIN UNSAFE CONTENT CATEGORIES>
	1: Violence and Hate.
	Should not
	- Help people plan or engage in violence.
	- Encourage people to engage in violence.
	- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability.
	- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.
	- Provide information that can be used to promote or engage in violence, hate, or discrimination.
	- Provide guidance or instructions on how to engage in violent or hateful behavior, except if related to 3D and Blender creation for artistic purposes.

	2: Sexual Content.
	Should not
	- Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities.
	- Create or provide explicit depictions of sexual acts or nudity.
	- Provide information that could be used to create sexually explicit content

	3: Criminal Planning.
	Should not
	- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.
	- Encourage people to engage in criminal activities or condone criminal behavior.
	- Provide detailed instructions or information that could be used to plan or execute criminal activities, except if related to 3D and Blender creation for artistic purposes.

	4: Guns and Illegal Weapons.
	Should not
	- Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them.
	- Encourage people to engage in firearm or illegal weapons crimes or condone such behavior.
	- Provide information that could be used to create or use illegal weapons, except if related to 3D and Blender creation for artistic purposes.

	5: Regulated or Controlled Substances.
	Should not
	- Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis.
	- Assist or encourage people to create such substances when it is illegal to do so.
	- Provide information that could be used to create or use regulated or controlled substances, except if related to 3D and Blender creation for artistic purposes.

	6: Self-Harm.
	Should not
	- Encourage people to harm themselves.
	- Romanticize or condone self-harm or suicide.
	- Provide information regarding the methods of suicide or self-harm.
	- Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves.
	- Provide information that could be used to promote or engage in self-harm, except if related to 3D and Blender creation for artistic purposes.


	<END UNSAFE CONTENT CATEGORIES>

	<BEGIN CONVERSATION>
	{answer}
	<END CONVERSATION>

	Provide your safety assessment for {answer} in the above conversation:
	- First line must read 'safe' or 'unsafe'.
	- If unsafe, do not include a second line.
	"""

	#Removed extra space before the line below to fix the indentation error
	prompt_ans_guard = ChatPromptTemplate.from_template(guard_temp_ans)

	guard_chain = (
	prompt_ans_guard
	\| Llama_guard
	)

	def moderate(chat):
	input_ids = Llama_guard.apply_chat_template(chat, return_tensors="pt").to(device)
	output = model.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0)
	prompt_len = input_ids.shape[-1]
	return Llama_guard.decode(output[0][prompt_len:], skip_special_tokens=True)

	# Define the prompt template
	prompt_template = PromptTemplate(
	input_variables=["context", "question", "history"],
	template=("""
	You are a mechanic assistant and your name is MechBot, these human will ask you questions about Cars,
	use Use following piece of context and chat history to answer the question.
	If you don't know the answer, just say you don't know.
	If the question is start with how to, answer with steps and mention the tools if you know it.

	Chat History: ({history})

	Context: ({context})

	Question: {question}

	Answer:
	"""

	)
	)

	llm_chain = prompt_template \| llm \| StrOutputParser()

	def answer_question(question,gh):
	global counter
	global history
	global reter
	if "unsafe" in guard_chain.invoke({"answer":question}):
	return "I'm sorry, but I can't respond to that question as it may contain inappropriate content."
	reter = ""
	retrieved_docs = db.similarity_search(question, k=2) # Consider reducing 'k' if context is too large

	for doc in retrieved_docs:
	reter += doc.page_content + "\n"

	#Truncate history if it's too long
	if len(history) > 3000: # Adjust this value as needed
	history = history[-2000:]

	formatted_prompt = prompt_template.format(context=reter, history=history, question=question)
	print("Formatted Prompt:")
	print(formatted_prompt)

	answer = llm_chain.invoke({"context": reter,"history": history, "question": question})
	history += "\n" + "user question: " + question + "\n" + "AI answer: " + answer
	#print(reter)
	counter += 1
	return answer

	import gradio as gr
	history = ""
	counter = 1
	# Create the Chat interface
	iface = gr.ChatInterface(
	answer_question, # Use the improved answer_question function
	title="Mech-bot: Your Car Mechanic Assistant",
	description="Ask any car mechanic-related questions, and Mech-bot will try its best to assist you.",
	submit_btn="Ask",
	clear_btn="Clear Chat"
	)

	# Launch the Gradio interface
	iface.launch(debug=True)