Mech-Bot / app.py
Khaledmd12's picture
Rename app (1).py to app.py
3bf5f4f verified
# -*- coding: utf-8 -*-
"""app.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/14T27f82OgH2BZgVkanyyUKMrM1KBBJjM
"""
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.llms import Together
os.environ["TOGETHER_API_KEY"] = "d94547f9415b99f85bfaa0de7bc43476f3450985ac6bf1ccab9942448293c00e"
import fitz # PyMuPDF
def extract_and_split_pdf(pdf_path, split_key="ENDOFTUT"):
combined_list = []
# Open the PDF file
document = fitz.open(pdf_path)
# Extract text from each page
all_text = ""
for page_num in range(document.page_count):
page = document.load_page(page_num)
all_text += page.get_text()
# Split the text by the key
combined_list = all_text.split(split_key)
return combined_list
# Example usage
pdf_path = "Mech-chunks.pdf"
combined_list = extract_and_split_pdf(pdf_path)
#take json file and make each q&a in single cell in the list
import json
# Load the JSON file
with open('output_data (1).json', 'r') as file:
data = json.load(file)
for item in data:
question = item.get("question", "")
answer = item.get("answer", "")
combined_list.append(f"Q: {question}?\nA: {answer}\n")
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
db = FAISS.from_texts(combined_list,
HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))
retriever = db.as_retriever(
search_type="similarity",
search_kwargs={'k': 4}
)
# Manually test retrieval to verify correct chunk fetching
query = "what are the tools needed to change Brake Discs on a Car?"
retrieved_docs = db.similarity_search(query, k=5)
# Print out the content of retrieved documents to verify
for idx, doc in enumerate(retrieved_docs):
print(f"Document {idx + 1}:", doc.page_content)
llm = Together(
model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
max_tokens=512,
temperature=0.2,
#top_p=0.7,
#top_k=50,
repetition_penalty=1.1,
)
Llama_guard = Together(
model="meta-llama/Meta-Llama-Guard-3-8B",
max_tokens=10,
temperature=0.2,
#top_p=0.7,
#top_k=50,
repetition_penalty=1.1,
)
guard_temp_ans = """
<|system|>
Task: Check if there is unsafe content in conversations according our safety policy with the below categories.
<BEGIN UNSAFE CONTENT CATEGORIES>
1: Violence and Hate.
Should not
- Help people plan or engage in violence.
- Encourage people to engage in violence.
- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability.
- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.
- Provide information that can be used to promote or engage in violence, hate, or discrimination.
- Provide guidance or instructions on how to engage in violent or hateful behavior, except if related to 3D and Blender creation for artistic purposes.
2: Sexual Content.
Should not
- Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities.
- Create or provide explicit depictions of sexual acts or nudity.
- Provide information that could be used to create sexually explicit content
3: Criminal Planning.
Should not
- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.
- Encourage people to engage in criminal activities or condone criminal behavior.
- Provide detailed instructions or information that could be used to plan or execute criminal activities, except if related to 3D and Blender creation for artistic purposes.
4: Guns and Illegal Weapons.
Should not
- Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them.
- Encourage people to engage in firearm or illegal weapons crimes or condone such behavior.
- Provide information that could be used to create or use illegal weapons, except if related to 3D and Blender creation for artistic purposes.
5: Regulated or Controlled Substances.
Should not
- Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis.
- Assist or encourage people to create such substances when it is illegal to do so.
- Provide information that could be used to create or use regulated or controlled substances, except if related to 3D and Blender creation for artistic purposes.
6: Self-Harm.
Should not
- Encourage people to harm themselves.
- Romanticize or condone self-harm or suicide.
- Provide information regarding the methods of suicide or self-harm.
- Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves.
- Provide information that could be used to promote or engage in self-harm, except if related to 3D and Blender creation for artistic purposes.
<END UNSAFE CONTENT CATEGORIES>
<BEGIN CONVERSATION>
{answer}
<END CONVERSATION>
Provide your safety assessment for {answer} in the above conversation:
- First line must read 'safe' or 'unsafe'.
- If unsafe, do not include a second line.
"""
#Removed extra space before the line below to fix the indentation error
prompt_ans_guard = ChatPromptTemplate.from_template(guard_temp_ans)
guard_chain = (
prompt_ans_guard
| Llama_guard
)
def moderate(chat):
input_ids = Llama_guard.apply_chat_template(chat, return_tensors="pt").to(device)
output = model.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0)
prompt_len = input_ids.shape[-1]
return Llama_guard.decode(output[0][prompt_len:], skip_special_tokens=True)
# Define the prompt template
prompt_template = PromptTemplate(
input_variables=["context", "question", "history"],
template=("""
You are a mechanic assistant and your name is MechBot, these human will ask you questions about Cars,
use Use following piece of context and chat history to answer the question.
If you don't know the answer, just say you don't know.
If the question is start with how to, answer with steps and mention the tools if you know it.
Chat History: ({history})
Context: ({context})
Question: {question}
Answer:
"""
)
)
llm_chain = prompt_template | llm | StrOutputParser()
def answer_question(question,gh):
global counter
global history
global reter
if "unsafe" in guard_chain.invoke({"answer":question}):
return "I'm sorry, but I can't respond to that question as it may contain inappropriate content."
reter = ""
retrieved_docs = db.similarity_search(question, k=2) # Consider reducing 'k' if context is too large
for doc in retrieved_docs:
reter += doc.page_content + "\n"
#Truncate history if it's too long
if len(history) > 3000: # Adjust this value as needed
history = history[-2000:]
formatted_prompt = prompt_template.format(context=reter, history=history, question=question)
print("Formatted Prompt:")
print(formatted_prompt)
answer = llm_chain.invoke({"context": reter,"history": history, "question": question})
history += "\n" + "user question: " + question + "\n" + "AI answer: " + answer
#print(reter)
counter += 1
return answer
import gradio as gr
history = ""
counter = 1
# Create the Chat interface
iface = gr.ChatInterface(
answer_question, # Use the improved answer_question function
title="Mech-bot: Your Car Mechanic Assistant",
description="Ask any car mechanic-related questions, and Mech-bot will try its best to assist you.",
submit_btn="Ask",
clear_btn="Clear Chat"
)
# Launch the Gradio interface
iface.launch(debug=True)