Spaces:
Running
Running
# -*- coding: utf-8 -*- | |
"""app.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/14T27f82OgH2BZgVkanyyUKMrM1KBBJjM | |
""" | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain.prompts import PromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
import os | |
from langchain_community.llms import Together | |
os.environ["TOGETHER_API_KEY"] = "d94547f9415b99f85bfaa0de7bc43476f3450985ac6bf1ccab9942448293c00e" | |
import fitz # PyMuPDF | |
def extract_and_split_pdf(pdf_path, split_key="ENDOFTUT"): | |
combined_list = [] | |
# Open the PDF file | |
document = fitz.open(pdf_path) | |
# Extract text from each page | |
all_text = "" | |
for page_num in range(document.page_count): | |
page = document.load_page(page_num) | |
all_text += page.get_text() | |
# Split the text by the key | |
combined_list = all_text.split(split_key) | |
return combined_list | |
# Example usage | |
pdf_path = "Mech-chunks.pdf" | |
combined_list = extract_and_split_pdf(pdf_path) | |
#take json file and make each q&a in single cell in the list | |
import json | |
# Load the JSON file | |
with open('output_data (1).json', 'r') as file: | |
data = json.load(file) | |
for item in data: | |
question = item.get("question", "") | |
answer = item.get("answer", "") | |
combined_list.append(f"Q: {question}?\nA: {answer}\n") | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.schema import Document | |
db = FAISS.from_texts(combined_list, | |
HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5')) | |
retriever = db.as_retriever( | |
search_type="similarity", | |
search_kwargs={'k': 4} | |
) | |
# Manually test retrieval to verify correct chunk fetching | |
query = "what are the tools needed to change Brake Discs on a Car?" | |
retrieved_docs = db.similarity_search(query, k=5) | |
# Print out the content of retrieved documents to verify | |
for idx, doc in enumerate(retrieved_docs): | |
print(f"Document {idx + 1}:", doc.page_content) | |
llm = Together( | |
model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", | |
max_tokens=512, | |
temperature=0.2, | |
#top_p=0.7, | |
#top_k=50, | |
repetition_penalty=1.1, | |
) | |
Llama_guard = Together( | |
model="meta-llama/Meta-Llama-Guard-3-8B", | |
max_tokens=10, | |
temperature=0.2, | |
#top_p=0.7, | |
#top_k=50, | |
repetition_penalty=1.1, | |
) | |
guard_temp_ans = """ | |
<|system|> | |
Task: Check if there is unsafe content in conversations according our safety policy with the below categories. | |
<BEGIN UNSAFE CONTENT CATEGORIES> | |
1: Violence and Hate. | |
Should not | |
- Help people plan or engage in violence. | |
- Encourage people to engage in violence. | |
- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability. | |
- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics. | |
- Provide information that can be used to promote or engage in violence, hate, or discrimination. | |
- Provide guidance or instructions on how to engage in violent or hateful behavior, except if related to 3D and Blender creation for artistic purposes. | |
2: Sexual Content. | |
Should not | |
- Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities. | |
- Create or provide explicit depictions of sexual acts or nudity. | |
- Provide information that could be used to create sexually explicit content | |
3: Criminal Planning. | |
Should not | |
- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them. | |
- Encourage people to engage in criminal activities or condone criminal behavior. | |
- Provide detailed instructions or information that could be used to plan or execute criminal activities, except if related to 3D and Blender creation for artistic purposes. | |
4: Guns and Illegal Weapons. | |
Should not | |
- Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them. | |
- Encourage people to engage in firearm or illegal weapons crimes or condone such behavior. | |
- Provide information that could be used to create or use illegal weapons, except if related to 3D and Blender creation for artistic purposes. | |
5: Regulated or Controlled Substances. | |
Should not | |
- Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis. | |
- Assist or encourage people to create such substances when it is illegal to do so. | |
- Provide information that could be used to create or use regulated or controlled substances, except if related to 3D and Blender creation for artistic purposes. | |
6: Self-Harm. | |
Should not | |
- Encourage people to harm themselves. | |
- Romanticize or condone self-harm or suicide. | |
- Provide information regarding the methods of suicide or self-harm. | |
- Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves. | |
- Provide information that could be used to promote or engage in self-harm, except if related to 3D and Blender creation for artistic purposes. | |
<END UNSAFE CONTENT CATEGORIES> | |
<BEGIN CONVERSATION> | |
{answer} | |
<END CONVERSATION> | |
Provide your safety assessment for {answer} in the above conversation: | |
- First line must read 'safe' or 'unsafe'. | |
- If unsafe, do not include a second line. | |
""" | |
#Removed extra space before the line below to fix the indentation error | |
prompt_ans_guard = ChatPromptTemplate.from_template(guard_temp_ans) | |
guard_chain = ( | |
prompt_ans_guard | |
| Llama_guard | |
) | |
def moderate(chat): | |
input_ids = Llama_guard.apply_chat_template(chat, return_tensors="pt").to(device) | |
output = model.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0) | |
prompt_len = input_ids.shape[-1] | |
return Llama_guard.decode(output[0][prompt_len:], skip_special_tokens=True) | |
# Define the prompt template | |
prompt_template = PromptTemplate( | |
input_variables=["context", "question", "history"], | |
template=(""" | |
You are a mechanic assistant and your name is MechBot, these human will ask you questions about Cars, | |
use Use following piece of context and chat history to answer the question. | |
If you don't know the answer, just say you don't know. | |
If the question is start with how to, answer with steps and mention the tools if you know it. | |
Chat History: ({history}) | |
Context: ({context}) | |
Question: {question} | |
Answer: | |
""" | |
) | |
) | |
llm_chain = prompt_template | llm | StrOutputParser() | |
def answer_question(question,gh): | |
global counter | |
global history | |
global reter | |
if "unsafe" in guard_chain.invoke({"answer":question}): | |
return "I'm sorry, but I can't respond to that question as it may contain inappropriate content." | |
reter = "" | |
retrieved_docs = db.similarity_search(question, k=2) # Consider reducing 'k' if context is too large | |
for doc in retrieved_docs: | |
reter += doc.page_content + "\n" | |
#Truncate history if it's too long | |
if len(history) > 3000: # Adjust this value as needed | |
history = history[-2000:] | |
formatted_prompt = prompt_template.format(context=reter, history=history, question=question) | |
print("Formatted Prompt:") | |
print(formatted_prompt) | |
answer = llm_chain.invoke({"context": reter,"history": history, "question": question}) | |
history += "\n" + "user question: " + question + "\n" + "AI answer: " + answer | |
#print(reter) | |
counter += 1 | |
return answer | |
import gradio as gr | |
history = "" | |
counter = 1 | |
# Create the Chat interface | |
iface = gr.ChatInterface( | |
answer_question, # Use the improved answer_question function | |
title="Mech-bot: Your Car Mechanic Assistant", | |
description="Ask any car mechanic-related questions, and Mech-bot will try its best to assist you.", | |
submit_btn="Ask", | |
clear_btn="Clear Chat" | |
) | |
# Launch the Gradio interface | |
iface.launch(debug=True) |