# -*- coding: utf-8 -*- """app.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/14T27f82OgH2BZgVkanyyUKMrM1KBBJjM """ from langchain_core.prompts import ChatPromptTemplate from langchain.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser import os from langchain_community.llms import Together os.environ["TOGETHER_API_KEY"] = "d94547f9415b99f85bfaa0de7bc43476f3450985ac6bf1ccab9942448293c00e" import fitz # PyMuPDF def extract_and_split_pdf(pdf_path, split_key="ENDOFTUT"): combined_list = [] # Open the PDF file document = fitz.open(pdf_path) # Extract text from each page all_text = "" for page_num in range(document.page_count): page = document.load_page(page_num) all_text += page.get_text() # Split the text by the key combined_list = all_text.split(split_key) return combined_list # Example usage pdf_path = "Mech-chunks.pdf" combined_list = extract_and_split_pdf(pdf_path) #take json file and make each q&a in single cell in the list import json # Load the JSON file with open('output_data (1).json', 'r') as file: data = json.load(file) for item in data: question = item.get("question", "") answer = item.get("answer", "") combined_list.append(f"Q: {question}?\nA: {answer}\n") from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.schema import Document db = FAISS.from_texts(combined_list, HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5')) retriever = db.as_retriever( search_type="similarity", search_kwargs={'k': 4} ) # Manually test retrieval to verify correct chunk fetching query = "what are the tools needed to change Brake Discs on a Car?" retrieved_docs = db.similarity_search(query, k=5) # Print out the content of retrieved documents to verify for idx, doc in enumerate(retrieved_docs): print(f"Document {idx + 1}:", doc.page_content) llm = Together( model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", max_tokens=512, temperature=0.2, #top_p=0.7, #top_k=50, repetition_penalty=1.1, ) Llama_guard = Together( model="meta-llama/Meta-Llama-Guard-3-8B", max_tokens=10, temperature=0.2, #top_p=0.7, #top_k=50, repetition_penalty=1.1, ) guard_temp_ans = """ <|system|> Task: Check if there is unsafe content in conversations according our safety policy with the below categories. 1: Violence and Hate. Should not - Help people plan or engage in violence. - Encourage people to engage in violence. - Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability. - Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics. - Provide information that can be used to promote or engage in violence, hate, or discrimination. - Provide guidance or instructions on how to engage in violent or hateful behavior, except if related to 3D and Blender creation for artistic purposes. 2: Sexual Content. Should not - Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities. - Create or provide explicit depictions of sexual acts or nudity. - Provide information that could be used to create sexually explicit content 3: Criminal Planning. Should not - Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them. - Encourage people to engage in criminal activities or condone criminal behavior. - Provide detailed instructions or information that could be used to plan or execute criminal activities, except if related to 3D and Blender creation for artistic purposes. 4: Guns and Illegal Weapons. Should not - Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them. - Encourage people to engage in firearm or illegal weapons crimes or condone such behavior. - Provide information that could be used to create or use illegal weapons, except if related to 3D and Blender creation for artistic purposes. 5: Regulated or Controlled Substances. Should not - Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis. - Assist or encourage people to create such substances when it is illegal to do so. - Provide information that could be used to create or use regulated or controlled substances, except if related to 3D and Blender creation for artistic purposes. 6: Self-Harm. Should not - Encourage people to harm themselves. - Romanticize or condone self-harm or suicide. - Provide information regarding the methods of suicide or self-harm. - Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves. - Provide information that could be used to promote or engage in self-harm, except if related to 3D and Blender creation for artistic purposes. {answer} Provide your safety assessment for {answer} in the above conversation: - First line must read 'safe' or 'unsafe'. - If unsafe, do not include a second line. """ #Removed extra space before the line below to fix the indentation error prompt_ans_guard = ChatPromptTemplate.from_template(guard_temp_ans) guard_chain = ( prompt_ans_guard | Llama_guard ) def moderate(chat): input_ids = Llama_guard.apply_chat_template(chat, return_tensors="pt").to(device) output = model.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0) prompt_len = input_ids.shape[-1] return Llama_guard.decode(output[0][prompt_len:], skip_special_tokens=True) # Define the prompt template prompt_template = PromptTemplate( input_variables=["context", "question", "history"], template=(""" You are a mechanic assistant and your name is MechBot, these human will ask you questions about Cars, use Use following piece of context and chat history to answer the question. If you don't know the answer, just say you don't know. If the question is start with how to, answer with steps and mention the tools if you know it. Chat History: ({history}) Context: ({context}) Question: {question} Answer: """ ) ) llm_chain = prompt_template | llm | StrOutputParser() def answer_question(question,gh): global counter global history global reter if "unsafe" in guard_chain.invoke({"answer":question}): return "I'm sorry, but I can't respond to that question as it may contain inappropriate content." reter = "" retrieved_docs = db.similarity_search(question, k=2) # Consider reducing 'k' if context is too large for doc in retrieved_docs: reter += doc.page_content + "\n" #Truncate history if it's too long if len(history) > 3000: # Adjust this value as needed history = history[-2000:] formatted_prompt = prompt_template.format(context=reter, history=history, question=question) print("Formatted Prompt:") print(formatted_prompt) answer = llm_chain.invoke({"context": reter,"history": history, "question": question}) history += "\n" + "user question: " + question + "\n" + "AI answer: " + answer #print(reter) counter += 1 return answer import gradio as gr history = "" counter = 1 # Create the Chat interface iface = gr.ChatInterface( answer_question, # Use the improved answer_question function title="Mech-bot: Your Car Mechanic Assistant", description="Ask any car mechanic-related questions, and Mech-bot will try its best to assist you.", submit_btn="Ask", clear_btn="Clear Chat" ) # Launch the Gradio interface iface.launch(debug=True)