Spaces:

jgrosjean
/

juri_cv_chatbot

Runtime error

App Files Files Community

juri_cv_chatbot / app.py

jgrosjean

Update app.py

9f59888 verified 6 months ago

raw

history blame contribute delete

3.11 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import pandas as pd
	import transformers
	import torch
	from sentence_transformers import SentenceTransformer, util
	"""
	For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co./docs/huggingface_hub/v0.22.2/en/guides/inference
	"""
	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	# Load the SBERT model
	sbert_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	# Function to initialize the Llama 3 8B model pipeline
	def initiate_pipeline():
	model = "meta-llama/Meta-Llama-3-8B-Instruct"
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	return transformers.pipeline(
	"text-generation",
	model=model,
	model_kwargs={"torch_dtype": torch.bfloat16},
	device=device,
	)

	# Initialize the model
	llama_model = initiate_pipeline()

	# Load the Q&A pairs from the CSV
	qa_data = pd.read_csv("rag_juri_cv.csv")

	# Function to retrieve the top 5 relevant Q&A pairs using Sentence-BERT
	def retrieve_top_k(query, k=5):
	# Combine the questions from the CSV into a list
	questions = qa_data['QUESTION'].tolist()

	# Encode the questions and the query using Sentence-BERT
	question_embeddings = sbert_model.encode(questions, convert_to_tensor=True)
	query_embedding = sbert_model.encode(query, convert_to_tensor=True)

	# Compute cosine similarities between the query and all questions
	cosine_scores = util.pytorch_cos_sim(query_embedding, question_embeddings).flatten()

	# Get the indices of the top k most similar questions
	top_k_indices = torch.topk(cosine_scores, k=k).indices.cpu()

	# Retrieve the corresponding Q&A pairs
	top_k_qa = qa_data.iloc[top_k_indices]

	return top_k_qa

	def chatbot(query):
	# Retrieve the top 5 relevant Q&A pairs
	top_k_qa = retrieve_top_k(query)

	# Generate the prefix, body, and suffix for the prompt
	prefix = """
	<\|begin_of_text\|><\|start_header_id\|>user<\|end_header_id\|>You are a chatbot specialized in answering questions about Juri Grosjean's CV.
	Please only use the information provided in the context to answer the question.
	Here is the question to answer:
	""" + query + "\n\n"

	context = "This is the context information to answer the question:\n"
	for index, row in top_k_qa.iterrows():
	context += f"Information {index}: {row['ANSWER']}\n\n"

	suffix = "<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>"

	prompt = prefix + context + suffix

	# Generate a response
	outputs = llama_model(
	prompt,
	max_new_tokens=500,
	do_sample=True,
	temperature=0.6,
	top_p=0.9,
	)

	# Extract and return the chatbot's answer
	output = outputs[0]["generated_text"]
	return output.split("assistant")[-1].strip()


	# Set up the Gradio interface
	demo = gr.Interface(
	fn=chatbot,
	inputs=gr.Textbox(lines=5, placeholder="Ask a question about Juri Grosjean's CV"),
	outputs="text"
	)

	if __name__ == "__main__":
	demo.launch()