Spaces:
Running
Running
File size: 9,268 Bytes
00ff6d0 16e69bb 00ff6d0 8ce0046 00ff6d0 67988ae 09c97b4 8ce0046 00ff6d0 09c97b4 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 57c9225 7e0d01e 00ff6d0 2e44a36 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 16e69bb 00ff6d0 16e69bb 00ff6d0 8ce0046 00ff6d0 8ce0046 97f2447 8ce0046 00ff6d0 8ce0046 00ff6d0 93c0b2f 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 97f2447 00ff6d0 57c9225 00ff6d0 57c9225 00ff6d0 57c9225 00ff6d0 8ce0046 00ff6d0 8ce0046 00ff6d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import os
import streamlit as st
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.prompts import load_prompt
from langchain.text_splitter import CharacterTextSplitter
from streamlit import session_state as ss
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import uuid
import json
import time
import datetime
# Function to check if a string is a valid JSON
def is_valid_json(data):
try:
json.loads(data)
return True
except json.JSONDecodeError:
return False
if "firebase_json_key" in os.environ:
firebase_json_key = os.getenv("firebase_json_key")
else:
firebase_json_key = st.secrets["firebase_json_key"]
firebase_credentials = json.loads(firebase_json_key)
# Function to initialize connection to Firebase Firestore
@st.cache_resource
def init_connection():
cred = credentials.Certificate(firebase_credentials)
firebase_admin.initialize_app(cred)
return firestore.client()
# Attempt to connect to Firebase Firestore
try:
db = init_connection()
except Exception as e:
st.write("Failed to connect to Firebase:", e)
# Access Firebase Firestore collection
if 'db' in locals():
conversations_collection = db.collection('conversations')
else:
st.write("Unable to access conversations collection. Firebase connection not established.")
# Retrieve OpenAI API key
if "OPENAI_API_KEY" in os.environ:
openai_api_key = os.getenv("OPENAI_API_KEY")
else:
openai_api_key = st.secrets["OPENAI_API_KEY"]
# Streamlit app title and disclaimer
st.title("HariGPT - Hari's resume bot")
st.image("images/about.jpg", use_column_width=True)
with st.expander("⚠️Disclaimer"):
st.write("""This bot is a LLM trained on GPT-4o-mini model to answer questions about Hari's professional background and qualifications. Your responses are recorded in a database for quality assurance and improvement purposes. Please be respectful and avoid asking personal or inappropriate questions.""")
# Define file paths and load initial settings
path = os.path.dirname(__file__)
prompt_template = path+"/templates/template.json"
prompt = load_prompt(prompt_template)
faiss_index = path+"/faiss_index"
data_source = path+"/data/about_hari.csv"
pdf_source = path+"/data/HarikrishnaDev_DataScientist.pdf"
# Function to store conversation in Firebase
def store_conversation(conversation_id, user_message, bot_message, answered):
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
data = {
"conversation_id": conversation_id,
"timestamp": timestamp,
"user_message": user_message,
"bot_message": bot_message,
"answered": answered
}
conversations_collection.add(data)
# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings()
# Load FAISS index or create a new one if it doesn't exist
if os.path.exists(faiss_index):
vectors = FAISS.load_local(faiss_index, embeddings, allow_dangerous_deserialization=True)
else:
# Load data from PDF and CSV sources
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=400,
chunk_overlap=40
)
pdf_loader = PyPDFLoader(pdf_source)
pdf_data = pdf_loader.load_and_split(text_splitter=text_splitter)
csv_loader = CSVLoader(file_path=data_source, encoding="utf-8")
csv_data = csv_loader.load()
data = pdf_data + csv_data
# Create embeddings for the documents and save the index
vectors = FAISS.from_documents(data, embeddings)
vectors.save_local("faiss_index")
# Initialize conversational retrieval chain
retriever = vectors.as_retriever(search_type="similarity", search_kwargs={"k": 6, "include_metadata": True, "score_threshold": 0.6})
chain = ConversationalRetrievalChain.from_llm(llm=ChatOpenAI(temperature=0.5, model_name='gpt-4o-mini', openai_api_key=openai_api_key),
retriever=retriever, return_source_documents=True, verbose=True, chain_type="stuff",
max_tokens_limit=4097, combine_docs_chain_kwargs={"prompt": prompt})
# Function to handle conversational chat
def conversational_chat(query):
with st.spinner("Thinking..."):
result = chain({"system":
"You are a Resume Bot, a comprehensive, interactive resource for exploring Harikrishna (Harry) Dev's background, skills, and expertise. Be polite and provide answers based on the provided context only. Only answer questions relevant to Harikrishna and his work experience. Answer question if there are ONLY regarding Harikrishna Dev. If the questions is not relevant to Harikrishna, reply that you are a Resume bot. You can make up projects with the skills and projects I have if the question requests a skill set related to Machine Learning, Database management or Computer sciences.",
"question": query,
"chat_history": st.session_state['history']})
# Check if the result is a valid JSON
if is_valid_json(result["answer"]):
data = json.loads(result["answer"])
else:
data = json.loads('{"answered":"false", "response":"Hmm... Something is not right. I\'m experiencing technical difficulties. Try asking your question again or ask another question about Hari\'s professional background and qualifications. Thank you for your understanding.", "questions":["What is Hari\'s professional experience?","What projects has Hari worked on?","What are Hari\'s career goals?"]}')
answered = data.get("answered")
response = data.get("response")
questions = data.get("questions")
full_response="--"
# Append user query and bot response to chat history
st.session_state['history'].append((query, response))
# Process the response based on the answer status
if ('I am tuned to only answer questions' in response) or (response == ""):
full_response = """Unfortunately, I can't answer this question. My capabilities are limited to providing information about Hari's professional background and qualifications. If you have other inquiries, I recommend reaching out to Hari on [LinkedIn](https://www.linkedin.com/in/harikrishnad1997/). I can answer questions like: \n - What is Hari's educational background? \n - Can you list Hari's professional experience? \n - What skills does Hari possess? \n"""
store_conversation(st.session_state["uuid"], query, full_response, answered)
else:
markdown_list = ""
for item in questions:
markdown_list += f"- {item}\n"
full_response = response + "\n\n What else would you like to know about Hari? You can ask me: \n" + markdown_list
store_conversation(st.session_state["uuid"], query, full_response, answered)
return(full_response)
# Initialize session variables if not already present
if "uuid" not in st.session_state:
st.session_state["uuid"] = str(uuid.uuid4())
if "openai_model" not in st.session_state:
st.session_state["openai_model"] = "gpt-4o-mini"
if "messages" not in st.session_state:
st.session_state.messages = []
with st.chat_message("assistant"):
message_placeholder = st.empty()
welcome_message = """
Welcome! I'm **Resume Bot**, a virtual assistant designed to provide insights into Harikrishna (Harry) Dev's background and qualifications.
Feel free to inquire about any aspect of Hari's profile, such as his educational journey, internships, professional projects, areas of expertise in data science and AI, or his future goals.
- His Master's in Business Analytics with a focus on Data Science from UTD
- His track record in roles at companies like Daifuku, Flipkart, and Walmart
- His proficiency in programming languages, ML frameworks, data visualization, and cloud platforms
- His passion for leveraging transformative technologies to drive innovation and positive societal impact
What would you like to know first? I'm ready to answer your questions in detail.
"""
message_placeholder.markdown(welcome_message)
if 'history' not in st.session_state:
st.session_state['history'] = []
# Display previous chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Process user input and display bot response
if prompt := st.chat_input("Ask me about Hari"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
user_input=prompt
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
full_response = conversational_chat(user_input)
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response}) |