import openai import os import streamlit as st from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from dotenv import load_dotenv # Set Streamlit page configuration st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide") # Load environment variables from .env file load_dotenv() # Retrieve OpenAI API key from environment OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") if not OPENAI_API_KEY: raise ValueError("OpenAI API key not found. Set it in the .env file or environment variables.") openai.api_key = OPENAI_API_KEY # Function to generate response from OpenAI API def generate_openai_response(instruction, context=None): try: messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": instruction}, ] if context: messages.append({"role": "user", "content": f"Context: {context}"}) response = openai.ChatCompletion.create( model="gpt-4", # Updated to use GPT-4 messages=messages, max_tokens=1200, temperature=0.7 ) return response["choices"][0]["message"]["content"] except Exception as e: return f"Error: {str(e)}" # Extract text from .txt files in a folder def get_text_files_content(folder): text = "" for filename in os.listdir(folder): if filename.endswith('.txt'): with open(os.path.join(folder, filename), 'r', encoding='utf-8') as file: text += file.read() + "\n" return text # Convert raw text into manageable chunks def get_chunks(raw_text): text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, # Reduced chunk size for faster processing chunk_overlap=200, # Smaller overlap for efficiency length_function=len ) return text_splitter.split_text(raw_text) # Create a FAISS vectorstore using OpenAI embeddings def get_vectorstore(chunks): embeddings = OpenAIEmbeddings() # Uses OpenAI Embeddings vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings) return vectorstore # Handle user queries by fetching relevant context and generating responses def handle_question(question, vectorstore=None): if vectorstore: # Retrieve relevant chunks using similarity search documents = vectorstore.similarity_search(question, k=2) context = "\n".join([doc.page_content for doc in documents]) context = context[:1000] # Limit context size for faster processing return generate_openai_response(question, context) else: # Fallback to instruction-only prompt if no context is found return generate_openai_response(question) # Main function for the Streamlit app def main(): st.title("Chat with Notes :books:") # Initialize session state for vectorstore if "vectorstore" not in st.session_state: st.session_state.vectorstore = None # Define folders for Current Affairs and Essays data_folder = "data" # Folder for Current Affairs notes essay_folder = "essays" # Folder for Essays # Content type selection content_type = st.sidebar.radio("Select Content Type:", ["Current Affairs", "Essays"]) # Populate subject list based on selected content type if content_type == "Current Affairs": subjects = [f for f in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, f))] if os.path.exists(data_folder) else [] elif content_type == "Essays": subjects = [f.replace(".txt", "") for f in os.listdir(essay_folder) if f.endswith('.txt')] if os.path.exists(essay_folder) else [] # Subject selection selected_subject = st.sidebar.selectbox("Select a Subject:", subjects) # Load and process the selected subject raw_text = "" if content_type == "Current Affairs" and selected_subject: subject_folder = os.path.join(data_folder, selected_subject) raw_text = get_text_files_content(subject_folder) elif content_type == "Essays" and selected_subject: subject_file = os.path.join(essay_folder, selected_subject + ".txt") if os.path.exists(subject_file): with open(subject_file, "r", encoding="utf-8") as file: raw_text = file.read() # Display notes preview if raw_text: st.subheader("Preview of Notes") st.text_area("Preview Content:", value=raw_text[:2000], height=300, disabled=True) # Generate vectorstore for the selected notes text_chunks = get_chunks(raw_text) vectorstore = get_vectorstore(text_chunks) st.session_state.vectorstore = vectorstore else: st.warning("No content available for the selected subject.") # Chat interface st.subheader("Ask Your Question") question = st.text_input("Ask a question about your selected subject:") if question: if st.session_state.vectorstore: response = handle_question(question, st.session_state.vectorstore) st.subheader("Answer:") st.write(response) else: st.warning("Please load the content for the selected subject before asking a question.") # Run the app if __name__ == '__main__': main()