# Import necessary libraries import openai import random import time import gradio as gr # Gradio is a library for creating UIs import os # This module provides functions to interact with the operating system # Importing various classes and functions from the langchain package from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import DeepLake from langchain.chat_models import ChatOpenAI from langchain.chains import ConversationalRetrievalChain from langchain.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.document_loaders import PyPDFDirectoryLoader from langchain.memory import ConversationBufferMemory from langchain.llms import OpenAI # Function to set the OpenAI API key def set_api_key(key): os.environ["OPENAI_API_KEY"] = key # Sets an environment variable with the key return f"Your API Key has been set to: {key}" # Returns a confirmation message # Function to reset the OpenAI API key def reset_api_key(): os.environ["OPENAI_API_KEY"] = "" # Clears the environment variable storing the key return "Your API Key has been reset" # Returns a confirmation message # Function to get the current OpenAI API key def get_api_key(): api_key = os.getenv("OPENAI_API_KEY") # Fetches the value of the environment variable return api_key # Function to set the model (GPT-3.5-turbo or GPT-4) def set_model(model): os.environ["OPENAI_MODEL"] = model # Sets an environment variable with the model return f"{model} selected" # Returns a confirmation message # Function to get the current model def get_model(): model = os.getenv("OPENAI_MODEL") # Fetches the value of the environment variable return model # Function to get file paths of uploaded files def upload_file(files): file_paths = [file.name for file in files] # List comprehension to get all file paths return file_paths # Function to create a Vectorstore def create_vectorstore(files): # Vectorstore is a searchable store of vector representations for text passages. pdf_dir = files.name # Get the file name pdf_loader = PyPDFDirectoryLoader(pdf_dir) # Load the PDFs in the directory pdf_docs = pdf_loader.load_and_split() # Load and split the PDFs into sections text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Set up a text splitter texts = text_splitter.split_documents(pdf_docs) # Split the documents into chunks embeddings = OpenAIEmbeddings() # Set up the OpenAI embeddings # Create the Vectorstore from the documents, using the specified path, embeddings, and overwrite if it exists. db = DeepLake.from_documents(texts, dataset_path="./documentation_db", embedding=embeddings, overwrite=True) return "Vectorstore Successfully Created" # Returns a confirmation message # Function to generate a response given a user's message and previous chat history def respond(message, chat_history): # Get embeddings embeddings = OpenAIEmbeddings() # Connect to existing Vectorstore db = DeepLake(dataset_path="./documentation_db", embedding_function=embeddings, read_only=True) # Set retriever settings retriever = db.as_retriever(search_kwargs={"distance_metric":'cos', "fetch_k":10, "maximal_marginal_relevance":True, "k":10}) # Check if chat history is not empty if len(chat_history) != 0: chat_history = [(chat_history[0][0], chat_history[0][1])] # Get model model = get_model() # Create ChatOpenAI and ConversationalRetrievalChain model = ChatOpenAI(model_name=model) qa = ConversationalRetrievalChain.from_llm(model, retriever) # Generate a bot message bot_message = qa({"question": message, "chat_history": chat_history}) # Update chat history chat_history = [(message, bot_message["answer"])] time.sleep(1) # Wait for a second to simulate real-time interaction return "", chat_history # Return updated chat history # Start building the Gradio UI with gr.Blocks() as demo: # Write some HTML for a header gr.Markdown("