import os import httpx from llama_index.core import Document from llama_index.core import Settings from llama_index.core import SimpleDirectoryReader from llama_index.core import StorageContext from llama_index.core import VectorStoreIndex from llama_index.vector_stores.chroma import ChromaVectorStore import chromadb import re from llama_index.llms.cohere import Cohere from llama_index.embeddings.cohere import CohereEmbedding from llama_index.core.memory import ChatMemoryBuffer from llama_index.core.chat_engine import CondensePlusContextChatEngine import gradio as gr import uuid api_key = os.environ.get("API_KEY") base_url = os.environ.get("BASE_URL") llm = Cohere( api_key=api_key, model="command") embedding_model = CohereEmbedding( api_key=api_key, model_name="embed-multilingual-v3.0", input_type="search_document", embedding_type="int8",) memory = "" # Set Global settings Settings.llm = llm Settings.embed_model=embedding_model # set context window Settings.context_window = 4096 # set number of output tokens Settings.num_output = 512 db_path="" def validate_url(url): try: response = httpx.get(url, timeout=60.0) response.raise_for_status() text = [Document(text=response.text)] option = "web" return text, option except httpx.RequestError as e: raise gr.Error(f"An error occurred while requesting {url}: {str(e)}") except httpx.HTTPStatusError as e: raise gr.Error(f"Error response {e.response.status_code} while requesting {url}") except Exception as e: raise gr.Error(f"An unexpected error occurred: {str(e)}") def extract_web(url): print("Entered Webpage Extraction") prefix_url = "https://r.jina.ai/" full_url = prefix_url + url print(full_url) print("Exited Webpage Extraction") return validate_url(full_url) def extract_doc(path): documents = SimpleDirectoryReader(input_files=path).load_data() option = "doc" return documents, option def create_col(documents): # Create a client and a new collection db_path = f'database/{str(uuid.uuid4())[:4]}' client = chromadb.PersistentClient(path=db_path) chroma_collection = client.get_or_create_collection("quickstart") # Create a vector store vector_store = ChromaVectorStore(chroma_collection=chroma_collection) # Create a storage context storage_context = StorageContext.from_defaults(vector_store=vector_store) # Create an index from the documents and save it to the disk. VectorStoreIndex.from_documents( documents, storage_context=storage_context ) return db_path def infer(message:str, history: list): global db_path global memory option="" print(f'message: {message}') print(f'history: {history}') messages = [] files_list = message["files"] if files_list: documents, option = extract_doc(files_list) db_path = create_col(documents) memory = ChatMemoryBuffer.from_defaults(token_limit=3900) else: if message["text"].startswith("http://") or message["text"].startswith("https://"): documents, option = extract_web(message["text"]) db_path = create_col(documents) memory = ChatMemoryBuffer.from_defaults(token_limit=3900) elif not message["text"].startswith("http://") and not message["text"].startswith("https://") and len(history) == 0: raise gr.Error("Please send an URL or document") # Load from disk load_client = chromadb.PersistentClient(path=db_path) # Fetch the collection chroma_collection = load_client.get_collection("quickstart") # Fetch the vector store vector_store = ChromaVectorStore(chroma_collection=chroma_collection) # Get the index from the vector store index = VectorStoreIndex.from_vector_store( vector_store, ) if option == "web" and len(history) == 0: response = "Getcha! Now ask your question." else: question = message['text'] chat_engine = CondensePlusContextChatEngine.from_defaults( index.as_retriever(), memory=memory, context_prompt=( "You are an assistant for question-answering tasks." "Use the following context to answer the question:\n" "{context_str}" "\nIf you don't know the answer, just say that you don't know." "Use five sentences maximum and keep the answer concise." "\nInstruction: Use the previous chat history, or the context above, to interact and help the user." ), verbose=True, ) response = chat_engine.chat( question ) print(type(response)) print(f'response: {response}') return str(response) css=""" footer { display:none !important } h1 { text-align: center; display: block; } """ title="""

RAG demo

Retrieval for web and documents

""" chatbot = gr.Chatbot(placeholder="Please send an URL or document file at first
Then ask question and get an answer.", height=800) with gr.Blocks(theme="soft", css=css, fill_height="true") as demo: gr.ChatInterface( fn = infer, title = title, multimodal = True, chatbot = chatbot, ) if __name__ == "__main__": demo.queue(api_open=False).launch(show_api=False, share=False)