Spaces:

orrinin
/

RAG

Sleeping

File size: 4,665 Bytes

import os
from bs4 import BeautifulSoup
from llama_index.core import Document
from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.readers.web import SimpleWebPageReader

from llama_index.vector_stores.chroma import ChromaVectorStore

import chromadb
import re
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.cohere import CohereEmbedding

from llama_index.core import PromptTemplate
from llama_index.core.llms import ChatMessage
import gradio as gr
import uuid

api_key = os.environ.get("API_KEY")
base_url = os.environ.get("BASE_URL")

llm = Cohere(
    api_key=api_key, 
    model_name="command")
embedding_model = CohereEmbedding(
    api_key=api_key, 
    model_name="embed-multilingual-v3.0",
    input_type="search_query",
    embedding_type="int8",)




# Set Global settings
Settings.llm = llm
Settings.embed_model = embedding_model

db_path=""

def extract_web(url):
    web_documents = SimpleWebPageReader().load_data(
        [url]
    )
    html_content = web_documents[0].text
    # Parse the data.
    soup = BeautifulSoup(html_content, 'html.parser')
    p_tags = soup.findAll('p')
    text_content = ""
    for each in p_tags:
        text_content += each.text + "\n"
    
    # Convert back to Document format
    documents = [Document(text=text_content)]
    option = "web"
    return documents, option

def extract_doc(path):
    documents = SimpleDirectoryReader(input_files=path).load_data()
    option = "doc"
    return documents, option


def create_col(documents):
    # Create a client and a new collection
    db_path = f'database/{str(uuid.uuid4())[:4]}'
    client = chromadb.PersistentClient(path=db_path)
    chroma_collection = client.get_or_create_collection("quickstart")
    
    # Create a vector store
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    
    # Create a storage context
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # Create an index from the documents and save it to the disk.
    VectorStoreIndex.from_documents(
        documents, storage_context=storage_context
    )
    return db_path

def infer(message:str, history: list):
    global db_path
    option=""
    print(f'message: {message}')
    print(f'history: {history}')
    messages = []
    files_list = message["files"]
    

    for prompt,answer in history:
        if prompt is tuple:
            files_list += prompt[0]
        else:
            messages.append(ChatMessage(role= "user", content = prompt))
            messages.append(ChatMessage(role= "assistant", content = answer))

            
    if files_list:
        documents, option = extract_doc(files_list)
        db_path = create_col(documents)
    else:
        if message["text"].startswith("http://") or message["text"].startswith("https://"):
            documents, option = extract_web(message["text"])
            db_path = create_col(documents)
        elif not message["text"].startswith("http://") and not message["text"].startswith("https://") and len(history) == 0:
            gr.Error("Please input an url or upload file at first.")
            

    # Load from disk
    load_client = chromadb.PersistentClient(path=db_path)
    
    # Fetch the collection
    chroma_collection = load_client.get_collection("quickstart")
    
    # Fetch the vector store
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    
    # Get the index from the vector store
    index = VectorStoreIndex.from_vector_store(
        vector_store
    )


    template = (
        """ You are an assistant for question-answering tasks.
    Use the following context to answer the question.
    If you don't know the answer, just say that you don't know.
    Use five sentences maximum and keep the answer concise.\n
    Question: {query_str} \nContext: {context_str} \nAnswer:"""
    )
    llm_prompt = PromptTemplate(template)
    print(llm_prompt)

    if option == "web" and len(history) == 0:
        response = "Get the web data! You can ask it."   
    else: 
        question = message['text']
        query_engine = index.as_query_engine(text_qa_template=llm_prompt)
        response = query_engine.query(question)

    return response
    






chatbot = gr.Chatbot()

with gr.Blocks(theme="soft") as demo:
    gr.ChatInterface(
        fn = infer,
        title = "RAG demo", 
        multimodal = True,
        chatbot=chatbot,
    )

if __name__ == "__main__":
    demo.queue(api_open=False).launch(show_api=False, share=False)