diff --git a/README.md b/README.md index 66721bf9adfd2b50ad58e8037aee9e9465478046..aae4ba7be15aba9f79cf1b28d1f20c8fc912e7cd 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,6 @@ --- -title: Discord Bot -emoji: 😻 -colorFrom: gray -colorTo: blue +title: discord-bot +app_file: __main__.py sdk: gradio sdk_version: 4.33.0 -app_file: app.py -pinned: false --- - -Check out the configuration reference at https://huggingface.co./docs/hub/spaces-config-reference diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f0bcf77e86222821799de53d32c3d9c3956f310 --- /dev/null +++ b/__init__.py @@ -0,0 +1,9 @@ +import logging + +ROOT_LOG_LEVEL = "INFO" + +PRETTY_LOG_FORMAT = ( + "%(asctime)s.%(msecs)03d [%(levelname)-8s] %(name)+25s - %(message)s" +) +logging.basicConfig(level=ROOT_LOG_LEVEL, format=PRETTY_LOG_FORMAT, datefmt="%H:%M:%S") +logging.captureWarnings(True) diff --git a/__main__.py b/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..38bc511ff38f51858834c8c11291e55cae212311 --- /dev/null +++ b/__main__.py @@ -0,0 +1,7 @@ +import uvicorn + +from app._config import settings +from app.main import app + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=settings.PORT) diff --git a/__pycache__/__init__.cpython-310.pyc b/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7269e80ce54e7530451c9ae8c4321ee77193d781 Binary files /dev/null and b/__pycache__/__init__.cpython-310.pyc differ diff --git a/__pycache__/__main__.cpython-310.pyc b/__pycache__/__main__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13533bd9d32052747d5a89f40c99a2a7e9b5b70c Binary files /dev/null and b/__pycache__/__main__.cpython-310.pyc differ diff --git a/__pycache__/_config.cpython-310.pyc b/__pycache__/_config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ad8ecebadcf36e66b88f5023d9c02d4b001e24e Binary files /dev/null and b/__pycache__/_config.cpython-310.pyc differ diff --git a/__pycache__/enums.cpython-310.pyc b/__pycache__/enums.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8faa01bbcd94dcea33f75aca2784b778b36e99c8 Binary files /dev/null and b/__pycache__/enums.cpython-310.pyc differ diff --git a/__pycache__/main.cpython-310.pyc b/__pycache__/main.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c827aaac46d5be98ffd4737443e23df6dbb8091 Binary files /dev/null and b/__pycache__/main.cpython-310.pyc differ diff --git a/__pycache__/paths.cpython-310.pyc b/__pycache__/paths.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3941138a8fc8ec7e570b26362928d62b5a909775 Binary files /dev/null and b/__pycache__/paths.cpython-310.pyc differ diff --git a/_config.py b/_config.py new file mode 100644 index 0000000000000000000000000000000000000000..9d0943fb142e22a95e7e947a25e1c44d2145cfd1 --- /dev/null +++ b/_config.py @@ -0,0 +1,62 @@ +import os +from typing import Literal, Optional + +from pydantic import Field +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + ENVIRONMENT: str + PORT: int = 8000 + VECTOR_DATABASE: Literal["weaviate"] = "weaviate" + + OPENAI_API_KEY: Optional[str] = None + OPENAI_MODEL: str = "gpt-3.5-turbo" + + WEAVIATE_CLIENT_URL: str = "http://localhost:8080" + + LLM_MODE: Literal["openai", "mock", "local"] = "mock" + EMBEDDING_MODE: Literal["openai", "mock", "local"] = "mock" + + LOCAL_DATA_FOLDER: str = "local_data/test" + + DEFAULT_QUERY_SYSTEM_PROMPT: str = "You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided." + + LOCAL_HF_EMBEDDING_MODEL_NAME: str = "BAAI/bge-small-en-v1.5" + + LOCAL_HF_LLM_REPO_ID: str = "TheBloke/Llama-2-7B-Chat-GGUF" + LOCAL_HF_LLM_MODEL_FILE: str = "llama-2-7b-chat.Q4_K_M.gguf" + + # LLM config + LLM_TEMPERATURE: float = Field( + default=0.1, description="The temperature to use for sampling." + ) + LLM_MAX_NEW_TOKENS: int = Field( + default=256, + description="The maximum number of tokens to generate.", + ) + LLM_CONTEXT_WINDOW: int = Field( + default=3900, + description="The maximum number of context tokens for the model.", + ) + + # UI + IS_UI_ENABLED: bool = True + UI_PATH: str = "/" + + # Rerank + IS_RERANK_ENABLED: bool = True + RERANK_TOP_N: int = 3 + RERANK_MODEL_NAME: str = "cross-encoder/ms-marco-MiniLM-L-2-v2" + + class Config: + case_sensitive = True + env_file_encoding = "utf-8" + + +environment = os.environ.get("ENVIRONMENT", "local") +settings = Settings( + ENVIRONMENT=environment, + # ".env.{environment}" takes priority over ".env" + _env_file=[".env", f".env.{environment}"], +) diff --git a/components/__init__.py b/components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/components/__pycache__/__init__.cpython-310.pyc b/components/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7fc92682bdcd72f50afbbe7a185b7b9bea2f16a9 Binary files /dev/null and b/components/__pycache__/__init__.cpython-310.pyc differ diff --git a/components/embedding/__init__.py b/components/embedding/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/components/embedding/__pycache__/__init__.cpython-310.pyc b/components/embedding/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb8234a4cfd45b184fe4fd34f358b215447d33ba Binary files /dev/null and b/components/embedding/__pycache__/__init__.cpython-310.pyc differ diff --git a/components/embedding/__pycache__/component.cpython-310.pyc b/components/embedding/__pycache__/component.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8453f5b646275ff37ee2269613145147f4a78f80 Binary files /dev/null and b/components/embedding/__pycache__/component.cpython-310.pyc differ diff --git a/components/embedding/component.py b/components/embedding/component.py new file mode 100644 index 0000000000000000000000000000000000000000..2b97c0af9c54a425d8bdcce95060083831df2833 --- /dev/null +++ b/components/embedding/component.py @@ -0,0 +1,38 @@ +import logging + +from llama_index import MockEmbedding +from llama_index.embeddings.base import BaseEmbedding + +from app._config import settings +from app.enums import EmbeddingMode +from app.paths import models_cache_path + +logger = logging.getLogger(__name__) + +MOCK_EMBEDDING_DIM = 1536 + + +class EmbeddingComponent: + embedding_model: BaseEmbedding + + def __init__(self) -> None: + embedding_mode = settings.EMBEDDING_MODE + logger.info("Initializing the embedding model in mode=%s", embedding_mode) + match embedding_mode: + case EmbeddingMode.OPENAI: + from llama_index import OpenAIEmbedding + + self.embedding_model = OpenAIEmbedding(api_key=settings.OPENAI_API_KEY) + + case EmbeddingMode.MOCK: + # Not a random number, is the dimensionality used by + # the default embedding model + self.embedding_model = MockEmbedding(MOCK_EMBEDDING_DIM) + + case EmbeddingMode.LOCAL: + from llama_index.embeddings import HuggingFaceEmbedding + + self.embedding_model = HuggingFaceEmbedding( + model_name=settings.LOCAL_HF_EMBEDDING_MODEL_NAME, + cache_folder=str(models_cache_path), + ) diff --git a/components/ingest/__init__.py b/components/ingest/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/components/ingest/__pycache__/__init__.cpython-310.pyc b/components/ingest/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63a21bb2db066cfaca1c647e3cb939392fa49981 Binary files /dev/null and b/components/ingest/__pycache__/__init__.cpython-310.pyc differ diff --git a/components/ingest/__pycache__/component.cpython-310.pyc b/components/ingest/__pycache__/component.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..485fb66408c818eeab090691e3b1bacc4f45584c Binary files /dev/null and b/components/ingest/__pycache__/component.cpython-310.pyc differ diff --git a/components/ingest/__pycache__/helpers.cpython-310.pyc b/components/ingest/__pycache__/helpers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b226867932f84c19f82ae31623f236c15fded1f Binary files /dev/null and b/components/ingest/__pycache__/helpers.cpython-310.pyc differ diff --git a/components/ingest/component.py b/components/ingest/component.py new file mode 100644 index 0000000000000000000000000000000000000000..1b1b644a1380e0b421fe0f52672e6e9f7f5362e2 --- /dev/null +++ b/components/ingest/component.py @@ -0,0 +1,143 @@ +import abc +import logging +import threading +from pathlib import Path +from typing import Any + +from llama_index import ( + Document, + ServiceContext, + StorageContext, + VectorStoreIndex, + load_index_from_storage, +) +from llama_index.data_structs import IndexDict +from llama_index.indices.base import BaseIndex + +from app.components.ingest.helpers import IngestionHelper +from app.paths import local_data_path + +logger = logging.getLogger(__name__) + + +class BaseIngestComponent(abc.ABC): + def __init__( + self, + storage_context: StorageContext, + service_context: ServiceContext, + *args: Any, + **kwargs: Any, + ) -> None: + logger.debug(f"Initializing base ingest component type={type(self).__name__}") + self.storage_context = storage_context + self.service_context = service_context + + @abc.abstractmethod + def ingest(self, file_name: str, file_data: Path) -> list[Document]: + pass + + @abc.abstractmethod + def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]: + pass + + @abc.abstractmethod + def delete(self, doc_id: str) -> None: + pass + + +class BaseIngestComponentWithIndex(BaseIngestComponent, abc.ABC): + def __init__( + self, + storage_context: StorageContext, + service_context: ServiceContext, + *args: Any, + **kwargs: Any, + ) -> None: + super().__init__(storage_context, service_context, *args, **kwargs) + + self.show_progress = True + self._index_thread_lock = ( + threading.Lock() + ) # Thread lock! Not Multiprocessing lock + self._index = self._initialize_index() + + def _initialize_index(self) -> BaseIndex[IndexDict]: + """Initialize the index from the storage context.""" + try: + # Load the index with store_nodes_override=True to be able to delete them + index = load_index_from_storage( + storage_context=self.storage_context, + service_context=self.service_context, + store_nodes_override=True, # Force store nodes in index and document stores + show_progress=self.show_progress, + ) + except ValueError: + # There are no index in the storage context, creating a new one + logger.info("Creating a new vector store index") + index = VectorStoreIndex.from_documents( + [], + storage_context=self.storage_context, + service_context=self.service_context, + store_nodes_override=True, # Force store nodes in index and document stores + show_progress=self.show_progress, + ) + index.storage_context.persist(persist_dir=local_data_path) + return index + + def _save_index(self) -> None: + self._index.storage_context.persist(persist_dir=local_data_path) + + def delete(self, doc_id: str) -> None: + with self._index_thread_lock: + # Delete the document from the index + self._index.delete_ref_doc(doc_id, delete_from_docstore=True) + + # Save the index + self._save_index() + + +class SimpleIngestComponent(BaseIngestComponentWithIndex): + def __init__( + self, + storage_context: StorageContext, + service_context: ServiceContext, + *args: Any, + **kwargs: Any, + ) -> None: + super().__init__(storage_context, service_context, *args, **kwargs) + + def ingest(self, file_name: str, file_data: Path) -> list[Document]: + logger.info("Ingesting file_name=%s", file_name) + documents = IngestionHelper.transform_file_into_documents(file_name, file_data) + logger.info( + "Transformed file=%s into count=%s documents", file_name, len(documents) + ) + logger.debug("Saving the documents in the index and doc store") + return self._save_docs(documents) + + def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]: + saved_documents = [] + for file_name, file_data in files: + documents = IngestionHelper.transform_file_into_documents( + file_name, file_data + ) + saved_documents.extend(self._save_docs(documents)) + return saved_documents + + def _save_docs(self, documents: list[Document]) -> list[Document]: + logger.debug("Transforming count=%s documents into nodes", len(documents)) + with self._index_thread_lock: + for document in documents: + self._index.insert(document, show_progress=True) + logger.debug("Persisting the index and nodes") + # persist the index and nodes + self._save_index() + logger.debug("Persisted the index and nodes") + return documents + + +def get_ingestion_component( + storage_context: StorageContext, + service_context: ServiceContext, +) -> BaseIngestComponent: + return SimpleIngestComponent(storage_context, service_context) diff --git a/components/ingest/helpers.py b/components/ingest/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..1de2f7fa479ed4d902073d252e6726833a5ef9e3 --- /dev/null +++ b/components/ingest/helpers.py @@ -0,0 +1,61 @@ +import logging +from pathlib import Path + +from llama_index import Document +from llama_index.readers import JSONReader, StringIterableReader +from llama_index.readers.file.base import DEFAULT_FILE_READER_CLS + +logger = logging.getLogger(__name__) + +# Patching the default file reader to support other file types +FILE_READER_CLS = DEFAULT_FILE_READER_CLS.copy() +FILE_READER_CLS.update( + { + ".json": JSONReader, + } +) + + +class IngestionHelper: + """Helper class to transform a file into a list of documents. + + This class should be used to transform a file into a list of documents. + These methods are thread-safe (and multiprocessing-safe). + """ + + @staticmethod + def transform_file_into_documents( + file_name: str, file_data: Path + ) -> list[Document]: + documents = IngestionHelper._load_file_to_documents(file_name, file_data) + for document in documents: + document.metadata["file_name"] = file_name + IngestionHelper._exclude_metadata(documents) + return documents + + @staticmethod + def _load_file_to_documents(file_name: str, file_data: Path) -> list[Document]: + logger.debug("Transforming file_name=%s into documents", file_name) + extension = Path(file_name).suffix + reader_cls = FILE_READER_CLS.get(extension) + if reader_cls is None: + logger.debug( + "No reader found for extension=%s, using default string reader", + extension, + ) + # Read as a plain text + string_reader = StringIterableReader() + return string_reader.load_data([file_data.read_text()]) + + logger.debug("Specific reader found for extension=%s", extension) + return reader_cls().load_data(file_data) + + @staticmethod + def _exclude_metadata(documents: list[Document]) -> None: + logger.debug("Excluding metadata from count=%s documents", len(documents)) + for document in documents: + document.metadata["doc_id"] = document.doc_id + # We don't want the Embeddings search to receive this metadata + document.excluded_embed_metadata_keys = ["doc_id"] + # We don't want the LLM to receive these metadata in the context + document.excluded_llm_metadata_keys = ["file_name", "doc_id", "page_label"] diff --git a/components/llm/__init__.py b/components/llm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/components/llm/__pycache__/__init__.cpython-310.pyc b/components/llm/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..121da75792c06690a9dcd50e597b8810b305cc1c Binary files /dev/null and b/components/llm/__pycache__/__init__.cpython-310.pyc differ diff --git a/components/llm/__pycache__/component.cpython-310.pyc b/components/llm/__pycache__/component.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c31e2af0599e983402b1eb6c27b056fa2d977422 Binary files /dev/null and b/components/llm/__pycache__/component.cpython-310.pyc differ diff --git a/components/llm/component.py b/components/llm/component.py new file mode 100644 index 0000000000000000000000000000000000000000..43be5ef6f65fa6759ba0578974462ee7b208335d --- /dev/null +++ b/components/llm/component.py @@ -0,0 +1,50 @@ +import logging + +from llama_index.llms import LLM, MockLLM + +from app._config import settings +from app.enums import LLMMode +from app.paths import models_path + +logger = logging.getLogger(__name__) + + +class LLMComponent: + llm: LLM + + def __init__(self) -> None: + llm_mode = settings.LLM_MODE + logger.info(f"Initializing the LLM in mode={llm_mode}") + match settings.LLM_MODE: + case LLMMode.OPENAI: + from llama_index.llms import OpenAI + + self.llm = OpenAI( + api_key=settings.OPENAI_API_KEY, + model=settings.OPENAI_MODEL, + ) + case LLMMode.MOCK: + self.llm = MockLLM() + + case LLMMode.LOCAL: + from llama_index.llms import LlamaCPP + from llama_index.llms.llama_utils import ( + completion_to_prompt, + messages_to_prompt, + ) + + self.llm = LlamaCPP( + model_path=str(models_path / settings.LOCAL_HF_LLM_MODEL_FILE), + temperature=settings.LLM_TEMPERATURE, + max_new_tokens=settings.LLM_MAX_NEW_TOKENS, + context_window=settings.LLM_CONTEXT_WINDOW, + generate_kwargs={}, + # set to at least 1 to use GPU + # set to -1 for all gpu + # set to 0 for cpu + model_kwargs={"n_gpu_layers": 0}, + # transform inputs into Llama2 format + messages_to_prompt=messages_to_prompt, + completion_to_prompt=completion_to_prompt, + verbose=True, + ) diff --git a/components/node_store/__init__.py b/components/node_store/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/components/node_store/__pycache__/__init__.cpython-310.pyc b/components/node_store/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7009400ddc603757a7de587aeb38bb609f46b933 Binary files /dev/null and b/components/node_store/__pycache__/__init__.cpython-310.pyc differ diff --git a/components/node_store/__pycache__/component.cpython-310.pyc b/components/node_store/__pycache__/component.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b58ec5be10280c6728503311d893280a108317f Binary files /dev/null and b/components/node_store/__pycache__/component.cpython-310.pyc differ diff --git a/components/node_store/component.py b/components/node_store/component.py new file mode 100644 index 0000000000000000000000000000000000000000..c3b11066d2d6ad48dd80f8bb352426a22859c784 --- /dev/null +++ b/components/node_store/component.py @@ -0,0 +1,31 @@ +import logging + +from llama_index.storage.docstore import BaseDocumentStore, SimpleDocumentStore +from llama_index.storage.index_store import SimpleIndexStore +from llama_index.storage.index_store.types import BaseIndexStore + +from app.paths import local_data_path + +logger = logging.getLogger(__name__) + + +class NodeStoreComponent: + index_store: BaseIndexStore + doc_store: BaseDocumentStore + + def __init__(self) -> None: + try: + self.index_store = SimpleIndexStore.from_persist_dir( + persist_dir=str(local_data_path) + ) + except FileNotFoundError: + logger.debug("Local index store not found, creating a new one") + self.index_store = SimpleIndexStore() + + try: + self.doc_store = SimpleDocumentStore.from_persist_dir( + persist_dir=str(local_data_path) + ) + except FileNotFoundError: + logger.debug("Local document store not found, creating a new one") + self.doc_store = SimpleDocumentStore() diff --git a/components/vector_store/__init__.py b/components/vector_store/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/components/vector_store/__pycache__/__init__.cpython-310.pyc b/components/vector_store/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21ae23f13ca61b8058cf14979b777e20a816f441 Binary files /dev/null and b/components/vector_store/__pycache__/__init__.cpython-310.pyc differ diff --git a/components/vector_store/__pycache__/component.cpython-310.pyc b/components/vector_store/__pycache__/component.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..772dcb8e833efa612426c6b08a7a036e64183bd6 Binary files /dev/null and b/components/vector_store/__pycache__/component.cpython-310.pyc differ diff --git a/components/vector_store/component.py b/components/vector_store/component.py new file mode 100644 index 0000000000000000000000000000000000000000..bd8141484fcac53ac9ebad57955a3bdb40e1e668 --- /dev/null +++ b/components/vector_store/component.py @@ -0,0 +1,51 @@ +import logging +import typing + +from llama_index import VectorStoreIndex +from llama_index.indices.vector_store import VectorIndexRetriever +from llama_index.vector_stores.types import VectorStore + +from app._config import settings +from app.enums import WEAVIATE_INDEX_NAME, VectorDatabase + +logger = logging.getLogger(__name__) + + +class VectorStoreComponent: + vector_store: VectorStore + + def __init__(self) -> None: + match settings.VECTOR_DATABASE: + case VectorDatabase.WEAVIATE: + import weaviate + from llama_index.vector_stores import WeaviateVectorStore + + client = weaviate.Client(settings.WEAVIATE_CLIENT_URL) + self.vector_store = typing.cast( + VectorStore, + WeaviateVectorStore( + weaviate_client=client, index_name=WEAVIATE_INDEX_NAME + ), + ) + case _: + # Should be unreachable + # The settings validator should have caught this + raise ValueError( + f"Vectorstore database {settings.VECTOR_DATABASE} not supported" + ) + + @staticmethod + def get_retriever( + index: VectorStoreIndex, + doc_ids: list[str] | None = None, + similarity_top_k: int = 2, + ) -> VectorIndexRetriever: + return VectorIndexRetriever( + index=index, + similarity_top_k=similarity_top_k, + doc_ids=doc_ids, + ) + + def close(self) -> None: + if hasattr(self.vector_store.client, "close"): + self.vector_store.client.close() diff --git a/enums.py b/enums.py new file mode 100644 index 0000000000000000000000000000000000000000..b9a4c16ffe3fcb15a3caa856a984f25febcca3e6 --- /dev/null +++ b/enums.py @@ -0,0 +1,39 @@ +from enum import Enum, auto, unique +from pathlib import Path + +PROJECT_ROOT_PATH: Path = Path(__file__).parents[1] + + +@unique +class BaseEnum(str, Enum): + @staticmethod + def _generate_next_value_(name: str, *_): + """ + Automatically generate values for enum. + Enum values are lower-cased enum member names. + """ + return name.lower() + + @classmethod + def get_values(cls) -> list[str]: + # noinspection PyUnresolvedReferences + return [m.value for m in cls] + + +class LLMMode(BaseEnum): + MOCK = auto() + OPENAI = auto() + LOCAL = auto() + + +class EmbeddingMode(BaseEnum): + MOCK = auto() + OPENAI = auto() + LOCAL = auto() + + +class VectorDatabase(BaseEnum): + WEAVIATE = auto() + + +WEAVIATE_INDEX_NAME = "LlamaIndex" diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..69011a3e6bb521f442d86333cb0f32567d633605 --- /dev/null +++ b/main.py @@ -0,0 +1,38 @@ +import logging + +from fastapi import FastAPI + +from app._config import settings +from app.components.embedding.component import EmbeddingComponent +from app.components.llm.component import LLMComponent +from app.components.node_store.component import NodeStoreComponent +from app.components.vector_store.component import VectorStoreComponent +from app.server.chat.router import chat_router +from app.server.chat.service import ChatService +from app.server.embedding.router import embedding_router +from app.server.ingest.service import IngestService + +logger = logging.getLogger(__name__) + +app = FastAPI() +app.include_router(chat_router) +app.include_router(embedding_router) + +if settings.IS_UI_ENABLED: + logger.debug("Importing the UI module") + from app.ui.ui import PrivateGptUi + + llm_component = LLMComponent() + vector_store_component = VectorStoreComponent() + embedding_component = EmbeddingComponent() + node_store_component = NodeStoreComponent() + + ingest_service = IngestService( + llm_component, vector_store_component, embedding_component, node_store_component + ) + chat_service = ChatService( + llm_component, vector_store_component, embedding_component, node_store_component + ) + + ui = PrivateGptUi(ingest_service, chat_service) + ui.mount_in_app(app, settings.UI_PATH) diff --git a/paths.py b/paths.py new file mode 100644 index 0000000000000000000000000000000000000000..852d8ed790738e4a4b067ac228c264520fb4c887 --- /dev/null +++ b/paths.py @@ -0,0 +1,15 @@ +from pathlib import Path + +from app._config import settings +from app.enums import PROJECT_ROOT_PATH + + +def _absolute_or_from_project_root(path: str) -> Path: + if path.startswith("/"): + return Path(path) + return PROJECT_ROOT_PATH / path + + +local_data_path: Path = _absolute_or_from_project_root(settings.LOCAL_DATA_FOLDER) +models_path: Path = PROJECT_ROOT_PATH / "models" +models_cache_path: Path = models_path / "cache" diff --git a/server/__init__.py b/server/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/server/__pycache__/__init__.cpython-310.pyc b/server/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..942d06156152508ca8ac077da3aecbbe5fcadb35 Binary files /dev/null and b/server/__pycache__/__init__.cpython-310.pyc differ diff --git a/server/chat/__init__.py b/server/chat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/server/chat/__pycache__/__init__.cpython-310.pyc b/server/chat/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb1eff494534ccc0c5c844e078525f44cab8331e Binary files /dev/null and b/server/chat/__pycache__/__init__.cpython-310.pyc differ diff --git a/server/chat/__pycache__/router.cpython-310.pyc b/server/chat/__pycache__/router.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..842125b7b6cc98c03097679e3c9547e4ff304987 Binary files /dev/null and b/server/chat/__pycache__/router.cpython-310.pyc differ diff --git a/server/chat/__pycache__/schemas.cpython-310.pyc b/server/chat/__pycache__/schemas.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16059bed8e73891c92ebbb06b84d7b45968d5d5c Binary files /dev/null and b/server/chat/__pycache__/schemas.cpython-310.pyc differ diff --git a/server/chat/__pycache__/service.cpython-310.pyc b/server/chat/__pycache__/service.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9161e10b14700e8f3bea37897fdabf21b0a1fa10 Binary files /dev/null and b/server/chat/__pycache__/service.cpython-310.pyc differ diff --git a/server/chat/__pycache__/utils.cpython-310.pyc b/server/chat/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d37ab77d1d0fc5a0b9b5aa5bebe7df8dc643623b Binary files /dev/null and b/server/chat/__pycache__/utils.cpython-310.pyc differ diff --git a/server/chat/router.py b/server/chat/router.py new file mode 100644 index 0000000000000000000000000000000000000000..d435c3b9056c612210c3c7dc5f0ab39e6d407402 --- /dev/null +++ b/server/chat/router.py @@ -0,0 +1,70 @@ +from fastapi import APIRouter +from llama_index.llms import ChatMessage, MessageRole +from pydantic import BaseModel + +from app.components.embedding.component import EmbeddingComponent +from app.components.llm.component import LLMComponent +from app.components.node_store.component import NodeStoreComponent +from app.components.vector_store.component import VectorStoreComponent +from app.server.chat.service import ChatService +from app.server.chat.utils import OpenAICompletion, OpenAIMessage, to_openai_response + +chat_router = APIRouter() + + +class ChatBody(BaseModel): + messages: list[OpenAIMessage] + include_sources: bool = True + + model_config = { + "json_schema_extra": { + "examples": [ + { + "messages": [ + { + "role": "system", + "content": "You are a rapper. Always answer with a rap.", + }, + { + "role": "user", + "content": "How do you fry an egg?", + }, + ], + "include_sources": True, + } + ] + } + } + + +@chat_router.post( + "/chat", + response_model=None, + responses={200: {"model": OpenAICompletion}}, + tags=["Contextual Completions"], +) +def chat_completion(body: ChatBody) -> OpenAICompletion: + """Given a list of messages comprising a conversation, return a response. + + Optionally include an initial `role: system` message to influence the way + the LLM answers. + + When using `'include_sources': true`, the API will return the source Chunks used + to create the response, which come from the context provided. + """ + llm_component = LLMComponent() + vector_store_component = VectorStoreComponent() + embedding_component = EmbeddingComponent() + node_store_component = NodeStoreComponent() + + chat_service = ChatService( + llm_component, vector_store_component, embedding_component, node_store_component + ) + all_messages = [ + ChatMessage(content=m.content, role=MessageRole(m.role)) for m in body.messages + ] + + completion = chat_service.chat(messages=all_messages) + return to_openai_response( + completion.response, completion.sources if body.include_sources else None + ) diff --git a/server/chat/schemas.py b/server/chat/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..f6445075d0a5b4592e36f22bc48e880f429a02d0 --- /dev/null +++ b/server/chat/schemas.py @@ -0,0 +1,45 @@ +from typing import Literal + +from llama_index.schema import NodeWithScore +from pydantic import BaseModel, Field + +from app.server.ingest.schemas import IngestedDoc + + +class Chunk(BaseModel): + object: Literal["context.chunk"] + score: float = Field(examples=[0.023]) + document: IngestedDoc + text: str = Field(examples=["Outbound sales increased 20%, driven by new leads."]) + previous_texts: list[str] | None = Field( + default=None, + examples=[["SALES REPORT 2023", "Inbound didn't show major changes."]], + ) + next_texts: list[str] | None = Field( + default=None, + examples=[ + [ + "New leads came from Google Ads campaign.", + "The campaign was run by the Marketing Department", + ] + ], + ) + + @classmethod + def from_node(cls: type["Chunk"], node: NodeWithScore) -> "Chunk": + doc_id = node.node.ref_doc_id if node.node.ref_doc_id is not None else "-" + return cls( + object="context.chunk", + score=node.score or 0.0, + document=IngestedDoc( + object="ingest.document", + doc_id=doc_id, + doc_metadata=node.metadata, + ), + text=node.get_content(), + ) + + +class Completion(BaseModel): + response: str + sources: list[Chunk] | None = None diff --git a/server/chat/service.py b/server/chat/service.py new file mode 100644 index 0000000000000000000000000000000000000000..96a81d310447a4a40abebaeaf2737fadd3dbca84 --- /dev/null +++ b/server/chat/service.py @@ -0,0 +1,122 @@ +from dataclasses import dataclass + +from llama_index import ServiceContext, StorageContext, VectorStoreIndex +from llama_index.chat_engine import ContextChatEngine +from llama_index.chat_engine.types import BaseChatEngine +from llama_index.core.postprocessor import SentenceTransformerRerank +from llama_index.indices.postprocessor import MetadataReplacementPostProcessor +from llama_index.llms import ChatMessage, MessageRole + +from app._config import settings +from app.components.embedding.component import EmbeddingComponent +from app.components.llm.component import LLMComponent +from app.components.node_store.component import NodeStoreComponent +from app.components.vector_store.component import VectorStoreComponent +from app.server.chat.schemas import Chunk, Completion + + +@dataclass +class ChatEngineInput: + system_message: ChatMessage | None = None + last_message: ChatMessage | None = None + chat_history: list[ChatMessage] | None = None + + @classmethod + def from_messages(cls, messages: list[ChatMessage]) -> "ChatEngineInput": + # Detect if there is a system message, extract the last message and chat history + system_message = ( + messages[0] + if len(messages) > 0 and messages[0].role == MessageRole.SYSTEM + else None + ) + last_message = ( + messages[-1] + if len(messages) > 0 and messages[-1].role == MessageRole.USER + else None + ) + # Remove from messages list the system message and last message, + # if they exist. The rest is the chat history. + if system_message: + messages.pop(0) + if last_message: + messages.pop(-1) + chat_history = messages if len(messages) > 0 else None + + return cls( + system_message=system_message, + last_message=last_message, + chat_history=chat_history, + ) + + +class ChatService: + def __init__( + self, + llm_component: LLMComponent, + vector_store_component: VectorStoreComponent, + embedding_component: EmbeddingComponent, + node_store_component: NodeStoreComponent, + ) -> None: + self.llm_service = llm_component + self.vector_store_component = vector_store_component + self.storage_context = StorageContext.from_defaults( + vector_store=vector_store_component.vector_store, + docstore=node_store_component.doc_store, + index_store=node_store_component.index_store, + ) + self.service_context = ServiceContext.from_defaults( + llm=llm_component.llm, embed_model=embedding_component.embedding_model + ) + self.index = VectorStoreIndex.from_vector_store( + vector_store_component.vector_store, + storage_context=self.storage_context, + service_context=self.service_context, + show_progress=True, + ) + + def _chat_engine(self, system_prompt: str | None = None) -> BaseChatEngine: + vector_index_retriever = self.vector_store_component.get_retriever( + index=self.index + ) + + node_postprocessors = [ + MetadataReplacementPostProcessor(target_metadata_key="window") + ] + if settings.IS_RERANK_ENABLED: + rerank = SentenceTransformerRerank( + top_n=settings.RERANK_TOP_N, model=settings.RERANK_MODEL_NAME + ) + node_postprocessors.append(rerank) + + return ContextChatEngine.from_defaults( + system_prompt=system_prompt, + retriever=vector_index_retriever, + service_context=self.service_context, + node_postprocessors=node_postprocessors, + ) + + def chat(self, messages: list[ChatMessage]): + chat_engine_input = ChatEngineInput.from_messages(messages) + last_message = ( + chat_engine_input.last_message.content + if chat_engine_input.last_message + else None + ) + system_prompt = ( + chat_engine_input.system_message.content + if chat_engine_input.system_message + else None + ) + chat_history = ( + chat_engine_input.chat_history if chat_engine_input.chat_history else None + ) + + chat_engine = self._chat_engine(system_prompt=system_prompt) + wrapped_response = chat_engine.chat( + message=last_message if last_message is not None else "", + chat_history=chat_history, + ) + + sources = [Chunk.from_node(node) for node in wrapped_response.source_nodes] + completion = Completion(response=wrapped_response.response, sources=sources) + return completion diff --git a/server/chat/utils.py b/server/chat/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0fd680a43115495591c805727197bbd5e5b9e4ac --- /dev/null +++ b/server/chat/utils.py @@ -0,0 +1,68 @@ +import time +import uuid +from typing import Literal + +from llama_index.llms import ChatResponse +from pydantic import BaseModel, Field + +from app.server.chat.schemas import Chunk + + +class OpenAIMessage(BaseModel): + """Inference result, with the source of the message. + + Role could be the assistant or system + (providing a default response, not AI generated). + """ + + role: Literal["assistant", "system", "user"] = Field(default="user") + content: str | None + + +class OpenAIChoice(BaseModel): + """Response from AI.""" + + finish_reason: str | None = Field(examples=["stop"]) + message: OpenAIMessage | None = None + sources: list[Chunk] | None = None + index: int = 0 + + +class OpenAICompletion(BaseModel): + """Clone of OpenAI Completion model. + + For more information see: https://platform.openai.com/docs/api-reference/chat/object + """ + + id: str + object: Literal["completion", "completion.chunk"] = Field(default="completion") + created: int = Field(..., examples=[1623340000]) + model: Literal["llm-agriculture"] + choices: list[OpenAIChoice] + + @classmethod + def from_text( + cls, + text: str | None, + finish_reason: str | None = None, + sources: list[Chunk] | None = None, + ) -> "OpenAICompletion": + return OpenAICompletion( + id=str(uuid.uuid4()), + object="completion", + created=int(time.time()), + model="llm-agriculture", + choices=[ + OpenAIChoice( + message=OpenAIMessage(role="assistant", content=text), + finish_reason=finish_reason, + sources=sources, + ) + ], + ) + + +def to_openai_response( + response: str | ChatResponse, sources: list[Chunk] | None = None +) -> OpenAICompletion: + return OpenAICompletion.from_text(response, finish_reason="stop", sources=sources) diff --git a/server/embedding/__init__.py b/server/embedding/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/server/embedding/__pycache__/__init__.cpython-310.pyc b/server/embedding/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b4fedddecfb10665d1e0a8a2591848443db2089 Binary files /dev/null and b/server/embedding/__pycache__/__init__.cpython-310.pyc differ diff --git a/server/embedding/__pycache__/router.cpython-310.pyc b/server/embedding/__pycache__/router.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5aec48b7619666a40616d916115e7afc7ead8c29 Binary files /dev/null and b/server/embedding/__pycache__/router.cpython-310.pyc differ diff --git a/server/embedding/__pycache__/schemas.cpython-310.pyc b/server/embedding/__pycache__/schemas.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d59969cd2bbaef531ed5402c34a24b4ad529188 Binary files /dev/null and b/server/embedding/__pycache__/schemas.cpython-310.pyc differ diff --git a/server/embedding/__pycache__/service.cpython-310.pyc b/server/embedding/__pycache__/service.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc439c802ae1b0add70803bd8e1a8dd2f2a61932 Binary files /dev/null and b/server/embedding/__pycache__/service.cpython-310.pyc differ diff --git a/server/embedding/router.py b/server/embedding/router.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c6ca91775c3016898ed60aa253f117cddea472 --- /dev/null +++ b/server/embedding/router.py @@ -0,0 +1,18 @@ +from fastapi import APIRouter + +from app.components.embedding.component import EmbeddingComponent +from app.server.embedding.schemas import EmbeddingsBody, EmbeddingsResponse +from app.server.embedding.service import EmbeddingsService + +embedding_router = APIRouter() + + +@embedding_router.post("/embedding", tags=["Embeddings"]) +def generate_embeddings(body: EmbeddingsBody) -> EmbeddingsResponse: + embedding_component = EmbeddingComponent() + service = EmbeddingsService(embedding_component) + input_texts = body.input if isinstance(body.input, list) else [body.input] + embeddings = service.embed_texts(input_texts) + return EmbeddingsResponse( + object="list", model=service.embedding_model.model_name, data=embeddings + ) diff --git a/server/embedding/schemas.py b/server/embedding/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..661cb734d7f6713a55ce005c8db1b3e9fa81ba2f --- /dev/null +++ b/server/embedding/schemas.py @@ -0,0 +1,19 @@ +from typing import Literal + +from pydantic import BaseModel, Field + + +class Embedding(BaseModel): + index: int + object: Literal["embedding"] + embedding: list[float] = Field(examples=[[0.1, -0.2]]) + + +class EmbeddingsBody(BaseModel): + input: str | list[str] + + +class EmbeddingsResponse(BaseModel): + object: Literal["list"] + model: str + data: list[Embedding] diff --git a/server/embedding/service.py b/server/embedding/service.py new file mode 100644 index 0000000000000000000000000000000000000000..63f857e47b3a444be566645568a75e43c0d1c443 --- /dev/null +++ b/server/embedding/service.py @@ -0,0 +1,18 @@ +from app.components.embedding.component import EmbeddingComponent +from app.server.embedding.schemas import Embedding + + +class EmbeddingsService: + def __init__(self, embedding_component: EmbeddingComponent) -> None: + self.embedding_model = embedding_component.embedding_model + + def embed_texts(self, texts: list[str]) -> list[Embedding]: + texts_embeddings = self.embedding_model.get_text_embedding_batch(texts) + return [ + Embedding( + index=texts_embeddings.index(embedding), + object="embedding", + embedding=embedding, + ) + for embedding in texts_embeddings + ] diff --git a/server/ingest/__init__.py b/server/ingest/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/server/ingest/__pycache__/__init__.cpython-310.pyc b/server/ingest/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a84c006289bd4fe719eb4c34ce865f9736f1954 Binary files /dev/null and b/server/ingest/__pycache__/__init__.cpython-310.pyc differ diff --git a/server/ingest/__pycache__/schemas.cpython-310.pyc b/server/ingest/__pycache__/schemas.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1ef1bfb2aeccc3a3cadf7070c51d21a2e4bbaa6 Binary files /dev/null and b/server/ingest/__pycache__/schemas.cpython-310.pyc differ diff --git a/server/ingest/__pycache__/service.cpython-310.pyc b/server/ingest/__pycache__/service.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c076095e4711a604ec35124d5d1d36371c848dd4 Binary files /dev/null and b/server/ingest/__pycache__/service.cpython-310.pyc differ diff --git a/server/ingest/schemas.py b/server/ingest/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..1b4ff7a79f33041b45109110d394f4470bc71832 --- /dev/null +++ b/server/ingest/schemas.py @@ -0,0 +1,32 @@ +from typing import Any, Literal + +from llama_index import Document +from pydantic import BaseModel, Field + + +class IngestedDoc(BaseModel): + object: Literal["ingest.document"] + doc_id: str = Field(examples=["c202d5e6-7b69-4869-81cc-dd574ee8ee11"]) + doc_metadata: dict[str, Any] | None = Field( + examples=[ + { + "page_label": "2", + "file_name": "agriculture.pdf", + } + ] + ) + + @staticmethod + def curate_metadata(metadata: dict[str, Any]) -> dict[str, Any]: + """Remove unwanted metadata keys.""" + for key in ["doc_id", "window", "original_text"]: + metadata.pop(key, None) + return metadata + + @staticmethod + def from_document(document: Document) -> "IngestedDoc": + return IngestedDoc( + object="ingest.document", + doc_id=document.doc_id, + doc_metadata=IngestedDoc.curate_metadata(document.metadata), + ) diff --git a/server/ingest/service.py b/server/ingest/service.py new file mode 100644 index 0000000000000000000000000000000000000000..55d2cf7afb956f479d14d42820a60b7f275afe3e --- /dev/null +++ b/server/ingest/service.py @@ -0,0 +1,123 @@ +import logging +import tempfile +from pathlib import Path +from typing import AnyStr, BinaryIO + +from llama_index import ServiceContext, StorageContext +from llama_index.node_parser import SentenceWindowNodeParser + +from app.components.embedding.component import EmbeddingComponent +from app.components.ingest.component import get_ingestion_component +from app.components.llm.component import LLMComponent +from app.components.node_store.component import NodeStoreComponent +from app.components.vector_store.component import VectorStoreComponent +from app.server.ingest.schemas import IngestedDoc + +logger = logging.getLogger(__name__) + + +class IngestService: + def __init__( + self, + llm_component: LLMComponent, + vector_store_component: VectorStoreComponent, + embedding_component: EmbeddingComponent, + node_store_component: NodeStoreComponent, + ) -> None: + self.llm_service = llm_component + self.storage_context = StorageContext.from_defaults( + vector_store=vector_store_component.vector_store, + docstore=node_store_component.doc_store, + index_store=node_store_component.index_store, + ) + node_parser = SentenceWindowNodeParser.from_defaults() + self.ingest_service_context = ServiceContext.from_defaults( + llm=self.llm_service.llm, + embed_model=embedding_component.embedding_model, + node_parser=node_parser, + # Embeddings done early in the pipeline of node transformations, right + # after the node parsing + transformations=[node_parser, embedding_component.embedding_model], + ) + + self.ingest_component = get_ingestion_component( + self.storage_context, self.ingest_service_context + ) + + def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]: + logger.debug(f"Got file data of size={len(file_data)} to ingest") + # llama-index mainly supports reading from files, so + # we have to create a tmp file to read for it to work + # delete=False to avoid a Windows 11 permission error. + with tempfile.NamedTemporaryFile(delete=False) as tmp: + try: + path_to_tmp = Path(tmp.name) + if isinstance(file_data, bytes): + path_to_tmp.write_bytes(file_data) + else: + path_to_tmp.write_text(str(file_data)) + return self.ingest_file(file_name, path_to_tmp) + finally: + tmp.close() + path_to_tmp.unlink() + + def ingest_file(self, file_name: str, file_data: Path) -> list[IngestedDoc]: + logger.info(f"Ingesting file_name={file_name}") + documents = self.ingest_component.ingest(file_name, file_data) + logger.info(f"Finished ingestion file_name={file_name}") + return [IngestedDoc.from_document(document) for document in documents] + + def ingest_text(self, file_name: str, text: str) -> list[IngestedDoc]: + logger.debug(f"Ingesting text data with file_name={file_name}") + return self._ingest_data(file_name, text) + + def ingest_bin_data( + self, file_name: str, raw_file_data: BinaryIO + ) -> list[IngestedDoc]: + logger.debug(f"Ingesting binary data with file_name={file_name}") + file_data = raw_file_data.read() + return self._ingest_data(file_name, file_data) + + def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[IngestedDoc]: + logger.info(f"Ingesting file_names={[f[0] for f in files]}") + documents = self.ingest_component.bulk_ingest(files) + logger.info(f"Finished ingestion file_name={[f[0] for f in files]}") + return [IngestedDoc.from_document(document) for document in documents] + + def list_ingested(self) -> list[IngestedDoc]: + ingested_docs = [] + try: + docstore = self.storage_context.docstore + ingested_docs_ids: set[str] = set() + + for node in docstore.docs.values(): + if node.ref_doc_id is not None: + ingested_docs_ids.add(node.ref_doc_id) + + for doc_id in ingested_docs_ids: + ref_doc_info = docstore.get_ref_doc_info(ref_doc_id=doc_id) + doc_metadata = None + if ref_doc_info is not None and ref_doc_info.metadata is not None: + doc_metadata = IngestedDoc.curate_metadata(ref_doc_info.metadata) + ingested_docs.append( + IngestedDoc( + object="ingest.document", + doc_id=doc_id, + doc_metadata=doc_metadata, + ) + ) + except ValueError: + logger.warning("Got an exception when getting list of docs", exc_info=True) + pass + logger.debug(f"Found count={len(ingested_docs)} ingested documents") + return ingested_docs + + def delete(self, doc_id: str) -> None: + """Delete an ingested document. + + :raises ValueError: if the document does not exist + """ + logger.info( + "Deleting the ingested document=%s in the doc and index store", doc_id + ) + self.ingest_component.delete(doc_id) diff --git a/ui/__init__.py b/ui/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8fe4022ab32a761b2377b8bc5db66357d6121e34 --- /dev/null +++ b/ui/__init__.py @@ -0,0 +1 @@ +"""Gradio based UI.""" diff --git a/ui/__pycache__/__init__.cpython-310.pyc b/ui/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3ae84b8e41384b6bb9104a79aab59ffd9e1219b Binary files /dev/null and b/ui/__pycache__/__init__.cpython-310.pyc differ diff --git a/ui/__pycache__/schemas.cpython-310.pyc b/ui/__pycache__/schemas.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc3a84aa3d9de933a238ca6a40b2168405a36b30 Binary files /dev/null and b/ui/__pycache__/schemas.cpython-310.pyc differ diff --git a/ui/__pycache__/ui.cpython-310.pyc b/ui/__pycache__/ui.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5891363ca3cec468f2cbc394840de5c331ab3ea1 Binary files /dev/null and b/ui/__pycache__/ui.cpython-310.pyc differ diff --git a/ui/dodge_ava.jpg b/ui/dodge_ava.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4f194045a2236f635c464ef538435082b8cbbf2f Binary files /dev/null and b/ui/dodge_ava.jpg differ diff --git a/ui/schemas.py b/ui/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..8cfb3754d328baf14a884faa330ee395963e897f --- /dev/null +++ b/ui/schemas.py @@ -0,0 +1,27 @@ +from pydantic import BaseModel + +from app.server.chat.schemas import Chunk + + +class Source(BaseModel): + file: str + page: str + text: str + + class Config: + frozen = True + + @staticmethod + def curate_sources(sources: list[Chunk]) -> set["Source"]: + curated_sources = set() + + for chunk in sources: + doc_metadata = chunk.document.doc_metadata + + file_name = doc_metadata.get("file_name", "-") if doc_metadata else "-" + page_label = doc_metadata.get("page_label", "-") if doc_metadata else "-" + + source = Source(file=file_name, page=page_label, text=chunk.text) + curated_sources.add(source) + + return curated_sources diff --git a/ui/ui.py b/ui/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..091f2e6737ebac01152eac7f4d24e7199e26cd4b --- /dev/null +++ b/ui/ui.py @@ -0,0 +1,228 @@ +"""This file should be imported only and only if you want to run the UI locally.""" +import itertools +import logging +from pathlib import Path +from typing import Any + +import gradio as gr +from fastapi import FastAPI +from gradio.themes.utils.colors import slate +from llama_index.llms import ChatMessage, MessageRole + +from app._config import settings +from app.components.embedding.component import EmbeddingComponent +from app.components.llm.component import LLMComponent +from app.components.node_store.component import NodeStoreComponent +from app.components.vector_store.component import VectorStoreComponent +from app.enums import PROJECT_ROOT_PATH +from app.server.chat.service import ChatService +from app.server.ingest.service import IngestService +from app.ui.schemas import Source + +logger = logging.getLogger(__name__) + +THIS_DIRECTORY_RELATIVE = Path(__file__).parent.relative_to(PROJECT_ROOT_PATH) +AVATAR_BOT = THIS_DIRECTORY_RELATIVE / "dodge_ava.jpg" + +UI_TAB_TITLE = "Agriculture Chatbot" + +SOURCES_SEPARATOR = "\n\n Sources: \n" + + +class PrivateGptUi: + def __init__( + self, + ingest_service: IngestService, + chat_service: ChatService, + ) -> None: + self._ingest_service = ingest_service + self._chat_service = chat_service + + # Cache the UI blocks + self._ui_block = None + + # Initialize system prompt + self._system_prompt = self._get_default_system_prompt() + + def _chat(self, message: str, history: list[list[str]], *_: Any) -> Any: + def build_history() -> list[ChatMessage]: + history_messages: list[ChatMessage] = list( + itertools.chain( + *[ + [ + ChatMessage(content=interaction[0], role=MessageRole.USER), + ChatMessage( + # Remove from history content the Sources information + content=interaction[1].split(SOURCES_SEPARATOR)[0], + role=MessageRole.ASSISTANT, + ), + ] + for interaction in history + ] + ) + ) + + # max 20 messages to try to avoid context overflow + return history_messages[:20] + + new_message = ChatMessage(content=message, role=MessageRole.USER) + all_messages = [*build_history(), new_message] + # If a system prompt is set, add it as a system message + if self._system_prompt: + all_messages.insert( + 0, + ChatMessage( + content=self._system_prompt, + role=MessageRole.SYSTEM, + ), + ) + + completion = self._chat_service.chat(messages=all_messages) + full_response = completion.response + + if completion.sources: + full_response += SOURCES_SEPARATOR + curated_sources = Source.curate_sources(completion.sources) + sources_text = "\n\n\n".join( + f"{index}. {source.file} (page {source.page})" + for index, source in enumerate(curated_sources, start=1) + ) + full_response += sources_text + + return full_response + + # On initialization this function set the system prompt + # to the default prompt based on settings. + @staticmethod + def _get_default_system_prompt() -> str: + return settings.DEFAULT_QUERY_SYSTEM_PROMPT + + def _set_system_prompt(self, system_prompt_input: str) -> None: + logger.info(f"Setting system prompt to: {system_prompt_input}") + self._system_prompt = system_prompt_input + + def _list_ingested_files(self) -> list[list[str]]: + files = set() + for ingested_document in self._ingest_service.list_ingested(): + if ingested_document.doc_metadata is None: + # Skipping documents without metadata + continue + file_name = ingested_document.doc_metadata.get( + "file_name", "[FILE NAME MISSING]" + ) + files.add(file_name) + return [[row] for row in files] + + def _upload_file(self, files: list[str]) -> None: + logger.debug("Loading count=%s files", len(files)) + paths = [Path(file) for file in files] + self._ingest_service.bulk_ingest([(str(path.name), path) for path in paths]) + + def _build_ui_blocks(self) -> gr.Blocks: + logger.debug("Creating the UI blocks") + with gr.Blocks( + title=UI_TAB_TITLE, + theme=gr.themes.Soft(primary_hue=slate), + css=".logo { " + "display:flex;" + "height: 80px;" + "border-radius: 8px;" + "align-content: center;" + "justify-content: center;" + "align-items: center;" + "}" + ".logo img { height: 25% }" + ".contain { display: flex !important; flex-direction: column !important; }" + "#component-0, #component-3, #component-10, #component-8 { height: 100% !important; }" + "#chatbot { flex-grow: 1 !important; overflow: auto !important;}" + "#col { height: calc(100vh - 112px - 16px) !important; }", + ) as blocks: + with gr.Row(): + gr.HTML(f"