AbeerTrial's picture
Upload folder using huggingface_hub
8a58cf3
"""List index.
A simple data structure where LlamaIndex iterates through document chunks
in sequence in order to answer a given query.
"""
from typing import Any, Dict, Optional, Sequence, Type
from gpt_index.data_structs.data_structs import IndexList
from gpt_index.indices.base import DOCUMENTS_INPUT, BaseGPTIndex
from gpt_index.indices.query.base import BaseGPTIndexQuery
from gpt_index.indices.query.list.embedding_query import GPTListIndexEmbeddingQuery
from gpt_index.indices.query.list.query import GPTListIndexQuery
from gpt_index.indices.query.schema import QueryMode
from gpt_index.langchain_helpers.chain_wrapper import LLMPredictor
from gpt_index.langchain_helpers.text_splitter import TextSplitter
from gpt_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT
from gpt_index.prompts.prompts import QuestionAnswerPrompt
from gpt_index.schema import BaseDocument
# This query is used to summarize the contents of the index.
GENERATE_TEXT_QUERY = "What is a concise summary of this document?"
class GPTListIndex(BaseGPTIndex[IndexList]):
"""GPT List Index.
The list index is a simple data structure where nodes are stored in
a sequence. During index construction, the document texts are
chunked up, converted to nodes, and stored in a list.
During query time, the list index iterates through the nodes
with some optional filter parameters, and synthesizes an
answer from all the nodes.
Args:
text_qa_template (Optional[QuestionAnswerPrompt]): A Question-Answer Prompt
(see :ref:`Prompt-Templates`).
NOTE: this is a deprecated field.
"""
index_struct_cls = IndexList
def __init__(
self,
documents: Optional[Sequence[DOCUMENTS_INPUT]] = None,
index_struct: Optional[IndexList] = None,
text_qa_template: Optional[QuestionAnswerPrompt] = None,
llm_predictor: Optional[LLMPredictor] = None,
text_splitter: Optional[TextSplitter] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
self.text_qa_template = text_qa_template or DEFAULT_TEXT_QA_PROMPT
super().__init__(
documents=documents,
index_struct=index_struct,
llm_predictor=llm_predictor,
text_splitter=text_splitter,
**kwargs,
)
@classmethod
def get_query_map(self) -> Dict[str, Type[BaseGPTIndexQuery]]:
"""Get query map."""
return {
QueryMode.DEFAULT: GPTListIndexQuery,
QueryMode.EMBEDDING: GPTListIndexEmbeddingQuery,
}
def _build_fallback_text_splitter(self) -> TextSplitter:
# if not specified, use "smart" text splitter to ensure chunks fit in prompt
return self._prompt_helper.get_text_splitter_given_prompt(
self.text_qa_template, 1
)
def _build_index_from_documents(
self, documents: Sequence[BaseDocument]
) -> IndexList:
"""Build the index from documents.
Args:
documents (List[BaseDocument]): A list of documents.
Returns:
IndexList: The created list index.
"""
index_struct = IndexList()
for d in documents:
nodes = self._get_nodes_from_document(d)
for n in nodes:
index_struct.add_node(n)
return index_struct
def _insert(self, document: BaseDocument, **insert_kwargs: Any) -> None:
"""Insert a document."""
nodes = self._get_nodes_from_document(document)
for n in nodes:
self._index_struct.add_node(n)
def _delete(self, doc_id: str, **delete_kwargs: Any) -> None:
"""Delete a document."""
cur_nodes = self._index_struct.nodes
nodes_to_keep = [n for n in cur_nodes if n.ref_doc_id != doc_id]
self._index_struct.nodes = nodes_to_keep
def _preprocess_query(self, mode: QueryMode, query_kwargs: Any) -> None:
"""Preprocess query."""
super()._preprocess_query(mode, query_kwargs)
if "text_qa_template" not in query_kwargs:
query_kwargs["text_qa_template"] = self.text_qa_template