Master-Thesis-Prakhar
/

GraphRAG

Model card Files Files and versions Community

Prakhar Bhandari commited on Apr 25, 2024

Commit

c8025cd

1 Parent(s): 2d9bc04

Modular v1.0

Browse files

Files changed (13) hide show

kg_builder/.DS_Store +0 -0
kg_builder/README.md +0 -0
kg_builder/requirements.txt +10 -0
kg_builder/src/.DS_Store +0 -0
kg_builder/src/__init__.py +0 -0
kg_builder/src/__pycache__/api_connections.cpython-39.pyc +0 -0
kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc +0 -0
kg_builder/src/__pycache__/query_graph.cpython-39.pyc +0 -0
kg_builder/src/api_connections.py +16 -0
kg_builder/src/knowledge_graph_builder.py +138 -0
kg_builder/src/main.py +33 -0
kg_builder/src/query_graph.py +23 -0
kg_creation.ipynb +0 -473

kg_builder/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

kg_builder/README.md ADDED Viewed

File without changes

kg_builder/requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+numpy
+pandas
+requests
+openai
+neo4j
+wikipedia
+tiktoken
+langchain
+langchain_openai
+tqdm

kg_builder/src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

kg_builder/src/__init__.py ADDED Viewed

File without changes

kg_builder/src/__pycache__/api_connections.cpython-39.pyc ADDED Viewed

Binary file (507 Bytes). View file

kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc ADDED Viewed

Binary file (7.19 kB). View file

kg_builder/src/__pycache__/query_graph.cpython-39.pyc ADDED Viewed

Binary file (829 Bytes). View file

kg_builder/src/api_connections.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from langchain_community.graphs import Neo4jGraph
+import os
+# Neo4j connection setup
+url = "neo4j+s://2f409740.databases.neo4j.io"
+username = "neo4j"
+password = "oe7A9ugxhxcuEtwci8khPIt2TTdz_am9AYDx1r9e9Tw"
+graph = Neo4jGraph(
+    url=url,
+    username=username,
+    password=password
+)
+# OpenAI API key setup
+os.environ["OPENAI_API_KEY"] = "sk-proj-hceIL56CC2zfjAvAlMjbT3BlbkFJyHKX2wbiQxsG9yy8dGJN"

kg_builder/src/knowledge_graph_builder.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# Add to knowledge_graph_builder.py
+from api_connections import graph
+from langchain_community.graphs.graph_document import (
+    Node as BaseNode,
+    Relationship as BaseRelationship,
+    GraphDocument,
+)
+from langchain.schema import Document
+from typing import List, Dict, Any, Optional
+from langchain.pydantic_v1 import Field, BaseModel
+class Property(BaseModel):
+    """A single property consisting of key and value"""
+    key: str = Field(..., description="key")
+    value: str = Field(..., description="value")
+class Node(BaseNode):
+    properties: Optional[List[Property]] = Field(
+        None, description="List of node properties")
+class Relationship(BaseRelationship):
+    properties: Optional[List[Property]] = Field(
+        None, description="List of relationship properties"
+    )
+class KnowledgeGraph(BaseModel):
+    """Generate a knowledge graph with entities and relationships."""
+    nodes: List[Node] = Field(..., description="List of nodes in the knowledge graph")
+    rels: List[Relationship] = Field(..., description="List of relationships in the knowledge graph")
+def format_property_key(s: str) -> str:
+    words = s.split()
+    if not words:
+        return s
+    first_word = words[0].lower()
+    capitalized_words = [word.capitalize() for word in words[1:]]
+    return "".join([first_word] + capitalized_words)
+def props_to_dict(props) -> dict:
+    """Convert properties to a dictionary."""
+    properties = {}
+    if not props:
+      return properties
+    for p in props:
+        properties[format_property_key(p.key)] = p.value
+    return properties
+def map_to_base_node(node: Node) -> BaseNode:
+    """Map the KnowledgeGraph Node to the base Node."""
+    properties = props_to_dict(node.properties) if node.properties else {}
+    properties["name"] = node.id.title()  # Assuming nodes have an 'id' attribute for this operation
+    return BaseNode(
+        id=node.id.title(), type=node.type.capitalize(), properties=properties
+    )
+def map_to_base_relationship(rel: Relationship) -> BaseRelationship:
+    """Map the KnowledgeGraph Relationship to the base Relationship."""
+    source = map_to_base_node(rel.source)
+    target = map_to_base_node(rel.target)
+    properties = props_to_dict(rel.properties) if rel.properties else {}
+    return BaseRelationship(
+        source=source, target=target, type=rel.type, properties=properties
+    )
+import os
+from langchain.chains.openai_functions import (
+    create_openai_fn_chain,
+    create_structured_output_runnable,
+    create_structured_output_chain,
+)
+from langchain_openai import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+# Setting the OpenAI API key for usage in LLM calls
+os.environ["OPENAI_API_KEY"] = "sk-proj-hceIL56CC2zfjAvAlMjbT3BlbkFJyHKX2wbiQxsG9yy8dGJN"
+llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
+def get_extraction_chain(
+    allowed_nodes: Optional[List[str]] = None,
+    allowed_rels: Optional[List[str]] = None
+    ):
+    prompt = ChatPromptTemplate.from_messages(
+        [(
+          "system",
+        f"""# Knowledge Graph Instructions for GPT-4
+## 1. Overview
+You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about chemotherapy and related cancer treatments.
+- **Nodes** symbolize entities such as medical conditions, drugs, symptoms, treatments, and associated medical concepts.
+- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for medical practitioners and scholarly research.
+## 2. Labeling Nodes
+- **Consistency**: Utilize uniform labels for node types to maintain clarity.
+  - For instance, consistently label drugs as **"Drug"**, symptoms as **"Symptom"**, and treatments as **"Treatment"**.
+- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
+{'- **Allowed Node Labels:**' + ", ".join(['Drug', 'Symptom', 'Treatment', 'MedicalCondition', 'ResearchStudy']) if allowed_nodes else ""}
+{'- **Allowed Relationship Types**:' + ", ".join(['Treats', 'Causes', 'Researches', 'Recommends']) if allowed_rels else ""}
+## 3. Handling Numerical Data and Dates
+- Integrate numerical data and dates as attributes of the corresponding nodes.
+- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
+- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `approvedYear`, `dosageAmount`.
+## 4. Coreference Resolution
+- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
+  - For example, if "Methotrexate" and "MTX" reference the same medication, uniformly apply "Methotrexate" as the node ID.
+## 5. Relationship Naming Conventions
+- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
+  - For instance, use "HAS_SIDE_EFFECT" instead of "HASSIDEEFFECT", use "CAN_RESULT_FROM" instead of "CANRESULTFROM" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
+- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "INHIBITS" or "ACTIVATES" for interactions between substances.
+## 6. Strict Compliance
+Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
+        """),
+            ("human", "Use the given format to extract information from the following input: {input}"),
+            ("human", "Tip: Precision in the node and relationship creation is vital for the integrity of the knowledge graph."),
+        ])
+    return create_structured_output_chain(KnowledgeGraph, llm, prompt)
+def extract_and_store_graph(
+    document: Document,
+    nodes:Optional[List[str]] = None,
+    rels:Optional[List[str]]=None) -> None:
+    # Extract graph data using OpenAI functions
+    extract_chain = get_extraction_chain(nodes, rels)
+    data = extract_chain.invoke(document.page_content)['function']
+    # Construct a graph document
+    graph_document = GraphDocument(
+      nodes = [map_to_base_node(node) for node in data.nodes],
+      relationships = [map_to_base_relationship(rel) for rel in data.rels],
+      source = document
+    )
+    # Store information into a graph
+    graph.add_graph_documents([graph_document])

kg_builder/src/main.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from knowledge_graph_builder import extract_and_store_graph
+from query_graph import query_knowledge_graph
+from langchain_community.document_loaders import WikipediaLoader
+from langchain.text_splitter import TokenTextSplitter
+from tqdm import tqdm
+def main():
+    print("Starting the script...")
+    # Take Wikipedia article name as input
+    article_name = input("Enter the Wikipedia article name: ")  # Corrected to proper input usage
+    print(f"Loading documents for: {article_name}")
+    # Load and process the Wikipedia article
+    raw_documents = WikipediaLoader(query=article_name).load()
+    text_splitter = TokenTextSplitter(chunk_size=4096, chunk_overlap=96)
+    documents = text_splitter.split_documents(raw_documents[:5])  # Only process the first 5 documents
+    print("Building the knowledge graph...")
+    # Build the knowledge graph from the documents
+    for i, d in tqdm(enumerate(documents), total=len(documents)):
+        extract_and_store_graph(d)
+    print("Graph construction complete. Please enter your query.")
+    # Take a query related to the graph
+    user_query = input("Enter your query related to the graph: ")
+    print(f"Querying the graph with: {user_query}")
+    # Query the graph and print the answer
+    answer = query_knowledge_graph(user_query)
+    print("Answer to your query:", answer)
+if __name__ == "__main__":
+    main()

kg_builder/src/query_graph.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from langchain.chains import GraphCypherQAChain
+from langchain_openai import ChatOpenAI
+from api_connections import graph  # Importing 'graph' from 'api_connections.py'
+def query_knowledge_graph(query):
+    print("Refreshing the graph schema...")
+    # Refresh the graph schema before querying
+    graph.refresh_schema()
+    print("Setting up the Cypher QA Chain...")
+    # Setup the Cypher QA Chain with specific LLM configurations
+    cypher_chain = GraphCypherQAChain.from_llm(
+        graph=graph,
+        cypher_llm=ChatOpenAI(temperature=0, model="gpt-4"),
+        qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
+        #verbose=True
+    )
+    print(f"Executing the query: {query}")
+    # Execute the query and return results
+    result = cypher_chain.invoke({"query": query})
+    print("Query executed. Processing results...")
+    return result

kg_creation.ipynb DELETED Viewed

@@ -1,473 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ['OPENAI_API_KEY'] = \"sk-proj-k8uMlsAJbdAuSWWnvaHyT3BlbkFJyQB8yMQavFuQDVmc4sNps\"\n",
-    "\n",
-    "import logging\n",
-    "import sys\n",
-    "\n",
-    "logging.basicConfig(\n",
-    "    stream=sys.stdout, level=logging.INFO\n",
-    ")  # logging.DEBUG for more verbose output\n",
-    "\n",
-    "\n",
-    "# define LLM\n",
-    "from llama_index.llms.openai import OpenAI\n",
-    "from llama_index.core import Settings\n",
-    "\n",
-    "Settings.llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo-0125\")\n",
-    "Settings.chunk_size = 512"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: langchain in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (0.1.16)\n",
-      "Requirement already satisfied: neo4j in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (5.19.0)\n",
-      "Requirement already satisfied: openai in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (1.23.2)\n",
-      "Requirement already satisfied: wikipedia in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (1.4.0)\n",
-      "Requirement already satisfied: tiktoken in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (0.6.0)\n",
-      "Requirement already satisfied: langchain_openai in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (0.1.3)\n",
-      "Requirement already satisfied: PyYAML>=5.3 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (6.0.1)\n",
-      "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (2.0.29)\n",
-      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (3.9.5)\n",
-      "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (4.0.3)\n",
-      "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.6.4)\n",
-      "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (1.33)\n",
-      "Requirement already satisfied: langchain-community<0.1,>=0.0.32 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.0.34)\n",
-      "Requirement already satisfied: langchain-core<0.2.0,>=0.1.42 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.1.45)\n",
-      "Requirement already satisfied: langchain-text-splitters<0.1,>=0.0.1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.0.1)\n",
-      "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (0.1.49)\n",
-      "Requirement already satisfied: numpy<2,>=1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (1.26.4)\n",
-      "Requirement already satisfied: pydantic<3,>=1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (2.7.0)\n",
-      "Requirement already satisfied: requests<3,>=2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (2.31.0)\n",
-      "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain) (8.2.3)\n",
-      "Requirement already satisfied: pytz in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from neo4j) (2024.1)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (4.3.0)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (1.9.0)\n",
-      "Requirement already satisfied: httpx<1,>=0.23.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (0.27.0)\n",
-      "Requirement already satisfied: sniffio in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (1.3.1)\n",
-      "Requirement already satisfied: tqdm>4 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (4.66.2)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from openai) (4.11.0)\n",
-      "Requirement already satisfied: beautifulsoup4 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from wikipedia) (4.12.3)\n",
-      "Requirement already satisfied: regex>=2022.1.18 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from tiktoken) (2024.4.16)\n",
-      "Requirement already satisfied: aiosignal>=1.1.2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
-      "Requirement already satisfied: attrs>=17.3.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n",
-      "Requirement already satisfied: frozenlist>=1.1.1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n",
-      "Requirement already satisfied: multidict<7.0,>=4.5 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n",
-      "Requirement already satisfied: yarl<2.0,>=1.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n",
-      "Requirement already satisfied: idna>=2.8 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from anyio<5,>=3.5.0->openai) (3.7)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from anyio<5,>=3.5.0->openai) (1.2.1)\n",
-      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (3.21.1)\n",
-      "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (0.9.0)\n",
-      "Requirement already satisfied: certifi in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from httpx<1,>=0.23.0->openai) (2024.2.2)\n",
-      "Requirement already satisfied: httpcore==1.* in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from httpx<1,>=0.23.0->openai) (1.0.5)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n",
-      "Requirement already satisfied: jsonpointer>=1.9 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from jsonpatch<2.0,>=1.33->langchain) (2.4)\n",
-      "Requirement already satisfied: packaging<24.0,>=23.2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langchain-core<0.2.0,>=0.1.42->langchain) (23.2)\n",
-      "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.1)\n",
-      "Requirement already satisfied: annotated-types>=0.4.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from pydantic<3,>=1->langchain) (0.6.0)\n",
-      "Requirement already satisfied: pydantic-core==2.18.1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from pydantic<3,>=1->langchain) (2.18.1)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from requests<3,>=2->langchain) (3.3.2)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from requests<3,>=2->langchain) (2.2.1)\n",
-      "Requirement already satisfied: greenlet!=0.4.17 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n",
-      "Requirement already satisfied: soupsieve>1.2 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from beautifulsoup4->wikipedia) (2.5)\n",
-      "Requirement already satisfied: mypy-extensions>=0.3.0 in /local/home/pbhandari/miniconda3/envs/graph_rag/lib/python3.9/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain) (1.0.0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install langchain neo4j openai wikipedia tiktoken langchain_openai"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.graphs import Neo4jGraph\n",
-    "\n",
-    "url = \"neo4j+s://2f409740.databases.neo4j.io\"\n",
-    "username =\"neo4j\"\n",
-    "password = \"oe7A9ugxhxcuEtwci8khPIt2TTdz_am9AYDx1r9e9Tpw\"\n",
-    "graph = Neo4jGraph(\n",
-    "    url=url,\n",
-    "    username=username,\n",
-    "    password=password\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.graphs.graph_document import (\n",
-    "    Node as BaseNode,\n",
-    "    Relationship as BaseRelationship,\n",
-    "    GraphDocument,\n",
-    ")\n",
-    "from langchain.schema import Document\n",
-    "from typing import List, Dict, Any, Optional\n",
-    "from langchain.pydantic_v1 import Field, BaseModel\n",
-    "\n",
-    "class Property(BaseModel):\n",
-    "  \"\"\"A single property consisting of key and value\"\"\"\n",
-    "  key: str = Field(..., description=\"key\")\n",
-    "  value: str = Field(..., description=\"value\")\n",
-    "\n",
-    "class Node(BaseNode):\n",
-    "    properties: Optional[List[Property]] = Field(\n",
-    "        None, description=\"List of node properties\")\n",
-    "\n",
-    "class Relationship(BaseRelationship):\n",
-    "    properties: Optional[List[Property]] = Field(\n",
-    "        None, description=\"List of relationship properties\"\n",
-    "    )\n",
-    "\n",
-    "class KnowledgeGraph(BaseModel):\n",
-    "    \"\"\"Generate a knowledge graph with entities and relationships.\"\"\"\n",
-    "    nodes: List[Node] = Field(\n",
-    "        ..., description=\"List of nodes in the knowledge graph\")\n",
-    "    rels: List[Relationship] = Field(\n",
-    "        ..., description=\"List of relationships in the knowledge graph\"\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def format_property_key(s: str) -> str:\n",
-    "    words = s.split()\n",
-    "    if not words:\n",
-    "        return s\n",
-    "    first_word = words[0].lower()\n",
-    "    capitalized_words = [word.capitalize() for word in words[1:]]\n",
-    "    return \"\".join([first_word] + capitalized_words)\n",
-    "\n",
-    "def props_to_dict(props) -> dict:\n",
-    "    \"\"\"Convert properties to a dictionary.\"\"\"\n",
-    "    properties = {}\n",
-    "    if not props:\n",
-    "      return properties\n",
-    "    for p in props:\n",
-    "        properties[format_property_key(p.key)] = p.value\n",
-    "    return properties\n",
-    "\n",
-    "def map_to_base_node(node: Node) -> BaseNode:\n",
-    "    \"\"\"Map the KnowledgeGraph Node to the base Node.\"\"\"\n",
-    "    properties = props_to_dict(node.properties) if node.properties else {}\n",
-    "    # Add name property for better Cypher statement generation\n",
-    "    properties[\"name\"] = node.id.title()\n",
-    "    return BaseNode(\n",
-    "        id=node.id.title(), type=node.type.capitalize(), properties=properties\n",
-    "    )\n",
-    "\n",
-    "\n",
-    "def map_to_base_relationship(rel: Relationship) -> BaseRelationship:\n",
-    "    \"\"\"Map the KnowledgeGraph Relationship to the base Relationship.\"\"\"\n",
-    "    source = map_to_base_node(rel.source)\n",
-    "    target = map_to_base_node(rel.target)\n",
-    "    properties = props_to_dict(rel.properties) if rel.properties else {}\n",
-    "    return BaseRelationship(\n",
-    "        source=source, target=target, type=rel.type, properties=properties\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from langchain.chains.openai_functions import (\n",
-    "    create_openai_fn_chain,\n",
-    "    create_structured_output_chain,\n",
-    ")\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "from langchain.prompts import ChatPromptTemplate\n",
-    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"sk-proj-k8uMlsAJbdAuSWWnvaHyT3BlbkFJyQB8yMQavFuQDVmc4sNs\"\n",
-    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-16k\", temperature=0)\n",
-    "\n",
-    "def get_extraction_chain(\n",
-    "    allowed_nodes: Optional[List[str]] = None,\n",
-    "    allowed_rels: Optional[List[str]] = None\n",
-    "    ):\n",
-    "    prompt = ChatPromptTemplate.from_messages(\n",
-    "        [(\n",
-    "          \"system\",\n",
-    "        f\"\"\"# Knowledge Graph Instructions for GPT-4\n",
-    "## 1. Overview\n",
-    "You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about chemotherapy and related cancer treatments.\n",
-    "- **Nodes** symbolize entities such as medical conditions, drugs, symptoms, treatments, and associated medical concepts.\n",
-    "- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for medical practitioners and scholarly research.\n",
-    "\n",
-    "## 2. Labeling Nodes\n",
-    "- **Consistency**: Utilize uniform labels for node types to maintain clarity.\n",
-    "  - For instance, consistently label drugs as **\"Drug\"**, symptoms as **\"Symptom\"**, and treatments as **\"Treatment\"**.\n",
-    "- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.\n",
-    "\n",
-    "{'- **Allowed Node Labels:**' + \", \".join(['Drug', 'Symptom', 'Treatment', 'MedicalCondition', 'ResearchStudy']) if allowed_nodes else \"\"}\n",
-    "{'- **Allowed Relationship Types**:' + \", \".join(['Treats', 'Causes', 'Researches', 'Recommends']) if allowed_rels else \"\"}\n",
-    "\n",
-    "## 3. Handling Numerical Data and Dates\n",
-    "- Integrate numerical data and dates as attributes of the corresponding nodes.\n",
-    "- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.\n",
-    "- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `approvedYear`, `dosageAmount`.\n",
-    "\n",
-    "## 4. Coreference Resolution\n",
-    "- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.\n",
-    "  - For example, if \"Methotrexate\" and \"MTX\" reference the same medication, uniformly apply \"Methotrexate\" as the node ID.\n",
-    "\n",
-    "## 5. Relationship Naming Conventions\n",
-    "- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.\n",
-    "  - For instance, use \"HAS_SIDE_EFFECT\" instead of \"HASSIDEEFFECT\", use \"CAN_RESULT_FROM\" instead of \"CANRESULTFROM\" etc.\n",
-    "- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as \"INHIBITS\" or \"ACTIVATES\" for interactions between substances.\n",
-    "\n",
-    "## 6. Strict Compliance\n",
-    "Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.\n",
-    "        \"\"\"),\n",
-    "            (\"human\", \"Use the given format to extract information from the following input: {input}\"),\n",
-    "            (\"human\", \"Tip: Precision in the node and relationship creation is vital for the integrity of the knowledge graph.\"),\n",
-    "        ])\n",
-    "    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def extract_and_store_graph(\n",
-    "    document: Document,\n",
-    "    nodes:Optional[List[str]] = None,\n",
-    "    rels:Optional[List[str]]=None) -> None:\n",
-    "    # Extract graph data using OpenAI functions\n",
-    "    extract_chain = get_extraction_chain(nodes, rels)\n",
-    "    data = extract_chain.invoke(document.page_content)['function']\n",
-    "    # Construct a graph document\n",
-    "    graph_document = GraphDocument(\n",
-    "      nodes = [map_to_base_node(node) for node in data.nodes],\n",
-    "      relationships = [map_to_base_relationship(rel) for rel in data.rels],\n",
-    "      source = document\n",
-    "    )\n",
-    "    # Store information into a graph\n",
-    "    graph.add_graph_documents([graph_document])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.document_loaders import WikipediaLoader\n",
-    "from langchain.text_splitter import TokenTextSplitter\n",
-    "\n",
-    "# Read the wikipedia article\n",
-    "raw_documents = WikipediaLoader(query=\"Chemotherapy\").load()\n",
-    "# Define chunking strategy\n",
-    "text_splitter = TokenTextSplitter(chunk_size=4096, chunk_overlap=96)\n",
-    "\n",
-    "# Only take the first the raw_documents\n",
-    "documents = text_splitter.split_documents(raw_documents[:5])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  0%|          | 0/5 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 20%|██        | 1/5 [01:11<04:45, 71.44s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 40%|████      | 2/5 [01:25<01:53, 37.82s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 60%|██████    | 3/5 [01:33<00:48, 24.24s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 80%|████████  | 4/5 [01:49<00:20, 20.99s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 5/5 [01:52<00:00, 22.58s/it]\n"
-     ]
-    }
-   ],
-   "source": [
-    "from tqdm import tqdm\n",
-    "\n",
-    "for i, d in tqdm(enumerate(documents), total=len(documents)):\n",
-    "    extract_and_store_graph(d)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Query the knowledge graph in a RAG application\n",
-    "from langchain.chains import GraphCypherQAChain\n",
-    "\n",
-    "graph.refresh_schema()\n",
-    "\n",
-    "cypher_chain = GraphCypherQAChain.from_llm(\n",
-    "    graph=graph,\n",
-    "    cypher_llm=ChatOpenAI(temperature=0, model=\"gpt-4\"),\n",
-    "    qa_llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n",
-    "    validate_cypher=True, # Validate relationship directions\n",
-    "    verbose=True\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (c:Condition {name: \"Cancer\"})-[:CANRESULTFROM]->(t:Treatment) RETURN t.name\u001b[0m\n",
-      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n",
-      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'query': 'What are the different treatment strategies for cancer?',\n",
-       " 'result': \"I'm sorry, but I don't have the information to answer that question.\"}"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cypher_chain.invoke({\"query\": \"What are the different treatment strategies for cancer?\"})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "my_project_env",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.19"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}