# Only For OpenAI

import os
os.environ['OPENAI_API_KEY'] = "INSERT OPENAI KEY"

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO) # logging.DEBUG for more verbose output

from llama_index import (
    KnowledgeGraphIndex,
    LLMPredictor,
    ServiceContext,
    SimpleDirectoryReader,
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore


from langchain import OpenAI
from IPython.display import Markdown, display


# define LLM
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-002"))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size_limit=512)


# Only For Azure OpenAI

import os
import json
import openai
from langchain.llms import AzureOpenAI
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    KnowledgeGraphIndex,
    LLMPredictor,
    ServiceContext
)

from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore

import logging
import sys

from IPython.display import Markdown, display

logging.basicConfig(stream=sys.stdout, level=logging.INFO) # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

openai.api_type = "azure"
openai.api_base = "INSERT AZURE API BASE"
openai.api_version = "2022-12-01"
os.environ["OPENAI_API_KEY"] = "INSERT OPENAI KEY"
openai.api_key = os.getenv("OPENAI_API_KEY")

# define LLM
llm = AzureOpenAI(
    deployment_name="INSERT DEPLOYMENT NAME",
    temperature=0,
    openai_api_version=openai.api_version,
    model_kwargs={
        "api_key": openai.api_key,
        "api_base": openai.api_base,
        "api_type": openai.api_type,
        "api_version": openai.api_version,
    }
)
llm_predictor = LLMPredictor(llm=llm)

# You need to deploy your own embedding model as well as your own chat completion model
embedding_llm = LangchainEmbedding(
    OpenAIEmbeddings(
        model="text-embedding-ada-002",
        deployment="INSERT DEPLOYMENT NAME",
        openai_api_key=openai.api_key,
        openai_api_base=openai.api_base,
        openai_api_type=openai.api_type,
        openai_api_version=openai.api_version,
    ),
    embed_batch_size=1,
)

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
    embed_model=embedding_llm,
)

CREATE SPACE guardians(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);
:sleep 10;
USE guardians;
CREATE TAG entity(name string);
CREATE EDGE relationship(relationship string);
:sleep 10;
CREATE TAG INDEX entity_index ON entity(name(256));


os.environ['NEBULA_USER'] = "root"
os.environ['NEBULA_PASSWORD'] = "nebula" # default password
os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669" # assumed we have NebulaGraph installed locally

space_name = "guardians"
edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
tags = ["entity"] # default, could be omit if create from an empty kg

graph_store = NebulaGraphStore(space_name=space_name, edge_types=edge_types, rel_prop_names=rel_prop_names, tags=tags)
storage_context = StorageContext.from_defaults(graph_store=graph_store)


from llama_index import download_loader

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()

documents = loader.load_data(pages=['Guardians of the Galaxy Vol. 3'], auto_suggest=False)


kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=10,
    service_context=service_context,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
)


kg_index.storage_context.persist(persist_dir='./storage_graph')


!ls -l storage_graph

total 9120
-rw-r--r--@ 1 weyl  staff    66922 Jul 12 20:26 docstore.json
-rw-r--r--@ 1 weyl  staff  4594860 Jul 12 20:26 index_store.json
-rw-r--r--@ 1 weyl  staff       51 Jul 12 20:26 vector_store.json

$ pip install ipython-ngql


%load_ext ngql
%ngql --address 127.0.0.1 --port 9669 --user root --password nebula
%ngql USE guardians


%ngql MATCH ()-[e]->() RETURN e LIMIT 30

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)
Get connection to ('127.0.0.1', 9669)


%ng_draw

nebulagraph_draw.html


from llama_index.query_engine import KnowledgeGraphQueryEngine

from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore

nl2kg_query_engine = KnowledgeGraphQueryEngine(
    storage_context=storage_context,
    service_context=service_context,
    llm=llm,
    verbose=True,
)


response = nl2kg_query_engine.query(
    "Tell me about Peter Quill?",
)
display(Markdown(f"<b>{response}</b>"))

Graph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;
INFO:llama_index.query_engine.knowledge_graph_query_engine:Graph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;
Graph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;
Graph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}
INFO:llama_index.query_engine.knowledge_graph_query_engine:Graph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}
Graph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}
Final Response: 
Peter Quill is a character from the Marvel Comics series Guardians of the Galaxy.


graph_query = nl2kg_query_engine.generate_query(
    "Tell me about Peter Quill?",
)
graph_query = graph_query.replace("WHERE", "\n  WHERE").replace("RETURN", "\nRETURN")

display(
    Markdown(
        f"""
```cypher
{graph_query}
```
"""
    )
)

MATCH (p:`entity`)-[:relationship]->(e:`entity`) 
  WHERE p.`entity`.`name` == 'Peter Quill' 
RETURN e.`entity`.`name`;


%%ngql
MATCH (p:`entity`)-[:relationship]->(e:`entity`)
  WHERE p.`entity`.`name` == 'Peter Quill'
RETURN e.`entity`.`name`;

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)
Get connection to ('127.0.0.1', 9669)


%%ngql
MATCH path_0=(p:`entity`)-[:relationship]->(e:`entity`)
  WHERE p.`entity`.`name` == 'Peter Quill'
RETURN path_0;

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)
Get connection to ('127.0.0.1', 9669)


%ng_draw

nebulagraph_draw.html


!mv nebulagraph_draw.html nebulagraph_draw_nl2cypher.html


from langchain.chat_models import ChatOpenAI
from langchain.chains import NebulaGraphQAChain
from langchain.graphs import NebulaGraph

graph = NebulaGraph(
    space=space_name,
    username="root",
    password="nebula",
    address="127.0.0.1",
    port=9669,
    session_pool_size=30,
)

chain = NebulaGraphQAChain.from_llm(
    llm, graph=graph, verbose=True
)


chain.run(
    "Tell me about Peter Quill?",
)


> Entering new  chain...
Generated nGQL:


MATCH (p:`entity`)-[e:relationship]->(m:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN p.`entity`.`name`, e.relationship, m.`entity`.`name`;
Full Context:
{'p.entity.name': ['Peter Quill'], 'e.relationship': ['is leader of'], 'm.entity.name': ['Guardians of the Galaxy']}

> Finished chain.

' Peter Quill is the leader of the Guardians of the Galaxy.'


from llama_index import load_index_from_storage

storage_context_graph = StorageContext.from_defaults(persist_dir='./storage_graph', graph_store=graph_store)
kg_index_new = load_index_from_storage(
    storage_context=storage_context_graph,
    service_context=service_context,
    max_triplets_per_chunk=10,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
)

INFO:llama_index.indices.loading:Loading all indices.
Loading all indices.


kg_rag_query_engine = kg_index_new.as_query_engine(
    include_text=False,  
    retriever_mode='keyword',
    response_mode="tree_summarize",
)


response = kg_rag_query_engine.query(
    "Tell me about Peter Quill?"
)
display(Markdown(f"<b>{response}</b>"))

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me about Peter Quill?
> Starting query: Tell me about Peter Quill?
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['biography', 'Peter Quill', 'Peter', 'Quill', 'information']
> Query keywords: ['biography', 'Peter Quill', 'Peter', 'Quill', 'information']
INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'released in', '2014']
Peter Quill ['portrays', 'Peter Quill']
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'reprised role from', 'Guardians of the Galaxy']
Peter Quill ['is leader of', 'Guardians of the Galaxy']
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'directed', 'Guardians of the Galaxy']
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'wrote', 'Guardians of the Galaxy']
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'sequel to', 'Guardians of the Galaxy']
Quill ['speaks', ' fuck ']
> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'released in', '2014']
Peter Quill ['portrays', 'Peter Quill']
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'reprised role from', 'Guardians of the Galaxy']
Peter Quill ['is leader of', 'Guardians of the Galaxy']
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'directed', 'Guardians of the Galaxy']
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'wrote', 'Guardians of the Galaxy']
Peter Quill ['is leader of', 'Guardians of the Galaxy', 'sequel to', 'Guardians of the Galaxy']
Quill ['speaks', ' fuck ']


%%ngql
MATCH path0=(p:`entity`)-[*1..2]-() WHERE p.`entity`.`name` == 'Peter Quill'
RETURN path0;

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)
Get connection to ('127.0.0.1', 9669)


%ng_draw

nebulagraph_draw.html


!mv nebulagraph_draw.html nebulagraph_draw_rag.html

	e
0	("Adam Warlock")-[:relationship@98688268702526...
1	("Alan F. Horn")-[:relationship@-3866030880391...
2	("Alan F. Horn")-[:relationship@-3866030880391...
3	("Bakalova")-[:relationship@-78310709996010382...
4	("Bakalova")-[:relationship@-18287293525973127...
5	("Bautista")-[:relationship@262829015229588616...
6	("Bautista")-[:relationship@264209192087427643...
7	("Chris Pratt")-[:relationship@-53886203992796...
8	("Christopher Fairbank")-[:relationship@704429...
9	("Cooper")-[:relationship@2642091920874276436{...
10	("Daniela Melchior")-[:relationship@5794733688...
11	("Dave Bautista")-[:relationship@-538862039927...
12	("Debicki")-[:relationship@2682825685616935037...
13	("Diesel")-[:relationship@2642091920874276436{...
14	("Disney")-[:relationship@-7269035608107002438...
15	("Disney")-[:relationship@4594936970614874383{...
16	("Drax")-[:relationship@1274897091364343563{re...
17	("Elizabeth Debicki")-[:relationship@704429536...
18	("Gamora")-[:relationship@2108090488737331578{...
19	("Gamora")-[:relationship@4452575226635738814{...
20	("Gamora")-[:relationship@7254563908946132317{...
21	("George MacKay")-[:relationship@2027380399406...
22	("Gillan")-[:relationship@-1827525784919523442...
23	("Gillan")-[:relationship@1278621438198917644{...
24	("Gillan")-[:relationship@2642091920874276436{...
25	("Gillan")-[:relationship@7823655194542812825{...
26	("Gregg Henry")-[:relationship@704429536949728...
27	("Guardians cast")-[:relationship@-64051353433...
28	("Guardians of the Galaxy")-[:relationship@790...
29	("Guardians of the Galaxy Vol. 3")-[:relations...

	path0
0	("Peter Quill" :entity{name: "Peter Quill"})-[...
1	("Peter Quill" :entity{name: "Peter Quill"})<-...
2	("Peter Quill" :entity{name: "Peter Quill"})-[...
3	("Peter Quill" :entity{name: "Peter Quill"})-[...
4	("Peter Quill" :entity{name: "Peter Quill"})-[...
5	("Peter Quill" :entity{name: "Peter Quill"})-[...
6	("Peter Quill" :entity{name: "Peter Quill"})-[...
7	("Peter Quill" :entity{name: "Peter Quill"})-[...
8	("Peter Quill" :entity{name: "Peter Quill"})-[...
9	("Peter Quill" :entity{name: "Peter Quill"})-[...
10	("Peter Quill" :entity{name: "Peter Quill"})-[...

Knowledge Graph Building with LLM¶

1. Preparation¶

1.1 Prepare for LLM¶

1.2. Prepare for NebulaGraph as Graph Store¶

2. Build the Knowledge Graph¶

2.1 Preprocess Data¶

2.2 Extract Triplets and Save to NebulaGraph¶

2.3 Inspect the Graph we built¶

NL2Cypher¶

NL2Cypher With Langchain¶

Graph RAG¶