%%capture
%pip install llama-index
%pip install llama-index-llms-openai
%pip install llama-index-graph-stores-kuzu
%pip install pyvis
Kuzu Graph Store (LlamaIndex docs)
Kùzu Graph Store
This notebook walks through configuring Kùzu
to be the backend for graph storage in LlamaIndex.
# My OpenAI Key
import os
"OPENAI_API_KEY"] = "" os.environ[
Prepare for Kùzu
# Clean up all the directories used in this notebook
import shutil
"./test1", ignore_errors=True)
shutil.rmtree("./test2", ignore_errors=True)
shutil.rmtree("./test3", ignore_errors=True) shutil.rmtree(
import kuzu
= kuzu.Database("test1") db
Using Knowledge Graph with KuzuGraphStore
from llama_index.graph_stores.kuzu import KuzuGraphStore
= KuzuGraphStore(db) graph_store
Building the Knowledge Graph
from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from IPython.display import Markdown, display
import kuzu
!curl -LJO https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 75042 100 75042 0 0 276k 0 --:--:-- --:--:-- --:--:-- 276k
!mkdir data
!mv paul_graham_essay.txt data/
!ls -ltra data/
total 84
-rw-r--r-- 1 root root 75042 Aug 21 13:49 paul_graham_essay.txt
drwxr-xr-x 1 root root 4096 Aug 21 13:50 ..
drwxr-xr-x 2 root root 4096 Aug 21 13:50 .
= SimpleDirectoryReader(
documents "data/"
).load_data()
# define LLM
= OpenAI(temperature=0, model="gpt-3.5-turbo")
llm = llm
Settings.llm = 512 Settings.chunk_size
from llama_index.core import StorageContext
= StorageContext.from_defaults(graph_store=graph_store)
storage_context
# NOTE: can take a while!
= KnowledgeGraphIndex.from_documents(
index
documents,=2,
max_triplets_per_chunk=storage_context,
storage_context
)# # To reload from an existing graph store without recomputing each time, use:
# index = KnowledgeGraphIndex(nodes=[], storage_context=storage_context)
Querying the Knowledge Graph
First, we can query and send only the triplets to the LLM.
= index.as_query_engine(
query_engine =False, response_mode="tree_summarize"
include_text
)= query_engine.query(
response "Tell me more about Interleaf",
)
f"<b>{response}</b>")) display(Markdown(
Interleaf was involved in making software and added a scripting language. Additionally, it was also associated with a reason for existence and eventually faced challenges due to Moore’s law.
For more detailed answers, we can also send the text from where the retrieved tripets were extracted.
= index.as_query_engine(
query_engine =True, response_mode="tree_summarize"
include_text
)= query_engine.query(
response "Tell me more about Interleaf",
)
f"<b>{response}</b>")) display(Markdown(
Interleaf was a company that made software for creating documents. They added a scripting language inspired by Emacs, which was a dialect of Lisp. The software they created had a specific purpose, which was to allow users to build their own online stores. Despite their impressive technology and smart employees, Interleaf ultimately faced challenges and was impacted by Moore’s Law, leading to its eventual decline.
Query with embeddings
# NOTE: can take a while!
= kuzu.Database("test2")
db = KuzuGraphStore(db)
graph_store = StorageContext.from_defaults(graph_store=graph_store)
storage_context = KnowledgeGraphIndex.from_documents(
new_index
documents,=2,
max_triplets_per_chunk=storage_context,
storage_context=True,
include_embeddings )
# query using top 3 triplets plus keywords (duplicate triplets are removed)
= index.as_query_engine(
query_engine =True,
include_text="tree_summarize",
response_mode="hybrid",
embedding_mode=5,
similarity_top_k
)= query_engine.query(
response "Tell me more about what the author worked on at Interleaf",
)
f"<b>{response}</b>")) display(Markdown(
The author worked at Interleaf, a company that made software for creating documents. Inspired by Emacs, Interleaf added a scripting language that was a dialect of Lisp. The author was hired as a Lisp hacker to write things in this scripting language. However, the author found it challenging to work with the software at Interleaf due to his lack of understanding of C and his reluctance to learn it. Despite this, the author managed to learn some valuable lessons at Interleaf, mostly about what not to do.
Visualizing the Graph
## create graph
from pyvis.network import Network
from IPython.core.display import display, HTML
= index.get_networkx_graph()
g = Network(notebook=True, cdn_resources="in_line", directed=True)
net
net.from_nx(g)"kuzugraph_draw.html")
net.show('kuzugraph_draw.html')) display(HTML(
kuzugraph_draw.html
[Optional] Try building the graph and manually add triplets!
from llama_index.core.node_parser import SentenceSplitter
= SentenceSplitter() node_parser
= node_parser.get_nodes_from_documents(documents) nodes
# initialize an empty database
= kuzu.Database("test3")
db = KuzuGraphStore(db)
graph_store = StorageContext.from_defaults(graph_store=graph_store)
storage_context = KnowledgeGraphIndex(
index
[],=storage_context,
storage_context )
# add keyword mappings and nodes manually
# add triplets (subject, relationship, object)
# for node 0
= [
node_0_tups "author", "worked on", "writing"),
("author", "worked on", "programming"),
(
]for tup in node_0_tups:
0])
index.upsert_triplet_and_node(tup, nodes[
# for node 1
= [
node_1_tups "Interleaf", "made software for", "creating documents"),
("Interleaf", "added", "scripting language"),
("software", "generate", "web sites"),
(
]for tup in node_1_tups:
1]) index.upsert_triplet_and_node(tup, nodes[
= index.as_query_engine(
query_engine =False, response_mode="tree_summarize"
include_text
)= query_engine.query(
response "Tell me more about Interleaf",
)
str(response)
'Interleaf was involved in creating documents and also added a scripting language to its software.'