General Kuzu Demo

kuzu
yfiles
Author
Published

March 31, 2024

Download and install

Install Kùzu Python API with pip.

%%capture
!pip install kuzu==0.6.0

Dataset

In this demo notebook, we use the following graph database that consists of User and City nodes, Follows relationships between Users, and LivesIn relationships between Users and Cities.

running-example.png

We first create this test dataset in pandas and export it as CSV.

import pandas as pd

user_data = [
    { "name": "Adam", "age": 30 },
    { "name": "Karissa", "age": 40 },
    { "name": "Zhang", "age": 50 },
    { "name": "Noura", "age": 25 },
]

follows_data = [
    { "from": "Adam", "to": "Karissa", "since": 2020 },
    { "from": "Adam", "to": "Zhang", "since": 2020 },
    { "from": "Karissa", "to": "Zhang", "since": 2021 },
    { "from": "Zhang", "to": "Noura", "since": 2022 }
]

city_data = [
    { "name": "Waterloo", "population": 150000 },
    { "name": "Kitchener", "population": 200000 },
    { "name": "Guelph", "population": 75000 }
]

lives_in_data = [
    { "from": "Adam", "to": "Waterloo" },
    { "from": "Karissa", "to": "Waterloo" },
    { "from": "Zhang", "to": "Kitchener" },
    { "from": "Noura", "to": "Guelph" },
]

user_df = pd.DataFrame(user_data)
follows_df = pd.DataFrame(follows_data)
city_df = pd.DataFrame(city_data)
lives_in_df = pd.DataFrame(lives_in_data)
user_df
name age
0 Adam 30
1 Karissa 40
2 Zhang 50
3 Noura 25
follows_df
from to since
0 Adam Karissa 2020
1 Adam Zhang 2020
2 Karissa Zhang 2021
3 Zhang Noura 2022
city_df
name population
0 Waterloo 150000
1 Kitchener 200000
2 Guelph 75000
lives_in_df
from to
0 Adam Waterloo
1 Karissa Waterloo
2 Zhang Kitchener
3 Noura Guelph
user_df.to_csv("user.csv", header=False, index=False)
follows_df.to_csv("follows.csv", header=False, index=False)
city_df.to_csv("city.csv", header=False, index=False)
lives_in_df.to_csv("lives_in.csv", header=False, index=False)

Using Kùzu

Import Kùzu.

import kuzu

Create an empty database and connect to it with Python API.

import shutil
shutil.rmtree("./test", ignore_errors=True)
db = kuzu.Database('./test', buffer_pool_size=1024**3)
conn = kuzu.Connection(db)

Create schemas in Kùzu.

conn.execute("CREATE NODE TABLE User(name STRING, age INT64, PRIMARY KEY (name))")
conn.execute("CREATE NODE TABLE City(name STRING, population INT64, PRIMARY KEY (name))")
conn.execute("CREATE REL TABLE Follows(FROM User TO User, since INT64)")
conn.execute("CREATE REL TABLE LivesIn(FROM User TO City)")
<kuzu.query_result.QueryResult at 0x7abff7547be0>

Load data from CSV files into Kùzu.

conn.execute('COPY User FROM "user.csv";')
conn.execute('COPY City FROM "city.csv";')
conn.execute('COPY Follows FROM "follows.csv";')
conn.execute('COPY LivesIn FROM "lives_in.csv";')
<kuzu.query_result.QueryResult at 0x7ac02a08a3e0>

Execute a simple query and iterate through the results.

results = conn.execute('MATCH (u:User) RETURN u.name, u.age;')
while results.has_next():
    print(results.get_next())
results.close()
['Adam', 30]
['Karissa', 40]
['Zhang', 50]
['Noura', 25]

Alternatively, the Python API can also output results as a Pandas dataframe.

results = conn.execute('MATCH (a:User) - [f:Follows] -> (b:User) RETURN a.name, f.since, b.name;')
results.get_as_df()
a.name f.since b.name
0 Adam 2020 Karissa
1 Adam 2020 Zhang
2 Karissa 2021 Zhang
3 Zhang 2022 Noura

The Python API can also output results in Apache Arrow format.

results = conn.execute('MATCH (u:User) RETURN u.name, u.age;')
results.get_as_arrow(chunk_size=100)
pyarrow.Table
u.name: string
u.age: int64
----
u.name: [["Adam","Karissa","Zhang","Noura"]]
u.age: [[30,40,50,25]]
%%capture
!pip install yfiles_jupyter_graphs
from yfiles_jupyter_graphs import GraphWidget
from google.colab import output
output.enable_custom_widget_manager()
result = conn.execute('MATCH (a:User) - [f:Follows] -> (b:User) RETURN a,f,b')
GraphWidget(graph = result.get_as_networkx())
g = GraphWidget(graph = result.get_as_networkx())
g.show()
g.set_graph_layout('Hierarchic')
g.show()
g.node_label_mapping='name'
g.show()
g.edge_label_mapping = '_label'
g.show()
g.show_cypher("MATCH (a:User) - [f:Follows] -> (b:User) RETURN a,f,b LIMIT 3")
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-26-97da18912208> in <cell line: 1>()
----> 1 g.show_cypher("MATCH (a:User) - [f:Follows] -> (b:User) RETURN a,f,b LIMIT 3")

AttributeError: 'GraphWidget' object has no attribute 'show_cypher'
g.set_edge_color_mapping(lambda edge : "blue" if edge["properties"]["_label"] == "Follows" else "black")
g.show()
styles = {
    "User": {"color":"#6C7400", "shape":"ellipse", "label":"name"}
  }
g.set_node_styles_mapping(lambda node : styles.get(node["properties"]["label"], {}))
g.show()
g.set_node_label_mapping(lambda node : node["properties"][styles.get(node["properties"]["label"], {"label":"label"})["label"]])
g.show()
print(g.node_type_mapping.__doc__)
The default type mapping for nodes.

        Provides the mapped node color to distinguish different node types

        Parameters
        ----------
        index: int (optional)
        node: typing.Dict

        Notes
        -----
        This is the default value for the `node_type_mapping` property.
        Can be 'overwritten' by setting the property
        with a function of the same signature.

        If the given mapping function has only one parameter (that is not typed as int),
        then it will be called with the element (typing.Dict) as first parameter.

        Example
        -------
        .. code::

           from yfiles_jupyter_graphs import GraphWidget
           w = GraphWidget()
           def custom_node_type_mapping(node: typing.Dict):
           ...
           w.set_node_type_mapping(custom_node_type_mapping)

        Returns
        -------
        type: None

        
g.get_node_type_mapping()
yfiles_jupyter_graphs.widget.GraphWidget.default_node_type_mapping
def default_node_type_mapping(index: int, node: TDict)
The default type mapping for nodes.

Provides the mapped node color to distinguish different node types

Parameters
----------
index: int (optional)
node: typing.Dict

Notes
-----
This is the default value for the `node_type_mapping` property.
Can be 'overwritten' by setting the property
with a function of the same signature.

If the given mapping function has only one parameter (that is not typed as int),
then it will be called with the element (typing.Dict) as first parameter.

Example
-------
.. code::

   from yfiles_jupyter_graphs import GraphWidget
   w = GraphWidget()
   def custom_node_type_mapping(node: typing.Dict):
   ...
   w.set_node_type_mapping(custom_node_type_mapping)

Returns
-------
type: None

Let’s set the new type mapping.

from typing import Dict
def custom_node_type_mapping(node: Dict):
    """assign type accordingly"""
    return node['properties']['_id']['offset']
g.set_node_type_mapping(custom_node_type_mapping)
g.get_node_type_mapping()
custom_node_type_mapping
def custom_node_type_mapping(node: Dict)
assign type accordingly
display(g)
colors = ["#17bebb", "#ffc914", "#0b7189", "#ff6c00", '#76b041']
def custom_node_color_mapping(node: Dict):
    """assign colors based on type"""
    return colors[node['properties']['_id']['offset']]
g.set_node_color_mapping(custom_node_color_mapping)
g.get_node_type_mapping()
custom_node_type_mapping
def custom_node_type_mapping(node: Dict)
assign type accordingly
display(g)

If a node type mapping is deleted, the layout mapping reverts back to the default mapping.

g.del_node_color_mapping()
g.get_node_color_mapping()
yfiles_jupyter_graphs.widget.GraphWidget.default_node_color_mapping
def default_node_color_mapping(index: int, node: TDict)
The default color mapping for nodes.

Provides constant value of '#15AFAC' for all nodes, or different colors per label/type when importing a Neo4j
graph.

Parameters
----------
index: int (optional)
node: typing.Dict

Notes
-----
This is the default value for the `node_color_mapping` property.
Can be 'overwritten' by setting the property
with a function of the same signature.

If the given mapping function has only one parameter (that is not typed as int),
then it will be called with the element (typing.Dict) as first parameter.

Example
-------
.. code::

   from yfiles_jupyter_graphs import GraphWidget
   w = GraphWidget()
   def custom_node_color_mapping(node: typing.Dict):
   ...
   w.set_node_color_mapping(custom_node_color_mapping)

Returns
-------
color: str
    css color value

References
----------
css color value <https://developer.mozilla.org/en-US/docs/Web/CSS/color_value>

yFiles docs Fill api <https://docs.yworks.com/yfileshtml/#/api/Fill>
g.show()