%%capture
!pip install kuzu==0.6.0
General Kuzu Demo
kuzu
yfiles
Download and install
Install Kùzu Python API with pip
.
Dataset
In this demo notebook, we use the following graph database that consists of User and City nodes, Follows relationships between Users, and LivesIn relationships between Users and Cities.
We first create this test dataset in pandas and export it as CSV.
import pandas as pd
= [
user_data "name": "Adam", "age": 30 },
{ "name": "Karissa", "age": 40 },
{ "name": "Zhang", "age": 50 },
{ "name": "Noura", "age": 25 },
{
]
= [
follows_data "from": "Adam", "to": "Karissa", "since": 2020 },
{ "from": "Adam", "to": "Zhang", "since": 2020 },
{ "from": "Karissa", "to": "Zhang", "since": 2021 },
{ "from": "Zhang", "to": "Noura", "since": 2022 }
{
]
= [
city_data "name": "Waterloo", "population": 150000 },
{ "name": "Kitchener", "population": 200000 },
{ "name": "Guelph", "population": 75000 }
{
]
= [
lives_in_data "from": "Adam", "to": "Waterloo" },
{ "from": "Karissa", "to": "Waterloo" },
{ "from": "Zhang", "to": "Kitchener" },
{ "from": "Noura", "to": "Guelph" },
{
]
= pd.DataFrame(user_data)
user_df = pd.DataFrame(follows_data)
follows_df = pd.DataFrame(city_data)
city_df = pd.DataFrame(lives_in_data) lives_in_df
user_df
name | age | |
---|---|---|
0 | Adam | 30 |
1 | Karissa | 40 |
2 | Zhang | 50 |
3 | Noura | 25 |
follows_df
from | to | since | |
---|---|---|---|
0 | Adam | Karissa | 2020 |
1 | Adam | Zhang | 2020 |
2 | Karissa | Zhang | 2021 |
3 | Zhang | Noura | 2022 |
city_df
name | population | |
---|---|---|
0 | Waterloo | 150000 |
1 | Kitchener | 200000 |
2 | Guelph | 75000 |
lives_in_df
from | to | |
---|---|---|
0 | Adam | Waterloo |
1 | Karissa | Waterloo |
2 | Zhang | Kitchener |
3 | Noura | Guelph |
"user.csv", header=False, index=False)
user_df.to_csv("follows.csv", header=False, index=False)
follows_df.to_csv("city.csv", header=False, index=False)
city_df.to_csv("lives_in.csv", header=False, index=False) lives_in_df.to_csv(
Using Kùzu
Import Kùzu.
import kuzu
Create an empty database and connect to it with Python API.
import shutil
"./test", ignore_errors=True) shutil.rmtree(
= kuzu.Database('./test', buffer_pool_size=1024**3)
db = kuzu.Connection(db) conn
Create schemas in Kùzu.
"CREATE NODE TABLE User(name STRING, age INT64, PRIMARY KEY (name))")
conn.execute("CREATE NODE TABLE City(name STRING, population INT64, PRIMARY KEY (name))")
conn.execute("CREATE REL TABLE Follows(FROM User TO User, since INT64)")
conn.execute("CREATE REL TABLE LivesIn(FROM User TO City)") conn.execute(
<kuzu.query_result.QueryResult at 0x7abff7547be0>
Load data from CSV files into Kùzu.
'COPY User FROM "user.csv";')
conn.execute('COPY City FROM "city.csv";')
conn.execute('COPY Follows FROM "follows.csv";')
conn.execute('COPY LivesIn FROM "lives_in.csv";') conn.execute(
<kuzu.query_result.QueryResult at 0x7ac02a08a3e0>
Execute a simple query and iterate through the results.
= conn.execute('MATCH (u:User) RETURN u.name, u.age;')
results while results.has_next():
print(results.get_next())
results.close()
['Adam', 30]
['Karissa', 40]
['Zhang', 50]
['Noura', 25]
Alternatively, the Python API can also output results as a Pandas dataframe.
= conn.execute('MATCH (a:User) - [f:Follows] -> (b:User) RETURN a.name, f.since, b.name;') results
results.get_as_df()
a.name | f.since | b.name | |
---|---|---|---|
0 | Adam | 2020 | Karissa |
1 | Adam | 2020 | Zhang |
2 | Karissa | 2021 | Zhang |
3 | Zhang | 2022 | Noura |
The Python API can also output results in Apache Arrow format.
= conn.execute('MATCH (u:User) RETURN u.name, u.age;')
results =100) results.get_as_arrow(chunk_size
pyarrow.Table
u.name: string
u.age: int64
----
u.name: [["Adam","Karissa","Zhang","Noura"]]
u.age: [[30,40,50,25]]
%%capture
!pip install yfiles_jupyter_graphs
from yfiles_jupyter_graphs import GraphWidget
from google.colab import output
output.enable_custom_widget_manager()= conn.execute('MATCH (a:User) - [f:Follows] -> (b:User) RETURN a,f,b')
result = result.get_as_networkx()) GraphWidget(graph
= GraphWidget(graph = result.get_as_networkx())
g g.show()
'Hierarchic') g.set_graph_layout(
g.show()
='name' g.node_label_mapping
g.show()
= '_label' g.edge_label_mapping
g.show()
"MATCH (a:User) - [f:Follows] -> (b:User) RETURN a,f,b LIMIT 3") g.show_cypher(
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-26-97da18912208> in <cell line: 1>() ----> 1 g.show_cypher("MATCH (a:User) - [f:Follows] -> (b:User) RETURN a,f,b LIMIT 3") AttributeError: 'GraphWidget' object has no attribute 'show_cypher'
lambda edge : "blue" if edge["properties"]["_label"] == "Follows" else "black")
g.set_edge_color_mapping(
g.show()
= {
styles "User": {"color":"#6C7400", "shape":"ellipse", "label":"name"}
}
lambda node : styles.get(node["properties"]["label"], {}))
g.set_node_styles_mapping(
g.show()
lambda node : node["properties"][styles.get(node["properties"]["label"], {"label":"label"})["label"]]) g.set_node_label_mapping(
g.show()
print(g.node_type_mapping.__doc__)
The default type mapping for nodes.
Provides the mapped node color to distinguish different node types
Parameters
----------
index: int (optional)
node: typing.Dict
Notes
-----
This is the default value for the `node_type_mapping` property.
Can be 'overwritten' by setting the property
with a function of the same signature.
If the given mapping function has only one parameter (that is not typed as int),
then it will be called with the element (typing.Dict) as first parameter.
Example
-------
.. code::
from yfiles_jupyter_graphs import GraphWidget
w = GraphWidget()
def custom_node_type_mapping(node: typing.Dict):
...
w.set_node_type_mapping(custom_node_type_mapping)
Returns
-------
type: None
g.get_node_type_mapping()
yfiles_jupyter_graphs.widget.GraphWidget.default_node_type_mapping
def default_node_type_mapping(index: int, node: TDict)
The default type mapping for nodes. Provides the mapped node color to distinguish different node types Parameters ---------- index: int (optional) node: typing.Dict Notes ----- This is the default value for the `node_type_mapping` property. Can be 'overwritten' by setting the property with a function of the same signature. If the given mapping function has only one parameter (that is not typed as int), then it will be called with the element (typing.Dict) as first parameter. Example ------- .. code:: from yfiles_jupyter_graphs import GraphWidget w = GraphWidget() def custom_node_type_mapping(node: typing.Dict): ... w.set_node_type_mapping(custom_node_type_mapping) Returns ------- type: None
Let’s set the new type mapping.
from typing import Dict
def custom_node_type_mapping(node: Dict):
"""assign type accordingly"""
return node['properties']['_id']['offset']
g.set_node_type_mapping(custom_node_type_mapping) g.get_node_type_mapping()
custom_node_type_mapping
def custom_node_type_mapping(node: Dict)
assign type accordingly
display(g)
= ["#17bebb", "#ffc914", "#0b7189", "#ff6c00", '#76b041']
colors def custom_node_color_mapping(node: Dict):
"""assign colors based on type"""
return colors[node['properties']['_id']['offset']]
g.set_node_color_mapping(custom_node_color_mapping) g.get_node_type_mapping()
custom_node_type_mapping
def custom_node_type_mapping(node: Dict)
assign type accordingly
display(g)
If a node type mapping is deleted, the layout mapping reverts back to the default mapping.
g.del_node_color_mapping() g.get_node_color_mapping()
yfiles_jupyter_graphs.widget.GraphWidget.default_node_color_mapping
def default_node_color_mapping(index: int, node: TDict)
The default color mapping for nodes. Provides constant value of '#15AFAC' for all nodes, or different colors per label/type when importing a Neo4j graph. Parameters ---------- index: int (optional) node: typing.Dict Notes ----- This is the default value for the `node_color_mapping` property. Can be 'overwritten' by setting the property with a function of the same signature. If the given mapping function has only one parameter (that is not typed as int), then it will be called with the element (typing.Dict) as first parameter. Example ------- .. code:: from yfiles_jupyter_graphs import GraphWidget w = GraphWidget() def custom_node_color_mapping(node: typing.Dict): ... w.set_node_color_mapping(custom_node_color_mapping) Returns ------- color: str css color value References ---------- css color value <https://developer.mozilla.org/en-US/docs/Web/CSS/color_value> yFiles docs Fill api <https://docs.yworks.com/yfileshtml/#/api/Fill>
g.show()