!pip install py2neo==4.1.3 pandas matplotlib sklearn
from py2neo import Graph
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
pd.set_option('display.float_format', lambda x: '%.3f' % x)
# Change the line of code below to use the IP Address, Bolt Port, and Password of your Sandbox.
# graph = Graph("<Bolt URL>", auth=("neo4j", "<Password>"))
graph = Graph("bolt://18.207.236.58:35753", auth=("neo4j", "<password>"))
<class 'list'>
1
========================================
nodes
----------------------------------------
(_-39:Venue {constraints: ['CONSTRAINT ON ( venue:Venue ) ASSERT venue.name IS UNIQUE'], indexes: [], name: 'Venue'})
(_-37:Article {constraints: ['CONSTRAINT ON ( article:Article ) ASSERT article.index IS UNIQUE'], indexes: [], name: 'Article'})
(_-38:Author {constraints: ['CONSTRAINT ON ( author:Author ) ASSERT author.name IS UNIQUE'], indexes: [], name: 'Author'})
----------------------------------------
relationships
----------------------------------------
(Article)-[:VENUE {}]->(Venue)
(Article)-[:AUTHOR {}]->(Author)
(Article)-[:CITED {}]->(Article)
----------------------------------------
========================================
import functools
"""
- query의 결과를 가져올 때, 반드시 .data()로 가져와야 하는 것은 아니다.
- .to_series, to_dataframes등의 방법도 있으나,
- 나는 python native 구조인 dictionary, list로 가져오는 것을 선호하여 다음과 같이 가져온 후에 후처리하는 식으로 처리하였다.
"""
labels = graph.run("CALL db.labels()").data()
print(labels)
print("--"*20)
labels = functools.reduce(lambda x, y: x+y, [list(k_v.values()) for k_v in labels])
print(labels)
result_dict = {}
for label in labels:
query = f"""
MATCH (:{label})
RETURN count(*) as count
"""
query_data = graph.run(query).data()
result_dict[label] = query_data[0]['count']
print("--"*20)
for label, count in result_dict.items():
print(f"{label:10s} : {count:7d}")
print("--"*20)
[{'label': 'Article'}, {'label': 'Author'}, {'label': 'Venue'}]
----------------------------------------
['Article', 'Author', 'Venue']
----------------------------------------
Article : 51956
Author : 80299
Venue : 4
{'VENUE': 51956, 'AUTHOR': 140575, 'CITED': 28706}
!pip install py2neo==4.1.3 pandas matplotlib sklearn
from py2neo import Graph
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
pd.set_option('display.float_format', lambda x: '%.3f' % x)
# Change the line of code below to use the IP Address, Bolt Port, and Password of your Sandbox.
# graph = Graph("<Bolt URL>", auth=("neo4j", "<Password>"))
graph = Graph("bolt://18.207.236.58:35753", auth=("neo4j", "<password>"))
## set environment done.
## db schema check
db_schema = graph.run("CALL db.schema()").data()
print(type(db_schema))
print(len(db_schema))
print('=='*20)
for key, value in db_schema[0].items():
print(key)
print('--'*20)
for node_or_rel in value:
print(node_or_rel)
print('--'*20)
print('=='*20)
## nodes
import functools
"""
- query의 결과를 가져올 때, 반드시 .data()로 가져와야 하는 것은 아니다.
- .to_series, to_dataframes등의 방법도 있으나,
- 나는 python native 구조인 dictionary, list로 가져오는 것을 선호하여 다음과 같이 가져온 후에 후처리하는 식으로 처리하였다.
"""
labels = graph.run("CALL db.labels()").data()
print(labels)
print("--"*20)
labels = functools.reduce(lambda x, y: x+y, [list(k_v.values()) for k_v in labels])
print(labels)
result_dict = {}
for label in labels:
query = f"""
MATCH (:{label})
RETURN count(*) as count
"""
query_data = graph.run(query).data()
result_dict[label] = query_data[0]['count']
print("--"*20)
for label, count in result_dict.items():
print(f"{label:10s} : {count:7d}")
print("--"*20)
## relationship
rel_labels = graph.run("CALL db.relationshipTypes()").data()
rel_labels = [k_v['relationshipType'] for k_v in rel_labels]
rel_labels
rel_result_dict = {}
for rel_label in rel_labels:
query = f"""
MATCH ()-[rel:{rel_label}]->()
RETURN count(*) as count
"""
count = graph.run(query).data()
rel_result_dict[rel_label] = count[0]['count']
print(rel_result_dict)
댓글남기기