forked from RTXteam/RTX-KG2
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_indexes_constraints.py
executable file
·121 lines (105 loc) · 4.46 KB
/
create_indexes_constraints.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
''' Creates Neo4j index and constraints for KG2
Usage: create_indexes_constraints.py [--passwordFile=<password-file-name>] <Neo4j Username> [<Neo4j Password>]
'''
import argparse
import json
import neo4j
import getpass
import sys
__author__ = 'Erica Wood'
__copyright__ = 'Oregon State University'
__credits__ = ['Stephen Ramsey', 'Erica Wood']
__license__ = 'MIT'
__version__ = '0.1.0'
__maintainer__ = ''
__email__ = ''
__status__ = 'Prototype'
def run_query(query):
"""
:param query: a cypher statement as a string to run
"""
# Start a neo4j session, run a query, then close the session
session = driver.session()
query = session.run(query)
session.close()
return query
def node_labels():
# Create a list of dictionaries where each key is "labels(n)"
# and each value is a list containing a node label
labels = "MATCH (n) RETURN distinct labels(n)"
query = run_query(labels)
data = query.data()
label_list = []
# Iterate through the list and dicitionaries to create a list
# of node labels
for dictionary in data:
for key in dictionary:
value = dictionary[key]
value_string = value[0]
label_list.append(value_string)
return label_list
def create_index(label_list, property_name):
"""
:param label_list: a list of the node labels in Neo4j
"""
# For every label in the label list, create an index
# on the given property name
for label in label_list:
index_query = "CREATE INDEX ON :`" + label + "` (" + property_name + ")"
run_query(index_query)
def constraint(label_list):
"""
:param label_list: a list of the node labels in Neo4j
"""
# For every label in the label list, create a unique constraint
# on the node id property
constraint_query = "CREATE CONSTRAINT ON (n:Base) ASSERT n.id IS UNIQUE"
run_query(constraint_query)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--configFile", type=str, help="RTXConfiguration JSON file containing the password", required=False, default=None)
parser.add_argument("-u", "--user", type=str, help="Neo4j Username", default=None, required=False)
parser.add_argument("-p", "--password", help="Neo4j Password", type=str, default=None, required=False)
arguments = parser.parse_args()
config_file_name = arguments.configFile
if arguments.password is not None and arguments.configFile is not None:
print("Not allowed to specify both password_file and password command-line options", file=sys.stderr)
sys.exit()
if arguments.user is None and arguments.configFile is None:
print("Must specify a username on the command-line or via the RTXConfiguration config file", file=sys.stderr)
sys.exit()
if arguments.user is not None and arguments.configFile is not None:
print("Cannot specify the username on both the command-line and the RTXConfiguration config file", file=sys.stderr)
sys.exit()
password = None
neo4j_password = None
neo4j_user = None
if config_file_name is not None:
print(config_file_name)
config_data = json.load(open(config_file_name, 'r'))
config_data_kg2_neo4j = config_data['KG2']['neo4j']
neo4j_user = config_data_kg2_neo4j['username']
neo4j_password = config_data_kg2_neo4j['password']
if neo4j_password is None:
neo4j_password = arguments.password
if neo4j_password is None:
neo4j_password = getpass.getpass("Please enter the Neo4j database password: ")
if arguments.user is not None:
neo4j_user = arguments.user
bolt = 'bolt://127.0.0.1:7687'
driver = neo4j.GraphDatabase.driver(bolt, auth=(neo4j_user, neo4j_password))
node_label_list = node_labels() + ['Base']
print("NOTE: If you are running create_indexes_constraints.py standalone and not via tsv-to-neo4j.sh, please make sure to re-set the read-only status of" +
" the Neo4j database to TRUE", file=sys.stderr)
# Create Indexes on Node Properties
create_index(node_label_list, "category")
create_index(node_label_list, "category_label")
create_index(node_label_list, "deprecated")
create_index(node_label_list, "full_name")
create_index(node_label_list, "name")
create_index(node_label_list, "provided_by")
create_index(node_label_list, "replaced_by")
create_index(node_label_list, "update_date")
constraint(node_label_list)
driver.close()