-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMySQLOperator.py
executable file
·596 lines (541 loc) · 23.1 KB
/
MySQLOperator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
from typing import List
import mysql.connector.pooling
import GlobalConstants
from MySQLPool import MySQLPool
from models.RepoInfo import RepoInfo
from models.BlobInfo import BlobInfo
from ConfigOperator import ConfigOperator
from models.CommitInfo import CommitInfo
class MySQLOperator(object):
def __init__(
self,
#mysql_pool: mysql.connector.pooling.MySQLConnectionPool
mysql_pool: MySQLPool
) -> None:
self.mysql_pool = mysql_pool
# """
# define the tablenames
# """
# if repoInfo is not None:
# self.repoInfo = repoInfo
# self.tablename_dict = {}
# for rel_table in GlobalConstants.REL_TABLES:
# self.tablename_dict[
# rel_table
# ] = "{repo_id}{separator}{rel_table}".format(
# repo_id=repoInfo.repo_id,
# separator=GlobalConstants.SEPARATOR,
# rel_table=rel_table,
# )
def form_tablename(self, repoInfo: RepoInfo, rel_table: str):
"""
get the tablename for a target repo
"""
tablename = "{repo_id}{separator}{rel_table}".format(
repo_id=repoInfo.repo_id,
separator=GlobalConstants.SEPARATOR,
rel_table=rel_table,
)
return tablename
def create_table(self, rel_table: str, tablename: str):
"""
Function: create the mysql tables used for running the program
"""
def read_template(template_filepath: str):
with open(template_filepath, "r") as f:
return f.read()
template_filepath = "sql_templates/{rel_table}.sql".format(
rel_table=rel_table
)
sql = read_template(template_filepath=template_filepath).format(
tablename=tablename
)
self.mysql_pool.execute(sql, commit=True)
def create_repositories_table(self):
"""
Function: create repositories table
"""
with open("sql_templates/repositories.sql", "r") as f:
sql = f.read()
self.mysql_pool.execute(sql, commit=True)
def init_repositories_table(self, repoInfos: List[RepoInfo]):
"""
Function: insert all the repositories into repositories table
params:
- repoInfos: a list of RepoInfo objects
"""
sql = "insert ignore into repositories (ownername, reponame) values (%s, %s)"
args = []
for repoInfo in repoInfos:
ownername = repoInfo.ownername
reponame = repoInfo.reponame
args.append((ownername, reponame))
self.mysql_pool.executemany(sql, args=args, commit=True)
def update_handled_repository(self, repoInfo: RepoInfo):
"""
Function: insert all the repositories into repositories table
params:
- repoInfos: a list of RepoInfo objects
"""
ownername = repoInfo.ownername
reponame = repoInfo.reponame
"""
self.cursor.execute(
"update repositories set handled = 1 where ownername=%s and reponame=%s",
(ownername, reponame),
)
"""
sql = "update repositories set handled = 1 where ownername=%s and reponame=%s"
self.mysql_pool.execute(sql, args=(ownername, reponame), commit=True)
def init_steps_table(self, repoInfo: RepoInfo):
"""
Function: initialize the handled_repositories table
params:
- RepoInfo object
"""
steps_tablename = "{repo_id}{separator}steps".format(
repo_id=repoInfo.repo_id,
separator=GlobalConstants.SEPARATOR,
)
steps = GlobalConstants.STEPS
sql = "insert ignore into `{steps_tablename}` (step_id, step_name, handled) values (%s, %s, %s)".format(steps_tablename=steps_tablename)
args = []
for step_id, step_name in steps.items():
args.append((step_id, step_name, 0))
self.mysql_pool.executemany(sql, args=args, commit=True)
def get_supported_blobs(self, repoInfo: RepoInfo, langs: List[str]) -> List:
"""
Function: get the paths of supported languages' blobs for a target repo
params:
- repoInfo: the object of RepoInfo
- langs: the list of supported languages
return:
- a list of tuples: (blob_sha, lang)
"""
bcr_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_commit_relations")
lang_str = ",".join(langs)
sql = "select distinct blob_sha, lang from `{bcr_tablename}` where lang in ('{lang_str}')".format(bcr_tablename=bcr_tablename, lang_str=lang_str)
res = self.mysql_pool.execute(sql)
return res
def truncate_table(self, rel_table: str, repoInfo: RepoInfo):
"""
Function: truncate a table
params:
- rel_table: the relative tablename waiting for truncate
- repoInfo: the repository that the rel_table belongs to
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table=rel_table)
sql = "truncate table `{tablename}`".format(tablename=tablename)
self.mysql_pool.execute(sql, commit=True)
def delete_all_tables_4_project(self, repoInfo: RepoInfo):
"""
Function: delete all the tables for a target repo
params:
- RepoInfo object
"""
for rel_tablename in GlobalConstants.REL_TABLES:
tablename = "{id}{separator}{rel_tablename}".format(
id=repoInfo.repo_id,
rel_tablename=rel_tablename,
separator=GlobalConstants.SEPARATOR,
)
sql = "drop table if exists `{tablename}`".format(tablename=tablename)
self.cursor.execute(sql)
print("Finish deleting all mysql tables of {id}".format(id=repoInfo.repo_id))
def get_blob_sha_id_dict(self):
"""
Function: get the blob sha and id relation dict
"""
sha_id_dict = {}
self.cursor.execute(
"select id, sha from `{blob_tablename}`".format(
blob_tablename=self.tablename_dict["blobs"]
)
)
items = self.cursor.fetchall()
for item in items:
id = item["id"]
sha = item["sha"]
sha_id_dict[sha] = id
return sha_id_dict
def get_commit_sha_id_dict(self):
"""
Function: get the commit sha and id relation dict
"""
sha_id_dict = {}
self.cursor.execute(
"select id, sha from `{commit_tablename}`".format(
commit_tablename=self.tablename_dict["commits"]
)
)
items = self.cursor.fetchall()
for item in items:
id = item["id"]
sha = item["sha"]
sha_id_dict[sha] = id
return sha_id_dict
def get_commit_ids(self) -> List[int]:
"""
Function get the list of commit ids
return:
- commit id list
"""
self.cursor.execute(
"select id from `{commit_tablename}` order by id asc".format(
commit_tablename=self.tablename_dict["commits"]
)
)
result = self.cursor.fetchall()
result = [id["id"] for id in result]
return result
"""
query commit id according to commit sha (from commit_shas table)
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="commit_shas")
sql = "select id from `{commit_shas_tablename}` where sha='{sha}'".format(commit_shas_tablename=tablename, sha=sha)
res = self.mysql_pool.execute(sql)
return res[0][0]
def get_filepath_sha_id_dict(self):
"""
Function: get the filepath sha and id relation dict
"""
sha_id_dict = {}
self.cursor.execute(
"select id, sha from `{filepath_tablename}`".format(
filepath_tablename=self.tablename_dict["filepaths"]
)
)
items = self.cursor.fetchall()
for item in items:
id = item["id"]
sha = item["sha"]
sha_id_dict[sha] = id
return sha_id_dict
def get_filepath_id_dict(self):
"""
Function: get the filepath and id relation dict
"""
path_id_dict = {}
self.cursor.execute(
"select id, filepath from `{filepath_tablename}`".format(
filepath_tablename=self.tablename_dict["filepaths"]
)
)
items = self.cursor.fetchall()
for item in items:
id = item["id"]
filepath = item["filepath"]
path_id_dict[filepath] = id
return path_id_dict
"""
def get_method_id_func_id_and_blob_id_dict(self):
"""
"""
Function: get method id and function id relationship, as well as get method id and blob id relationship
return:
- {method_id: (function_id, blob_id)}
"""
"""
result = {}
self.cursor.execute(
"select function_id, blob_id, id as method_id from `{blob_method_tablename}`".format(
blob_method_tablename=self.tablename_dict["blob_methods"]
)
)
items = self.cursor.fetchall()
for item in items:
function_id = item["function_id"]
blob_id = item["blob_id"]
method_id = item["method_id"]
result[method_id] = (function_id, blob_id)
return result
"""
def get_function_id_interface_id_and_blob_sha_dict(self):
"""
Function: get function id and interface id relationship, as well as get function id and blob sha relationship
return:
- {function_id: (interface_id, blob_sha)}
"""
result = {}
bf_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_functions")
sql = "select interface_id, blob_sha, id as function_id from `{bf_tablename}`".format(bf_tablename=bf_tablename)
items = mysqlOp.mysql_pool.execute(sql)
for item in items:
function_id = item["function_id"]
blob_sha = item["blob_sha"]
interface_id = item["interface_id"]
result[function_id] = (interface_id, blob_sha)
return result
def get_fp_lineno_function_id_dict(self, repoInfo: RepoInfo, commit_sha: str) -> dict:
"""
Function: get the dict: filepath -> lineno -> function_id
params:
- commit_sha: str
return:
- {filepath_sha: {lineno: function_id}}
"""
result = {}
bf_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_functions")
bcr_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_commit_relations")
sql = "select bf.id, bcr.filepath_sha, bf.start, bf.end from `{bf_tablename}` bf, `{bcr_tablename}` bcr where bf.blob_sha=bcr.blob_sha and bcr.commit_sha='{commit_sha}'".format(
bf_tablename=bf_tablename,
bcr_tablename=bcr_tablename,
commit_sha=commit_sha
)
res = self.mysql_pool.execute(sql)
for item in res:
function_id = item[0]
filepath_sha = item[1]
start = item[2]
end = item[3]
result.setdefault(filepath_sha, {})
for lineno in range(start, end + 1):
result[filepath_sha][lineno] = function_id
return result
def get_function_id_name_dict(self, repoInfo: RepoInfo) -> dict:
"""
Function: get the function id -> name dict
return:
- {function_id: function_name}
"""
result = {}
bf_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_functions")
sql = "select id, function_name from `{bf_tablename}`".format(bf_tablename=bf_tablename)
items = self.mysql_pool.execute(sql)
for item in items:
id = item[0]
function_name = item[1]
result[id] = function_name
return result
def get_commit_id_by_sha(self, repoInfo: RepoInfo, sha: str) -> int:
"""
query commit id according to commit sha (from commit_shas table)
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="commit_shas")
sql = "select id from `{commit_shas_tablename}` where sha='{sha}'".format(commit_shas_tablename=tablename, sha=sha)
res = self.mysql_pool.execute(sql)
return res[0][0]
"""
def get_blob_ids_by_commit_id(self, commit_id: int) -> List[int]:
"""
"""
Function: get all the commit related blob ids
params:
- commit_id: the id of the commit
return:
- a list of blob ids
"""
"""
self.cursor.execute(
"select blob_id from `{blob_commit_relation_tablename}` where commit_id=%s".format(
blob_commit_relation_tablename=self.tablename_dict[
"blob_commit_relations"
]
),
(commit_id,),
)
blob_ids = self.cursor.fetchall()
blob_ids = [item["blob_id"] for item in blob_ids]
return blob_ids
"""
def get_blob_shas_by_commit_sha(self, commit_sha: str) -> List[str]:
"""
Function: get all the commit related blob shas
params:
- commit_sha: the sha of the commit
return:
- a list of blob shas
"""
bcr_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_commit_relations")
sql = "select blob_sha from `{bcr_tablename}` where commit_sha=%s".format(bcr_tablename=bcr_tablename)
blob_shas = mysqlOp.mysql_pool.execute(sql, args=(commit_sha,))
blob_shas = [item["blob_sha"] for item in blob_shas]
return blob_shas
def get_repo_id_by_names(self, repoInfo: RepoInfo) -> int:
"""
Function: get the id of repository through ownername and reponame
params:
- RepoInfo object
return:
- id of repository
"""
res = self.mysql_pool.execute(
"select id from repositories where ownername=%s and reponame=%s",
(repoInfo.ownername, repoInfo.reponame),
)
if len(res) == 0:
return None
else:
return res[0][0]
def get_commit_shas(self, repoInfo: RepoInfo) -> List[str]:
"""
Function: get the commits' shas of the target repository.
params:
- repoInfo: the object of RepoInfo
return: the List of commit shas
"""
commits_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="commits")
res = self.mysql_pool.execute("select sha from `{commits_tablename}`".format(commits_tablename=commits_tablename))
commit_shas = [sha[0] for sha in res]
return commit_shas
def insert_blob_sha(self, blob_sha: str, repoInfo: RepoInfo):
"""
insert shas into blobs table
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_shas")
sql = "insert ignore into `{blobs_tablename}` (sha) values ('{blob_sha}')".format(blobs_tablename=tablename, blob_sha=blob_sha)
self.mysql_pool.execute(sql, commit=True)
def insert_filepaths(self, filepaths, repoInfo: RepoInfo):
"""
insert filepath item
params:
- filepaths: [(filepath, sha)]
- repoInfo: the object of RepoInfo
"""
filepaths_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="filepaths")
sql = "insert ignore into `{filepaths_tablename}` (filepath, sha) values (%s, %s)".format(filepaths_tablename=filepaths_tablename)
self.mysql_pool.executemany(sql, args=filepaths, commit=True)
def insert_blob_commit_relations(self, blob_commit_relations, repoInfo: RepoInfo):
"""
insert blob commit relation item
params:
- blob_commit_relations: [(blob_sha, commit_sha, filepath_sha, lang)]
- repoInfo: the object of RepoInfo
"""
bcr_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_commit_relations")
sql = "insert ignore into `{bcr_tablename}` (blob_sha, commit_sha, filepath_sha, lang) values (%s, %s, %s, %s)".format(bcr_tablename=bcr_tablename)
self.mysql_pool.executemany(sql, args=blob_commit_relations, commit=True)
def insert_blob_shas(self, blob_shas: List[str], repoInfo: RepoInfo):
"""
insert shas into blobs table
params:
- blob_shas: [(blob_sha,)]
return: None
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blobs")
sql = "insert ignore into `{blobs_tablename}` (sha) values (%s)".format(blobs_tablename=tablename)
args = [(blob_sha,) for blob_sha in blob_shas]
self.mysql_pool.executemany(sql, args=args, commit=True)
def insert_commit_shas(self, commit_shas: List[str], repoInfo: RepoInfo):
"""
insert the extracted commit shas
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="commits")
sql = "insert ignore into `{commit_shas_tablename}` (sha) values (%s)".format(commit_shas_tablename=tablename)
args = [(commit_sha,) for commit_sha in commit_shas]
self.mysql_pool.executemany(sql, args=args, commit=True)
def insert_commit_sha(self, commit_sha: str, repoInfo: RepoInfo):
"""
insert a extracted commit sha
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="commits")
sql = "insert ignore into `{commit_shas_tablename}` (sha) values ('{commit_sha}')".format(commit_shas_tablename=tablename, commit_sha=commit_sha)
self.mysql_pool.execute(sql, commit=True)
def insert_parent_shas(self, commit_sha: str, parent_shas: List[str], repoInfo: RepoInfo):
"""
insert commit and parent relations
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="commit_relations")
sql = "insert ignore into `{commit_relation_tablename}` (commit_sha, parent_sha) values (%s, %s)".format(commit_relation_tablename=tablename)
if len(parent_shas) == 0:
args = [(commit_sha, None)]
else:
args = [(commit_sha, parent_sha) for parent_sha in parent_shas]
self.mysql_pool.executemany(sql, args=args, commit=True)
def is_commit_exist(self, commit_sha: str, repoInfo: RepoInfo):
"""
Function: check whether a commit sha exists in table commit_shas
params:
- commit_sha: str
- repoInfo: the object of RepoInfo
return:
- True: exist
- False: not exist
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="commits")
sql = "select * from `{commit_shas_tablename}` where sha='{commit_sha}'".format(commit_shas_tablename=tablename, commit_sha=commit_sha)
res = self.mysql_pool.execute(sql)
if len(res) > 0:
return True
else:
return False
def step_is_handled(self, step_id: str, repoInfo: RepoInfo) -> bool:
"""
Function: whether the step is already handled
params:
- step_id: the id of the step
- repoInfo: the RepoInfo object
return:
- bool
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="steps")
sql = "select handled from `{steps_tablename}` where step_id='{step_id}'".format(steps_tablename=tablename, step_id=step_id)
res = self.mysql_pool.execute(sql)
if res[0][0] == 1:
result = True
else:
result = False
return result
def update_step(self, step_id: int, repoInfo: RepoInfo):
"""
Function: Update the step table for already handled
params:
- step_id: the id of the step
- repoInfo: Repo object
"""
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="steps")
sql = "update `{steps_tablename}` set handled=1 where step_id='{step_id}'".format(steps_tablename=tablename, step_id=step_id)
self.mysql_pool.execute(sql, commit=True)
def insert_blob_changes(self, repoInfo: RepoInfo, blob_changes: List):
tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_changes")
sql = "insert into `{blob_changes_tablename}` (a_commit_sha, a_blob_sha, a_filepath, a_filetype, b_commit_sha, b_blob_sha, b_filepath, b_filetype, change_type, change_content) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)".format(blob_changes_tablename=tablename)
args = [
(change["a_commit_sha"], change["a_blob_sha"], change["a_filepath"], change["a_filetype"], change["b_commit_sha"], change["b_blob_sha"], change["b_filepath"], change["b_filetype"], change["change_type"], change["change_content"])
for change in blob_changes
]
self.mysql_pool.executemany(sql, args=args, commit=True)
def get_blob_shas(self, repoInfo: RepoInfo) -> List[str]:
"""
Function: get the blob' shas of the target repository.
params:
- repoInfo: the RepoInfo object
return: the List of blob shas
"""
blobs_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blobs")
res = self.mysql_pool.execute("select sha from `{blobs_tablename}`".format(blobs_tablename=blobs_tablename))
blob_shas = [sha[0] for sha in res]
return blob_shas
def insert_blob_functions(self, repoInfo: RepoInfo, args: List):
"""
Function: insert items of `blob_functions` table
params:
- repoInfo: the object of RepoInfo
- args: (id, blob_sha, startlineno, endlineno, function_name)
"""
bf_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_functions")
sql = "insert into `{bf_tablename}` (id, blob_sha, start, end, function_name) values (%s, %s, %s, %s, %s)".format(bf_tablename=bf_tablename)
self.mysql_pool.executemany(sql, args=args, commit=True)
def get_max_interface_id(self, repoInfo: RepoInfo) -> int:
"""
Function: get the max interface id
params:
- repoInfo: the object of RepoInfo
return:
- int
"""
bf_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="blob_functions")
sql = "select max(interface_id) as max_interface_id from `{bf_tablename}`".format(bf_tablename=bf_tablename)
res = self.mysql_pool.execute(sql)
if res is not None:
return res[0]
else:
return 0
def insert_function_relations(self, repoInfo: RepoInfo, changes: List):
"""
Function: insert function relations
params:
- repoInfo: the object of RepoInfo
"""
fr_tablename = self.form_tablename(repoInfo=repoInfo, rel_table="function_relations")
sql = "insert into `{fr_tablename}` (function_id_1, function_id_2, `change`) values (%s, %s, %s)".format(fr_tablename=fr_tablename)
self.mysql_pool.executemany(sql, args=changes, commit=True)