-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDiffHandler.py
79 lines (76 loc) · 2.95 KB
/
DiffHandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
import numpy as np
from difflib import SequenceMatcher
class DiffHandler():
def extract_diff(self, old_content: list, new_content: list):
"""
Function: extract the differences between two contents (Only store new lines for insert and replace; only store old lines for delete).
We also need to store the line number relationship between old and new file, {
old line number: [new line numbers] # because there may not be strict line number relationship
}
params:
- old_content: a list of strings
- new_content: a list of strings
return:
- [dict: {
line_old,
line_new,
content_old,
content_new
}]
"""
result = []
for tag, i1, i2, j1, j2 in SequenceMatcher(
None, old_content, new_content
).get_opcodes():
if tag == "equal":
for i in range(i2 - i1):
old_line = i1 + i + 1
new_line = j1 + i + 1
item = {
"line_old": old_line,
"line_new": new_line,
"content_old": None,
"content_new": None
}
result.append(item)
elif tag == "insert":
for i in range(j1 + 1, j2 + 1):
item = {
"line_old": None,
"line_new": i,
"content_old": None,
"content_new": new_content[i - 1]
}
result.append(item)
elif tag == "delete":
for i in range(i1 + 1, i2 + 1):
item = {
"line_old": i,
"line_new": None,
"content_old": old_content[i - 1],
"content_new": None
}
result.append(item)
elif tag == "replace":
for i in range(i2 - i1):
old_line = i1 + i + 1
item = {
"line_old": old_line,
"line_new": None,
"content_old": old_content[old_line - 1],
"content_new": None
}
result.append(item)
for j in range(j2 - j1):
new_line = j1 + j + 1
item = {
"line_old": None,
"line_new": new_line,
"content_old": None,
"content_new": new_content[new_line - 1]
}
result.append(item)
else:
raise Exception("Function extract_diff Error: type error!")
return result