-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutilities.py
87 lines (74 loc) · 2.57 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import shutil
def merge_files(filenames: list, outfile: str):
cnt = 0
with open(outfile, "w", encoding="utf-8") as f1:
for fn in filenames:
cnt_s = 0
if os.path.exists(fn):
with open(fn, "r", encoding="utf-8") as f2:
for buf in f2:
f1.write(buf)
cnt_s += 1
cnt += 1
print("#lines in " + fn + ": " + str(cnt_s))
else:
print(fn + " does not exist.")
# f2.write("\n") # if the last line of a file is not empty
print("total lines: " + str(cnt))
def extract_lines(lines: str, infile: str, outfile: str):
with open(infile, "r", encoding="utf-8") as f1, open(
outfile, "w", encoding="utf-8"
) as f2:
cnt = 0
for buf in f1:
cnt += 1
if cnt in lines:
f2.write(buf)
# [left,right)
def get_partof_file(infile: str, outfile: str, left: int, right: int):
with open(infile, "r", encoding="utf-8") as f1, open(
outfile, "w", encoding="utf-8",
) as f2:
for i in range(right):
buf = f1.readline()
if i >= left:
f2.write(buf)
def get_lines(fn: str):
with open(fn, "r", encoding="utf-8") as f:
cnt = 0
for _ in f:
cnt += 1
return cnt
if __name__ == "__main__":
# dir_onto = "dbpedia\\ontology"
# dir_info = "dbpedia\\infobox"
# fns = [
# "instance-types_lang=en_specific.ttl",
# "instance-types_lang=en_transitive.ttl",
# "mappingbased-literals_lang=en.ttl",
# "mappingbased-objects_lang=en.ttl",
# "specific-mappingbased-properties_lang=en.ttl",
# ]
# tot = 0
# for pid in range(10):
# fn_rules = os.path.join("test_v8_e3", "p" + str(pid), "rules.dlp")
# if os.path.exists(fn_rules):
# tot += get_lines(fn_rules)
# else:
# print(fn_rules+" does not exist.")
# print(tot)
# fin = os.path.join("test_v8", "data", "facts_unlimit.dlp")
# fout = os.path.join("test_v8", "data", "facts_unlimited.dlp")
# get_partof_file(fin, fout, 43613730, 56155009)
# move data files to the same dir
data_files = os.listdir("data")
fns1 = os.listdir(os.path.join("test_v8_e1", "data"))
cnt = 0
for fn in fns1:
if not fn in data_files:
src = os.path.join("test_v8_e1", "data", fn)
dst = os.path.join("data", fn)
shutil.move(src, dst)
cnt += 1
print(cnt)