-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_pdf_and_epub.py
129 lines (103 loc) · 4.56 KB
/
parse_pdf_and_epub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from PyPDF2 import PdfReader
import ebookatty
import os
# import shutil
import pathlib
"""
Scan directory for pdf and epub files, extract title and author from metadata and replace its filename with that.
When metadata is present, the program proposes for each file its renaming which the user can confirm or not.
In case of confirmation, the program sets the pattern >title by author.pdf< (or .epub)
Example: >messyname.epub< is being renamed to >Robinson Crusoe by Daniel Dafoe.epub<
In case it should not be renamed as proposed, the program asks to rename it manually which you can skip as well.
Hereby, you can check for each file if and how you want it to be renamed.
"""
folder = r"C:\path\to\your\ebook\folder"
def main(folder):
file_list = collect(folder)
untouchedlist = []
for file_old in file_list:
try:
file_new = parse(file_old, folder)
if file_new is not None:
file_new = rename(file_new, folder)
save(file_old, file_new, folder, untouchedlist)
else:
# print("Datei", file_old, "hat keine meta-Einträge; wird übersprungen")
untouchedlist.append(file_old)
continue
except Exception as e:
print ("Datei", file_old, "liefert Fehler\n", e, "\n")
untouchedlist.append("Fehler: " + str(e) + "-" + file_old)
terminate(untouchedlist)
def collect(folder):
file_list = [os.path.join(folder, name) for folder, subdirs, files in os.walk(folder) for name in files]
# file_list = [os.path.join(folder, name) for folder, subdirs, files in os.walk(folder) for name in files if name.endswith(".pdf")]
return file_list
def parse(file, file_folder):
ext = pathlib.Path(file).suffix # pdf oder epub oder...
if ext == ".pdf": # pdf
reader = PdfReader(file)
meta = reader.metadata
title = meta.title
author = meta.author
elif ext == ".epub": # pdf
meta = ebookatty.metadata.epub.Epub(file)
title = meta.metadata["title"]
author = meta.metadata["author"]
else: # Kein pdf und kein epub
return
if isinstance(title, str) and isinstance(author, str):
file = file_folder + "\\" + title + " by " + author + ext
elif isinstance(title, str):
if title != "untitled":
file = file_folder + "\\" + title + ext
else:
file = None
else:
# print ("Kein Meta", file)
file = None
return file
def rename(filename, file_folder):
# Unerlaubte Dateizeichen ersetzen
filename = filename.split("\\")[-1]
filename = filename.replace("\n", " - ") # "\n" ersetzen mit Bindestrich
filename = filename.replace("\\", "-") # "n" ersetzen mit Bindestrich
filename = filename.replace("/", "-") # "/" ersetzen mit Bindestrich
filename = filename.replace("?", "-") # "/" ersetzen mit Bindestrich
filename = filename.replace("\"", "!") # "/" ersetzen mit Bindestrich
filename = filename.replace(r"*", " ! ") # "/" ersetzen mit Bindestrich
filename = filename.replace(r"<", " ! ") # "/" ersetzen mit Bindestrich
filename = filename.replace(r">", " ! ") # "/" ersetzen mit Bindestrich
filename = filename.replace(r":", " - ") # "/" ersetzen mit Bindestrich
filename = filename.replace(r"|", "-") # "/" ersetzen mit Bindestrich
filename = file_folder + "\\" + filename
return filename
def save(file_old, file_new, file_folder, untouchedlist):
file_old_stripped = file_old.split("\\")[-1]
file_new_stripped = file_new.split("\\")[-1]
question = input("\nRename\n" + '\033[1m' + file_old_stripped + '\033[0m' + "\nto\n" + '\033[1m' + file_new_stripped + '\033[0m' + "?" +
"\npress y for yes, n for no\n")
# print("\n")
if question == "y":
os.rename(file_old, file_new)
else:
man_filename = input("Type new file name (or press \"n\" for skipping the file)\n")
if man_filename != "n":
file_new = file_folder + "\\" + man_filename
os.rename(file_old, file_new)
else:
untouchedlist.append(file_old)
pass
def terminate(untouched):
print("\nDONE!\n")
if untouched:
print ("Not renamed were:")
for x in zip(untouched):
print(x)
if __name__ == '__main__':
main(folder=folder)
# files = collect(folder_orig)
# parse(...)
# rename(files)
# save(...)
# terminate(...)