-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
226 lines (183 loc) · 6.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import ebooklib, os, shutil, argparse
from ebooklib import epub
from bs4 import BeautifulSoup
from fpdf import FPDF
from pdf2image import convert_from_path
from PIL import Image
from Book import *
def epub_to_obj(file_path):
obj = Book()
book = epub.read_epub(file_path)
obj.title, obj.author = book.get_metadata('DC', 'title')[0][0], book.get_metadata('DC', 'creator')[0][0]
for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
html = item.get_content()
heading, content = parse_epub_chapter(html)
ch = Book.Chapter(heading, content)
obj.chapters.append(ch)
return obj
def parse_epub_chapter(raw_ch):
blacklist = ['[document]','noscript','header','html','meta','head','input','script', 'h2']
soup = BeautifulSoup(raw_ch, 'html.parser')
text = soup.find_all(text=True)
# heading
heading = ''
for item in text:
if item.parent.name == 'h2':
heading += '{} '.format(item)
break
# content
content = ''
for item in text:
if item.parent.name not in blacklist:
content += '{} '.format(item)
return heading, content
def obj_to_html(obj):
f = open('product.html', 'w')
f.write('<!DOCTYPE html>' +
'<html>' +
'<head>' +
'<title>' + obj.title + '</title>' +
'</head>' +
'<body>')
f.write('<h1 id="title">' + obj.title + '</h1>')
f.write('<h1 id="author">' + obj.author + '</h1>')
chapters = obj.chapters
for ch in chapters:
f.write('<div>')
f.write('<h2>' + ch.heading + '</h2>')
f.write('<p>' + ch.content + '</p>')
f.write('</div>')
f.write('</body>' +
'</html>')
f.close()
def html_to_obj(path='product.html'):
f = open(path, 'r')
html = f.read()
soup = BeautifulSoup(html, 'html.parser')
obj = Book()
obj.title = soup.find('h1', id='title').string
obj.author = soup.find('h1', id='author').string
chs = soup.find_all('div')
for item in chs:
h2 = item.find('h2')
p = item.find('p')
heading = h2.string
content = p.string
obj.chapters.append(Book.Chapter(heading, content))
return obj
body_size = 8
heading_size = 15
title_size = 17.5
grey = 128
black = 0
body_type = 'Times'
title_type = 'Arial'
class TINY_BOOK(FPDF):
def print_cover_page(self, title: str, author: str):
self.add_page()
self.ln(5)
self.set_text_color(black)
self.set_font(title_type, 'B', title_size)
# title
self.multi_cell(0, h=8, txt=title.upper(), align='L')
# author
self.set_text_color(grey)
self.multi_cell(0, h=8, txt=author.upper(), align='L')
self.set_text_color(black)
# def header(self):
# pass
def footer(self):
# Position at 10mm from bottom
self.set_y(-7)
# Iowan bold 8
self.set_font(body_type, 'B', body_size)
self.set_text_color(grey)
# Page number
self.cell(0, h=0, txt=str(self.page_no()), align='C')
def chapter_title(self, heading):
self.set_text_color(black)
# Iowan 12
self.set_font(body_type, '', heading_size)
# Title
self.ln(5)
self.multi_cell(0, h=5, txt=heading, align='C')
# Line break
def chapter_body(self, content):
self.set_text_color(black)
# Iowan 8
self.set_font(body_type, '', body_size)
# Output justified text
self.multi_cell(0, 4, content, align='J')
def print_chapter(self, heading, content):
self.add_page()
self.chapter_title(heading)
self.chapter_body(content)
def obj_to_pdf(obj, out_name):
pdf = TINY_BOOK(format=(75,150))
pdf.set_title(obj.title)
pdf.set_author(obj.author)
pdf.set_margins(8, 10)
pdf.set_auto_page_break(True, 11)
# add cover
pdf.print_cover_page(obj.title, obj.author)
# add chapters
for chapter in obj.chapters:
pdf.print_chapter(chapter.heading, chapter.content)
if pdf.page_no() % 2 != 0:
pdf.print_cover_page('back page', '...')
pdf.output(out_name, 'F')
def tiny_name(obj):
name = obj.title
return name.casefold().strip(' ').replace(' ', '-') + '.pdf'
def pdf_to_jpeg(filename: str):
images = convert_from_path(filename, 600)
dir_path = filename + '_images'
if os.path.exists(dir_path):
shutil.rmtree(dir_path)
os.mkdir(dir_path)
for i, image in enumerate(images):
image.save(dir_path + '/' + str(i), 'PNG')
def make_booklet(dir_path: str):
pages = [int(file) for file in os.listdir(dir_path)]
pages = sorted(pages)
images = [Image.open(dir_path + '/' + str(page)) for page in pages]
blank = Image.open('blank.jpg')
N = len(images)
if N%4 == 2:
images.insert(1, blank.copy())
images.insert(-1, blank.copy())
N += 2
booklet = []
width, height = images[0].size
booklet_width = width * 2
booklet_height = height
for i in range(int(N/2)):
new_page = Image.new('RGB', (booklet_width, booklet_height))
front, back = images[i], images[N-1-i]
if i%2 == 0:
# back first
new_page.paste(back, (0, 0))
new_page.paste(front, (int(booklet_width/2), 0))
else:
# front first
new_page.paste(front, (0, 0))
new_page.paste(back, (int(booklet_width/2), 0))
booklet.append(new_page)
booklet[0].save("out.pdf", save_all=True, append_images=booklet[1:])
def main(filename):
print('Converting epub...')
obj = epub_to_obj( filename +'.epub')
obj_to_html(obj)
print('Succesfully converted epub to html')
print('Converting html...')
obj = html_to_obj()
obj_to_pdf(obj, filename + '.pdf')
print('Succesfully converted html to pdf')
print('making booklet')
pdf_to_jpeg(filename + '.pdf')
make_booklet(filename + '.pdf' + '_images')
print('Done making booklet, ready to print')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
args = parser.parse_args()
main(args[1])