-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_bible.py
241 lines (190 loc) · 7.48 KB
/
generate_bible.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import json
import os
from pathlib import Path
from urllib.parse import quote
def load_bible_data(json_path):
"""Load the Bible data from JSON file."""
with open(json_path, 'r', encoding='utf-8') as f:
return json.load(f)
def number_to_hebrew_letters(num):
"""Convert number to Hebrew letters with correct quotation placement."""
heb_chars = "אבגדהוזחטיכלמנסעפצקרשת"
heb_tens = "יכלמנסעפצ"
result = ""
if num >= 1000:
result = number_to_hebrew_letters(num // 1000) + "'"
num %= 1000
if num >= 100:
result += heb_chars[num // 100 - 1]
num %= 100
if num >= 10:
result += heb_tens[num // 10 - 1]
num %= 10
if num > 0:
result += heb_chars[num - 1]
# Special cases for 15 and 16
result = result.replace('יה', 'טו').replace('יו', 'טז')
# Insert quotation mark between characters for numbers 11-99
if len(result) == 2:
result = result[0] + '"' + result[1]
elif len(result) > 2:
result += '"'
else:
result += "'"
return result
def get_navigation_urls(bible_data, current_book, current_chapter):
"""Generate navigation URLs for previous and next chapters."""
books = list(bible_data.keys())
current_book_index = books.index(current_book)
total_chapters = len(bible_data[current_book])
prev_url = next_url = "#"
# Previous chapter
if current_chapter > 1:
prev_url = f"/tanakh/{quote(current_book)}/chapter_{current_chapter - 1}"
elif current_book_index > 0:
prev_book = books[current_book_index - 1]
prev_chapter = len(bible_data[prev_book])
prev_url = f"/tanakh/{quote(prev_book)}/chapter_{prev_chapter}"
# Next chapter
if current_chapter < total_chapters:
next_url = f"/tanakh/{quote(current_book)}/chapter_{current_chapter + 1}"
elif current_book_index < len(books) - 1:
next_book = books[current_book_index + 1]
next_url = f"/tanakh/{quote(next_book)}/chapter_1"
return prev_url, next_url
def generate_chapter_html(book_name, chapter_num, verses, template, bible_data):
"""Generate HTML content for a specific chapter."""
# Generate verse content
chapter_content = '\n'.join([
f'<p class="verse" data-verse="{i+1}">'
f'<span class="verse-number">{number_to_hebrew_letters(i+1)}</span>'
f'{verse}</p>'
for i, verse in enumerate(verses)
])
# Get navigation URLs
prev_url, next_url = get_navigation_urls(bible_data, book_name, chapter_num)
# Replace all placeholders in template
replacements = {
'{{BOOK_NAME}}': book_name,
'{{CHAPTER_NUM}}': str(chapter_num),
'{{CHAPTER_NUM_HEB}}': number_to_hebrew_letters(chapter_num),
'{{CONTENT}}': chapter_content,
'{{PREV_CHAPTER_URL}}': prev_url,
'{{NEXT_CHAPTER_URL}}': next_url
}
html_content = template
for key, value in replacements.items():
html_content = html_content.replace(key, value)
return html_content
def create_directory_structure(output_dir):
"""Create the necessary directory structure."""
Path(output_dir).mkdir(parents=True, exist_ok=True)
def generate_sitemap(bible_data, output_dir):
"""Generate sitemap.xml for all chapters."""
sitemap_content = ['<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">']
# Add home page
sitemap_content.append("""
<url>
<loc>https://dorpascal.com/tanakh</loc>
<changefreq>monthly</changefreq>
<priority>1.0</priority>
</url>""")
# Add book and chapter pages
for book_name in bible_data.keys():
# Add book main page
sitemap_content.append(f"""
<url>
<loc>https://dorpascal.com/tanakh/{quote(book_name)}</loc>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>""")
# Add chapter pages
for chapter_num in range(1, len(bible_data[book_name]) + 1):
sitemap_content.append(f"""
<url>
<loc>https://dorpascal.com/tanakh/{quote(book_name)}/chapter_{chapter_num}</loc>
<changefreq>monthly</changefreq>
<priority>0.3</priority>
</url>""")
sitemap_content.append('</urlset>')
# Write sitemap file
sitemap_path = os.path.join(output_dir, 'sitemap.xml')
with open(sitemap_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(sitemap_content))
print(f'Generated sitemap.xml at {sitemap_path}')
def generate_robots_txt(output_dir):
"""Generate robots.txt file."""
robots_content = """User-agent: *
Allow: /
Sitemap: https://dorpascal.com/sitemap.xml
# Crawl-delay for specific bots
User-agent: AhrefsBot
Crawl-delay: 5
User-agent: Baiduspider
Crawl-delay: 5
User-agent: SemrushBot
Crawl-delay: 5"""
robots_path = os.path.join(output_dir, 'robots.txt')
with open(robots_path, 'w', encoding='utf-8') as f:
f.write(robots_content)
print(f'Generated robots.txt at {robots_path}')
def generate_book_index_pages(bible_data, template_dir, output_dir):
"""Generate index pages for each book."""
# Load book index template
with open(os.path.join(template_dir, 'book_template.html'), 'r', encoding='utf-8') as f:
book_template = f.read()
for book_name, chapters in bible_data.items():
# Generate chapter list
chapter_links = '\n'.join([
f'<li><a href="/tanakh/{quote(book_name)}/chapter_{i+1}">פרק {number_to_hebrew_letters(i+1)}</a></li>'
for i in range(len(chapters))
])
# Replace placeholders
content = book_template.replace('{{BOOK_NAME}}', book_name)
content = content.replace('{{CHAPTER_LIST}}', chapter_links)
# Save the file
book_dir = os.path.join(output_dir, book_name)
Path(book_dir).mkdir(exist_ok=True)
index_path = os.path.join(book_dir, 'index.html')
with open(index_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f'Generated index page for {book_name}')
def main():
# Load the template
with open('template.html', 'r', encoding='utf-8') as f:
template = f.read()
# Load Bible data
bible_data = load_bible_data('./docs/assets/full_bible.json')
# Create output directory
output_dir = './docs'
create_directory_structure(output_dir)
# Generate HTML files for each chapter
for book_name, chapters in bible_data.items():
book_dir = os.path.join(output_dir, book_name)
Path(book_dir).mkdir(exist_ok=True)
for chapter_num, verses in enumerate(chapters, 1):
html_content = generate_chapter_html(
book_name,
chapter_num,
verses,
template,
bible_data
)
# Save the file
file_path = os.path.join(book_dir, f'chapter_{chapter_num}.html')
with open(file_path, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f'Generated {book_name} Chapter {chapter_num}')
# Generate book index pages
generate_book_index_pages(bible_data, '.', output_dir)
# Generate sitemap.xml
generate_sitemap(bible_data, output_dir)
# Generate robots.txt
generate_robots_txt(output_dir)
print('\nGeneration complete! Generated files:')
print(f'- HTML chapters in {output_dir}/')
print(f'- Book index pages')
print(f'- sitemap.xml')
print(f'- robots.txt')
if __name__ == '__main__':
main()