-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchat_ki.py
143 lines (111 loc) · 4.27 KB
/
chat_ki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import requests
from bs4 import BeautifulSoup
import edge_tts
import os
import asyncio
import concurrent.futures
import regex as re
import ki
# keyboard immortal
chapters = ki.chapters
novel = "KI"
base_url = f"https://novelbin.com/b/keyboard-immortal-novel/"
location = novel.lower()
chapters_lot = {
"KI": 2,
"SS": 3
}
voice_list = {
"KI": "Microsoft Andrew Online (Natural) - English (United States)",
"SS": "Microsoft Steffan Online (Natural) - English (United States)"
}
batch = 2
def fetch_chapter(chapter):
"""Function to scrape the text for a given chapter."""
new_text = ""
try:
chapter = chapter.lower()
url = f"{base_url}{chapter}"
response = requests.get(url)
if response.status_code == 404:
return "done"
soup = BeautifulSoup(response.content, "html.parser")
text = soup.find_all("p")
for paragraph in text:
paragraph = paragraph.get_text()
if paragraph == " Translator: Pika\n":
continue
if "1." in paragraph:
break
new_text += paragraph
new_text = new_text.replace("\\", "")
return new_text
except Exception as e:
return "continue"
def scrape_chapters_in_threads(chapters_to_convert):
"""Use multithreading to scrape all chapters concurrently."""
with concurrent.futures.ThreadPoolExecutor() as executor:
results = list(executor.map(fetch_chapter, chapters_to_convert))
return results
async def generate_audio(chapter_text, voice, output_file, semaphore):
"""Function to generate audio for a given chapter text, limited by semaphore."""
async with semaphore: # Ensures only 2 tasks run in parallel
communicate = edge_tts.Communicate(chapter_text, voice, pitch="+5Hz", rate="-5%")
await communicate.save(output_file)
async def tts() -> None:
"""Main function to convert scraped chapters to speech in parallel."""
voices = await edge_tts.list_voices()
os_path = os.getcwd()
# Get the latest chapter to convert
for chapter in reversed(chapters):
chapter = chapter.split("-")[1]
if ":" in chapter:
chapter = chapter.replace(":", "")
if int(chapter) % int(chapters_lot[novel]) == 0:
chapters_to_convert = chapter
break
folder_path = f"{os_path}/output_file/{location}"
if not os.path.exists(folder_path):
os.makedirs(folder_path)
# Semaphore to limit concurrency to 2 tasks at a time
semaphore = asyncio.Semaphore(batch)
chapters_range = range(0, int(chapters_to_convert), chapters_lot[novel])
# Process chapters in chunks using threads for scraping
tasks = []
for i in chapters_range:
text = ""
path = f"{os_path}/output_file/{location}/{novel}-{int(i)+1}-{int(i)+chapters_lot[novel]}.mp3"
if os.path.exists(path):
continue
VOICE = get_voice(novel, voices)
chapters_to_convert = [chapters[a] for a in range(i, i+chapters_lot[novel])]
# Use multithreading to fetch chapters concurrently
chapters_texts = scrape_chapters_in_threads(chapters_to_convert)
# Process fetched chapters
for chapter, text1 in zip(chapters_to_convert, chapters_texts):
if text1 == "done":
print(text1, chapter)
break
if "-" in chapter:
text_chapter = chapter.replace("-", " ")
else:
text_chapter = chapter
text += f"{text_chapter} .{text1}"
print(text)
if text == "done":
pass
else:
# Schedule audio generation in parallel with limited concurrency
OUTPUT_FILE = f"{os_path}/output_file/{location}/{novel}-{int(i)+1}-{int(i)+chapters_lot[novel]}.mp3"
task = asyncio.create_task(generate_audio(text, VOICE, OUTPUT_FILE, semaphore))
tasks.append(task)
# Wait for all audio generation tasks to complete
await asyncio.gather(*tasks)
def get_voice(novel, voices):
"""Function to get the required voice."""
voice_need = voice_list[novel]
for voice in voices:
if voice["FriendlyName"] == voice_need:
return voice["ShortName"]
# Run the TTS function
asyncio.run(tts())