-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
269 lines (216 loc) · 9.06 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
import base64
import math
import os
import shutil
from tkinter import Label, Entry, Tk, Button, filedialog, Listbox, END, IntVar, Radiobutton
import cv2
import moviepy.editor as mpe
import openai
import requests
from gtts import gTTS
from moviepy.video.VideoClip import TextClip
from moviepy.video.tools.subtitles import SubtitlesClip
from pydub import AudioSegment
# setup
root = Tk()
root.geometry('400x400')
data_dir = "./data"
output_dir = "./out"
openai_api_key = os.getenv("OPENAI_API_KEY")
if openai_api_key is None:
raise Exception("Missing OpenAI API Key")
openai.api_key = openai_api_key
stability_engine_id = "stable-diffusion-v1-5" # "stable-diffusion-v1-5" or "stable-diffusion-512-v2-1"
stability_api_host = os.getenv('API_HOST', 'https://api.stability.ai')
stability_api_key = os.getenv("STABILITY_API_KEY")
if stability_api_key is None:
raise Exception("Missing Stability API key")
# variables
scenes = []
images = []
num_scenes = 0
language = 'English'
# functions
def generate():
# get user input
global num_scenes
global language
num_scenes = entry1.get() # for some reason 4 is fine but 5 is not (it doesn't have two spaces between the scenes)
subject = entry2.get()
if v.get() == 0:
language = "English"
else:
language = "Chinese"
# set up directories
if os.path.exists(data_dir):
shutil.rmtree(data_dir)
os.mkdir(data_dir)
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.mkdir(output_dir)
print(f'Generating {num_scenes} scenes about {subject} in {language}')
generate_scenes(subject)
if language != 'English':
print(f'Translating story into {language}')
translate_scenes()
print("Generating Images...")
generate_images()
print("Generating Audio...")
generate_audio()
print("Generating Video...")
create_video()
print("Done!")
root.destroy()
def generate_scenes(subject):
prompt = f'Generate a manga story line with {num_scenes} scenes. The manga will have 4 characters, and be about ' \
f'{subject}. For each scene, give me both a description of the scene in natural language and a prompt I ' \
f'could use on a text-to-image AI to generate the scene. When writing the image prompt, only use ' \
f'keywords. Make it specific. Make sure to make a prompt that matches the scene and is in anime style. ' \
f'Do not use periods in the prompt, instead separating the keywords with commas. No need for "joiner ' \
f'words" that make the sentence readable. Add ", anime style" to the end of the prompt. Separate each ' \
f'scene and its corresponding prompt with exactly two new line. Also separate each prompt and the next ' \
f'scene with exactly two new lines. Answer exactly in the form of "Scene 1: [content] \\n\\n Prompt: ' \
f'[content] \\n\\n Scene 2: [content] \\n\\n Prompt 2: [content]" and so on, replacing [content] with ' \
f'the text you generate. Do not include the quotation marks. Do not include new lines after the last line.'
print(f'Prompt: {prompt}')
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
print(f'GPT Response: {response}')
sentences = response['choices'][0]['message']['content'].split('\n\n')
sentences = [sentence.strip() for sentence in sentences]
sentences = [sentence[sentence.index(': ') + 2:] for sentence in sentences]
for i in range(len(sentences)):
if i % 2 == 0:
scenes.append(sentences[i])
else:
images.append(sentences[i])
print(f'Scenes: {scenes}')
print(f'Image prompts: {images}')
def translate_scenes():
for i, scene in enumerate(scenes):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f'Translate the following scene into {language}: {scene}'}]
)
translation = response['choices'][0]['message']['content']
print(f'Translation: {translation}')
scenes[i] = translation
def generate_images():
for i, prompt in enumerate(images):
response = requests.post(
f"{stability_api_host}/v1/generation/{stability_engine_id}/text-to-image",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {stability_api_key}"
},
json={
"text_prompts": [{"text": prompt}],
"clip_guidance_preset": "FAST_BLUE", # not sure what this is yet, need to do further research
"height": 512,
"width": 512,
"samples": 1,
"steps": 10,
# "height": 576, # high-res params
# "width": 1024,
# "samples": 1,
# "steps": 50,
"style_preset": "anime"
},
)
if response.status_code != 200:
raise Exception("Non-200 response: " + str(response.text))
print(f'StabilityAI Response: {response}')
data = response.json()
with open(os.path.join(data_dir, f"frame_{i:04}.png"), "wb") as f:
f.write(base64.b64decode(data["artifacts"][0]["base64"]))
def generate_audio():
for i, caption in enumerate(scenes):
voice = "en"
if language == 'Chinese':
voice = 'zh-CN'
voiceover = gTTS(text=caption, lang=voice, slow=False)
voiceover.save(os.path.join(data_dir, f'audio_{i:04}.mp3'))
print(f'Generated audio for scene {i}')
def create_video():
num_frames = len(scenes)
# params
fps = 30
# generate scene video clips
for i in range(num_frames):
frame_path = os.path.join(data_dir, f"frame_{i:04}.png")
frame = cv2.imread(frame_path)
height, width, channels = frame.shape
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
output_path = os.path.join(data_dir, f"scene_{i:04}.mp4")
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
audio_path = os.path.join(data_dir, f'audio_{i:04}.mp3')
audio = AudioSegment.from_mp3(audio_path)
audio_frames = math.ceil(len(audio) / 1000 * fps)
for j in range(audio_frames):
out.write(frame)
out.release()
# add audio to scene clips
for i in range(num_frames):
scene_path = os.path.join(data_dir, f"scene_{i:04}.mp4")
audio_path = os.path.join(data_dir, f'audio_{i:04}.mp3')
video = mpe.VideoFileClip(scene_path)
audio = mpe.AudioFileClip(audio_path)
video = video.set_audio(audio)
# add subtitles to scene clips
subs = [((0, video.duration), scenes[i])]
generator = lambda txt: TextClip(txt, align='center', method='caption', size=(video.w, None)).set_position(
('center', 'bottom')).set_duration(video.duration)
subtitles = SubtitlesClip(subs, generator)
video = mpe.CompositeVideoClip([video, subtitles])
video.write_videofile(os.path.join(data_dir, f"scene_{i:04}.mp4"))
# concatenate scene clips
clips = []
for i in range(num_frames):
scene_path = os.path.join(data_dir, f"scene_{i:04}.mp4")
clip = mpe.VideoFileClip(scene_path)
clips.append(clip)
# export final video
final_clip = mpe.concatenate_videoclips(clips)
final_clip.write_videofile(os.path.join(output_dir, "final.mp4"))
def validate_entry(text):
if text.isdigit():
return True
else:
return False
def open_file_dialog():
filenames = filedialog.askopenfilenames(initialdir="/", title="Select Files")
# make a clean data folder
if os.path.exists("user_data/"):
shutil.rmtree("user_data/")
os.mkdir("user_data/")
# iterate through the files and copy them to the "data" folder
for filename in filenames:
shutil.copy(filename, "user_data/")
listbox.insert(END, filename)
# gui
label1 = Label(root, text="Enter number of panes:")
label1.grid(row=0, column=0, padx=10, pady=10)
entry1 = Entry(root, validate="key", validatecommand=(root.register(validate_entry), '%S'))
entry1.grid(row=0, column=1, padx=10, pady=10)
label2 = Label(root, text="Enter subject of Anime:")
label2.grid(row=1, column=0, padx=10, pady=10)
entry2 = Entry(root, validate="key")
entry2.grid(row=1, column=1, padx=10, pady=10)
label3 = Label(root, text="Language:")
label3.grid(row=2, column=0, padx=10, pady=10)
v = IntVar()
rb1 = Radiobutton(root, text="English", variable=v, value=0)
rb1.grid(row=2, column=1, padx=10, pady=10)
rb2 = Radiobutton(root, text="Chinese", variable=v, value=1)
rb2.grid(row=2, column=2, padx=10, pady=10)
listbox = Listbox(root)
listbox.grid(row=3, column=0)
button = Button(root, text="Upload Files", command=open_file_dialog)
button.grid(row=4, column=0, columnspan=2, padx=10, pady=10, sticky="W")
button = Button(root, text="Send", command=generate)
button.grid(row=5, column=0, columnspan=2, padx=10, pady=10, sticky="W")
# main
if __name__ == '__main__':
root.mainloop()