-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathavatar.py
95 lines (82 loc) · 3.84 KB
/
avatar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from config import *
from image import generate_image
import humanize
import datetime as dt
from argparse import ArgumentParser
import shutil
import os
from animate_face import animate_face
import subprocess, platform
avatar_description = "Young asian man, with short brunette hair, slightly smiling"
def main():
parser = ArgumentParser()
parser.add_argument("--image", default=imgfile, help="path to avatar file")
parser.add_argument("--path_id", default=str(int(time.time())), help="set the path id to use")
parser.add_argument("--pitch", default=1.0, help="change pitch of voice, 1.0 is original, higher number is higher pitch")
args = parser.parse_args()
tstart = time.time()
## SET PATH
path_id = args.path_id
path = os.path.join("temp", path_id)
os.makedirs(path, exist_ok=True)
## GENERATE AVATAR IMAGE
timage = "None"
if args.image == imgfile:
print("-----------------------------------------")
print("generating avatar image")
t1 = time.time()
generate_image(path_id, imgfile, f"hyperrealistic digital avatar, centered, \
{avatar_description}, rim lighting, studio lighting, looking at the camera")
timage = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t1)))
print("\ngenerating avatar:", timage)
else:
shutil.copyfile(args.image, os.path.join("temp", path_id, imgfile))
## EXTRACT SPEECH FROM MP4
print("-----------------------------------------")
print("extracting speech from mp4")
t2 = time.time()
wavoutfile = os.path.join(path, audiofile)
command = 'ffmpeg -i {} -acodec pcm_s16le -ar 44100 -ac 1 {}'.format(driverfile, wavoutfile)
subprocess.call(command, shell=platform.system() != 'Windows')
tspeech = humanize.naturaldelta(dt.timedelta(microseconds=int(time.time() - t2)))
print("\nextracting speech:", tspeech)
## ANIMATE AVATAR IMAGE
print("-----------------------------------------")
print("animating face with driver")
t3 = time.time()
# audiofile determines the length of the driver movie to trim
# driver movie is imposed on the image file to produce the animated file
animate_face(path_id, audiofile, driverfile, imgfile, animatedfile)
tanimate = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t3)))
print("\nanimating face:", tanimate)
## CHANGING THE PITCH OF THE VOICE
print("-----------------------------------------")
print("changing pitch of voice")
t4 = time.time()
wavpitchedfile = os.path.join(path, "pitched.wav")
# command = 'ffmpeg -i {} -af "rubberband=pitch={}" {}'.format(wavoutfile, args.pitch, wavpitchedfile)
command = 'ffmpeg -i {} -af "asetrate=44100*{},aresample=44100,atempo=1/{}" {}'.format(wavoutfile, args.pitch, args.pitch, wavpitchedfile)
subprocess.call(command, shell=platform.system() != 'Windows')
tpitch = humanize.naturaldelta(dt.timedelta(microseconds=int(time.time() - t4)))
print("\changing pitch:", tpitch)
## COMBINING ANIMATION WITH SPPECH
print("-----------------------------------------")
print("combining animation with speech")
t5 = time.time()
animatedoutfile = os.path.join(path, animatedfile)
finaloutfile = os.path.join("results", path_id + "_animated.mp4")
command = 'ffmpeg -i {} -i {} -c:v copy -map 0:v:0 -map 1:a:0 -shortest {}'.format(animatedoutfile, wavpitchedfile, finaloutfile)
subprocess.call(command, shell=platform.system() != 'Windows')
tcombi = humanize.naturaldelta(dt.timedelta(microseconds=int(time.time() - t5)))
print("\combining animation with speech:", tcombi)
print("done")
print("Overall timing")
print("--------------")
print("generating avatar image:", timage)
print("extracting speech from mp4:", tspeech)
print("animating face:", tanimate)
print("changing pitch of voice:", tpitch)
print("combining animation with speech:", tcombi)
print("total time:", humanize.naturaldelta(minimum_unit="microseconds", value=dt.timedelta(seconds=int(time.time() - tstart))))
if __name__ == '__main__':
main()