-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathformat_dataset.py
137 lines (116 loc) · 5.28 KB
/
format_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import argparse
import logging
import os
from shutil import copyfile
_logger = logging.getLogger(__name__)
'''
This script modifies speakers data sets to match the required format
Each speaker data set must be of the following format:
/speaker_name
metadata.csv
/wavs
wav_file_name_1.wav
wav_file_name_2.wav
...
metadata.csv must be formatted as follows (pipe "|" separator):
wav_file_name_1|text_1
wav_file_name_2|text_2
...
'''
def format_LJ_speech(lj_args):
''' Format LJ data set
Only metadata.csv needs to be modified
'''
# read metadata lines
_logger.info('Formatting LJ Speech')
metadata = os.path.join(lj_args.data_set_dir, 'metadata.csv')
assert(os.path.isfile(metadata)), _logger.error(f'There is no such file {metadata}')
with open(metadata, 'r', encoding='utf-8') as f:
metadata_lines = f.readlines()
# create new metadata.csv
metadata_lines = [line.strip().split(sep='|') for line in metadata_lines]
metadata_lines = [f'{line[0]}|{line[2]}\n' for line in metadata_lines]
with open(metadata, 'w', encoding='utf-8') as f:
f.writelines(metadata_lines)
_logger.info('Done!')
def format_BC2013(bc2013_args):
''' Format BC2013 data set
Only metadata.csv needs to be modified
'''
# read metadata lines
_logger.info('Formatting BC2013 Speech')
metadata = os.path.join(bc2013_args.data_set_dir, 'metadata.csv')
assert(os.path.isfile(metadata)), _logger.error(f'There is no such file {metadata}')
with open(metadata, 'r', encoding='utf-8') as f:
metadata_lines = f.readlines()
# create new metadata.csv
metadata_lines = [line.strip().split(sep='|') for line in metadata_lines]
metadata_lines = [f'{line[0]}|{line[1]}\n' for line in metadata_lines]
with open(metadata, 'w', encoding='utf-8') as f:
f.writelines(metadata_lines)
_logger.info('Done!')
def format_ESD(esd_args):
''' Format ESD data set
'''
# extract speaker dirs depending on the language
_logger.info(f'Formatting ESD -- Language = {esd_args.language}')
speakers = [x for x in os.listdir(esd_args.data_set_dir) if
os.path.isdir(os.path.join(esd_args.data_set_dir, x))]
speakers.sort()
if esd_args.language == 'english':
for speaker in speakers[10:]:
_logger.info(f'Speaker -- {speaker}')
speaker_dir = os.path.join(esd_args.data_set_dir, speaker)
spk_out_dir = os.path.join(esd_args.data_set_dir, esd_args.language, speaker)
os.makedirs(spk_out_dir, exist_ok=True)
# read metadata lines
if speaker == speakers[10]:
metadata = os.path.join(speaker_dir,f'{speaker}.txt')
assert(os.path.isfile(metadata)), _logger.error(f'There is no such file {metadata}')
with open(metadata, 'r', encoding='utf-8') as f:
metadata_lines = f.readlines()
metadata_lines = [line.strip().split(sep='\t') for line in metadata_lines]
# create new metadata.csv
spk_metadata_lines = [f'{speaker}_{line[0].strip().split(sep="_")[1]}|{line[1]}\n'
for line in metadata_lines]
with open(os.path.join(spk_out_dir, 'metadata.csv'), 'w', encoding='utf-8') as f:
f.writelines(spk_metadata_lines)
# copy all audio files to /wavs directory
wavs_dir = os.path.join(spk_out_dir, 'wavs')
os.makedirs(wavs_dir, exist_ok=True)
for root, _, files in os.walk(speaker_dir):
wav_files = [x for x in files if x.endswith('.wav')]
for wav_file in wav_files:
src = os.path.join(root, wav_file)
dst = os.path.join(wavs_dir, wav_file)
copyfile(src, dst)
elif esd_args.language == 'mandarin':
_logger.error(f'"mandarin" not implemented')
else:
_logger.error(f'"language" must be either "english" or "mandarin", not "{esd_args.language}"')
_logger.info('Done!')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='script to format speakers data sets')
subparsers = parser.add_subparsers(help='commands for targeting a specific data set')
parser.add_argument('-dd', '--data_set_dir', type=str,
help='path to the directory containing speakers data sets to format')
parser_LJ = subparsers.add_parser('LJ', help='format LJ data set')
parser_LJ.set_defaults(func=format_LJ_speech)
parser_BC2013 = subparsers.add_parser('BC2013', help='format BC2013 data set')
parser_BC2013.set_defaults(func=format_BC2013)
parser_ESD = subparsers.add_parser('ESD', help='format emotional speech dataset from Zhou et al.')
parser_ESD.set_defaults(func=format_ESD)
parser_ESD.add_argument('-lg', '--language', type=str,
help='either english or mandarin')
args = parser.parse_args()
# set logger config
logging.basicConfig(
handlers=[
logging.StreamHandler(),
],
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO
)
# run args
args.func(args)