1
1
from functools import partial
2
+ from multiprocessing import Pool
2
3
from pathlib import Path
3
4
from typing import Optional
4
- from multiprocessing import Pool
5
+
5
6
import click
6
7
from loguru import logger
7
8
from tqdm import tqdm
@@ -26,12 +27,28 @@ def process_one(file, input_dir):
26
27
)
27
28
28
29
30
+ def process_one_accurate (file , input_dir ):
31
+ import torchaudio
32
+
33
+ try :
34
+ y , sr = torchaudio .load (str (file ), backend = "sox" )
35
+ return y .size (- 1 ), sr , y .size (- 1 ) / sr , file .relative_to (input_dir )
36
+ except Exception as e :
37
+ logger .warning (f"Error reading { file } : { e } " )
38
+ return None
39
+
40
+
29
41
@click .command ()
30
42
@click .argument ("input_dir" , type = click .Path (exists = True , file_okay = False ))
31
43
@click .option ("--recursive/--no-recursive" , default = True , help = "Search recursively" )
32
44
@click .option (
33
45
"--visualize/--no-visualize" , default = False , help = "Visualize the distribution"
34
46
)
47
+ @click .option (
48
+ "--accurate/--no-accurate" ,
49
+ default = False ,
50
+ help = "Use accurate mode for duration calculation" ,
51
+ )
35
52
@click .option (
36
53
"-l" , "--long-threshold" , default = None , type = float , help = "Threshold for long files"
37
54
)
@@ -53,6 +70,7 @@ def length(
53
70
input_dir : str ,
54
71
recursive : bool ,
55
72
visualize : bool ,
73
+ accurate : bool ,
56
74
long_threshold : Optional [float ],
57
75
short_threshold : Optional [float ],
58
76
num_workers : int ,
@@ -67,7 +85,9 @@ def length(
67
85
logger .info (f"Found { len (files )} files, calculating length" )
68
86
69
87
infos = []
70
- process_one_partial = partial (process_one , input_dir = input_dir )
88
+ process_one_partial = partial (
89
+ process_one_accurate if accurate else process_one , input_dir = input_dir
90
+ )
71
91
72
92
with Pool (processes = num_workers ) as executor :
73
93
for res in tqdm (
0 commit comments