Skip to content

Commit 7096695

Browse files
committed
Optimize accurate probing
1 parent 80a9dcc commit 7096695

File tree

1 file changed

+22
-2
lines changed

1 file changed

+22
-2
lines changed

fish_audio_preprocess/cli/length.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from functools import partial
2+
from multiprocessing import Pool
23
from pathlib import Path
34
from typing import Optional
4-
from multiprocessing import Pool
5+
56
import click
67
from loguru import logger
78
from tqdm import tqdm
@@ -26,12 +27,28 @@ def process_one(file, input_dir):
2627
)
2728

2829

30+
def process_one_accurate(file, input_dir):
31+
import torchaudio
32+
33+
try:
34+
y, sr = torchaudio.load(str(file), backend="sox")
35+
return y.size(-1), sr, y.size(-1) / sr, file.relative_to(input_dir)
36+
except Exception as e:
37+
logger.warning(f"Error reading {file}: {e}")
38+
return None
39+
40+
2941
@click.command()
3042
@click.argument("input_dir", type=click.Path(exists=True, file_okay=False))
3143
@click.option("--recursive/--no-recursive", default=True, help="Search recursively")
3244
@click.option(
3345
"--visualize/--no-visualize", default=False, help="Visualize the distribution"
3446
)
47+
@click.option(
48+
"--accurate/--no-accurate",
49+
default=False,
50+
help="Use accurate mode for duration calculation",
51+
)
3552
@click.option(
3653
"-l", "--long-threshold", default=None, type=float, help="Threshold for long files"
3754
)
@@ -53,6 +70,7 @@ def length(
5370
input_dir: str,
5471
recursive: bool,
5572
visualize: bool,
73+
accurate: bool,
5674
long_threshold: Optional[float],
5775
short_threshold: Optional[float],
5876
num_workers: int,
@@ -67,7 +85,9 @@ def length(
6785
logger.info(f"Found {len(files)} files, calculating length")
6886

6987
infos = []
70-
process_one_partial = partial(process_one, input_dir=input_dir)
88+
process_one_partial = partial(
89+
process_one_accurate if accurate else process_one, input_dir=input_dir
90+
)
7191

7292
with Pool(processes=num_workers) as executor:
7393
for res in tqdm(

0 commit comments

Comments
 (0)