Skip to content

Commit

Permalink
Remove empty files in the Vocal Sound audio scenario (#3313)
Browse files Browse the repository at this point in the history
  • Loading branch information
ImKeTT authored Feb 5, 2025
1 parent fae4660 commit a4f5e39
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from tqdm import tqdm
from helm.common.media_object import MediaObject, MultimediaObject
from helm.common.general import ensure_file_downloaded
from helm.common.audio_utils import remove_audio_clips_with_zero_frames


class VocalSoundScenario(Scenario):
Expand Down Expand Up @@ -49,10 +50,9 @@ def get_instances(self, output_path: str) -> List[Instance]:
down_loading_path = os.path.join(output_path, "download")
ensure_file_downloaded(VocalSoundScenario.DOWNLOADING_URL, down_loading_path, unpack=True)
wav_save_dir = os.path.join(down_loading_path, "audio_16k")
remove_audio_clips_with_zero_frames(wav_save_dir)
for file_name in tqdm(os.listdir(wav_save_dir)):
local_audio_path = os.path.join(wav_save_dir, file_name)
assert os.path.exists(local_audio_path), f"Audio file does not exist at path: {local_audio_path}"

answer = file_name.split("_")[-1].split(".")[0]
input = Input(
multimedia_content=MultimediaObject([MediaObject(content_type="audio/wav", location=local_audio_path)])
Expand Down
14 changes: 14 additions & 0 deletions src/helm/common/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,17 @@ def use_ffmpeg_to_extract_audio_from_video(input_video_path: str, output_audio_p
subprocess.run(["ffmpeg", "-i", input_video_path, "-q:a", "0", "-map", "a", output_audio_path], check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
raise ValueError("Please install ffmpeg using `bash install-shelm-extras.sh` first to extract audio files.")


def remove_audio_clips_with_zero_frames(audio_directory: str) -> None:
# Iterate through all files in the directory
for filename in os.listdir(audio_directory):
if filename.endswith(".wav"):
file_path = os.path.join(audio_directory, filename)
try:
with sf.SoundFile(file_path) as audio_file:
if len(audio_file) == 0: # Check if the file has zero frames
print(f"Removing empty file: {filename}")
os.remove(file_path)
except RuntimeError:
print(f"Skipping invalid audio file: {filename}")

0 comments on commit a4f5e39

Please sign in to comment.