Skip to content

Commit

Permalink
Merge pull request #5 from IDRnD/brnch-update
Browse files Browse the repository at this point in the history
[upd] Update lib version and add download warning
  • Loading branch information
AntonOkhotnikov authored Aug 22, 2023
2 parents e0f066c + ebad9c3 commit 0f537d8
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 13 deletions.
2 changes: 2 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ cd VoxTube/examples
python3 load_example.py ../resources/meta/UC-9GWCoQoMr_ey6AMhClStQ.json <DATASET_ROOT>

# example of downloading the whole dataset in N parallel jobs
# WARNING: you might run into HTTP Error 429 if there are too many requests
# (parallel jobs) used, decrease -j parameter in this case
python3 load_all_examples.py -r <DATASET_ROOT> -j N
```

Expand Down
20 changes: 8 additions & 12 deletions examples/load_all_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@
import argparse
import multiprocessing as mp
import os
import subprocess as sp
from functools import partial
from pathlib import Path

from tqdm import tqdm

from load_example import download_process_and_cut_channel_videos

def load_json(json_path, dataset_root, load_script_path):

def load_json(json_path, dataset_root):
try:
status = sp.run(["python3", load_script_path, str(json_path), dataset_root])
return status
download_process_and_cut_channel_videos(
json_path,
dataset_root
)
except Exception as e:
print(f'Error while loading channel {json_path}')
print(f'Exception: {str(e)}')
Expand All @@ -23,20 +26,13 @@ def main(dataset_root, nj=1):
fwd = os.path.dirname(os.path.realpath(__file__))
meta_path = Path(f'{fwd}/../resources/meta')
json_paths = sorted(list(meta_path.glob('*.json')))
path_to_download_script = f'{fwd}/load_example.py'

# Run downloading
load_job = partial(
load_json,
dataset_root=dataset_root,
load_script_path=path_to_download_script
dataset_root=dataset_root
)

# with mp.Pool(nj) as pool:
# _ = pool.imap(
# load_job, tqdm(json_paths, total=len(json_paths))
# )

with tqdm(total=len(json_paths)) as pb:
with mp.Pool(nj) as pool:
for _ in pool.imap(load_job, json_paths):
Expand Down
2 changes: 1 addition & 1 deletion examples/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
yt-dlp==2023.3.3
yt-dlp==2023.7.6
soundfile==0.12.1
tqdm==4.64.1

0 comments on commit 0f537d8

Please sign in to comment.