From b6dc21a4af93606c870ad947678ddb7556b0aa85 Mon Sep 17 00:00:00 2001 From: Nutto55 <44169425+Nutto55@users.noreply.github.com> Date: Mon, 5 Sep 2022 21:26:52 +0700 Subject: [PATCH] - Update download audio file using stream segment logic --- package-rfcx/rfcx/_audio.py | 133 +++++++++++++++++++++--------------- package-rfcx/rfcx/client.py | 4 +- 2 files changed, 80 insertions(+), 57 deletions(-) diff --git a/package-rfcx/rfcx/_audio.py b/package-rfcx/rfcx/_audio.py index bc12520..b0b9f33 100644 --- a/package-rfcx/rfcx/_audio.py +++ b/package-rfcx/rfcx/_audio.py @@ -1,67 +1,45 @@ -import datetime -import requests +"""RFCx audio segment information and download""" import shutil import os import concurrent.futures +import requests from rfcx._api_rfcx import stream_segments + def __save_file(url, local_path, token): """ Download the file from `url` and save it locally under `local_path` """ headers = { - "Authorization": "Bearer " + token, + 'Authorization': 'Bearer ' + token, 'Content-Type': 'application/json' } response = requests.get(url, headers=headers, stream=True) - if (response.status_code == 200): + if response.status_code == 200: with open(local_path, 'wb') as out_file: response.raw.decode_content = True shutil.copyfileobj(response.raw, out_file) - print('Saved {}'.format(local_path)) + print(f'Saved {local_path}') else: - print("Can not download", url) + print('Can not download', url) reason = response.json() - print("Reason:", response.status_code, reason["message"]) + print('Reason:', response.status_code, reason['message']) + def __local_audio_file_path(path, audio_name, audio_extension): - """ Create string for the name and the path """ - return path + '/' + audio_name + "." + audio_extension + """ Create string for the name and the path """ + return path + '/' + audio_name + '.' + audio_extension + def __generate_date_in_isoformat(date): """ Generate date in iso format ending with `Z` """ return date.replace(microsecond=0).isoformat() + 'Z' -def download_file(token, dest_path, stream_id, start_time, end_time, gain=1, file_ext='wav'): - """ Prepare `url` and `local_path` and save it using function `__save_file` - Args: - dest_path: Audio save path. - stream_id: Stream id to get the segment. - start_time: Minimum timestamp to get the audio. - end_time: Maximum timestamp to get the audio. (Should not more than 15 min range) - gain: (optional, default = 1) Input channel tone loudness - file_ext: (optional, default = 'wav') Extension for saving audio files. - - Returns: - None. - - Raises: - TypeError: if missing required arguements. - """ - start = __iso_to_rfcx_custom_format(__generate_date_in_isoformat(start_time)) - end = __iso_to_rfcx_custom_format(__generate_date_in_isoformat(end_time)) - audio_name = "{stream_id}_t{start_time}.{end_time}_g{gain}_f{file_ext}".format(stream_id=stream_id, - start_time=start, - end_time=end, - gain=gain, - file_ext=file_ext) - url = "https://media-api.rfcx.org/internal/assets/streams/" + audio_name + "." + file_ext - local_path = __local_audio_file_path(dest_path, audio_name, file_ext) - __save_file(url, local_path, token) def __iso_to_rfcx_custom_format(time): """Convert RFCx iso format to RFCx custom format""" return time.replace('-', '').replace(':', '').replace('.', '') + def __get_all_segments(token, stream_id, start, end): """Get all audio segment in the `start` and `end` time range""" all_segments = [] @@ -70,7 +48,12 @@ def __get_all_segments(token, stream_id, start, end): while not empty_segment: # No data will return empty array from server - segments = stream_segments(token, stream_id, start, end, limit=1000, offset=offset) + segments = stream_segments(token, + stream_id, + start, + end, + limit=1000, + offset=offset) if segments: all_segments.extend(segments) offset = offset + 1000 @@ -79,22 +62,58 @@ def __get_all_segments(token, stream_id, start, end): return all_segments -def __segment_download(save_path, gain, file_ext, segment, token): + +def __segment_download(save_path, stream_id, gain, file_ext, segment, token): """Download audio using the core api(v2)""" - stream_id = segment['stream']['id'] start = __iso_to_rfcx_custom_format(segment['start']) end = __iso_to_rfcx_custom_format(segment['end']) - custom_time_range = start + '.' + end - rfcx_audio_format = "{stream_id}_t{time}_rfull_g{gain}_f{file_ext}".format(stream_id=stream_id, - time=custom_time_range, - gain=gain, - file_ext=file_ext) - audio_name = "{}_{}_{}_gain{}".format(stream_id, start, segment['id'], gain) - url = "https://media-api.rfcx.org/internal/assets/streams/" + rfcx_audio_format + "." + file_ext + time = start + '.' + end + rfcx_audio_format = f'{stream_id}_t{time}_rfull_g{gain}_f{file_ext}' + audio_name = f'{stream_id}_{start}_{segment["id"]}_gain{gain}' + url = 'https://media-api.rfcx.org/internal/assets/streams/' + rfcx_audio_format + '.' + file_ext local_path = __local_audio_file_path(save_path, audio_name, file_ext) __save_file(url, local_path, token) -def download_file_segments(token, dest_path, stream, min_date, max_date, gain=1, file_ext='wav', parallel=True): + +def download_audio_file(token, + dest_path, + stream_id, + start_time, + end_time, + gain=1, + file_ext='wav'): + """ Prepare `url` and `local_path` and save it using function `__save_file` + Args: + dest_path: Audio save path. + stream_id: Stream id to get the segment. + start_time: Minimum timestamp to get the audio. + end_time: Maximum timestamp to get the audio. (Should not more than 15 min range) + gain: (optional, default = 1) Input channel tone loudness. + file_ext: (optional, default = 'wav') Extension for saving audio files. + + Returns: + None. + + Raises: + TypeError: if missing required arguements. + """ + start = __iso_to_rfcx_custom_format( + __generate_date_in_isoformat(start_time)) + end = __iso_to_rfcx_custom_format(__generate_date_in_isoformat(end_time)) + audio_name = f'{stream_id}_t{start}.{end}_g{gain}_f{file_ext}' + url = 'https://media-api.rfcx.org/internal/assets/streams/' + audio_name + '.' + file_ext + local_path = __local_audio_file_path(dest_path, audio_name, file_ext) + __save_file(url, local_path, token) + + +def download_audio_files(token, + dest_path, + stream, + min_date, + max_date, + gain=1, + file_ext='wav', + parallel=True): """ Download RFCx audio on specific time range using `stream_segments` to get audio segments information and save it using function `__save_file` Args: @@ -103,9 +122,9 @@ def download_file_segments(token, dest_path, stream, min_date, max_date, gain=1, stream: Identifies a stream/site min_date: Minimum timestamp to get the audio. max_date: Maximum timestamp to get the audio. - gain: (optional, default= 1) Input channel tone loudness + gain: (optional, default= 1) Input channel tone loudness. file_ext: (optional, default= 'wav') Extension for saving audio file. - parallel: (optional, default= True) Enable to parallel download audio from RFCx + parallel: (optional, default= True) Enable to parallel download audio from RFCx. Returns: None. @@ -123,17 +142,21 @@ def download_file_segments(token, dest_path, stream, min_date, max_date, gain=1, segments = __get_all_segments(token, stream, start, end) if segments: - print("Downloading {} audio from {}".format(len(segments), stream)) - if(parallel): - with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: + print(f'Downloading {len(segments)} audio from {stream}') + if parallel: + with concurrent.futures.ThreadPoolExecutor( + max_workers=100) as executor: futures = [] for segment in segments: - futures.append(executor.submit(__segment_download, save_path, gain, file_ext, segment, token)) + futures.append( + executor.submit(__segment_download, save_path, stream, + gain, file_ext, segment, token)) futures, _ = concurrent.futures.wait(futures) else: for segment in segments: - __segment_download(save_path, gain, file_ext, segment, token) - print("Finish download on {}".format(stream)) + __segment_download(save_path, stream, gain, file_ext, segment, + token) + print(f'Finish download on {stream}') else: - print("No data found on {} - {} at {}".format(start[:-10], end[:-10], stream)) + print(f'No data found on {start[:-10]} - {end[:-10]} at {stream}') diff --git a/package-rfcx/rfcx/client.py b/package-rfcx/rfcx/client.py index f56b7b7..8e97a6e 100644 --- a/package-rfcx/rfcx/client.py +++ b/package-rfcx/rfcx/client.py @@ -64,7 +64,7 @@ def download_audio_file(self, print("end_time is not type datetime") return - return audio.download_file(self.credentials.token, dest_path, stream, + return audio.download_audio_file(self.credentials.token, dest_path, stream, start_time, end_time, gain, file_ext) def download_audio_files(self, @@ -114,7 +114,7 @@ def download_audio_files(self, if not os.path.exists(dest_path): os.makedirs(dest_path) - return audio.download_file_segments(self.credentials.token, dest_path, + return audio.download_audio_files(self.credentials.token, dest_path, stream, min_date, max_date, gain, file_ext, parallel)