- Update download audio file using stream segment logic

rfcx · Sep 5, 2022 · b6dc21a · b6dc21a
1 parent 894bb11
commit b6dc21a
Show file tree

Hide file tree

Showing 2 changed files with 80 additions and 57 deletions.
diff --git a/package-rfcx/rfcx/_audio.py b/package-rfcx/rfcx/_audio.py
@@ -1,67 +1,45 @@
-import datetime
-import requests
+"""RFCx audio segment information and download"""
 import shutil
 import os
 import concurrent.futures
+import requests
 from rfcx._api_rfcx import stream_segments
 
+
 def __save_file(url, local_path, token):
     """ Download the file from `url` and save it locally under `local_path` """
     headers = {
-        "Authorization": "Bearer " + token,
+        'Authorization': 'Bearer ' + token,
         'Content-Type': 'application/json'
     }
     response = requests.get(url, headers=headers, stream=True)
 
-    if (response.status_code == 200):
+    if response.status_code == 200:
         with open(local_path, 'wb') as out_file:
             response.raw.decode_content = True
             shutil.copyfileobj(response.raw, out_file)
-            print('Saved {}'.format(local_path))
+            print(f'Saved {local_path}')
     else:
-        print("Can not download", url)
+        print('Can not download', url)
         reason = response.json()
-        print("Reason:", response.status_code, reason["message"])
+        print('Reason:', response.status_code, reason['message'])
+
 
 def __local_audio_file_path(path, audio_name, audio_extension):
-    """ Create string for the name and the path """    
-    return path + '/' + audio_name + "." + audio_extension
+    """ Create string for the name and the path """
+    return path + '/' + audio_name + '.' + audio_extension
+
 
 def __generate_date_in_isoformat(date):
     """ Generate date in iso format ending with `Z` """
     return date.replace(microsecond=0).isoformat() + 'Z'
 
-def download_file(token, dest_path, stream_id, start_time, end_time, gain=1, file_ext='wav'):
-    """ Prepare `url` and `local_path` and save it using function `__save_file` 
-        Args:
-            dest_path: Audio save path.
-            stream_id: Stream id to get the segment.
-            start_time: Minimum timestamp to get the audio.
-            end_time: Maximum timestamp to get the audio. (Should not more than 15 min range)
-            gain: (optional, default = 1) Input channel tone loudness
-            file_ext: (optional, default = 'wav') Extension for saving audio files.
-
-        Returns:
-            None.
-
-        Raises:
-            TypeError: if missing required arguements.
-    """
-    start = __iso_to_rfcx_custom_format(__generate_date_in_isoformat(start_time))
-    end = __iso_to_rfcx_custom_format(__generate_date_in_isoformat(end_time))
-    audio_name = "{stream_id}_t{start_time}.{end_time}_g{gain}_f{file_ext}".format(stream_id=stream_id,
-                                                                                    start_time=start,
-                                                                                    end_time=end,
-                                                                                    gain=gain,
-                                                                                    file_ext=file_ext)
-    url = "https://media-api.rfcx.org/internal/assets/streams/" + audio_name + "." + file_ext
-    local_path = __local_audio_file_path(dest_path, audio_name, file_ext)
-    __save_file(url, local_path, token)
 
 def __iso_to_rfcx_custom_format(time):
     """Convert RFCx iso format to RFCx custom format"""
     return time.replace('-', '').replace(':', '').replace('.', '')
 
+
 def __get_all_segments(token, stream_id, start, end):
     """Get all audio segment in the `start` and `end` time range"""
     all_segments = []
@@ -70,7 +48,12 @@ def __get_all_segments(token, stream_id, start, end):
 
     while not empty_segment:
         # No data will return empty array from server
-        segments = stream_segments(token, stream_id, start, end, limit=1000, offset=offset)
+        segments = stream_segments(token,
+                                   stream_id,
+                                   start,
+                                   end,
+                                   limit=1000,
+                                   offset=offset)
         if segments:
             all_segments.extend(segments)
             offset = offset + 1000
@@ -79,22 +62,58 @@ def __get_all_segments(token, stream_id, start, end):
 
     return all_segments
 
-def __segment_download(save_path, gain, file_ext, segment, token):
+
+def __segment_download(save_path, stream_id, gain, file_ext, segment, token):
     """Download audio using the core api(v2)"""
-    stream_id = segment['stream']['id']
     start = __iso_to_rfcx_custom_format(segment['start'])
     end = __iso_to_rfcx_custom_format(segment['end'])
-    custom_time_range = start + '.' + end
-    rfcx_audio_format = "{stream_id}_t{time}_rfull_g{gain}_f{file_ext}".format(stream_id=stream_id,
-                                                                                                time=custom_time_range,
-                                                                                                gain=gain,
-                                                                                                file_ext=file_ext)
-    audio_name = "{}_{}_{}_gain{}".format(stream_id, start, segment['id'], gain)
-    url = "https://media-api.rfcx.org/internal/assets/streams/" + rfcx_audio_format + "." + file_ext
+    time = start + '.' + end
+    rfcx_audio_format = f'{stream_id}_t{time}_rfull_g{gain}_f{file_ext}'
+    audio_name = f'{stream_id}_{start}_{segment["id"]}_gain{gain}'
+    url = 'https://media-api.rfcx.org/internal/assets/streams/' + rfcx_audio_format + '.' + file_ext
     local_path = __local_audio_file_path(save_path, audio_name, file_ext)
     __save_file(url, local_path, token)
 
-def download_file_segments(token, dest_path, stream, min_date, max_date, gain=1, file_ext='wav', parallel=True):
+
+def download_audio_file(token,
+                        dest_path,
+                        stream_id,
+                        start_time,
+                        end_time,
+                        gain=1,
+                        file_ext='wav'):
+    """ Prepare `url` and `local_path` and save it using function `__save_file`
+        Args:
+            dest_path: Audio save path.
+            stream_id: Stream id to get the segment.
+            start_time: Minimum timestamp to get the audio.
+            end_time: Maximum timestamp to get the audio. (Should not more than 15 min range)
+            gain: (optional, default = 1) Input channel tone loudness.
+            file_ext: (optional, default = 'wav') Extension for saving audio files.
+
+        Returns:
+            None.
+
+        Raises:
+            TypeError: if missing required arguements.
+    """
+    start = __iso_to_rfcx_custom_format(
+        __generate_date_in_isoformat(start_time))
+    end = __iso_to_rfcx_custom_format(__generate_date_in_isoformat(end_time))
+    audio_name = f'{stream_id}_t{start}.{end}_g{gain}_f{file_ext}'
+    url = 'https://media-api.rfcx.org/internal/assets/streams/' + audio_name + '.' + file_ext
+    local_path = __local_audio_file_path(dest_path, audio_name, file_ext)
+    __save_file(url, local_path, token)
+
+
+def download_audio_files(token,
+                         dest_path,
+                         stream,
+                         min_date,
+                         max_date,
+                         gain=1,
+                         file_ext='wav',
+                         parallel=True):
     """ Download RFCx audio on specific time range using `stream_segments` to get audio segments information
         and save it using function `__save_file`
         Args:
@@ -103,9 +122,9 @@ def download_file_segments(token, dest_path, stream, min_date, max_date, gain=1,
             stream: Identifies a stream/site
             min_date: Minimum timestamp to get the audio.
             max_date: Maximum timestamp to get the audio.
-            gain: (optional, default= 1) Input channel tone loudness
+            gain: (optional, default= 1) Input channel tone loudness.
             file_ext: (optional, default= 'wav') Extension for saving audio file.
-            parallel: (optional, default= True) Enable to parallel download audio from RFCx
+            parallel: (optional, default= True) Enable to parallel download audio from RFCx.
 
         Returns:
             None.
@@ -123,17 +142,21 @@ def download_file_segments(token, dest_path, stream, min_date, max_date, gain=1,
     segments = __get_all_segments(token, stream, start, end)
 
     if segments:
-        print("Downloading {} audio from {}".format(len(segments), stream))
-        if(parallel):
-            with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
+        print(f'Downloading {len(segments)} audio from {stream}')
+        if parallel:
+            with concurrent.futures.ThreadPoolExecutor(
+                    max_workers=100) as executor:
                 futures = []
                 for segment in segments:
-                    futures.append(executor.submit(__segment_download, save_path, gain, file_ext, segment, token))
+                    futures.append(
+                        executor.submit(__segment_download, save_path, stream,
+                                        gain, file_ext, segment, token))
 
                 futures, _ = concurrent.futures.wait(futures)
         else:
             for segment in segments:
-                __segment_download(save_path, gain, file_ext, segment, token)
-        print("Finish download on {}".format(stream))
+                __segment_download(save_path, stream, gain, file_ext, segment,
+                                   token)
+        print(f'Finish download on {stream}')
     else:
-        print("No data found on {} - {} at {}".format(start[:-10], end[:-10], stream))
+        print(f'No data found on {start[:-10]} - {end[:-10]} at {stream}')
diff --git a/package-rfcx/rfcx/client.py b/package-rfcx/rfcx/client.py
@@ -64,7 +64,7 @@ def download_audio_file(self,
             print("end_time is not type datetime")
             return
 
-        return audio.download_file(self.credentials.token, dest_path, stream,
+        return audio.download_audio_file(self.credentials.token, dest_path, stream,
                                    start_time, end_time, gain, file_ext)
 
     def download_audio_files(self,
@@ -114,7 +114,7 @@ def download_audio_files(self,
         if not os.path.exists(dest_path):
             os.makedirs(dest_path)
 
-        return audio.download_file_segments(self.credentials.token, dest_path,
+        return audio.download_audio_files(self.credentials.token, dest_path,
                                             stream, min_date, max_date, gain,
                                             file_ext, parallel)