From 3337c2504a2a4e7969c8f2858910165450396404 Mon Sep 17 00:00:00 2001 From: Danilo Pejovic Date: Mon, 4 Mar 2024 11:45:08 +0100 Subject: [PATCH 1/8] Adding mic recording to sdk --- Dockerfile | 5 ++-- rae_sdk/rae_sdk/robot/audio.py | 46 +++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index f0368ef..25e7663 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,9 +24,10 @@ RUN apt-get update && apt-get -y install --no-install-recommends \ git \ htop \ libsndfile1-dev \ - libsndfile1 + libsndfile1 \ + ffmpeg -RUN pip3 install openai +RUN pip3 install openai ffmpeg-python ENV WS=/ws RUN mkdir -p $WS/src diff --git a/rae_sdk/rae_sdk/robot/audio.py b/rae_sdk/rae_sdk/robot/audio.py index e31dbcd..a570be0 100644 --- a/rae_sdk/rae_sdk/robot/audio.py +++ b/rae_sdk/rae_sdk/robot/audio.py @@ -2,6 +2,8 @@ import random import logging as log from ament_index_python import get_package_share_directory +import base64 +import ffmpeg from rae_msgs.srv import PlayAudio @@ -33,11 +35,53 @@ def __init__(self, ros_interface): log.info("Audio Controller ready") - def play_audio_file(self, audio_file_path): + def play_audio_file(self, audio_file_path, delete_after_play=False): + """ + Play an audio file and optionally delete it after playback. + + Args: + audio_file_path (str): Path to the audio file. + delete_after_play (bool, optional): Whether to delete the audio file after playback. Defaults to False. + + Returns: + res: Response from the service call. + """ req = PlayAudio.Request() req.file_location = audio_file_path res = self._ros_interface.call_async_srv('/play_audio', req) + + # Delete the file if requested + if delete_after_play: + try: + os.remove(audio_file_path) + except OSError as e: + print(f"Error deleting file: {e}") + return res + + def save_recorded_sound(self, audio_data, output_file="/app/mic_recording.wav"): + """ + Decode the Base64 audio data and save it as a WAV file. + + Args: + audio_data (str): Base64 encoded audio data. + output_file (str, optional): Path to save the WAV file. Defaults to "/app/output.wav". + """ + # Decode Base64 data + binary_data = base64.b64decode(audio_data) + + # Convert WebM to WAV using ffmpeg + output, _ = ( + ffmpeg.input('pipe:', format='webm') + .output('pipe:', format='wav') + .run(input=binary_data, capture_stdout=True, capture_stderr=True) + ) + + # Write the output to the specified WAV file + with open(output_file, 'wb') as wave_file: + wave_file.write(output) + + def honk(self): horn_path = os.path.join(self._assets_path, 'sfx', 'horn.mp3') From 8ed3aacd5c9ec9a956f41c50e37632698c85d95f Mon Sep 17 00:00:00 2001 From: Danilo Pejovic Date: Mon, 4 Mar 2024 18:47:59 +0100 Subject: [PATCH 2/8] Improving sound quality a bit, reverting changes to play audio files --- rae_hw/src/peripherals/speakers.cpp | 2 +- rae_sdk/rae_sdk/robot/audio.py | 23 +---------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/rae_hw/src/peripherals/speakers.cpp b/rae_hw/src/peripherals/speakers.cpp index 09ca7fb..d3b4585 100644 --- a/rae_hw/src/peripherals/speakers.cpp +++ b/rae_hw/src/peripherals/speakers.cpp @@ -133,7 +133,7 @@ void SpeakersNode::play_wav(const char* wav_file) { int32_t* buffer_wav = new int32_t[BUFFER_SIZE * sfinfo.channels]; // Use int32_t for 32-bit format sf_count_t readCount; - const float gain = 64.0f; // Adjust this factor for desired gain + const float gain = 16.0f; // Adjust this factor for desired gain while((readCount = sf_readf_int(file, buffer_wav, BUFFER_SIZE)) > 0) { // Apply gain to the samples diff --git a/rae_sdk/rae_sdk/robot/audio.py b/rae_sdk/rae_sdk/robot/audio.py index a570be0..bef72fd 100644 --- a/rae_sdk/rae_sdk/robot/audio.py +++ b/rae_sdk/rae_sdk/robot/audio.py @@ -4,7 +4,6 @@ from ament_index_python import get_package_share_directory import base64 import ffmpeg - from rae_msgs.srv import PlayAudio @@ -35,28 +34,10 @@ def __init__(self, ros_interface): log.info("Audio Controller ready") - def play_audio_file(self, audio_file_path, delete_after_play=False): - """ - Play an audio file and optionally delete it after playback. - - Args: - audio_file_path (str): Path to the audio file. - delete_after_play (bool, optional): Whether to delete the audio file after playback. Defaults to False. - - Returns: - res: Response from the service call. - """ + def play_audio_file(self, audio_file_path): req = PlayAudio.Request() req.file_location = audio_file_path res = self._ros_interface.call_async_srv('/play_audio', req) - - # Delete the file if requested - if delete_after_play: - try: - os.remove(audio_file_path) - except OSError as e: - print(f"Error deleting file: {e}") - return res def save_recorded_sound(self, audio_data, output_file="/app/mic_recording.wav"): @@ -81,8 +62,6 @@ def save_recorded_sound(self, audio_data, output_file="/app/mic_recording.wav"): with open(output_file, 'wb') as wave_file: wave_file.write(output) - - def honk(self): horn_path = os.path.join(self._assets_path, 'sfx', 'horn.mp3') res = self.play_audio_file(horn_path) From 2a9182a8de3ecfe6ec813f0f03c7164e3cfad0cf Mon Sep 17 00:00:00 2001 From: Danilo Pejovic <115164734+danilo-pejovic@users.noreply.github.com> Date: Thu, 7 Mar 2024 11:04:44 +0100 Subject: [PATCH 3/8] Remove double installation of ffmpeg --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 25e7663..20bcb6a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,8 +24,7 @@ RUN apt-get update && apt-get -y install --no-install-recommends \ git \ htop \ libsndfile1-dev \ - libsndfile1 \ - ffmpeg + libsndfile1 RUN pip3 install openai ffmpeg-python From a01991ef527694a6ddbd32864e1b530e024610ee Mon Sep 17 00:00:00 2001 From: Danilo Pejovic Date: Thu, 7 Mar 2024 11:20:13 +0100 Subject: [PATCH 4/8] Fixing formatting --- rae_sdk/rae_sdk/robot/audio.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rae_sdk/rae_sdk/robot/audio.py b/rae_sdk/rae_sdk/robot/audio.py index bef72fd..604e787 100644 --- a/rae_sdk/rae_sdk/robot/audio.py +++ b/rae_sdk/rae_sdk/robot/audio.py @@ -43,10 +43,13 @@ def play_audio_file(self, audio_file_path): def save_recorded_sound(self, audio_data, output_file="/app/mic_recording.wav"): """ Decode the Base64 audio data and save it as a WAV file. - - Args: + + Attributes + ---------- audio_data (str): Base64 encoded audio data. output_file (str, optional): Path to save the WAV file. Defaults to "/app/output.wav". + + """ # Decode Base64 data binary_data = base64.b64decode(audio_data) From 550635e1e0ce86ccbdf3ce29cf9e472777ad210d Mon Sep 17 00:00:00 2001 From: Danilo Pejovic Date: Thu, 7 Mar 2024 11:35:44 +0100 Subject: [PATCH 5/8] Changing default path for saving files --- rae_sdk/rae_sdk/robot/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rae_sdk/rae_sdk/robot/audio.py b/rae_sdk/rae_sdk/robot/audio.py index 604e787..90bf796 100644 --- a/rae_sdk/rae_sdk/robot/audio.py +++ b/rae_sdk/rae_sdk/robot/audio.py @@ -40,7 +40,7 @@ def play_audio_file(self, audio_file_path): res = self._ros_interface.call_async_srv('/play_audio', req) return res - def save_recorded_sound(self, audio_data, output_file="/app/mic_recording.wav"): + def save_recorded_sound(self, audio_data, output_file="/tmp/mic_recording.wav"): """ Decode the Base64 audio data and save it as a WAV file. From b8d7e478b8841282f2fc7c3f6850e0e0abfbb594 Mon Sep 17 00:00:00 2001 From: Danilo Pejovic Date: Thu, 7 Mar 2024 13:27:27 +0100 Subject: [PATCH 6/8] Adding gain/volume as optional argument to service call --- rae_hw/include/rae_hw/peripherals/speakers.hpp | 2 +- rae_hw/src/peripherals/speakers.cpp | 8 ++++---- rae_msgs/srv/PlayAudio.srv | 1 + rae_sdk/rae_sdk/robot/audio.py | 5 +++-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rae_hw/include/rae_hw/peripherals/speakers.hpp b/rae_hw/include/rae_hw/peripherals/speakers.hpp index 8ee4314..6a26670 100644 --- a/rae_hw/include/rae_hw/peripherals/speakers.hpp +++ b/rae_hw/include/rae_hw/peripherals/speakers.hpp @@ -30,7 +30,7 @@ class SpeakersNode : public rclcpp_lifecycle::LifecycleNode { private: void play_mp3(const char*); - void play_wav(const char*); + void play_wav(const char*, const float); rclcpp::Service::SharedPtr play_audio_service_; void play_audio_service_callback(const std::shared_ptr request, diff --git a/rae_hw/src/peripherals/speakers.cpp b/rae_hw/src/peripherals/speakers.cpp index d3b4585..938f8d0 100644 --- a/rae_hw/src/peripherals/speakers.cpp +++ b/rae_hw/src/peripherals/speakers.cpp @@ -47,11 +47,13 @@ CallbackReturn SpeakersNode::on_shutdown(const rclcpp_lifecycle::State& /*previo void SpeakersNode::play_audio_service_callback(const std::shared_ptr request, const std::shared_ptr response) { const std::string& file_location = request->file_location; + const float gain = request->gain; + // Check if the file ends with ".wav" if(file_location.size() >= 4 && file_location.substr(file_location.size() - 4) == ".wav") { // Call the play_wav function - play_wav(file_location.c_str()); + play_wav(file_location.c_str(), gain); response->success = true; return; } @@ -110,7 +112,7 @@ void SpeakersNode::play_mp3(const char* mp3_file) { return; } -void SpeakersNode::play_wav(const char* wav_file) { +void SpeakersNode::play_wav(const char* wav_file, const float gain) { // Open WAV file SF_INFO sfinfo; SNDFILE* file = sf_open(wav_file, SFM_READ, &sfinfo); @@ -133,8 +135,6 @@ void SpeakersNode::play_wav(const char* wav_file) { int32_t* buffer_wav = new int32_t[BUFFER_SIZE * sfinfo.channels]; // Use int32_t for 32-bit format sf_count_t readCount; - const float gain = 16.0f; // Adjust this factor for desired gain - while((readCount = sf_readf_int(file, buffer_wav, BUFFER_SIZE)) > 0) { // Apply gain to the samples for(int i = 0; i < readCount * sfinfo.channels; ++i) { diff --git a/rae_msgs/srv/PlayAudio.srv b/rae_msgs/srv/PlayAudio.srv index d5f7da2..fd76c89 100644 --- a/rae_msgs/srv/PlayAudio.srv +++ b/rae_msgs/srv/PlayAudio.srv @@ -1,3 +1,4 @@ string file_location +float32 gain 16.0 --- bool success diff --git a/rae_sdk/rae_sdk/robot/audio.py b/rae_sdk/rae_sdk/robot/audio.py index 90bf796..86c2194 100644 --- a/rae_sdk/rae_sdk/robot/audio.py +++ b/rae_sdk/rae_sdk/robot/audio.py @@ -34,13 +34,14 @@ def __init__(self, ros_interface): log.info("Audio Controller ready") - def play_audio_file(self, audio_file_path): + def play_audio_file(self, audio_file_path, gain = 1.0): req = PlayAudio.Request() req.file_location = audio_file_path + req.gain = gain res = self._ros_interface.call_async_srv('/play_audio', req) return res - def save_recorded_sound(self, audio_data, output_file="/tmp/mic_recording.wav"): + def save_recorded_sound(self, audio_data, output_file="/app/mic_recording.wav"): """ Decode the Base64 audio data and save it as a WAV file. From f9bc54ebc69fff4c5ad65c7bc84565131e24da28 Mon Sep 17 00:00:00 2001 From: Danilo Pejovic Date: Thu, 7 Mar 2024 13:44:09 +0100 Subject: [PATCH 7/8] Formatting --- rae_hw/src/peripherals/speakers.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/rae_hw/src/peripherals/speakers.cpp b/rae_hw/src/peripherals/speakers.cpp index 938f8d0..7d92500 100644 --- a/rae_hw/src/peripherals/speakers.cpp +++ b/rae_hw/src/peripherals/speakers.cpp @@ -49,7 +49,6 @@ void SpeakersNode::play_audio_service_callback(const std::shared_ptrfile_location; const float gain = request->gain; - // Check if the file ends with ".wav" if(file_location.size() >= 4 && file_location.substr(file_location.size() - 4) == ".wav") { // Call the play_wav function From 71869b8c79bc491bda9f92c589c35dc6d8bb3744 Mon Sep 17 00:00:00 2001 From: Danilo Pejovic <115164734+danilo-pejovic@users.noreply.github.com> Date: Thu, 7 Mar 2024 13:57:49 +0100 Subject: [PATCH 8/8] Setting default gain to 1.0 --- rae_msgs/srv/PlayAudio.srv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rae_msgs/srv/PlayAudio.srv b/rae_msgs/srv/PlayAudio.srv index fd76c89..ca2bef7 100644 --- a/rae_msgs/srv/PlayAudio.srv +++ b/rae_msgs/srv/PlayAudio.srv @@ -1,4 +1,4 @@ string file_location -float32 gain 16.0 +float32 gain 1.0 --- bool success