imported

akscf · Jan 2, 2024 · 1cf4b0c · 1cf4b0c
1 parent ca0059b
commit 1cf4b0c
Show file tree

Hide file tree

Showing 7 changed files with 408 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1 +1,16 @@
-# mod_piper_tts
+<p>
+  Freeswitch TTS module, provides a simple frontend for <a href="https://github.com/rhasspy/piper" target="_blank">piper</a>.<br>
+</p>
+
+### Usage example
+```XML
+<extension name="tts-test">
+    <condition field="destination_number" expression="^(3333)$">
+        <action application="answer"/>
+        <action application="speak" data="piper|en|Hello world!"/>
+        <action application="sleep" data="1000"/>
+        <action application="hangup"/>
+    </condition>
+</extension>
+
+```
diff --git a/sources/Makefile.am b/sources/Makefile.am
@@ -0,0 +1,11 @@
+
+include $(top_srcdir)/build/modmake.rulesam
+MODNAME=mod_piper_tts
+
+mod_LTLIBRARIES = mod_piper_tts.la
+mod_piper_tts_la_SOURCES  = mod_piper_tts.c utils.c
+mod_piper_tts_la_CFLAGS   = $(AM_CFLAGS) -I. -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label -Wno-declaration-after-statement
+mod_piper_tts_la_LIBADD   = $(switch_builddir)/libfreeswitch.la
+mod_piper_tts_la_LDFLAGS  = -avoid-version -module -no-undefined -shared
+
+$(am_mod_piper_tts_la_OBJECTS): mod_piper_tts.h
diff --git a/sources/conf/autoload_configs/piper_tts.conf.xml b/sources/conf/autoload_configs/piper_tts.conf.xml
@@ -0,0 +1,15 @@
+<configuration name="piper_tts.conf" description="">
+    <settings>
+        <param name="cache-path" value="/tmp/piper-tts-cache" />
+        <param name="cache-disable" value="false" />
+        <param name="piper-bin" value="/opt/piper/lib/piper" />
+        <param name="piper-opts" value="" />
+        <param name="voice-name-as-language-code" value="true" />
+    </settings>
+
+    <models>
+        <model language="en" model="/opt/piper/models/en_US-lessac-medium.onnx" />
+        <model language="ru" model="/opt/piper/models/ru_RU-irina-medium.onnx" />
+    </models>
+
+</configuration>
diff --git a/sources/conf/dialplan/example.xml b/sources/conf/dialplan/example.xml
@@ -0,0 +1,7 @@
+<extension name="google-tts">
+ <condition field="destination_number" expression="^(3111)$">
+  <action application="answer"/>
+  <action application="speak" data="piper|en|Hello world!"/>
+  <action application="hangup"/>
+ </condition>
+</extension>
diff --git a/sources/mod_piper_tts.c b/sources/mod_piper_tts.c
@@ -0,0 +1,281 @@
+/**
+ * (C)2023 aks
+ * https://akscf.me/
+ * https://github.com/akscf/
+ **/
+#include "mod_piper_tts.h"
+
+globals_t globals;
+
+SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load);
+SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown);
+SWITCH_MODULE_DEFINITION(mod_piper_tts, mod_piper_tts_load, mod_piper_tts_shutdown, NULL);
+
+// ------------------------------------------------------------------------------------------------------------------------------------------------------------
+static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) {
+    char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 };
+    switch_status_t status = SWITCH_STATUS_SUCCESS;
+    tts_ctx_t *tts_ctx = NULL;
+
+    tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t));
+    tts_ctx->pool = sh->memory_pool;
+    tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t));
+    tts_ctx->voice = switch_core_strdup(tts_ctx->pool, voice);
+    tts_ctx->language = (globals.fl_voice_as_lang_code && voice ? switch_core_strdup(sh->memory_pool, voice) : "en");
+    tts_ctx->channels = channels;
+    tts_ctx->samplerate = samplerate;
+
+    sh->private_info = tts_ctx;
+
+    if(tts_ctx->language) {
+        tts_ctx->model_info = piper_lookup_model(tts_ctx->language);
+        if(!tts_ctx->model_info) {
+            switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't find model for language: '%s'\n", tts_ctx->language);
+            switch_goto_status(SWITCH_STATUS_FALSE, out);
+        }
+    }
+
+    if(globals.fl_cache_disabled) {
+        switch_uuid_str((char *)name_uuid, sizeof(name_uuid));
+        tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", globals.cache_path, SWITCH_PATH_SEPARATOR, name_uuid, PIPER_FILE_ENCODING);
+    }
+out:
+    return status;
+}
+
+static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) {
+    tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
+    assert(tts_ctx != NULL);
+
+    if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) {
+        switch_core_file_close(tts_ctx->fhnd);
+    }
+
+    if(tts_ctx->dst_fname) {
+        if(globals.fl_cache_disabled) {
+            unlink(tts_ctx->dst_fname);
+        }
+    }
+
+    return SWITCH_STATUS_SUCCESS;
+}
+
+static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) {
+    tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
+    switch_status_t status = SWITCH_STATUS_SUCCESS;
+    char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 };
+    const void *ptr = NULL;
+    uint32_t recv_len = 0;
+
+    assert(tts_ctx != NULL);
+
+    if(!tts_ctx->dst_fname) {
+        switch_md5_string(digest, (void *) text, strlen(text));
+        tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", globals.cache_path, SWITCH_PATH_SEPARATOR, digest, PIPER_FILE_ENCODING);
+    }
+
+    if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) {
+        if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), NULL)) != SWITCH_STATUS_SUCCESS) {
+            switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open file: %s\n", tts_ctx->dst_fname);
+            switch_goto_status(SWITCH_STATUS_FALSE, out);
+        }
+    } else {
+        char *cmd = NULL;
+        char *textq = NULL;
+
+        if(!tts_ctx->model_info) {
+            if(tts_ctx->language) {
+                tts_ctx->model_info = piper_lookup_model(tts_ctx->language);
+            }
+            if(!tts_ctx->model_info) {
+                switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't find model for language: '%s'\n", tts_ctx->language);
+                switch_goto_status(SWITCH_STATUS_FALSE, out);
+            }
+        }
+
+        textq = switch_util_quote_shell_arg(text);
+        cmd = switch_mprintf("echo %s | %s %s --model '%s' --output_file '%s'", textq, globals.piper_bin, (globals.piper_opts ? globals.piper_opts : ""), tts_ctx->model_info->model, tts_ctx->dst_fname);
+
+        // switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "PIPER: [%s]\n", cmd);
+
+        if(switch_system(cmd, SWITCH_TRUE) != 0) {
+            switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to execute command: %s\n", cmd);
+            status = SWITCH_STATUS_FALSE;
+        }
+
+        switch_safe_free(textq);
+        switch_safe_free(cmd);
+
+        if(status == SWITCH_STATUS_SUCCESS) {
+            if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) {
+                if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), NULL)) != SWITCH_STATUS_SUCCESS) {
+                    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open file: %s\n", tts_ctx->dst_fname);
+                    switch_goto_status(SWITCH_STATUS_FALSE, out);
+                }
+            } else {
+                switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "File not found: %s\n", tts_ctx->dst_fname);
+                switch_goto_status(SWITCH_STATUS_FALSE, out);
+            }
+        }
+    }
+out:
+    return status;
+}
+
+static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) {
+    tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
+    size_t len = (*data_len / sizeof(int16_t));
+
+    assert(tts_ctx != NULL);
+
+    if(tts_ctx->fhnd->file_interface == NULL) {
+        return SWITCH_STATUS_FALSE;
+    }
+
+    if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) {
+        switch_core_file_close(tts_ctx->fhnd);
+        return SWITCH_STATUS_FALSE;
+    }
+
+    *data_len = (len * 2);
+    if(data_len == 0) {
+        switch_core_file_close(tts_ctx->fhnd);
+        return SWITCH_STATUS_BREAK;
+    }
+
+    return SWITCH_STATUS_SUCCESS;
+}
+
+static void speech_flush_tts(switch_speech_handle_t *sh) {
+    tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
+    assert(tts_ctx != NULL);
+
+    if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) {
+        switch_core_file_close(tts_ctx->fhnd);
+    }
+}
+
+static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) {
+    tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
+
+    assert(tts_ctx != NULL);
+
+    if(strcasecmp(param, "lang") == 0) {
+        if(val) {  tts_ctx->language = switch_core_strdup(sh->memory_pool, val); }
+    } else if(strcasecmp(param, "voice") == 0) {
+        if(val) {  tts_ctx->voice = switch_core_strdup(sh->memory_pool, val); }
+    }
+}
+
+static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) {
+}
+
+static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) {
+}
+
+// ---------------------------------------------------------------------------------------------------------------------------------------------
+// main
+// ---------------------------------------------------------------------------------------------------------------------------------------------
+#define CONFIG_NAME "piper_tts.conf"
+SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load) {
+    switch_status_t status = SWITCH_STATUS_SUCCESS;
+    switch_xml_t cfg, xml, settings, param, xmodels, xmodel;
+    switch_speech_interface_t *speech_interface;
+
+    memset(&globals, 0, sizeof(globals));
+    switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool);
+    switch_core_hash_init(&globals.models);
+
+    if((xml = switch_xml_open_cfg(CONFIG_NAME, &cfg, NULL)) == NULL) {
+        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open configuration file: %s\n", CONFIG_NAME);
+        switch_goto_status(SWITCH_STATUS_GENERR, out);
+    }
+
+    if((settings = switch_xml_child(cfg, "settings"))) {
+        for(param = switch_xml_child(settings, "param"); param; param = param->next) {
+            char *var = (char *) switch_xml_attr_soft(param, "name");
+            char *val = (char *) switch_xml_attr_soft(param, "value");
+
+            if(!strcasecmp(var, "cache-path")) {
+                if(val) globals.cache_path = switch_core_strdup(pool, val);
+            } else if(!strcasecmp(var, "piper-bin")) {
+                if(val) globals.piper_bin = switch_core_strdup(pool, val);
+            } else if(!strcasecmp(var, "piper-opts")) {
+                if(val) globals.piper_opts = switch_core_strdup(pool, val);
+            } else if(!strcasecmp(var, "voice-name-as-language-code")) {
+                if(val) globals.fl_voice_as_lang_code = switch_true(val);
+            } else if(!strcasecmp(var, "cache-disable")) {
+                if(val) globals.fl_cache_disabled = switch_true(val);
+            }
+        }
+    }
+
+    if((xmodels = switch_xml_child(cfg, "models"))) {
+        for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) {
+            char *lang = (char *) switch_xml_attr_soft(xmodel, "language");
+            char *model = (char *) switch_xml_attr_soft(xmodel, "model");
+            piper_model_info_t *model_info = NULL;
+
+            if(!lang || !model) { continue; }
+
+            if(switch_core_hash_find(globals.models, lang)) {
+                switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang);
+                continue;
+            }
+
+            if((model_info = switch_core_alloc(pool, sizeof(piper_model_info_t))) == NULL) {
+                switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "mem fail\n");
+                switch_goto_status(SWITCH_STATUS_GENERR, out);
+            }
+            model_info->lang = switch_core_strdup(pool, lang);
+            model_info->model = switch_core_strdup(pool, model);
+
+            switch_core_hash_insert(globals.models, model_info->lang, model_info);
+        }
+    }
+
+    if(!globals.piper_bin) {
+        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "piper-bin - not defined\n");
+        switch_goto_status(SWITCH_STATUS_FALSE, out);
+    }
+
+    globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir;
+    globals.cache_path = (globals.cache_path == NULL ? "/tmp/piper-tts-cache" : globals.cache_path);
+
+    if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) {
+        switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL);
+    }
+
+    *module_interface = switch_loadable_module_create_module_interface(pool, modname);
+    speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE);
+    speech_interface->interface_name = "piper";
+
+    speech_interface->speech_open = speech_open;
+    speech_interface->speech_close = speech_close;
+    speech_interface->speech_feed_tts = speech_feed_tts;
+    speech_interface->speech_read_tts = speech_read_tts;
+    speech_interface->speech_flush_tts = speech_flush_tts;
+
+    speech_interface->speech_text_param_tts = speech_text_param_tts;
+    speech_interface->speech_numeric_param_tts = speech_numeric_param_tts;
+    speech_interface->speech_float_param_tts = speech_float_param_tts;
+
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "PiperTTS-%s\n", VERSION);
+out:
+    if(xml) {
+        switch_xml_free(xml);
+    }
+    if(status != SWITCH_STATUS_SUCCESS) {
+        if(globals.models) { switch_core_hash_destroy(&globals.models); }
+    }
+    return status;
+}
+
+SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown) {
+
+    if(globals.models) {
+        switch_core_hash_destroy(&globals.models);
+    }
+
+    return SWITCH_STATUS_SUCCESS;
+}
diff --git a/sources/mod_piper_tts.h b/sources/mod_piper_tts.h
@@ -0,0 +1,54 @@
+/**
+ * (C)2023 aks
+ * https://akscf.me/
+ * https://github.com/akscf/
+ **/
+#ifndef MOD_PIPER_TTS_H
+#define MOD_PIPER_TTS_H
+
+#include <switch.h>
+#include <switch_stun.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifndef true
+#define true SWITCH_TRUE
+#endif
+#ifndef false
+#define false SWITCH_FALSE
+#endif
+
+#define VERSION                 "1.0"
+#define PIPER_FILE_ENCODING     "wav"
+
+typedef struct {
+    switch_mutex_t          *mutex;
+    switch_hash_t           *models;
+    const char              *tmp_path;
+    const char              *cache_path;
+    const char              *piper_bin;
+    const char              *piper_opts;
+    uint8_t                 fl_cache_disabled;
+    uint8_t                 fl_voice_as_lang_code;
+} globals_t;
+
+typedef struct {
+    char                    *lang;
+    char                    *model;
+} piper_model_info_t;
+
+typedef struct {
+    piper_model_info_t      *model_info;
+    switch_memory_pool_t    *pool;
+    switch_file_handle_t    *fhnd;
+    char                    *language;
+    char                    *voice;
+    char                    *dst_fname;
+    uint32_t                samplerate;
+    uint32_t                channels;
+} tts_ctx_t;
+
+piper_model_info_t *piper_lookup_model(const char *lang);
+
+
+#endif