Skip to content

Commit

Permalink
imported
Browse files Browse the repository at this point in the history
  • Loading branch information
aks committed Jan 2, 2024
1 parent ca0059b commit 1cf4b0c
Show file tree
Hide file tree
Showing 7 changed files with 408 additions and 1 deletion.
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,16 @@
# mod_piper_tts
<p>
Freeswitch TTS module, provides a simple frontend for <a href="https://github.com/rhasspy/piper" target="_blank">piper</a>.<br>
</p>

### Usage example
```XML
<extension name="tts-test">
<condition field="destination_number" expression="^(3333)$">
<action application="answer"/>
<action application="speak" data="piper|en|Hello world!"/>
<action application="sleep" data="1000"/>
<action application="hangup"/>
</condition>
</extension>

```
11 changes: 11 additions & 0 deletions sources/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

include $(top_srcdir)/build/modmake.rulesam
MODNAME=mod_piper_tts

mod_LTLIBRARIES = mod_piper_tts.la
mod_piper_tts_la_SOURCES = mod_piper_tts.c utils.c
mod_piper_tts_la_CFLAGS = $(AM_CFLAGS) -I. -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label -Wno-declaration-after-statement
mod_piper_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la
mod_piper_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared

$(am_mod_piper_tts_la_OBJECTS): mod_piper_tts.h
15 changes: 15 additions & 0 deletions sources/conf/autoload_configs/piper_tts.conf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<configuration name="piper_tts.conf" description="">
<settings>
<param name="cache-path" value="/tmp/piper-tts-cache" />
<param name="cache-disable" value="false" />
<param name="piper-bin" value="/opt/piper/lib/piper" />
<param name="piper-opts" value="" />
<param name="voice-name-as-language-code" value="true" />
</settings>

<models>
<model language="en" model="/opt/piper/models/en_US-lessac-medium.onnx" />
<model language="ru" model="/opt/piper/models/ru_RU-irina-medium.onnx" />
</models>

</configuration>
7 changes: 7 additions & 0 deletions sources/conf/dialplan/example.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<extension name="google-tts">
<condition field="destination_number" expression="^(3111)$">
<action application="answer"/>
<action application="speak" data="piper|en|Hello world!"/>
<action application="hangup"/>
</condition>
</extension>
281 changes: 281 additions & 0 deletions sources/mod_piper_tts.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
/**
* (C)2023 aks
* https://akscf.me/
* https://github.com/akscf/
**/
#include "mod_piper_tts.h"

globals_t globals;

SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load);
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown);
SWITCH_MODULE_DEFINITION(mod_piper_tts, mod_piper_tts_load, mod_piper_tts_shutdown, NULL);

// ------------------------------------------------------------------------------------------------------------------------------------------------------------
static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) {
char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 };
switch_status_t status = SWITCH_STATUS_SUCCESS;
tts_ctx_t *tts_ctx = NULL;

tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t));
tts_ctx->pool = sh->memory_pool;
tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t));
tts_ctx->voice = switch_core_strdup(tts_ctx->pool, voice);
tts_ctx->language = (globals.fl_voice_as_lang_code && voice ? switch_core_strdup(sh->memory_pool, voice) : "en");
tts_ctx->channels = channels;
tts_ctx->samplerate = samplerate;

sh->private_info = tts_ctx;

if(tts_ctx->language) {
tts_ctx->model_info = piper_lookup_model(tts_ctx->language);
if(!tts_ctx->model_info) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't find model for language: '%s'\n", tts_ctx->language);
switch_goto_status(SWITCH_STATUS_FALSE, out);
}
}

if(globals.fl_cache_disabled) {
switch_uuid_str((char *)name_uuid, sizeof(name_uuid));
tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", globals.cache_path, SWITCH_PATH_SEPARATOR, name_uuid, PIPER_FILE_ENCODING);
}
out:
return status;
}

static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) {
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
assert(tts_ctx != NULL);

if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) {
switch_core_file_close(tts_ctx->fhnd);
}

if(tts_ctx->dst_fname) {
if(globals.fl_cache_disabled) {
unlink(tts_ctx->dst_fname);
}
}

return SWITCH_STATUS_SUCCESS;
}

static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) {
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
switch_status_t status = SWITCH_STATUS_SUCCESS;
char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 };
const void *ptr = NULL;
uint32_t recv_len = 0;

assert(tts_ctx != NULL);

if(!tts_ctx->dst_fname) {
switch_md5_string(digest, (void *) text, strlen(text));
tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", globals.cache_path, SWITCH_PATH_SEPARATOR, digest, PIPER_FILE_ENCODING);
}

if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) {
if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), NULL)) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open file: %s\n", tts_ctx->dst_fname);
switch_goto_status(SWITCH_STATUS_FALSE, out);
}
} else {
char *cmd = NULL;
char *textq = NULL;

if(!tts_ctx->model_info) {
if(tts_ctx->language) {
tts_ctx->model_info = piper_lookup_model(tts_ctx->language);
}
if(!tts_ctx->model_info) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't find model for language: '%s'\n", tts_ctx->language);
switch_goto_status(SWITCH_STATUS_FALSE, out);
}
}

textq = switch_util_quote_shell_arg(text);
cmd = switch_mprintf("echo %s | %s %s --model '%s' --output_file '%s'", textq, globals.piper_bin, (globals.piper_opts ? globals.piper_opts : ""), tts_ctx->model_info->model, tts_ctx->dst_fname);

// switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "PIPER: [%s]\n", cmd);

if(switch_system(cmd, SWITCH_TRUE) != 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to execute command: %s\n", cmd);
status = SWITCH_STATUS_FALSE;
}

switch_safe_free(textq);
switch_safe_free(cmd);

if(status == SWITCH_STATUS_SUCCESS) {
if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) {
if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), NULL)) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open file: %s\n", tts_ctx->dst_fname);
switch_goto_status(SWITCH_STATUS_FALSE, out);
}
} else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "File not found: %s\n", tts_ctx->dst_fname);
switch_goto_status(SWITCH_STATUS_FALSE, out);
}
}
}
out:
return status;
}

static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) {
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
size_t len = (*data_len / sizeof(int16_t));

assert(tts_ctx != NULL);

if(tts_ctx->fhnd->file_interface == NULL) {
return SWITCH_STATUS_FALSE;
}

if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) {
switch_core_file_close(tts_ctx->fhnd);
return SWITCH_STATUS_FALSE;
}

*data_len = (len * 2);
if(data_len == 0) {
switch_core_file_close(tts_ctx->fhnd);
return SWITCH_STATUS_BREAK;
}

return SWITCH_STATUS_SUCCESS;
}

static void speech_flush_tts(switch_speech_handle_t *sh) {
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;
assert(tts_ctx != NULL);

if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) {
switch_core_file_close(tts_ctx->fhnd);
}
}

static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) {
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info;

assert(tts_ctx != NULL);

if(strcasecmp(param, "lang") == 0) {
if(val) { tts_ctx->language = switch_core_strdup(sh->memory_pool, val); }
} else if(strcasecmp(param, "voice") == 0) {
if(val) { tts_ctx->voice = switch_core_strdup(sh->memory_pool, val); }
}
}

static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) {
}

static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) {
}

// ---------------------------------------------------------------------------------------------------------------------------------------------
// main
// ---------------------------------------------------------------------------------------------------------------------------------------------
#define CONFIG_NAME "piper_tts.conf"
SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load) {
switch_status_t status = SWITCH_STATUS_SUCCESS;
switch_xml_t cfg, xml, settings, param, xmodels, xmodel;
switch_speech_interface_t *speech_interface;

memset(&globals, 0, sizeof(globals));
switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool);
switch_core_hash_init(&globals.models);

if((xml = switch_xml_open_cfg(CONFIG_NAME, &cfg, NULL)) == NULL) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open configuration file: %s\n", CONFIG_NAME);
switch_goto_status(SWITCH_STATUS_GENERR, out);
}

if((settings = switch_xml_child(cfg, "settings"))) {
for(param = switch_xml_child(settings, "param"); param; param = param->next) {
char *var = (char *) switch_xml_attr_soft(param, "name");
char *val = (char *) switch_xml_attr_soft(param, "value");

if(!strcasecmp(var, "cache-path")) {
if(val) globals.cache_path = switch_core_strdup(pool, val);
} else if(!strcasecmp(var, "piper-bin")) {
if(val) globals.piper_bin = switch_core_strdup(pool, val);
} else if(!strcasecmp(var, "piper-opts")) {
if(val) globals.piper_opts = switch_core_strdup(pool, val);
} else if(!strcasecmp(var, "voice-name-as-language-code")) {
if(val) globals.fl_voice_as_lang_code = switch_true(val);
} else if(!strcasecmp(var, "cache-disable")) {
if(val) globals.fl_cache_disabled = switch_true(val);
}
}
}

if((xmodels = switch_xml_child(cfg, "models"))) {
for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) {
char *lang = (char *) switch_xml_attr_soft(xmodel, "language");
char *model = (char *) switch_xml_attr_soft(xmodel, "model");
piper_model_info_t *model_info = NULL;

if(!lang || !model) { continue; }

if(switch_core_hash_find(globals.models, lang)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang);
continue;
}

if((model_info = switch_core_alloc(pool, sizeof(piper_model_info_t))) == NULL) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "mem fail\n");
switch_goto_status(SWITCH_STATUS_GENERR, out);
}
model_info->lang = switch_core_strdup(pool, lang);
model_info->model = switch_core_strdup(pool, model);

switch_core_hash_insert(globals.models, model_info->lang, model_info);
}
}

if(!globals.piper_bin) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "piper-bin - not defined\n");
switch_goto_status(SWITCH_STATUS_FALSE, out);
}

globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir;
globals.cache_path = (globals.cache_path == NULL ? "/tmp/piper-tts-cache" : globals.cache_path);

if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) {
switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL);
}

*module_interface = switch_loadable_module_create_module_interface(pool, modname);
speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE);
speech_interface->interface_name = "piper";

speech_interface->speech_open = speech_open;
speech_interface->speech_close = speech_close;
speech_interface->speech_feed_tts = speech_feed_tts;
speech_interface->speech_read_tts = speech_read_tts;
speech_interface->speech_flush_tts = speech_flush_tts;

speech_interface->speech_text_param_tts = speech_text_param_tts;
speech_interface->speech_numeric_param_tts = speech_numeric_param_tts;
speech_interface->speech_float_param_tts = speech_float_param_tts;

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "PiperTTS-%s\n", VERSION);
out:
if(xml) {
switch_xml_free(xml);
}
if(status != SWITCH_STATUS_SUCCESS) {
if(globals.models) { switch_core_hash_destroy(&globals.models); }
}
return status;
}

SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown) {

if(globals.models) {
switch_core_hash_destroy(&globals.models);
}

return SWITCH_STATUS_SUCCESS;
}
54 changes: 54 additions & 0 deletions sources/mod_piper_tts.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/**
* (C)2023 aks
* https://akscf.me/
* https://github.com/akscf/
**/
#ifndef MOD_PIPER_TTS_H
#define MOD_PIPER_TTS_H

#include <switch.h>
#include <switch_stun.h>
#include <stdint.h>
#include <string.h>

#ifndef true
#define true SWITCH_TRUE
#endif
#ifndef false
#define false SWITCH_FALSE
#endif

#define VERSION "1.0"
#define PIPER_FILE_ENCODING "wav"

typedef struct {
switch_mutex_t *mutex;
switch_hash_t *models;
const char *tmp_path;
const char *cache_path;
const char *piper_bin;
const char *piper_opts;
uint8_t fl_cache_disabled;
uint8_t fl_voice_as_lang_code;
} globals_t;

typedef struct {
char *lang;
char *model;
} piper_model_info_t;

typedef struct {
piper_model_info_t *model_info;
switch_memory_pool_t *pool;
switch_file_handle_t *fhnd;
char *language;
char *voice;
char *dst_fname;
uint32_t samplerate;
uint32_t channels;
} tts_ctx_t;

piper_model_info_t *piper_lookup_model(const char *lang);


#endif
Loading

0 comments on commit 1cf4b0c

Please sign in to comment.