-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
aks
committed
Jan 2, 2024
1 parent
ca0059b
commit 1cf4b0c
Showing
7 changed files
with
408 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,16 @@ | ||
# mod_piper_tts | ||
<p> | ||
Freeswitch TTS module, provides a simple frontend for <a href="https://github.com/rhasspy/piper" target="_blank">piper</a>.<br> | ||
</p> | ||
|
||
### Usage example | ||
```XML | ||
<extension name="tts-test"> | ||
<condition field="destination_number" expression="^(3333)$"> | ||
<action application="answer"/> | ||
<action application="speak" data="piper|en|Hello world!"/> | ||
<action application="sleep" data="1000"/> | ||
<action application="hangup"/> | ||
</condition> | ||
</extension> | ||
|
||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
|
||
include $(top_srcdir)/build/modmake.rulesam | ||
MODNAME=mod_piper_tts | ||
|
||
mod_LTLIBRARIES = mod_piper_tts.la | ||
mod_piper_tts_la_SOURCES = mod_piper_tts.c utils.c | ||
mod_piper_tts_la_CFLAGS = $(AM_CFLAGS) -I. -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label -Wno-declaration-after-statement | ||
mod_piper_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la | ||
mod_piper_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared | ||
|
||
$(am_mod_piper_tts_la_OBJECTS): mod_piper_tts.h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
<configuration name="piper_tts.conf" description=""> | ||
<settings> | ||
<param name="cache-path" value="/tmp/piper-tts-cache" /> | ||
<param name="cache-disable" value="false" /> | ||
<param name="piper-bin" value="/opt/piper/lib/piper" /> | ||
<param name="piper-opts" value="" /> | ||
<param name="voice-name-as-language-code" value="true" /> | ||
</settings> | ||
|
||
<models> | ||
<model language="en" model="/opt/piper/models/en_US-lessac-medium.onnx" /> | ||
<model language="ru" model="/opt/piper/models/ru_RU-irina-medium.onnx" /> | ||
</models> | ||
|
||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<extension name="google-tts"> | ||
<condition field="destination_number" expression="^(3111)$"> | ||
<action application="answer"/> | ||
<action application="speak" data="piper|en|Hello world!"/> | ||
<action application="hangup"/> | ||
</condition> | ||
</extension> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,281 @@ | ||
/** | ||
* (C)2023 aks | ||
* https://akscf.me/ | ||
* https://github.com/akscf/ | ||
**/ | ||
#include "mod_piper_tts.h" | ||
|
||
globals_t globals; | ||
|
||
SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load); | ||
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown); | ||
SWITCH_MODULE_DEFINITION(mod_piper_tts, mod_piper_tts_load, mod_piper_tts_shutdown, NULL); | ||
|
||
// ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ||
static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { | ||
char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; | ||
switch_status_t status = SWITCH_STATUS_SUCCESS; | ||
tts_ctx_t *tts_ctx = NULL; | ||
|
||
tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); | ||
tts_ctx->pool = sh->memory_pool; | ||
tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); | ||
tts_ctx->voice = switch_core_strdup(tts_ctx->pool, voice); | ||
tts_ctx->language = (globals.fl_voice_as_lang_code && voice ? switch_core_strdup(sh->memory_pool, voice) : "en"); | ||
tts_ctx->channels = channels; | ||
tts_ctx->samplerate = samplerate; | ||
|
||
sh->private_info = tts_ctx; | ||
|
||
if(tts_ctx->language) { | ||
tts_ctx->model_info = piper_lookup_model(tts_ctx->language); | ||
if(!tts_ctx->model_info) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't find model for language: '%s'\n", tts_ctx->language); | ||
switch_goto_status(SWITCH_STATUS_FALSE, out); | ||
} | ||
} | ||
|
||
if(globals.fl_cache_disabled) { | ||
switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); | ||
tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", globals.cache_path, SWITCH_PATH_SEPARATOR, name_uuid, PIPER_FILE_ENCODING); | ||
} | ||
out: | ||
return status; | ||
} | ||
|
||
static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { | ||
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; | ||
assert(tts_ctx != NULL); | ||
|
||
if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { | ||
switch_core_file_close(tts_ctx->fhnd); | ||
} | ||
|
||
if(tts_ctx->dst_fname) { | ||
if(globals.fl_cache_disabled) { | ||
unlink(tts_ctx->dst_fname); | ||
} | ||
} | ||
|
||
return SWITCH_STATUS_SUCCESS; | ||
} | ||
|
||
static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { | ||
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; | ||
switch_status_t status = SWITCH_STATUS_SUCCESS; | ||
char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; | ||
const void *ptr = NULL; | ||
uint32_t recv_len = 0; | ||
|
||
assert(tts_ctx != NULL); | ||
|
||
if(!tts_ctx->dst_fname) { | ||
switch_md5_string(digest, (void *) text, strlen(text)); | ||
tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", globals.cache_path, SWITCH_PATH_SEPARATOR, digest, PIPER_FILE_ENCODING); | ||
} | ||
|
||
if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { | ||
if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), NULL)) != SWITCH_STATUS_SUCCESS) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open file: %s\n", tts_ctx->dst_fname); | ||
switch_goto_status(SWITCH_STATUS_FALSE, out); | ||
} | ||
} else { | ||
char *cmd = NULL; | ||
char *textq = NULL; | ||
|
||
if(!tts_ctx->model_info) { | ||
if(tts_ctx->language) { | ||
tts_ctx->model_info = piper_lookup_model(tts_ctx->language); | ||
} | ||
if(!tts_ctx->model_info) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't find model for language: '%s'\n", tts_ctx->language); | ||
switch_goto_status(SWITCH_STATUS_FALSE, out); | ||
} | ||
} | ||
|
||
textq = switch_util_quote_shell_arg(text); | ||
cmd = switch_mprintf("echo %s | %s %s --model '%s' --output_file '%s'", textq, globals.piper_bin, (globals.piper_opts ? globals.piper_opts : ""), tts_ctx->model_info->model, tts_ctx->dst_fname); | ||
|
||
// switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "PIPER: [%s]\n", cmd); | ||
|
||
if(switch_system(cmd, SWITCH_TRUE) != 0) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to execute command: %s\n", cmd); | ||
status = SWITCH_STATUS_FALSE; | ||
} | ||
|
||
switch_safe_free(textq); | ||
switch_safe_free(cmd); | ||
|
||
if(status == SWITCH_STATUS_SUCCESS) { | ||
if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { | ||
if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), NULL)) != SWITCH_STATUS_SUCCESS) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open file: %s\n", tts_ctx->dst_fname); | ||
switch_goto_status(SWITCH_STATUS_FALSE, out); | ||
} | ||
} else { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "File not found: %s\n", tts_ctx->dst_fname); | ||
switch_goto_status(SWITCH_STATUS_FALSE, out); | ||
} | ||
} | ||
} | ||
out: | ||
return status; | ||
} | ||
|
||
static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { | ||
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; | ||
size_t len = (*data_len / sizeof(int16_t)); | ||
|
||
assert(tts_ctx != NULL); | ||
|
||
if(tts_ctx->fhnd->file_interface == NULL) { | ||
return SWITCH_STATUS_FALSE; | ||
} | ||
|
||
if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { | ||
switch_core_file_close(tts_ctx->fhnd); | ||
return SWITCH_STATUS_FALSE; | ||
} | ||
|
||
*data_len = (len * 2); | ||
if(data_len == 0) { | ||
switch_core_file_close(tts_ctx->fhnd); | ||
return SWITCH_STATUS_BREAK; | ||
} | ||
|
||
return SWITCH_STATUS_SUCCESS; | ||
} | ||
|
||
static void speech_flush_tts(switch_speech_handle_t *sh) { | ||
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; | ||
assert(tts_ctx != NULL); | ||
|
||
if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { | ||
switch_core_file_close(tts_ctx->fhnd); | ||
} | ||
} | ||
|
||
static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { | ||
tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; | ||
|
||
assert(tts_ctx != NULL); | ||
|
||
if(strcasecmp(param, "lang") == 0) { | ||
if(val) { tts_ctx->language = switch_core_strdup(sh->memory_pool, val); } | ||
} else if(strcasecmp(param, "voice") == 0) { | ||
if(val) { tts_ctx->voice = switch_core_strdup(sh->memory_pool, val); } | ||
} | ||
} | ||
|
||
static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { | ||
} | ||
|
||
static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { | ||
} | ||
|
||
// --------------------------------------------------------------------------------------------------------------------------------------------- | ||
// main | ||
// --------------------------------------------------------------------------------------------------------------------------------------------- | ||
#define CONFIG_NAME "piper_tts.conf" | ||
SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load) { | ||
switch_status_t status = SWITCH_STATUS_SUCCESS; | ||
switch_xml_t cfg, xml, settings, param, xmodels, xmodel; | ||
switch_speech_interface_t *speech_interface; | ||
|
||
memset(&globals, 0, sizeof(globals)); | ||
switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); | ||
switch_core_hash_init(&globals.models); | ||
|
||
if((xml = switch_xml_open_cfg(CONFIG_NAME, &cfg, NULL)) == NULL) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't open configuration file: %s\n", CONFIG_NAME); | ||
switch_goto_status(SWITCH_STATUS_GENERR, out); | ||
} | ||
|
||
if((settings = switch_xml_child(cfg, "settings"))) { | ||
for(param = switch_xml_child(settings, "param"); param; param = param->next) { | ||
char *var = (char *) switch_xml_attr_soft(param, "name"); | ||
char *val = (char *) switch_xml_attr_soft(param, "value"); | ||
|
||
if(!strcasecmp(var, "cache-path")) { | ||
if(val) globals.cache_path = switch_core_strdup(pool, val); | ||
} else if(!strcasecmp(var, "piper-bin")) { | ||
if(val) globals.piper_bin = switch_core_strdup(pool, val); | ||
} else if(!strcasecmp(var, "piper-opts")) { | ||
if(val) globals.piper_opts = switch_core_strdup(pool, val); | ||
} else if(!strcasecmp(var, "voice-name-as-language-code")) { | ||
if(val) globals.fl_voice_as_lang_code = switch_true(val); | ||
} else if(!strcasecmp(var, "cache-disable")) { | ||
if(val) globals.fl_cache_disabled = switch_true(val); | ||
} | ||
} | ||
} | ||
|
||
if((xmodels = switch_xml_child(cfg, "models"))) { | ||
for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) { | ||
char *lang = (char *) switch_xml_attr_soft(xmodel, "language"); | ||
char *model = (char *) switch_xml_attr_soft(xmodel, "model"); | ||
piper_model_info_t *model_info = NULL; | ||
|
||
if(!lang || !model) { continue; } | ||
|
||
if(switch_core_hash_find(globals.models, lang)) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang); | ||
continue; | ||
} | ||
|
||
if((model_info = switch_core_alloc(pool, sizeof(piper_model_info_t))) == NULL) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "mem fail\n"); | ||
switch_goto_status(SWITCH_STATUS_GENERR, out); | ||
} | ||
model_info->lang = switch_core_strdup(pool, lang); | ||
model_info->model = switch_core_strdup(pool, model); | ||
|
||
switch_core_hash_insert(globals.models, model_info->lang, model_info); | ||
} | ||
} | ||
|
||
if(!globals.piper_bin) { | ||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "piper-bin - not defined\n"); | ||
switch_goto_status(SWITCH_STATUS_FALSE, out); | ||
} | ||
|
||
globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; | ||
globals.cache_path = (globals.cache_path == NULL ? "/tmp/piper-tts-cache" : globals.cache_path); | ||
|
||
if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { | ||
switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); | ||
} | ||
|
||
*module_interface = switch_loadable_module_create_module_interface(pool, modname); | ||
speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); | ||
speech_interface->interface_name = "piper"; | ||
|
||
speech_interface->speech_open = speech_open; | ||
speech_interface->speech_close = speech_close; | ||
speech_interface->speech_feed_tts = speech_feed_tts; | ||
speech_interface->speech_read_tts = speech_read_tts; | ||
speech_interface->speech_flush_tts = speech_flush_tts; | ||
|
||
speech_interface->speech_text_param_tts = speech_text_param_tts; | ||
speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; | ||
speech_interface->speech_float_param_tts = speech_float_param_tts; | ||
|
||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "PiperTTS-%s\n", VERSION); | ||
out: | ||
if(xml) { | ||
switch_xml_free(xml); | ||
} | ||
if(status != SWITCH_STATUS_SUCCESS) { | ||
if(globals.models) { switch_core_hash_destroy(&globals.models); } | ||
} | ||
return status; | ||
} | ||
|
||
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown) { | ||
|
||
if(globals.models) { | ||
switch_core_hash_destroy(&globals.models); | ||
} | ||
|
||
return SWITCH_STATUS_SUCCESS; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/** | ||
* (C)2023 aks | ||
* https://akscf.me/ | ||
* https://github.com/akscf/ | ||
**/ | ||
#ifndef MOD_PIPER_TTS_H | ||
#define MOD_PIPER_TTS_H | ||
|
||
#include <switch.h> | ||
#include <switch_stun.h> | ||
#include <stdint.h> | ||
#include <string.h> | ||
|
||
#ifndef true | ||
#define true SWITCH_TRUE | ||
#endif | ||
#ifndef false | ||
#define false SWITCH_FALSE | ||
#endif | ||
|
||
#define VERSION "1.0" | ||
#define PIPER_FILE_ENCODING "wav" | ||
|
||
typedef struct { | ||
switch_mutex_t *mutex; | ||
switch_hash_t *models; | ||
const char *tmp_path; | ||
const char *cache_path; | ||
const char *piper_bin; | ||
const char *piper_opts; | ||
uint8_t fl_cache_disabled; | ||
uint8_t fl_voice_as_lang_code; | ||
} globals_t; | ||
|
||
typedef struct { | ||
char *lang; | ||
char *model; | ||
} piper_model_info_t; | ||
|
||
typedef struct { | ||
piper_model_info_t *model_info; | ||
switch_memory_pool_t *pool; | ||
switch_file_handle_t *fhnd; | ||
char *language; | ||
char *voice; | ||
char *dst_fname; | ||
uint32_t samplerate; | ||
uint32_t channels; | ||
} tts_ctx_t; | ||
|
||
piper_model_info_t *piper_lookup_model(const char *lang); | ||
|
||
|
||
#endif |
Oops, something went wrong.