Skip to content

Commit

Permalink
A native output module based on piper TTS.
Browse files Browse the repository at this point in the history
  • Loading branch information
net-ddavies authored and sthibaul committed Mar 4, 2025
1 parent b3a0fc2 commit 6511762
Show file tree
Hide file tree
Showing 7 changed files with 1,416 additions and 9 deletions.
18 changes: 10 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,22 +75,24 @@ jobs:
libao-dev libasound2-dev libaudio-dev libpulse-dev libpipewire-0.3-dev libxau-dev libsndfile1-dev
libdotconf-dev libglib2.0-dev libltdl-dev systemd
gettext help2man texinfo texlive texlive-plain-generic
librubberband-dev
pulseaudio ${{ matrix.deps }}
- name: install piper
run: |
wget https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz
echo "698358c5362250f315f400468199c40bfff41b56b4e079d4b93bfa67019e2cd28a88d385dd4de79ea24b923272826cf4e4191b81a7b33d3164cb5615b8c66be9 libpiper_phonemize-amd64.tar.gz" | sha512sum --check
tar -C /opt -xvf libpiper_phonemize-amd64.tar.gz
wget https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_amd64.tar.gz
echo "e255e804b7ba1f837ea76d5c3693abf32514dd4ca30239d80d2b5131b5886c76a00a741d405b762658ae4444b90a8a857f7d65f1c25da250d14d705cbc580cc1 piper_amd64.tar.gz" | sha512sum --check
tar -C /opt -xvf piper_amd64.tar.gz
wget https://github.com/rhasspy/piper-phonemize/releases/download/2023.11.14-4/piper-phonemize_linux_x86_64.tar.gz
echo "22ae85dba1c07e328b33e8311ae6e67ad1ea2a57068d08238c452c69b0b1761eb39b064bf7e51f7413317a8200f6f92c23426a5a575abbb4dbdde86c81825601 piper-phonemize_linux_x86_64.tar.gz" | sha512sum --check
tar -C /opt -xvf piper-phonemize_linux_x86_64.tar.gz
wget https://github.com/rhasspy/piper/archive/refs/tags/2023.11.14-2.tar.gz
echo "2156c8d384aafc977a41eea181855b5e3fddab1eca353528d919d421958e4c10dbcf6526d03433e4246ac847a2fe4a7c43a57d2cb5666993ad0f28ed9fb68fe6 2023.11.14-2.tar.gz" | sha512sum --check
tar -C /opt -xvf 2023.11.14-2.tar.gz
- name: autoconf
run: ./build.sh
- name: configure
run: ${{ matrix.configenv }}
CFLAGS="${{ matrix.configflags }}"
CXXFLAGS="${{ matrix.configflags }}"
./configure --prefix=/tmp/foobar ${{ matrix.config }}
CXXFLAGS="${{ matrix.configflags }} -I/opt/piper_phonemize/include"
LDFLAGS="-L/opt/piper_phonemize/lib"
./configure --prefix=/tmp/foobar --with-piper=/opt/piper-2023.11.14-2 ${{ matrix.config }}
- name: Store the config
if: ${{ always() }}
uses: actions/upload-artifact@v4
Expand Down
5 changes: 5 additions & 0 deletions config/modules/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ dist_moduleconf_DATA += kali.conf
dist_moduleconforig_DATA += kali.conf
endif

if piper_support
dist_moduleconf_DATA += cxxpiper.conf
dist_moduleconforig_DATA += cxxpiper.conf
endif

if ibmtts_support
dist_moduleconf_DATA += ibmtts.conf
dist_moduleconforig_DATA += ibmtts.conf
Expand Down
71 changes: 71 additions & 0 deletions config/modules/cxxpiper.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#
# Configuration for cxxpiper speech dispatcher output module.
#

Debug 0

# Piper doesn't have voices ala speech- dispatcher. Piper has a
# "model" and the model's "configuration". A model/config may be
# single speaker or multi-speaker. Single speaker models produce a
# single speech dispatcher voice. The speech dispatcher voice name
# can be listed with 'spd-say -o cxxpiper -L', but it is
# not needed as the voice will be the default, and only, voice available.
#
# Piper multi-speaker models produce a discrete speech dispatcher
# voice for each speaker the model suports. The configuration
# file for a multi-speaker model contains a "speaker_id_map" object
# that lists an integer speaker id and string mneumonic for each
# speaker supported by the model. Since speech dispatcher has no
# notion of speaker id, speaker selection details are hidden from the
# user by instead exposing voices of the form
# <model-name>›~<speaker-id>~mneumonic with the output module
# mapping between "voice names" and the current model's speakers.

# NB: Unsure if onnx models may allow different " languages within
# the same multi-speaker model. REgardless, if there's sufficient memory: it might be possible to load multiple
# models and have cxxpiper select between them, while presenting the union of
# the speakers and languages of each model.
# For now all directives with language fields require the language code, but ignore it.

# ModelPath and ConfigPath are required. There should be exactly one of each of them.
ModelPath "/usr/share/piper/voices/clean100.onnx"
ConfigPath "/usr/share/piper/voices/clean100.onnx.json"

# For single-speaker models, DefaultVoice is ignored, and logged as such, with
# a warning. For multi-speaker models, DefaultVoice is optional. If it is
# not specified, the first speaker of the multi-speaker model becomes
# the default speaker for the lifetime of the cxxpiper output module
# and also for future runtimes unless this configuration is changed.
# When specified along with a multi-speaker model, the argument is a string that matches one of the
# "voices" listed by spd-say -o cxxpiper -L . Note that piper's
# notion of "speaker" appears to the user as the "voice" concept of
# speech dispatcher. This is pretty much invisible to the user, but
# note that it means that the strings listed in the .json
# configuration file in the speaker_id_map object are not the same as
# the voices listed by spd-say (i.e. the speaker ids are substrings of
# the listed voices). We could also match the substrings, but we
# don't at least for now, only the "voice" string string is
# recognized. If the voice string can't be matched against the voices
# found when the model is loaded, then the first speaker becomes the
# default for the lifetime of the output module. A warning is logged
# if the string can't be matched and the voice name of the first
# speaker (index 0) is included in the warning message.
DefaultVoice "clean100~2~5393"

# AddVoice (optional) reused from the generic output module. This maps types to voice names within a language code.
# It does not do anything useful for single speaker models and is ignored. For
# multi-speaker models the language code is required, but ignored, at least for now.
AddVoice "en_US" "MALE1" "clean100~33~8419"
AddVoice "en_US" "FEMALE1" "clean100~25~4137"

# Sound Icons are configured and work like espeak.
SoundIconFolder "/usr/share/sounds/sound-icons/"
SoundIconVolume 0

# Piper uses ESpeak NG sometimes, depending on the model. Piper distributes this and distros
# may provide it. Default is "/usr/share/espeak-ng-data/".
# It should probably be considered required, but if a model doesn't use espeak
# it might work to omit it.
ESpeakNGDataDirPath "/usr/share/espeak-ng-data/"

# End of cxxpiper.conf
69 changes: 69 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,75 @@ AM_CONDITIONAL([kali_support], [test $with_kali != no])
AM_CONDITIONAL([kali_shim], [test $with_kali = shim])
AS_IF([test $with_kali != no], [output_modules="${output_modules} kali"])

# check for piper support
AC_ARG_WITH([piper],
[AS_HELP_STRING([--with-piper=/path], [include Piper support])],
[],
[with_piper=no])
AS_IF([test $with_espeak = yes -a $with_piper != no], [
# We currently need various headers of piper
PIPER_SRC_DIR="$with_piper/src/cpp"
OLDCXXFLAGS="$CXXFLAGS"
OLDLDFLAGS="$LDFLAGS"
CXXFLAGS="$CXXFLAGS -I$PIPER_SRC_DIR"
AS_IF([test $with_piper != no],
[PKG_CHECK_MODULES([ONNXRUNTIME], [libonnxruntime], [
CXXFLAGS="$CXXFLAGS $ONNXRUNTIME_CFLAGS"
LDFLAGS="$LDFLAGS $ONNXRUNTIME_LIBS"
], [AC_CHECK_LIB([onnxruntime], [OrtGetApiBase],
[ONNXRUNTIME_LIBS="-lonnxruntime"],
[with_piper=no])
])])
AS_IF([test $with_piper != no],
[PKG_CHECK_MODULES([RUBBERBAND], [rubberband], [
CXXFLAGS="$CXXFLAGS $RUBBERBAND_CFLAGS"
LDFLAGS="$LDFLAGS $RUBBERBAND_LIBS"
], [with_piper=no])])
AC_LANG_PUSH(C++)
AS_IF([test $with_piper != no],
[AC_CHECK_LIB([piper_phonemize], [_ZN5piper19DEFAULT_PHONEME_MAPB5cxx11E],
[:],
[with_piper=no])])
AS_IF([test $with_piper != no],
[AC_CHECK_HEADER([json.hpp],
[],
[with_piper=no])])
AS_IF([test $with_piper != no],
[AC_CHECK_HEADER([piper.hpp],
[],
[with_piper=no])])
AS_IF([test $with_piper != no],
[AC_CHECK_HEADER([utf8.h],
[],
[with_piper=no])])
AS_IF([test $with_piper != no],
[AC_CHECK_HEADER([wavfile.hpp],
[],
[with_piper=no])])
AS_IF([test $with_piper != no],
# We need espeak with https://github.com/espeak-ng/espeak-ng/pull/2127 applied
[AC_CHECK_LIB([espeak-ng], [espeak_TextToPhonemesWithTerminator],
[:],
[with_piper=no],
[$ESPEAK_NG_LIBS])])
AC_LANG_POP(C++)
CXXFLAGS="$OLDCXXFLAGS"
LDFLAGS="$OLDLDFLAGS"
])
AM_CONDITIONAL([piper_support], [test $with_piper != no])
AS_IF([test $with_piper != no], [output_modules="${output_modules} cxxpiper"])
AC_SUBST([PIPER_SRC_DIR])
AC_SUBST([ONNXRUNTIME_CFLAGS])
AC_SUBST([ONNXRUNTIME_LIBS])
AC_SUBST([RUBBERBAND_CFLAGS])
AC_SUBST([RUBBERBAND_LIBS])

audio_dlopen='-dlopen force'
audio_dlopen_modules=

Expand Down
1 change: 1 addition & 0 deletions src/modules/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
/libvoxin.so
/sd_baratinoo
/sd_cicero
/sd_cxxpiper
/sd_dummy
/sd_espeak
/sd_espeak-ng
Expand Down
17 changes: 16 additions & 1 deletion src/modules/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ EXTRA_DIST += dummy-message.txt
CLEANFILES = dummy-message.wav

inc_local = -I$(top_srcdir)/include -I$(top_srcdir)/src/common
LDFLAGS =

if DARWIN_HOST
LDFLAGS += -Wl,-U,_module_speak -Wl,-U,_module_speak_sync
Expand Down Expand Up @@ -287,6 +286,22 @@ CLEANFILES += $(EXTRA_sd_kali_DEPENDENCIES)
endif
endif

#
# cxxpiper
#
if piper_support
modulebin_PROGRAMS += sd_cxxpiper
sd_cxxpiper_SOURCES = cxxpiper.cpp module_utils_addvoice.c module_utils_play.c $(common_SOURCES)
sd_cxxpiper_CPPFLAGS = -I$(PIPER_SRC_DIR) $(ONNXRUNTIME_CFLAGS) $(RUBBERBAND_CFLAGS) $(AM_CPPFLAGS)
sd_cxxpiper_LDADD = $(top_builddir)/src/common/libcommon.la \
-lpiper_phonemize \
$(ONNXRUNTIME_LIBS) \
-lespeak-ng \
$(RUBBERBAND_LIBS) \
$(SNDFILE_LIBS) \
$(common_LDADD)
endif

#
# voxin module
#
Expand Down
Loading

0 comments on commit 6511762

Please sign in to comment.