diff --git a/ovos_tts_plugin_mimic2/__init__.py b/ovos_tts_plugin_mimic2/__init__.py index d383c9e..8e9aa16 100755 --- a/ovos_tts_plugin_mimic2/__init__.py +++ b/ovos_tts_plugin_mimic2/__init__.py @@ -12,6 +12,7 @@ # import base64 import math +import random import re import requests @@ -21,7 +22,7 @@ class Mimic2TTSPlugin(TTS): - """Interface to Catotron TTS.""" + """Interface to Mimic2 TTS.""" # Heuristic value, caps character length of a chunk of text # to be spoken as a work around for current Tacotron implementation limits. max_sentence_size = 170 @@ -30,7 +31,25 @@ def __init__(self, lang="en-us", config=None): config = config or {} super(Mimic2TTSPlugin, self).__init__(lang, config, Mimic2TTSValidator(self), 'wav') - self.url = config.get("url", "https://mimic-api.mycroft.ai/synthesize") + self.voice = self.voice.lower() + self._visemes = False + self.cache.persist = True # save synths to avoid repeat queries + if self.config.get("url"): # self hosted + self.url = self.config["url"] + # TODO disable cache to avoid filename conflicts with other voices + if not self.voice or self.voice == "default": + self.voice = f"selfhosted{random.randint(0, 9999999)}" + self.cache.persist = False + elif self.voice == "kusal" or self.voice == "default": + self.url = "https://mimic-api.mycroft.ai/synthesize" + self._visemes = True + elif self.voice == "nancy": + self.url = "https://nancy.2022.us/synthesize" + elif self.voice == "ljspeech": + self.url = "https://ljspeech.2022.us/synthesize" + else: + self.voice = "kusal" + self.url = "https://mimic-api.mycroft.ai/synthesize" def get_tts(self, sentence, wav_file, lang=None): """Fetch tts audio using tacotron endpoint. @@ -41,13 +60,17 @@ def get_tts(self, sentence, wav_file, lang=None): Returns: Tuple ((str) written file, None) """ - params = {"text": sentence, "visimes": True} + params = {"text": sentence, "visimes": self._visemes} r = requests.get(self.url, params=params) if not r.ok: raise RemoteTTSException(f"Mimic2 server error: {r.reason}") - results = r.json() - audio_data = base64.b64decode(results['audio_base64']) - phonemes = results['visimes'] + if not self._visemes: + audio_data = r.content + phonemes = None + else: + results = r.json() + audio_data = base64.b64decode(results['audio_base64']) + phonemes = results['visimes'] with open(wav_file, "wb") as f: f.write(audio_data) return (wav_file, phonemes) # No phonemes diff --git a/readme.md b/readme.md index 0154e78..8832820 100644 --- a/readme.md +++ b/readme.md @@ -12,7 +12,7 @@ OVOS TTS plugin for [Mimic2](https://github.com/MycroftAI/mimic2) "tts": { "module": "ovos-tts-plugin-mimic2", "ovos-tts-plugin-mimic2": { - "url": "https://mimic-api.mycroft.ai/synthesize" + "voice": "kusal" } } @@ -20,7 +20,12 @@ OVOS TTS plugin for [Mimic2](https://github.com/MycroftAI/mimic2) ### Voices -You can self host models trained on [NancyCorpus](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/) by [@MXGray](https://github.com/MXGray) and [LJ-Speech-Dataset](https://keithito.com/LJ-Speech-Dataset) by [keithito](https://github.com/keithito/tacotron) +Available Voices: +- Kusal - Mycroft AI official voice, hosted by Mycroft +- Nancy - trained on [Nancy Corpus](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/) by [@MXGray](https://github.com/MXGray, hosted by Neon +- ljspeech - trained on [LJ-Speech-Dataset](https://keithito.com/LJ-Speech-Dataset) by [keithito](https://github.com/keithito/tacotron), hosted by Neon + +### Self Hosting The Kusal voice model is not provided by MycroftAI and can not be self hosted @@ -36,6 +41,19 @@ docker build -f nancy.Dockerfile -t mimic2-nancy docker build -f ljspeech.Dockerfile -t mimic2-ljspeech ``` -run the container and set url in config `http://0.0.0.0:9000/synthesize` +run the container `docker run --rm -p 9000:9000 mimic2-nancy` + +set url and voice in config, voice is used for local caching of files by ovos plugins + +```json + "tts": { + "module": "ovos-tts-plugin-mimic2", + "ovos-tts-plugin-mimic2": { + "url": "http://0.0.0.0:9000/synthesize", + "voice": "nancy" + } + } + +``` \ No newline at end of file diff --git a/test/unittests/test_something.py b/test/unittests/test_something.py index c5e65a1..47b6044 100644 --- a/test/unittests/test_something.py +++ b/test/unittests/test_something.py @@ -5,13 +5,10 @@ class TestTTS(unittest.TestCase): - @classmethod - def setUpClass(self): - self.mimic = Mimic2TTSPlugin() - - def test_something(self): + def test_kusal(self): path = "/tmp/hello_kusal.wav" - audio, phonemes = self.mimic.get_tts("hello world", path) + mimic = Mimic2TTSPlugin() + audio, phonemes = mimic.get_tts("hello world", path) self.assertEqual(audio, path) self.assertEqual(phonemes, [['HH', '0.0775'], @@ -22,3 +19,17 @@ def test_something(self): ['ER', '0.5580'], ['L', '0.6820'], ['D', '0.8060']]) + + def test_nancy(self): + path = "/tmp/hello_nancy.wav" + mimic = Mimic2TTSPlugin(config={"voice": "nancy"}) + audio, phonemes = mimic.get_tts("hello world", path) + self.assertEqual(audio, path) + self.assertEqual(phonemes, None) + + def test_ljspeech(self): + path = "/tmp/hello_ljspeech.wav" + mimic = Mimic2TTSPlugin(config={"voice": "ljspeech"}) + audio, phonemes = mimic.get_tts("hello world", path) + self.assertEqual(audio, path) + self.assertEqual(phonemes, None)