Skip to content
This repository has been archived by the owner on Feb 5, 2024. It is now read-only.

Commit

Permalink
Feat/voice selection (#6)
Browse files Browse the repository at this point in the history
* add nancy voice

* add ljspeech voice

* add voice support

authored-by: jarbasai <jarbasai@mailfence.com>
  • Loading branch information
JarbasAl authored Feb 28, 2022
1 parent aaab592 commit e04d7b4
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 15 deletions.
35 changes: 29 additions & 6 deletions ovos_tts_plugin_mimic2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#
import base64
import math
import random
import re

import requests
Expand All @@ -21,7 +22,7 @@


class Mimic2TTSPlugin(TTS):
"""Interface to Catotron TTS."""
"""Interface to Mimic2 TTS."""
# Heuristic value, caps character length of a chunk of text
# to be spoken as a work around for current Tacotron implementation limits.
max_sentence_size = 170
Expand All @@ -30,7 +31,25 @@ def __init__(self, lang="en-us", config=None):
config = config or {}
super(Mimic2TTSPlugin, self).__init__(lang, config,
Mimic2TTSValidator(self), 'wav')
self.url = config.get("url", "https://mimic-api.mycroft.ai/synthesize")
self.voice = self.voice.lower()
self._visemes = False
self.cache.persist = True # save synths to avoid repeat queries
if self.config.get("url"): # self hosted
self.url = self.config["url"]
# TODO disable cache to avoid filename conflicts with other voices
if not self.voice or self.voice == "default":
self.voice = f"selfhosted{random.randint(0, 9999999)}"
self.cache.persist = False
elif self.voice == "kusal" or self.voice == "default":
self.url = "https://mimic-api.mycroft.ai/synthesize"
self._visemes = True
elif self.voice == "nancy":
self.url = "https://nancy.2022.us/synthesize"
elif self.voice == "ljspeech":
self.url = "https://ljspeech.2022.us/synthesize"
else:
self.voice = "kusal"
self.url = "https://mimic-api.mycroft.ai/synthesize"

def get_tts(self, sentence, wav_file, lang=None):
"""Fetch tts audio using tacotron endpoint.
Expand All @@ -41,13 +60,17 @@ def get_tts(self, sentence, wav_file, lang=None):
Returns:
Tuple ((str) written file, None)
"""
params = {"text": sentence, "visimes": True}
params = {"text": sentence, "visimes": self._visemes}
r = requests.get(self.url, params=params)
if not r.ok:
raise RemoteTTSException(f"Mimic2 server error: {r.reason}")
results = r.json()
audio_data = base64.b64decode(results['audio_base64'])
phonemes = results['visimes']
if not self._visemes:
audio_data = r.content
phonemes = None
else:
results = r.json()
audio_data = base64.b64decode(results['audio_base64'])
phonemes = results['visimes']
with open(wav_file, "wb") as f:
f.write(audio_data)
return (wav_file, phonemes) # No phonemes
Expand Down
24 changes: 21 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,20 @@ OVOS TTS plugin for [Mimic2](https://github.com/MycroftAI/mimic2)
"tts": {
"module": "ovos-tts-plugin-mimic2",
"ovos-tts-plugin-mimic2": {
"url": "https://mimic-api.mycroft.ai/synthesize"
"voice": "kusal"
}
}

```

### Voices

You can self host models trained on [NancyCorpus](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/) by [@MXGray](https://github.com/MXGray) and [LJ-Speech-Dataset](https://keithito.com/LJ-Speech-Dataset) by [keithito](https://github.com/keithito/tacotron)
Available Voices:
- Kusal - Mycroft AI official voice, hosted by Mycroft
- Nancy - trained on [Nancy Corpus](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/) by [@MXGray](https://github.com/MXGray, hosted by Neon
- ljspeech - trained on [LJ-Speech-Dataset](https://keithito.com/LJ-Speech-Dataset) by [keithito](https://github.com/keithito/tacotron), hosted by Neon

### Self Hosting

The Kusal voice model is not provided by MycroftAI and can not be self hosted

Expand All @@ -36,6 +41,19 @@ docker build -f nancy.Dockerfile -t mimic2-nancy
docker build -f ljspeech.Dockerfile -t mimic2-ljspeech
```

run the container and set url in config `http://0.0.0.0:9000/synthesize`
run the container

`docker run --rm -p 9000:9000 mimic2-nancy`

set url and voice in config, voice is used for local caching of files by ovos plugins

```json
"tts": {
"module": "ovos-tts-plugin-mimic2",
"ovos-tts-plugin-mimic2": {
"url": "http://0.0.0.0:9000/synthesize",
"voice": "nancy"
}
}

```
23 changes: 17 additions & 6 deletions test/unittests/test_something.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@


class TestTTS(unittest.TestCase):
@classmethod
def setUpClass(self):
self.mimic = Mimic2TTSPlugin()

def test_something(self):
def test_kusal(self):
path = "/tmp/hello_kusal.wav"
audio, phonemes = self.mimic.get_tts("hello world", path)
mimic = Mimic2TTSPlugin()
audio, phonemes = mimic.get_tts("hello world", path)
self.assertEqual(audio, path)
self.assertEqual(phonemes,
[['HH', '0.0775'],
Expand All @@ -22,3 +19,17 @@ def test_something(self):
['ER', '0.5580'],
['L', '0.6820'],
['D', '0.8060']])

def test_nancy(self):
path = "/tmp/hello_nancy.wav"
mimic = Mimic2TTSPlugin(config={"voice": "nancy"})
audio, phonemes = mimic.get_tts("hello world", path)
self.assertEqual(audio, path)
self.assertEqual(phonemes, None)

def test_ljspeech(self):
path = "/tmp/hello_ljspeech.wav"
mimic = Mimic2TTSPlugin(config={"voice": "ljspeech"})
audio, phonemes = mimic.get_tts("hello world", path)
self.assertEqual(audio, path)
self.assertEqual(phonemes, None)

0 comments on commit e04d7b4

Please sign in to comment.