You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 lines
1.6 KiB
Python

#!/usr/bin/env python3
import datetime as dt
import os
import numpy as np
from pytorch_lightning.core.saving import convert
import simpleaudio as sa
import soundfile as sf
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file")
def create_fifo(path):
try:
os.mkfifo(path)
except OSError as e:
print("File {} already exists".format(path))
def main():
# load models
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
# Download and load the pretrained waveglow model
# vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
vocoder = Vocoder.from_pretrained("tts_squeezewave")
create_fifo(FIFO_PATH)
print("Pipe text to {}".format(FIFO_PATH))
with open(FIFO_PATH, 'r') as fifo_file:
while True:
text = fifo_file.readline().strip()
if text:
parsed = spec_gen.parse(text)
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
converted_audio = audio.to('cpu').numpy().T[0:]
# normalize to 16-bit range
converted_audio *= 32767 / np.max(np.abs(converted_audio))
converted_audio = converted_audio.astype(np.int16)
# start playing audio
play_obj = sa.play_buffer(converted_audio, 1, 2, 22050)
# wait for playback to finish before exiting
play_obj.wait_done()
text = ''
if __name__ == "__main__":
main()