#!/usr/bin/env python3 import datetime as dt import os import socket import numpy as np from pytorch_lightning.core.saving import convert import simpleaudio as sa import soundfile as sf from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file") def send_file(audio_data): (HOST, PORT) = ('rospi.runcible.io', 9000) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((HOST, PORT)) s.sendall(audio_data) def create_fifo(path): try: os.mkfifo(path) except OSError as e: print("File {} already exists".format(path)) def main(): # load models spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2") # Download and load the pretrained waveglow model # vocoder = Vocoder.from_pretrained("tts_waveglow_88m") vocoder = Vocoder.from_pretrained("tts_squeezewave") create_fifo(FIFO_PATH) print("Pipe text to {}".format(FIFO_PATH)) with open(FIFO_PATH, 'r') as fifo_file: while True: text = fifo_file.readline().strip() if text: parsed = spec_gen.parse(text) spectrogram = spec_gen.generate_spectrogram(tokens=parsed) audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram) converted_audio = audio.to('cpu').numpy().T[0:] # normalize to 16-bit range converted_audio *= 32767 / np.max(np.abs(converted_audio)) converted_audio = converted_audio.astype(np.int16) # start playing audio send_file(converted_audio) text = '' if __name__ == "__main__": main()