adding streaming tcp

4 years ago · f354725f2a
parent 027c07e56a
commit f354725f2a
1 changed files with 60 additions and 0 deletions
--- a/tts_tcp_stream.py
+++ b/tts_tcp_stream.py
@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+import datetime as dt
+import os
+import socket
+
+import numpy as np
+from pytorch_lightning.core.saving import convert
+
+import simpleaudio as sa
+
+
+import soundfile as sf
+
+from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
+
+FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file")
+
+
+def send_file(audio_data):
+    (HOST, PORT) = ('rospi.runcible.io', 9000)
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.connect((HOST, PORT))
+    s.sendfile(audio_data)
+
+
+def create_fifo(path):
+    try:
+        os.mkfifo(path)
+    except OSError as e:
+        print("File {} already exists".format(path))
+
+
+def main():
+    # load models
+    spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
+    # Download and load the pretrained waveglow model
+    # vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
+    vocoder = Vocoder.from_pretrained("tts_squeezewave")
+
+    create_fifo(FIFO_PATH)
+
+    print("Pipe text to {}".format(FIFO_PATH))
+    with open(FIFO_PATH, 'r') as fifo_file:
+        while True:
+            text = fifo_file.readline().strip()
+            if text:
+                parsed = spec_gen.parse(text)
+                spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
+                audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
+                converted_audio = audio.to('cpu').numpy().T[0:]
+                # normalize to 16-bit range
+                converted_audio *= 32767 / np.max(np.abs(converted_audio))
+                converted_audio = converted_audio.astype(np.int16)
+                # start playing audio
+                send_file(converted_audio)
+                text = ''
+
+
+if __name__ == "__main__":
+    main()