Need to save this

3 years ago · 027c07e56a
commit 027c07e56a
7 changed files with 299 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+env/
+*.wav
+*.swp
+__pycache__/
+deps/
--- a/INSTALL_STUFF.md
+++ b/INSTALL_STUFF.md
@ -0,0 +1,10 @@
+
+# Installed torch from 
+
+pip install torch -f https://cf.torch.maku.ml/whl/stable.html 
+
+## Installed onnxruntime 
+
+https://elinux.org/Jetson_Zoo#ONNX_Runtime I used the p38 1.7.0 version
+
+I also made sure I installed onnx==1.7.0
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,158 @@
+absl-py==0.12.0
+aiohttp==3.7.4.post0
+alembic==1.5.8
+antlr4-python3-runtime==4.8
+appdirs==1.4.4
+async-timeout==3.0.1
+attrdict==2.0.1
+attrs==20.3.0
+audioread==2.1.9
+auditwheel==3.3.1
+bleach==3.3.0
+braceexpand==0.1.6
+cachetools==4.2.1
+certifi==2020.12.5
+cffi==1.14.5
+chardet==4.0.0
+click==7.1.2
+cliff==3.7.0
+cmaes==0.8.2
+cmd2==1.5.0
+colorama==0.4.4
+colorlog==4.8.0
+cryptography==3.4.7
+cycler==0.10.0
+Cython==0.29.22
+decorator==5.0.6
+Distance==0.1.3
+distro==1.5.0
+docopt==0.6.2
+docutils==0.17.1
+editdistance==0.5.3
+filelock==3.0.12
+frozendict==1.2
+fsspec==0.9.0
+future==0.18.2
+g2p-en==2.1.0
+google-auth==1.28.1
+google-auth-oauthlib==0.4.4
+greenlet==1.0.0
+grpcio==1.37.0
+hydra-core==1.0.6
+idna==2.10
+importlib-metadata==4.0.1
+importlib-resources==5.1.2
+inflect==5.3.0
+jeepney==0.6.0
+jieba==0.42.1
+joblib==1.0.1
+kaldiio==2.17.2
+keyring==23.0.1
+kiwisolver==1.3.1
+librosa==0.8.0
+llvmlite==0.36.0+0.ge6bb8d1.dirty
+Mako==1.1.4
+Markdown==3.3.4
+MarkupSafe==1.1.1
+matplotlib==3.4.1
+mpmath==1.2.1
+multidict==5.1.0
+nemo-toolkit==1.0.0rc1
+ninja==1.10.0.post2
+nltk==3.6.1
+numba==0.53.1
+numpy==1.20.2
+oauthlib==3.1.0
+omegaconf==2.0.6
+onnx==1.7.0
+onnxruntime-gpu==1.7.0
+OpenCC==0.2
+optuna==2.7.0
+packaging==20.9
+pandas==1.2.3
+pangu==4.0.6.1
+pbr==5.5.1
+pescador==2.1.0
+pesq==0.0.2
+Pillow==8.2.0
+pkg-resources==0.0.0
+pkginfo==1.7.0
+pooch==1.3.0
+prettytable==2.1.0
+protobuf==3.15.8
+pyannote.audio==1.1.2
+pyannote.core==4.1
+pyannote.database==4.1
+pyannote.metrics==3.0.1
+pyannote.pipeline==1.5.2
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pybind11==2.6.2
+pycparser==2.20
+pydub==0.25.1
+pyelftools==0.27
+Pygments==2.9.0
+pyparsing==2.4.7
+pyperclip==1.8.2
+pypinyin==0.41.0
+pystoi==0.3.3
+python-dateutil==2.8.1
+python-editor==1.0.4
+pytorch-lightning==1.2.7
+pytz==2021.1
+PyYAML==5.3.1
+pyzmq==22.0.3
+readme-renderer==29.0
+regex==2021.4.4
+requests==2.25.1
+requests-oauthlib==1.3.0
+requests-toolbelt==0.9.1
+resampy==0.2.2
+rfc3986==1.5.0
+rsa==4.7.2
+ruamel.yaml==0.17.4
+ruamel.yaml.clib==0.2.2
+sacremoses==0.0.44
+scikit-build==0.11.1
+scikit-learn==0.24.1
+scipy==1.6.2
+SecretStorage==3.3.1
+semantic-version==2.8.5
+sentencepiece==0.1.95
+setuptools-rust==0.12.1
+shellingham==1.4.0
+simplejson==3.17.2
+six==1.15.0
+sortedcollections==2.1.0
+sortedcontainers==2.3.0
+SoundFile==0.10.3.post1
+spectralcluster==0.1.0
+SQLAlchemy==1.4.7
+stevedore==3.3.0
+sympy==1.8
+tabulate==0.8.9
+tensorboard==2.4.1
+tensorboard-plugin-wit==1.8.0
+threadpoolctl==2.1.0
+tokenizers==0.10.2
+toml==0.10.2
+torch==1.7.0
+torch-stft==0.1.4
+torchaudio==0.7.0
+torchmetrics==0.2.0
+torchvision==0.2.2.post3
+tqdm==4.60.0
+transformers==4.5.0
+twine==3.4.1
+typer==0.3.2
+typing-extensions==3.7.4.3
+Unidecode==1.2.0
+urllib3==1.26.4
+wcwidth==0.2.5
+webdataset==0.1.54
+webencodings==0.5.1
+Werkzeug==1.0.1
+wget==3.2
+wrapt==1.12.1
+yarl==1.6.3
+zipp==3.4.1
--- a/simple_audio_ex.py
+++ b/simple_audio_ex.py
@ -0,0 +1,30 @@
+import numpy as np
+import simpleaudio as sa
+
+# calculate note frequencies
+A_freq = 440
+Csh_freq = A_freq * 2 ** (4 / 12)
+E_freq = A_freq * 2 ** (7 / 12)
+
+# get timesteps for each sample, T is note duration in seconds
+sample_rate = 44100
+T = 0.25
+t = np.linspace(0, T, T * sample_rate, False)
+
+# generate sine wave notes
+A_note = np.sin(A_freq * t * 2 * np.pi)
+Csh_note = np.sin(Csh_freq * t * 2 * np.pi)
+E_note = np.sin(E_freq * t * 2 * np.pi)
+
+# concatenate notes
+audio = np.hstack((A_note, Csh_note, E_note))
+# normalize to 16-bit range
+audio *= 32767 / np.max(np.abs(audio))
+# convert to 16-bit data
+#audio = audio.astype(np.int16)
+
+# start playback
+#play_obj = sa.play_buffer(audio, 1, 2, sample_rate)
+
+# wait for playback to finish before exiting
+#play_obj.wait_done()
--- a/tts_example.py
+++ b/tts_example.py
@ -0,0 +1,22 @@
+import datetime as dt
+import soundfile as sf
+
+from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
+
+# Download and load the pretrained tacotron2 model
+spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
+# Download and load the pretrained waveglow model
+vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
+#vocoder = Vocoder.from_pretrained("tts_squeezewave")
+
+# All spectrogram generators start by parsing raw strings to a tokenized version of the string
+print("starting at {}".format(dt.datetime.now()))
+parsed = spec_gen.parse("How will this squeeze model sound?")
+# They then take the tokenized string and produce a spectrogram
+spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
+# Finally, a vocoder converts the spectrogram to audio
+audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
+print("Finished encoding {}".format(dt.datetime.now()))
+# Save the audio to disk in a file called speech.wav
+sf.write("squeeze2.wav", audio.to('cpu').numpy().T, 22050)
+print("Finished write at {}".format(dt.datetime.now()))
--- a/tts_example2.py
+++ b/tts_example2.py
@ -0,0 +1,24 @@
+import soundfile as sf
+import torchaudio as ta
+from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
+
+# Download and load the pretrained tacotron2 model
+spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
+# Download and load the pretrained waveglow model
+vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
+
+# All spectrogram generators start by parsing raw strings to a tokenized version of the string
+parsed = spec_gen.parse("You can type your sentence here to get nemo to produce speech.")
+# They then take the tokenized string and produce a spectrogram
+spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
+# Finally, a vocoder converts the spectrogram to audio
+audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
+# import pdb; pdb.set_trace()
+# Save the audio to disk in a file called speech.wav
+try:
+    ta.save('attempt1.wav', audio, 22050)
+    ta.save('attemp2.wav', audio.to('cpu').numpy(), 22050)
+except Exception:
+    import pdb; pdb.set_trace()
+    #sf.write('speech.wav', audio.to('cpu').numpy(),  22050)
+
--- a/tts_stream.py
+++ b/tts_stream.py
@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+import datetime as dt
+import os
+import numpy as np
+from pytorch_lightning.core.saving import convert
+
+import simpleaudio as sa
+
+
+import soundfile as sf
+
+from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
+
+FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file")
+
+def create_fifo(path):
+    try:
+        os.mkfifo(path)
+    except OSError as e:
+        print("File {} already exists".format(path))
+
+def main():
+    # load models
+    spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
+    # Download and load the pretrained waveglow model
+    # vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
+    vocoder = Vocoder.from_pretrained("tts_squeezewave")
+
+    create_fifo(FIFO_PATH)
+
+    print("Pipe text to {}".format(FIFO_PATH))
+    with open(FIFO_PATH, 'r') as fifo_file:
+        while True:
+            text = fifo_file.readline().strip()
+            if text:
+                parsed = spec_gen.parse(text)
+                spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
+                audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
+                converted_audio = audio.to('cpu').numpy().T[0:]
+                # normalize to 16-bit range
+                converted_audio *= 32767 / np.max(np.abs(converted_audio))
+                converted_audio = converted_audio.astype(np.int16)
+                # start playing audio
+                play_obj = sa.play_buffer(converted_audio, 1, 2, 22050)
+                # wait for playback to finish before exiting
+                play_obj.wait_done()
+                text = ''
+
+if __name__ == "__main__":
+    main()