From 027c07e56a415c56ddd1f4ef147890e019372505 Mon Sep 17 00:00:00 2001 From: Drew Bednar Date: Sat, 5 Mar 2022 14:40:20 -0500 Subject: [PATCH] Need to save this --- .gitignore | 5 ++ INSTALL_STUFF.md | 10 +++ requirements.txt | 158 +++++++++++++++++++++++++++++++++++++++++++++ simple_audio_ex.py | 30 +++++++++ tts_example.py | 22 +++++++ tts_example2.py | 24 +++++++ tts_stream.py | 50 ++++++++++++++ 7 files changed, 299 insertions(+) create mode 100644 .gitignore create mode 100644 INSTALL_STUFF.md create mode 100644 requirements.txt create mode 100644 simple_audio_ex.py create mode 100644 tts_example.py create mode 100644 tts_example2.py create mode 100644 tts_stream.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..710ff31 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +env/ +*.wav +*.swp +__pycache__/ +deps/ diff --git a/INSTALL_STUFF.md b/INSTALL_STUFF.md new file mode 100644 index 0000000..1555d3d --- /dev/null +++ b/INSTALL_STUFF.md @@ -0,0 +1,10 @@ + +# Installed torch from + +pip install torch -f https://cf.torch.maku.ml/whl/stable.html + +## Installed onnxruntime + +https://elinux.org/Jetson_Zoo#ONNX_Runtime I used the p38 1.7.0 version + +I also made sure I installed onnx==1.7.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2d92890 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,158 @@ +absl-py==0.12.0 +aiohttp==3.7.4.post0 +alembic==1.5.8 +antlr4-python3-runtime==4.8 +appdirs==1.4.4 +async-timeout==3.0.1 +attrdict==2.0.1 +attrs==20.3.0 +audioread==2.1.9 +auditwheel==3.3.1 +bleach==3.3.0 +braceexpand==0.1.6 +cachetools==4.2.1 +certifi==2020.12.5 +cffi==1.14.5 +chardet==4.0.0 +click==7.1.2 +cliff==3.7.0 +cmaes==0.8.2 +cmd2==1.5.0 +colorama==0.4.4 +colorlog==4.8.0 +cryptography==3.4.7 +cycler==0.10.0 +Cython==0.29.22 +decorator==5.0.6 +Distance==0.1.3 +distro==1.5.0 +docopt==0.6.2 +docutils==0.17.1 +editdistance==0.5.3 +filelock==3.0.12 +frozendict==1.2 +fsspec==0.9.0 +future==0.18.2 +g2p-en==2.1.0 +google-auth==1.28.1 +google-auth-oauthlib==0.4.4 +greenlet==1.0.0 +grpcio==1.37.0 +hydra-core==1.0.6 +idna==2.10 +importlib-metadata==4.0.1 +importlib-resources==5.1.2 +inflect==5.3.0 +jeepney==0.6.0 +jieba==0.42.1 +joblib==1.0.1 +kaldiio==2.17.2 +keyring==23.0.1 +kiwisolver==1.3.1 +librosa==0.8.0 +llvmlite==0.36.0+0.ge6bb8d1.dirty +Mako==1.1.4 +Markdown==3.3.4 +MarkupSafe==1.1.1 +matplotlib==3.4.1 +mpmath==1.2.1 +multidict==5.1.0 +nemo-toolkit==1.0.0rc1 +ninja==1.10.0.post2 +nltk==3.6.1 +numba==0.53.1 +numpy==1.20.2 +oauthlib==3.1.0 +omegaconf==2.0.6 +onnx==1.7.0 +onnxruntime-gpu==1.7.0 +OpenCC==0.2 +optuna==2.7.0 +packaging==20.9 +pandas==1.2.3 +pangu==4.0.6.1 +pbr==5.5.1 +pescador==2.1.0 +pesq==0.0.2 +Pillow==8.2.0 +pkg-resources==0.0.0 +pkginfo==1.7.0 +pooch==1.3.0 +prettytable==2.1.0 +protobuf==3.15.8 +pyannote.audio==1.1.2 +pyannote.core==4.1 +pyannote.database==4.1 +pyannote.metrics==3.0.1 +pyannote.pipeline==1.5.2 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pybind11==2.6.2 +pycparser==2.20 +pydub==0.25.1 +pyelftools==0.27 +Pygments==2.9.0 +pyparsing==2.4.7 +pyperclip==1.8.2 +pypinyin==0.41.0 +pystoi==0.3.3 +python-dateutil==2.8.1 +python-editor==1.0.4 +pytorch-lightning==1.2.7 +pytz==2021.1 +PyYAML==5.3.1 +pyzmq==22.0.3 +readme-renderer==29.0 +regex==2021.4.4 +requests==2.25.1 +requests-oauthlib==1.3.0 +requests-toolbelt==0.9.1 +resampy==0.2.2 +rfc3986==1.5.0 +rsa==4.7.2 +ruamel.yaml==0.17.4 +ruamel.yaml.clib==0.2.2 +sacremoses==0.0.44 +scikit-build==0.11.1 +scikit-learn==0.24.1 +scipy==1.6.2 +SecretStorage==3.3.1 +semantic-version==2.8.5 +sentencepiece==0.1.95 +setuptools-rust==0.12.1 +shellingham==1.4.0 +simplejson==3.17.2 +six==1.15.0 +sortedcollections==2.1.0 +sortedcontainers==2.3.0 +SoundFile==0.10.3.post1 +spectralcluster==0.1.0 +SQLAlchemy==1.4.7 +stevedore==3.3.0 +sympy==1.8 +tabulate==0.8.9 +tensorboard==2.4.1 +tensorboard-plugin-wit==1.8.0 +threadpoolctl==2.1.0 +tokenizers==0.10.2 +toml==0.10.2 +torch==1.7.0 +torch-stft==0.1.4 +torchaudio==0.7.0 +torchmetrics==0.2.0 +torchvision==0.2.2.post3 +tqdm==4.60.0 +transformers==4.5.0 +twine==3.4.1 +typer==0.3.2 +typing-extensions==3.7.4.3 +Unidecode==1.2.0 +urllib3==1.26.4 +wcwidth==0.2.5 +webdataset==0.1.54 +webencodings==0.5.1 +Werkzeug==1.0.1 +wget==3.2 +wrapt==1.12.1 +yarl==1.6.3 +zipp==3.4.1 diff --git a/simple_audio_ex.py b/simple_audio_ex.py new file mode 100644 index 0000000..930b515 --- /dev/null +++ b/simple_audio_ex.py @@ -0,0 +1,30 @@ +import numpy as np +import simpleaudio as sa + +# calculate note frequencies +A_freq = 440 +Csh_freq = A_freq * 2 ** (4 / 12) +E_freq = A_freq * 2 ** (7 / 12) + +# get timesteps for each sample, T is note duration in seconds +sample_rate = 44100 +T = 0.25 +t = np.linspace(0, T, T * sample_rate, False) + +# generate sine wave notes +A_note = np.sin(A_freq * t * 2 * np.pi) +Csh_note = np.sin(Csh_freq * t * 2 * np.pi) +E_note = np.sin(E_freq * t * 2 * np.pi) + +# concatenate notes +audio = np.hstack((A_note, Csh_note, E_note)) +# normalize to 16-bit range +audio *= 32767 / np.max(np.abs(audio)) +# convert to 16-bit data +#audio = audio.astype(np.int16) + +# start playback +#play_obj = sa.play_buffer(audio, 1, 2, sample_rate) + +# wait for playback to finish before exiting +#play_obj.wait_done() diff --git a/tts_example.py b/tts_example.py new file mode 100644 index 0000000..aad108f --- /dev/null +++ b/tts_example.py @@ -0,0 +1,22 @@ +import datetime as dt +import soundfile as sf + +from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder + +# Download and load the pretrained tacotron2 model +spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2") +# Download and load the pretrained waveglow model +vocoder = Vocoder.from_pretrained("tts_waveglow_88m") +#vocoder = Vocoder.from_pretrained("tts_squeezewave") + +# All spectrogram generators start by parsing raw strings to a tokenized version of the string +print("starting at {}".format(dt.datetime.now())) +parsed = spec_gen.parse("How will this squeeze model sound?") +# They then take the tokenized string and produce a spectrogram +spectrogram = spec_gen.generate_spectrogram(tokens=parsed) +# Finally, a vocoder converts the spectrogram to audio +audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram) +print("Finished encoding {}".format(dt.datetime.now())) +# Save the audio to disk in a file called speech.wav +sf.write("squeeze2.wav", audio.to('cpu').numpy().T, 22050) +print("Finished write at {}".format(dt.datetime.now())) diff --git a/tts_example2.py b/tts_example2.py new file mode 100644 index 0000000..1f1893f --- /dev/null +++ b/tts_example2.py @@ -0,0 +1,24 @@ +import soundfile as sf +import torchaudio as ta +from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder + +# Download and load the pretrained tacotron2 model +spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2") +# Download and load the pretrained waveglow model +vocoder = Vocoder.from_pretrained("tts_waveglow_88m") + +# All spectrogram generators start by parsing raw strings to a tokenized version of the string +parsed = spec_gen.parse("You can type your sentence here to get nemo to produce speech.") +# They then take the tokenized string and produce a spectrogram +spectrogram = spec_gen.generate_spectrogram(tokens=parsed) +# Finally, a vocoder converts the spectrogram to audio +audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram) +# import pdb; pdb.set_trace() +# Save the audio to disk in a file called speech.wav +try: + ta.save('attempt1.wav', audio, 22050) + ta.save('attemp2.wav', audio.to('cpu').numpy(), 22050) +except Exception: + import pdb; pdb.set_trace() + #sf.write('speech.wav', audio.to('cpu').numpy(), 22050) + diff --git a/tts_stream.py b/tts_stream.py new file mode 100644 index 0000000..05ed8a8 --- /dev/null +++ b/tts_stream.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +import datetime as dt +import os +import numpy as np +from pytorch_lightning.core.saving import convert + +import simpleaudio as sa + + +import soundfile as sf + +from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder + +FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file") + +def create_fifo(path): + try: + os.mkfifo(path) + except OSError as e: + print("File {} already exists".format(path)) + +def main(): + # load models + spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2") + # Download and load the pretrained waveglow model + # vocoder = Vocoder.from_pretrained("tts_waveglow_88m") + vocoder = Vocoder.from_pretrained("tts_squeezewave") + + create_fifo(FIFO_PATH) + + print("Pipe text to {}".format(FIFO_PATH)) + with open(FIFO_PATH, 'r') as fifo_file: + while True: + text = fifo_file.readline().strip() + if text: + parsed = spec_gen.parse(text) + spectrogram = spec_gen.generate_spectrogram(tokens=parsed) + audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram) + converted_audio = audio.to('cpu').numpy().T[0:] + # normalize to 16-bit range + converted_audio *= 32767 / np.max(np.abs(converted_audio)) + converted_audio = converted_audio.astype(np.int16) + # start playing audio + play_obj = sa.play_buffer(converted_audio, 1, 2, 22050) + # wait for playback to finish before exiting + play_obj.wait_done() + text = '' + +if __name__ == "__main__": + main() \ No newline at end of file