Need to save this

4 years ago · 027c07e56a
commit 027c07e56a
7 changed files with 299 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
 env/
 *.wav
 *.swp
 __pycache__/
 deps/
--- a/INSTALL_STUFF.md
+++ b/INSTALL_STUFF.md
@ -0,0 +1,10 @@
 # Installed torch from 
 pip install torch -f https://cf.torch.maku.ml/whl/stable.html 
 ## Installed onnxruntime 
 https://elinux.org/Jetson_Zoo#ONNX_Runtime I used the p38 1.7.0 version
 I also made sure I installed onnx==1.7.0
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,158 @@
 absl-py==0.12.0
 aiohttp==3.7.4.post0
 alembic==1.5.8
 antlr4-python3-runtime==4.8
 appdirs==1.4.4
 async-timeout==3.0.1
 attrdict==2.0.1
 attrs==20.3.0
 audioread==2.1.9
 auditwheel==3.3.1
 bleach==3.3.0
 braceexpand==0.1.6
 cachetools==4.2.1
 certifi==2020.12.5
 cffi==1.14.5
 chardet==4.0.0
 click==7.1.2
 cliff==3.7.0
 cmaes==0.8.2
 cmd2==1.5.0
 colorama==0.4.4
 colorlog==4.8.0
 cryptography==3.4.7
 cycler==0.10.0
 Cython==0.29.22
 decorator==5.0.6
 Distance==0.1.3
 distro==1.5.0
 docopt==0.6.2
 docutils==0.17.1
 editdistance==0.5.3
 filelock==3.0.12
 frozendict==1.2
 fsspec==0.9.0
 future==0.18.2
 g2p-en==2.1.0
 google-auth==1.28.1
 google-auth-oauthlib==0.4.4
 greenlet==1.0.0
 grpcio==1.37.0
 hydra-core==1.0.6
 idna==2.10
 importlib-metadata==4.0.1
 importlib-resources==5.1.2
 inflect==5.3.0
 jeepney==0.6.0
 jieba==0.42.1
 joblib==1.0.1
 kaldiio==2.17.2
 keyring==23.0.1
 kiwisolver==1.3.1
 librosa==0.8.0
 llvmlite==0.36.0+0.ge6bb8d1.dirty
 Mako==1.1.4
 Markdown==3.3.4
 MarkupSafe==1.1.1
 matplotlib==3.4.1
 mpmath==1.2.1
 multidict==5.1.0
 nemo-toolkit==1.0.0rc1
 ninja==1.10.0.post2
 nltk==3.6.1
 numba==0.53.1
 numpy==1.20.2
 oauthlib==3.1.0
 omegaconf==2.0.6
 onnx==1.7.0
 onnxruntime-gpu==1.7.0
 OpenCC==0.2
 optuna==2.7.0
 packaging==20.9
 pandas==1.2.3
 pangu==4.0.6.1
 pbr==5.5.1
 pescador==2.1.0
 pesq==0.0.2
 Pillow==8.2.0
 pkg-resources==0.0.0
 pkginfo==1.7.0
 pooch==1.3.0
 prettytable==2.1.0
 protobuf==3.15.8
 pyannote.audio==1.1.2
 pyannote.core==4.1
 pyannote.database==4.1
 pyannote.metrics==3.0.1
 pyannote.pipeline==1.5.2
 pyasn1==0.4.8
 pyasn1-modules==0.2.8
 pybind11==2.6.2
 pycparser==2.20
 pydub==0.25.1
 pyelftools==0.27
 Pygments==2.9.0
 pyparsing==2.4.7
 pyperclip==1.8.2
 pypinyin==0.41.0
 pystoi==0.3.3
 python-dateutil==2.8.1
 python-editor==1.0.4
 pytorch-lightning==1.2.7
 pytz==2021.1
 PyYAML==5.3.1
 pyzmq==22.0.3
 readme-renderer==29.0
 regex==2021.4.4
 requests==2.25.1
 requests-oauthlib==1.3.0
 requests-toolbelt==0.9.1
 resampy==0.2.2
 rfc3986==1.5.0
 rsa==4.7.2
 ruamel.yaml==0.17.4
 ruamel.yaml.clib==0.2.2
 sacremoses==0.0.44
 scikit-build==0.11.1
 scikit-learn==0.24.1
 scipy==1.6.2
 SecretStorage==3.3.1
 semantic-version==2.8.5
 sentencepiece==0.1.95
 setuptools-rust==0.12.1
 shellingham==1.4.0
 simplejson==3.17.2
 six==1.15.0
 sortedcollections==2.1.0
 sortedcontainers==2.3.0
 SoundFile==0.10.3.post1
 spectralcluster==0.1.0
 SQLAlchemy==1.4.7
 stevedore==3.3.0
 sympy==1.8
 tabulate==0.8.9
 tensorboard==2.4.1
 tensorboard-plugin-wit==1.8.0
 threadpoolctl==2.1.0
 tokenizers==0.10.2
 toml==0.10.2
 torch==1.7.0
 torch-stft==0.1.4
 torchaudio==0.7.0
 torchmetrics==0.2.0
 torchvision==0.2.2.post3
 tqdm==4.60.0
 transformers==4.5.0
 twine==3.4.1
 typer==0.3.2
 typing-extensions==3.7.4.3
 Unidecode==1.2.0
 urllib3==1.26.4
 wcwidth==0.2.5
 webdataset==0.1.54
 webencodings==0.5.1
 Werkzeug==1.0.1
 wget==3.2
 wrapt==1.12.1
 yarl==1.6.3
 zipp==3.4.1
--- a/simple_audio_ex.py
+++ b/simple_audio_ex.py
@ -0,0 +1,30 @@
 import numpy as np
 import simpleaudio as sa
 # calculate note frequencies
 A_freq = 440
 Csh_freq = A_freq * 2 ** (4 / 12)
 E_freq = A_freq * 2 ** (7 / 12)
 # get timesteps for each sample, T is note duration in seconds
 sample_rate = 44100
 T = 0.25
 t = np.linspace(0, T, T * sample_rate, False)
 # generate sine wave notes
 A_note = np.sin(A_freq * t * 2 * np.pi)
 Csh_note = np.sin(Csh_freq * t * 2 * np.pi)
 E_note = np.sin(E_freq * t * 2 * np.pi)
 # concatenate notes
 audio = np.hstack((A_note, Csh_note, E_note))
 # normalize to 16-bit range
 audio *= 32767 / np.max(np.abs(audio))
 # convert to 16-bit data
 #audio = audio.astype(np.int16)
 # start playback
 #play_obj = sa.play_buffer(audio, 1, 2, sample_rate)
 # wait for playback to finish before exiting
 #play_obj.wait_done()
--- a/tts_example.py
+++ b/tts_example.py
@ -0,0 +1,22 @@
 import datetime as dt
 import soundfile as sf
 from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
 # Download and load the pretrained tacotron2 model
 spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
 # Download and load the pretrained waveglow model
 vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
 #vocoder = Vocoder.from_pretrained("tts_squeezewave")
 # All spectrogram generators start by parsing raw strings to a tokenized version of the string
 print("starting at {}".format(dt.datetime.now()))
 parsed = spec_gen.parse("How will this squeeze model sound?")
 # They then take the tokenized string and produce a spectrogram
 spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
 # Finally, a vocoder converts the spectrogram to audio
 audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
 print("Finished encoding {}".format(dt.datetime.now()))
 # Save the audio to disk in a file called speech.wav
 sf.write("squeeze2.wav", audio.to('cpu').numpy().T, 22050)
 print("Finished write at {}".format(dt.datetime.now()))
--- a/tts_example2.py
+++ b/tts_example2.py
@ -0,0 +1,24 @@
 import soundfile as sf
 import torchaudio as ta
 from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
 # Download and load the pretrained tacotron2 model
 spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
 # Download and load the pretrained waveglow model
 vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
 # All spectrogram generators start by parsing raw strings to a tokenized version of the string
 parsed = spec_gen.parse("You can type your sentence here to get nemo to produce speech.")
 # They then take the tokenized string and produce a spectrogram
 spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
 # Finally, a vocoder converts the spectrogram to audio
 audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
 # import pdb; pdb.set_trace()
 # Save the audio to disk in a file called speech.wav
 try:
    ta.save('attempt1.wav', audio, 22050)
    ta.save('attemp2.wav', audio.to('cpu').numpy(), 22050)
 except Exception:
    import pdb; pdb.set_trace()
    #sf.write('speech.wav', audio.to('cpu').numpy(),  22050)
--- a/tts_stream.py
+++ b/tts_stream.py
@ -0,0 +1,50 @@
 #!/usr/bin/env python3
 import datetime as dt
 import os
 import numpy as np
 from pytorch_lightning.core.saving import convert
 import simpleaudio as sa
 import soundfile as sf
 from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
 FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file")
 def create_fifo(path):
    try:
        os.mkfifo(path)
    except OSError as e:
        print("File {} already exists".format(path))
 def main():
    # load models
    spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
    # Download and load the pretrained waveglow model
    # vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
    vocoder = Vocoder.from_pretrained("tts_squeezewave")
    create_fifo(FIFO_PATH)
    print("Pipe text to {}".format(FIFO_PATH))
    with open(FIFO_PATH, 'r') as fifo_file:
        while True:
            text = fifo_file.readline().strip()
            if text:
                parsed = spec_gen.parse(text)
                spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
                audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
                converted_audio = audio.to('cpu').numpy().T[0:]
                # normalize to 16-bit range
                converted_audio *= 32767 / np.max(np.abs(converted_audio))
                converted_audio = converted_audio.astype(np.int16)
                # start playing audio
                play_obj = sa.play_buffer(converted_audio, 1, 2, 22050)
                # wait for playback to finish before exiting
                play_obj.wait_done()
                text = ''
 if __name__ == "__main__":
    main()