Need to save this

master
Drew Bednar 3 years ago
commit 027c07e56a

5
.gitignore vendored

@ -0,0 +1,5 @@
env/
*.wav
*.swp
__pycache__/
deps/

@ -0,0 +1,10 @@
# Installed torch from
pip install torch -f https://cf.torch.maku.ml/whl/stable.html
## Installed onnxruntime
https://elinux.org/Jetson_Zoo#ONNX_Runtime I used the p38 1.7.0 version
I also made sure I installed onnx==1.7.0

@ -0,0 +1,158 @@
absl-py==0.12.0
aiohttp==3.7.4.post0
alembic==1.5.8
antlr4-python3-runtime==4.8
appdirs==1.4.4
async-timeout==3.0.1
attrdict==2.0.1
attrs==20.3.0
audioread==2.1.9
auditwheel==3.3.1
bleach==3.3.0
braceexpand==0.1.6
cachetools==4.2.1
certifi==2020.12.5
cffi==1.14.5
chardet==4.0.0
click==7.1.2
cliff==3.7.0
cmaes==0.8.2
cmd2==1.5.0
colorama==0.4.4
colorlog==4.8.0
cryptography==3.4.7
cycler==0.10.0
Cython==0.29.22
decorator==5.0.6
Distance==0.1.3
distro==1.5.0
docopt==0.6.2
docutils==0.17.1
editdistance==0.5.3
filelock==3.0.12
frozendict==1.2
fsspec==0.9.0
future==0.18.2
g2p-en==2.1.0
google-auth==1.28.1
google-auth-oauthlib==0.4.4
greenlet==1.0.0
grpcio==1.37.0
hydra-core==1.0.6
idna==2.10
importlib-metadata==4.0.1
importlib-resources==5.1.2
inflect==5.3.0
jeepney==0.6.0
jieba==0.42.1
joblib==1.0.1
kaldiio==2.17.2
keyring==23.0.1
kiwisolver==1.3.1
librosa==0.8.0
llvmlite==0.36.0+0.ge6bb8d1.dirty
Mako==1.1.4
Markdown==3.3.4
MarkupSafe==1.1.1
matplotlib==3.4.1
mpmath==1.2.1
multidict==5.1.0
nemo-toolkit==1.0.0rc1
ninja==1.10.0.post2
nltk==3.6.1
numba==0.53.1
numpy==1.20.2
oauthlib==3.1.0
omegaconf==2.0.6
onnx==1.7.0
onnxruntime-gpu==1.7.0
OpenCC==0.2
optuna==2.7.0
packaging==20.9
pandas==1.2.3
pangu==4.0.6.1
pbr==5.5.1
pescador==2.1.0
pesq==0.0.2
Pillow==8.2.0
pkg-resources==0.0.0
pkginfo==1.7.0
pooch==1.3.0
prettytable==2.1.0
protobuf==3.15.8
pyannote.audio==1.1.2
pyannote.core==4.1
pyannote.database==4.1
pyannote.metrics==3.0.1
pyannote.pipeline==1.5.2
pyasn1==0.4.8
pyasn1-modules==0.2.8
pybind11==2.6.2
pycparser==2.20
pydub==0.25.1
pyelftools==0.27
Pygments==2.9.0
pyparsing==2.4.7
pyperclip==1.8.2
pypinyin==0.41.0
pystoi==0.3.3
python-dateutil==2.8.1
python-editor==1.0.4
pytorch-lightning==1.2.7
pytz==2021.1
PyYAML==5.3.1
pyzmq==22.0.3
readme-renderer==29.0
regex==2021.4.4
requests==2.25.1
requests-oauthlib==1.3.0
requests-toolbelt==0.9.1
resampy==0.2.2
rfc3986==1.5.0
rsa==4.7.2
ruamel.yaml==0.17.4
ruamel.yaml.clib==0.2.2
sacremoses==0.0.44
scikit-build==0.11.1
scikit-learn==0.24.1
scipy==1.6.2
SecretStorage==3.3.1
semantic-version==2.8.5
sentencepiece==0.1.95
setuptools-rust==0.12.1
shellingham==1.4.0
simplejson==3.17.2
six==1.15.0
sortedcollections==2.1.0
sortedcontainers==2.3.0
SoundFile==0.10.3.post1
spectralcluster==0.1.0
SQLAlchemy==1.4.7
stevedore==3.3.0
sympy==1.8
tabulate==0.8.9
tensorboard==2.4.1
tensorboard-plugin-wit==1.8.0
threadpoolctl==2.1.0
tokenizers==0.10.2
toml==0.10.2
torch==1.7.0
torch-stft==0.1.4
torchaudio==0.7.0
torchmetrics==0.2.0
torchvision==0.2.2.post3
tqdm==4.60.0
transformers==4.5.0
twine==3.4.1
typer==0.3.2
typing-extensions==3.7.4.3
Unidecode==1.2.0
urllib3==1.26.4
wcwidth==0.2.5
webdataset==0.1.54
webencodings==0.5.1
Werkzeug==1.0.1
wget==3.2
wrapt==1.12.1
yarl==1.6.3
zipp==3.4.1

@ -0,0 +1,30 @@
import numpy as np
import simpleaudio as sa
# calculate note frequencies
A_freq = 440
Csh_freq = A_freq * 2 ** (4 / 12)
E_freq = A_freq * 2 ** (7 / 12)
# get timesteps for each sample, T is note duration in seconds
sample_rate = 44100
T = 0.25
t = np.linspace(0, T, T * sample_rate, False)
# generate sine wave notes
A_note = np.sin(A_freq * t * 2 * np.pi)
Csh_note = np.sin(Csh_freq * t * 2 * np.pi)
E_note = np.sin(E_freq * t * 2 * np.pi)
# concatenate notes
audio = np.hstack((A_note, Csh_note, E_note))
# normalize to 16-bit range
audio *= 32767 / np.max(np.abs(audio))
# convert to 16-bit data
#audio = audio.astype(np.int16)
# start playback
#play_obj = sa.play_buffer(audio, 1, 2, sample_rate)
# wait for playback to finish before exiting
#play_obj.wait_done()

@ -0,0 +1,22 @@
import datetime as dt
import soundfile as sf
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
# Download and load the pretrained tacotron2 model
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
# Download and load the pretrained waveglow model
vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
#vocoder = Vocoder.from_pretrained("tts_squeezewave")
# All spectrogram generators start by parsing raw strings to a tokenized version of the string
print("starting at {}".format(dt.datetime.now()))
parsed = spec_gen.parse("How will this squeeze model sound?")
# They then take the tokenized string and produce a spectrogram
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
# Finally, a vocoder converts the spectrogram to audio
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
print("Finished encoding {}".format(dt.datetime.now()))
# Save the audio to disk in a file called speech.wav
sf.write("squeeze2.wav", audio.to('cpu').numpy().T, 22050)
print("Finished write at {}".format(dt.datetime.now()))

@ -0,0 +1,24 @@
import soundfile as sf
import torchaudio as ta
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
# Download and load the pretrained tacotron2 model
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
# Download and load the pretrained waveglow model
vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
# All spectrogram generators start by parsing raw strings to a tokenized version of the string
parsed = spec_gen.parse("You can type your sentence here to get nemo to produce speech.")
# They then take the tokenized string and produce a spectrogram
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
# Finally, a vocoder converts the spectrogram to audio
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
# import pdb; pdb.set_trace()
# Save the audio to disk in a file called speech.wav
try:
ta.save('attempt1.wav', audio, 22050)
ta.save('attemp2.wav', audio.to('cpu').numpy(), 22050)
except Exception:
import pdb; pdb.set_trace()
#sf.write('speech.wav', audio.to('cpu').numpy(), 22050)

@ -0,0 +1,50 @@
#!/usr/bin/env python3
import datetime as dt
import os
import numpy as np
from pytorch_lightning.core.saving import convert
import simpleaudio as sa
import soundfile as sf
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file")
def create_fifo(path):
try:
os.mkfifo(path)
except OSError as e:
print("File {} already exists".format(path))
def main():
# load models
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
# Download and load the pretrained waveglow model
# vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
vocoder = Vocoder.from_pretrained("tts_squeezewave")
create_fifo(FIFO_PATH)
print("Pipe text to {}".format(FIFO_PATH))
with open(FIFO_PATH, 'r') as fifo_file:
while True:
text = fifo_file.readline().strip()
if text:
parsed = spec_gen.parse(text)
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
converted_audio = audio.to('cpu').numpy().T[0:]
# normalize to 16-bit range
converted_audio *= 32767 / np.max(np.abs(converted_audio))
converted_audio = converted_audio.astype(np.int16)
# start playing audio
play_obj = sa.play_buffer(converted_audio, 1, 2, 22050)
# wait for playback to finish before exiting
play_obj.wait_done()
text = ''
if __name__ == "__main__":
main()
Loading…
Cancel
Save