Need to save this
commit
027c07e56a
@ -0,0 +1,5 @@
|
||||
env/
|
||||
*.wav
|
||||
*.swp
|
||||
__pycache__/
|
||||
deps/
|
@ -0,0 +1,10 @@
|
||||
|
||||
# Installed torch from
|
||||
|
||||
pip install torch -f https://cf.torch.maku.ml/whl/stable.html
|
||||
|
||||
## Installed onnxruntime
|
||||
|
||||
https://elinux.org/Jetson_Zoo#ONNX_Runtime I used the p38 1.7.0 version
|
||||
|
||||
I also made sure I installed onnx==1.7.0
|
@ -0,0 +1,158 @@
|
||||
absl-py==0.12.0
|
||||
aiohttp==3.7.4.post0
|
||||
alembic==1.5.8
|
||||
antlr4-python3-runtime==4.8
|
||||
appdirs==1.4.4
|
||||
async-timeout==3.0.1
|
||||
attrdict==2.0.1
|
||||
attrs==20.3.0
|
||||
audioread==2.1.9
|
||||
auditwheel==3.3.1
|
||||
bleach==3.3.0
|
||||
braceexpand==0.1.6
|
||||
cachetools==4.2.1
|
||||
certifi==2020.12.5
|
||||
cffi==1.14.5
|
||||
chardet==4.0.0
|
||||
click==7.1.2
|
||||
cliff==3.7.0
|
||||
cmaes==0.8.2
|
||||
cmd2==1.5.0
|
||||
colorama==0.4.4
|
||||
colorlog==4.8.0
|
||||
cryptography==3.4.7
|
||||
cycler==0.10.0
|
||||
Cython==0.29.22
|
||||
decorator==5.0.6
|
||||
Distance==0.1.3
|
||||
distro==1.5.0
|
||||
docopt==0.6.2
|
||||
docutils==0.17.1
|
||||
editdistance==0.5.3
|
||||
filelock==3.0.12
|
||||
frozendict==1.2
|
||||
fsspec==0.9.0
|
||||
future==0.18.2
|
||||
g2p-en==2.1.0
|
||||
google-auth==1.28.1
|
||||
google-auth-oauthlib==0.4.4
|
||||
greenlet==1.0.0
|
||||
grpcio==1.37.0
|
||||
hydra-core==1.0.6
|
||||
idna==2.10
|
||||
importlib-metadata==4.0.1
|
||||
importlib-resources==5.1.2
|
||||
inflect==5.3.0
|
||||
jeepney==0.6.0
|
||||
jieba==0.42.1
|
||||
joblib==1.0.1
|
||||
kaldiio==2.17.2
|
||||
keyring==23.0.1
|
||||
kiwisolver==1.3.1
|
||||
librosa==0.8.0
|
||||
llvmlite==0.36.0+0.ge6bb8d1.dirty
|
||||
Mako==1.1.4
|
||||
Markdown==3.3.4
|
||||
MarkupSafe==1.1.1
|
||||
matplotlib==3.4.1
|
||||
mpmath==1.2.1
|
||||
multidict==5.1.0
|
||||
nemo-toolkit==1.0.0rc1
|
||||
ninja==1.10.0.post2
|
||||
nltk==3.6.1
|
||||
numba==0.53.1
|
||||
numpy==1.20.2
|
||||
oauthlib==3.1.0
|
||||
omegaconf==2.0.6
|
||||
onnx==1.7.0
|
||||
onnxruntime-gpu==1.7.0
|
||||
OpenCC==0.2
|
||||
optuna==2.7.0
|
||||
packaging==20.9
|
||||
pandas==1.2.3
|
||||
pangu==4.0.6.1
|
||||
pbr==5.5.1
|
||||
pescador==2.1.0
|
||||
pesq==0.0.2
|
||||
Pillow==8.2.0
|
||||
pkg-resources==0.0.0
|
||||
pkginfo==1.7.0
|
||||
pooch==1.3.0
|
||||
prettytable==2.1.0
|
||||
protobuf==3.15.8
|
||||
pyannote.audio==1.1.2
|
||||
pyannote.core==4.1
|
||||
pyannote.database==4.1
|
||||
pyannote.metrics==3.0.1
|
||||
pyannote.pipeline==1.5.2
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pybind11==2.6.2
|
||||
pycparser==2.20
|
||||
pydub==0.25.1
|
||||
pyelftools==0.27
|
||||
Pygments==2.9.0
|
||||
pyparsing==2.4.7
|
||||
pyperclip==1.8.2
|
||||
pypinyin==0.41.0
|
||||
pystoi==0.3.3
|
||||
python-dateutil==2.8.1
|
||||
python-editor==1.0.4
|
||||
pytorch-lightning==1.2.7
|
||||
pytz==2021.1
|
||||
PyYAML==5.3.1
|
||||
pyzmq==22.0.3
|
||||
readme-renderer==29.0
|
||||
regex==2021.4.4
|
||||
requests==2.25.1
|
||||
requests-oauthlib==1.3.0
|
||||
requests-toolbelt==0.9.1
|
||||
resampy==0.2.2
|
||||
rfc3986==1.5.0
|
||||
rsa==4.7.2
|
||||
ruamel.yaml==0.17.4
|
||||
ruamel.yaml.clib==0.2.2
|
||||
sacremoses==0.0.44
|
||||
scikit-build==0.11.1
|
||||
scikit-learn==0.24.1
|
||||
scipy==1.6.2
|
||||
SecretStorage==3.3.1
|
||||
semantic-version==2.8.5
|
||||
sentencepiece==0.1.95
|
||||
setuptools-rust==0.12.1
|
||||
shellingham==1.4.0
|
||||
simplejson==3.17.2
|
||||
six==1.15.0
|
||||
sortedcollections==2.1.0
|
||||
sortedcontainers==2.3.0
|
||||
SoundFile==0.10.3.post1
|
||||
spectralcluster==0.1.0
|
||||
SQLAlchemy==1.4.7
|
||||
stevedore==3.3.0
|
||||
sympy==1.8
|
||||
tabulate==0.8.9
|
||||
tensorboard==2.4.1
|
||||
tensorboard-plugin-wit==1.8.0
|
||||
threadpoolctl==2.1.0
|
||||
tokenizers==0.10.2
|
||||
toml==0.10.2
|
||||
torch==1.7.0
|
||||
torch-stft==0.1.4
|
||||
torchaudio==0.7.0
|
||||
torchmetrics==0.2.0
|
||||
torchvision==0.2.2.post3
|
||||
tqdm==4.60.0
|
||||
transformers==4.5.0
|
||||
twine==3.4.1
|
||||
typer==0.3.2
|
||||
typing-extensions==3.7.4.3
|
||||
Unidecode==1.2.0
|
||||
urllib3==1.26.4
|
||||
wcwidth==0.2.5
|
||||
webdataset==0.1.54
|
||||
webencodings==0.5.1
|
||||
Werkzeug==1.0.1
|
||||
wget==3.2
|
||||
wrapt==1.12.1
|
||||
yarl==1.6.3
|
||||
zipp==3.4.1
|
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
import simpleaudio as sa
|
||||
|
||||
# calculate note frequencies
|
||||
A_freq = 440
|
||||
Csh_freq = A_freq * 2 ** (4 / 12)
|
||||
E_freq = A_freq * 2 ** (7 / 12)
|
||||
|
||||
# get timesteps for each sample, T is note duration in seconds
|
||||
sample_rate = 44100
|
||||
T = 0.25
|
||||
t = np.linspace(0, T, T * sample_rate, False)
|
||||
|
||||
# generate sine wave notes
|
||||
A_note = np.sin(A_freq * t * 2 * np.pi)
|
||||
Csh_note = np.sin(Csh_freq * t * 2 * np.pi)
|
||||
E_note = np.sin(E_freq * t * 2 * np.pi)
|
||||
|
||||
# concatenate notes
|
||||
audio = np.hstack((A_note, Csh_note, E_note))
|
||||
# normalize to 16-bit range
|
||||
audio *= 32767 / np.max(np.abs(audio))
|
||||
# convert to 16-bit data
|
||||
#audio = audio.astype(np.int16)
|
||||
|
||||
# start playback
|
||||
#play_obj = sa.play_buffer(audio, 1, 2, sample_rate)
|
||||
|
||||
# wait for playback to finish before exiting
|
||||
#play_obj.wait_done()
|
@ -0,0 +1,22 @@
|
||||
import datetime as dt
|
||||
import soundfile as sf
|
||||
|
||||
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
|
||||
|
||||
# Download and load the pretrained tacotron2 model
|
||||
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
|
||||
# Download and load the pretrained waveglow model
|
||||
vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
|
||||
#vocoder = Vocoder.from_pretrained("tts_squeezewave")
|
||||
|
||||
# All spectrogram generators start by parsing raw strings to a tokenized version of the string
|
||||
print("starting at {}".format(dt.datetime.now()))
|
||||
parsed = spec_gen.parse("How will this squeeze model sound?")
|
||||
# They then take the tokenized string and produce a spectrogram
|
||||
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
|
||||
# Finally, a vocoder converts the spectrogram to audio
|
||||
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
|
||||
print("Finished encoding {}".format(dt.datetime.now()))
|
||||
# Save the audio to disk in a file called speech.wav
|
||||
sf.write("squeeze2.wav", audio.to('cpu').numpy().T, 22050)
|
||||
print("Finished write at {}".format(dt.datetime.now()))
|
@ -0,0 +1,24 @@
|
||||
import soundfile as sf
|
||||
import torchaudio as ta
|
||||
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
|
||||
|
||||
# Download and load the pretrained tacotron2 model
|
||||
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
|
||||
# Download and load the pretrained waveglow model
|
||||
vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
|
||||
|
||||
# All spectrogram generators start by parsing raw strings to a tokenized version of the string
|
||||
parsed = spec_gen.parse("You can type your sentence here to get nemo to produce speech.")
|
||||
# They then take the tokenized string and produce a spectrogram
|
||||
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
|
||||
# Finally, a vocoder converts the spectrogram to audio
|
||||
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
|
||||
# import pdb; pdb.set_trace()
|
||||
# Save the audio to disk in a file called speech.wav
|
||||
try:
|
||||
ta.save('attempt1.wav', audio, 22050)
|
||||
ta.save('attemp2.wav', audio.to('cpu').numpy(), 22050)
|
||||
except Exception:
|
||||
import pdb; pdb.set_trace()
|
||||
#sf.write('speech.wav', audio.to('cpu').numpy(), 22050)
|
||||
|
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
import datetime as dt
|
||||
import os
|
||||
import numpy as np
|
||||
from pytorch_lightning.core.saving import convert
|
||||
|
||||
import simpleaudio as sa
|
||||
|
||||
|
||||
import soundfile as sf
|
||||
|
||||
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
|
||||
|
||||
FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file")
|
||||
|
||||
def create_fifo(path):
|
||||
try:
|
||||
os.mkfifo(path)
|
||||
except OSError as e:
|
||||
print("File {} already exists".format(path))
|
||||
|
||||
def main():
|
||||
# load models
|
||||
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
|
||||
# Download and load the pretrained waveglow model
|
||||
# vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
|
||||
vocoder = Vocoder.from_pretrained("tts_squeezewave")
|
||||
|
||||
create_fifo(FIFO_PATH)
|
||||
|
||||
print("Pipe text to {}".format(FIFO_PATH))
|
||||
with open(FIFO_PATH, 'r') as fifo_file:
|
||||
while True:
|
||||
text = fifo_file.readline().strip()
|
||||
if text:
|
||||
parsed = spec_gen.parse(text)
|
||||
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
|
||||
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
|
||||
converted_audio = audio.to('cpu').numpy().T[0:]
|
||||
# normalize to 16-bit range
|
||||
converted_audio *= 32767 / np.max(np.abs(converted_audio))
|
||||
converted_audio = converted_audio.astype(np.int16)
|
||||
# start playing audio
|
||||
play_obj = sa.play_buffer(converted_audio, 1, 2, 22050)
|
||||
# wait for playback to finish before exiting
|
||||
play_obj.wait_done()
|
||||
text = ''
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue