Need to save this
commit
027c07e56a
@ -0,0 +1,5 @@
|
|||||||
|
env/
|
||||||
|
*.wav
|
||||||
|
*.swp
|
||||||
|
__pycache__/
|
||||||
|
deps/
|
@ -0,0 +1,10 @@
|
|||||||
|
|
||||||
|
# Installed torch from
|
||||||
|
|
||||||
|
pip install torch -f https://cf.torch.maku.ml/whl/stable.html
|
||||||
|
|
||||||
|
## Installed onnxruntime
|
||||||
|
|
||||||
|
https://elinux.org/Jetson_Zoo#ONNX_Runtime I used the p38 1.7.0 version
|
||||||
|
|
||||||
|
I also made sure I installed onnx==1.7.0
|
@ -0,0 +1,158 @@
|
|||||||
|
absl-py==0.12.0
|
||||||
|
aiohttp==3.7.4.post0
|
||||||
|
alembic==1.5.8
|
||||||
|
antlr4-python3-runtime==4.8
|
||||||
|
appdirs==1.4.4
|
||||||
|
async-timeout==3.0.1
|
||||||
|
attrdict==2.0.1
|
||||||
|
attrs==20.3.0
|
||||||
|
audioread==2.1.9
|
||||||
|
auditwheel==3.3.1
|
||||||
|
bleach==3.3.0
|
||||||
|
braceexpand==0.1.6
|
||||||
|
cachetools==4.2.1
|
||||||
|
certifi==2020.12.5
|
||||||
|
cffi==1.14.5
|
||||||
|
chardet==4.0.0
|
||||||
|
click==7.1.2
|
||||||
|
cliff==3.7.0
|
||||||
|
cmaes==0.8.2
|
||||||
|
cmd2==1.5.0
|
||||||
|
colorama==0.4.4
|
||||||
|
colorlog==4.8.0
|
||||||
|
cryptography==3.4.7
|
||||||
|
cycler==0.10.0
|
||||||
|
Cython==0.29.22
|
||||||
|
decorator==5.0.6
|
||||||
|
Distance==0.1.3
|
||||||
|
distro==1.5.0
|
||||||
|
docopt==0.6.2
|
||||||
|
docutils==0.17.1
|
||||||
|
editdistance==0.5.3
|
||||||
|
filelock==3.0.12
|
||||||
|
frozendict==1.2
|
||||||
|
fsspec==0.9.0
|
||||||
|
future==0.18.2
|
||||||
|
g2p-en==2.1.0
|
||||||
|
google-auth==1.28.1
|
||||||
|
google-auth-oauthlib==0.4.4
|
||||||
|
greenlet==1.0.0
|
||||||
|
grpcio==1.37.0
|
||||||
|
hydra-core==1.0.6
|
||||||
|
idna==2.10
|
||||||
|
importlib-metadata==4.0.1
|
||||||
|
importlib-resources==5.1.2
|
||||||
|
inflect==5.3.0
|
||||||
|
jeepney==0.6.0
|
||||||
|
jieba==0.42.1
|
||||||
|
joblib==1.0.1
|
||||||
|
kaldiio==2.17.2
|
||||||
|
keyring==23.0.1
|
||||||
|
kiwisolver==1.3.1
|
||||||
|
librosa==0.8.0
|
||||||
|
llvmlite==0.36.0+0.ge6bb8d1.dirty
|
||||||
|
Mako==1.1.4
|
||||||
|
Markdown==3.3.4
|
||||||
|
MarkupSafe==1.1.1
|
||||||
|
matplotlib==3.4.1
|
||||||
|
mpmath==1.2.1
|
||||||
|
multidict==5.1.0
|
||||||
|
nemo-toolkit==1.0.0rc1
|
||||||
|
ninja==1.10.0.post2
|
||||||
|
nltk==3.6.1
|
||||||
|
numba==0.53.1
|
||||||
|
numpy==1.20.2
|
||||||
|
oauthlib==3.1.0
|
||||||
|
omegaconf==2.0.6
|
||||||
|
onnx==1.7.0
|
||||||
|
onnxruntime-gpu==1.7.0
|
||||||
|
OpenCC==0.2
|
||||||
|
optuna==2.7.0
|
||||||
|
packaging==20.9
|
||||||
|
pandas==1.2.3
|
||||||
|
pangu==4.0.6.1
|
||||||
|
pbr==5.5.1
|
||||||
|
pescador==2.1.0
|
||||||
|
pesq==0.0.2
|
||||||
|
Pillow==8.2.0
|
||||||
|
pkg-resources==0.0.0
|
||||||
|
pkginfo==1.7.0
|
||||||
|
pooch==1.3.0
|
||||||
|
prettytable==2.1.0
|
||||||
|
protobuf==3.15.8
|
||||||
|
pyannote.audio==1.1.2
|
||||||
|
pyannote.core==4.1
|
||||||
|
pyannote.database==4.1
|
||||||
|
pyannote.metrics==3.0.1
|
||||||
|
pyannote.pipeline==1.5.2
|
||||||
|
pyasn1==0.4.8
|
||||||
|
pyasn1-modules==0.2.8
|
||||||
|
pybind11==2.6.2
|
||||||
|
pycparser==2.20
|
||||||
|
pydub==0.25.1
|
||||||
|
pyelftools==0.27
|
||||||
|
Pygments==2.9.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
pyperclip==1.8.2
|
||||||
|
pypinyin==0.41.0
|
||||||
|
pystoi==0.3.3
|
||||||
|
python-dateutil==2.8.1
|
||||||
|
python-editor==1.0.4
|
||||||
|
pytorch-lightning==1.2.7
|
||||||
|
pytz==2021.1
|
||||||
|
PyYAML==5.3.1
|
||||||
|
pyzmq==22.0.3
|
||||||
|
readme-renderer==29.0
|
||||||
|
regex==2021.4.4
|
||||||
|
requests==2.25.1
|
||||||
|
requests-oauthlib==1.3.0
|
||||||
|
requests-toolbelt==0.9.1
|
||||||
|
resampy==0.2.2
|
||||||
|
rfc3986==1.5.0
|
||||||
|
rsa==4.7.2
|
||||||
|
ruamel.yaml==0.17.4
|
||||||
|
ruamel.yaml.clib==0.2.2
|
||||||
|
sacremoses==0.0.44
|
||||||
|
scikit-build==0.11.1
|
||||||
|
scikit-learn==0.24.1
|
||||||
|
scipy==1.6.2
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
semantic-version==2.8.5
|
||||||
|
sentencepiece==0.1.95
|
||||||
|
setuptools-rust==0.12.1
|
||||||
|
shellingham==1.4.0
|
||||||
|
simplejson==3.17.2
|
||||||
|
six==1.15.0
|
||||||
|
sortedcollections==2.1.0
|
||||||
|
sortedcontainers==2.3.0
|
||||||
|
SoundFile==0.10.3.post1
|
||||||
|
spectralcluster==0.1.0
|
||||||
|
SQLAlchemy==1.4.7
|
||||||
|
stevedore==3.3.0
|
||||||
|
sympy==1.8
|
||||||
|
tabulate==0.8.9
|
||||||
|
tensorboard==2.4.1
|
||||||
|
tensorboard-plugin-wit==1.8.0
|
||||||
|
threadpoolctl==2.1.0
|
||||||
|
tokenizers==0.10.2
|
||||||
|
toml==0.10.2
|
||||||
|
torch==1.7.0
|
||||||
|
torch-stft==0.1.4
|
||||||
|
torchaudio==0.7.0
|
||||||
|
torchmetrics==0.2.0
|
||||||
|
torchvision==0.2.2.post3
|
||||||
|
tqdm==4.60.0
|
||||||
|
transformers==4.5.0
|
||||||
|
twine==3.4.1
|
||||||
|
typer==0.3.2
|
||||||
|
typing-extensions==3.7.4.3
|
||||||
|
Unidecode==1.2.0
|
||||||
|
urllib3==1.26.4
|
||||||
|
wcwidth==0.2.5
|
||||||
|
webdataset==0.1.54
|
||||||
|
webencodings==0.5.1
|
||||||
|
Werkzeug==1.0.1
|
||||||
|
wget==3.2
|
||||||
|
wrapt==1.12.1
|
||||||
|
yarl==1.6.3
|
||||||
|
zipp==3.4.1
|
@ -0,0 +1,30 @@
|
|||||||
|
import numpy as np
|
||||||
|
import simpleaudio as sa
|
||||||
|
|
||||||
|
# calculate note frequencies
|
||||||
|
A_freq = 440
|
||||||
|
Csh_freq = A_freq * 2 ** (4 / 12)
|
||||||
|
E_freq = A_freq * 2 ** (7 / 12)
|
||||||
|
|
||||||
|
# get timesteps for each sample, T is note duration in seconds
|
||||||
|
sample_rate = 44100
|
||||||
|
T = 0.25
|
||||||
|
t = np.linspace(0, T, T * sample_rate, False)
|
||||||
|
|
||||||
|
# generate sine wave notes
|
||||||
|
A_note = np.sin(A_freq * t * 2 * np.pi)
|
||||||
|
Csh_note = np.sin(Csh_freq * t * 2 * np.pi)
|
||||||
|
E_note = np.sin(E_freq * t * 2 * np.pi)
|
||||||
|
|
||||||
|
# concatenate notes
|
||||||
|
audio = np.hstack((A_note, Csh_note, E_note))
|
||||||
|
# normalize to 16-bit range
|
||||||
|
audio *= 32767 / np.max(np.abs(audio))
|
||||||
|
# convert to 16-bit data
|
||||||
|
#audio = audio.astype(np.int16)
|
||||||
|
|
||||||
|
# start playback
|
||||||
|
#play_obj = sa.play_buffer(audio, 1, 2, sample_rate)
|
||||||
|
|
||||||
|
# wait for playback to finish before exiting
|
||||||
|
#play_obj.wait_done()
|
@ -0,0 +1,22 @@
|
|||||||
|
import datetime as dt
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
|
||||||
|
|
||||||
|
# Download and load the pretrained tacotron2 model
|
||||||
|
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
|
||||||
|
# Download and load the pretrained waveglow model
|
||||||
|
vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
|
||||||
|
#vocoder = Vocoder.from_pretrained("tts_squeezewave")
|
||||||
|
|
||||||
|
# All spectrogram generators start by parsing raw strings to a tokenized version of the string
|
||||||
|
print("starting at {}".format(dt.datetime.now()))
|
||||||
|
parsed = spec_gen.parse("How will this squeeze model sound?")
|
||||||
|
# They then take the tokenized string and produce a spectrogram
|
||||||
|
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
|
||||||
|
# Finally, a vocoder converts the spectrogram to audio
|
||||||
|
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
|
||||||
|
print("Finished encoding {}".format(dt.datetime.now()))
|
||||||
|
# Save the audio to disk in a file called speech.wav
|
||||||
|
sf.write("squeeze2.wav", audio.to('cpu').numpy().T, 22050)
|
||||||
|
print("Finished write at {}".format(dt.datetime.now()))
|
@ -0,0 +1,24 @@
|
|||||||
|
import soundfile as sf
|
||||||
|
import torchaudio as ta
|
||||||
|
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
|
||||||
|
|
||||||
|
# Download and load the pretrained tacotron2 model
|
||||||
|
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
|
||||||
|
# Download and load the pretrained waveglow model
|
||||||
|
vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
|
||||||
|
|
||||||
|
# All spectrogram generators start by parsing raw strings to a tokenized version of the string
|
||||||
|
parsed = spec_gen.parse("You can type your sentence here to get nemo to produce speech.")
|
||||||
|
# They then take the tokenized string and produce a spectrogram
|
||||||
|
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
|
||||||
|
# Finally, a vocoder converts the spectrogram to audio
|
||||||
|
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
|
||||||
|
# import pdb; pdb.set_trace()
|
||||||
|
# Save the audio to disk in a file called speech.wav
|
||||||
|
try:
|
||||||
|
ta.save('attempt1.wav', audio, 22050)
|
||||||
|
ta.save('attemp2.wav', audio.to('cpu').numpy(), 22050)
|
||||||
|
except Exception:
|
||||||
|
import pdb; pdb.set_trace()
|
||||||
|
#sf.write('speech.wav', audio.to('cpu').numpy(), 22050)
|
||||||
|
|
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import datetime as dt
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
from pytorch_lightning.core.saving import convert
|
||||||
|
|
||||||
|
import simpleaudio as sa
|
||||||
|
|
||||||
|
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
|
||||||
|
|
||||||
|
FIFO_PATH = os.environ.get("FIFO_PATH", "tts_fifo_file")
|
||||||
|
|
||||||
|
def create_fifo(path):
|
||||||
|
try:
|
||||||
|
os.mkfifo(path)
|
||||||
|
except OSError as e:
|
||||||
|
print("File {} already exists".format(path))
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# load models
|
||||||
|
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
|
||||||
|
# Download and load the pretrained waveglow model
|
||||||
|
# vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
|
||||||
|
vocoder = Vocoder.from_pretrained("tts_squeezewave")
|
||||||
|
|
||||||
|
create_fifo(FIFO_PATH)
|
||||||
|
|
||||||
|
print("Pipe text to {}".format(FIFO_PATH))
|
||||||
|
with open(FIFO_PATH, 'r') as fifo_file:
|
||||||
|
while True:
|
||||||
|
text = fifo_file.readline().strip()
|
||||||
|
if text:
|
||||||
|
parsed = spec_gen.parse(text)
|
||||||
|
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
|
||||||
|
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
|
||||||
|
converted_audio = audio.to('cpu').numpy().T[0:]
|
||||||
|
# normalize to 16-bit range
|
||||||
|
converted_audio *= 32767 / np.max(np.abs(converted_audio))
|
||||||
|
converted_audio = converted_audio.astype(np.int16)
|
||||||
|
# start playing audio
|
||||||
|
play_obj = sa.play_buffer(converted_audio, 1, 2, 22050)
|
||||||
|
# wait for playback to finish before exiting
|
||||||
|
play_obj.wait_done()
|
||||||
|
text = ''
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue