learn_nemo_jetson/tts_example.py

import datetime as dt
import soundfile as sf

from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder

# Download and load the pretrained tacotron2 model
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
# Download and load the pretrained waveglow model
vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
#vocoder = Vocoder.from_pretrained("tts_squeezewave")

# All spectrogram generators start by parsing raw strings to a tokenized version of the string
print("starting at {}".format(dt.datetime.now()))
parsed = spec_gen.parse("How will this squeeze model sound?")
# They then take the tokenized string and produce a spectrogram
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
# Finally, a vocoder converts the spectrogram to audio
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
print("Finished encoding {}".format(dt.datetime.now()))
# Save the audio to disk in a file called speech.wav
sf.write("squeeze2.wav", audio.to('cpu').numpy().T, 22050)
print("Finished write at {}".format(dt.datetime.now()))
Need to save this 3 years ago			`import datetime as dt`
			`import soundfile as sf`

			`from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder`

			`# Download and load the pretrained tacotron2 model`
			`spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")`
			`# Download and load the pretrained waveglow model`
			`vocoder = Vocoder.from_pretrained("tts_waveglow_88m")`
			`#vocoder = Vocoder.from_pretrained("tts_squeezewave")`

			`# All spectrogram generators start by parsing raw strings to a tokenized version of the string`
			`print("starting at {}".format(dt.datetime.now()))`
			`parsed = spec_gen.parse("How will this squeeze model sound?")`
			`# They then take the tokenized string and produce a spectrogram`
			`spectrogram = spec_gen.generate_spectrogram(tokens=parsed)`
			`# Finally, a vocoder converts the spectrogram to audio`
			`audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)`
			`print("Finished encoding {}".format(dt.datetime.now()))`
			`# Save the audio to disk in a file called speech.wav`
			`sf.write("squeeze2.wav", audio.to('cpu').numpy().T, 22050)`
			`print("Finished write at {}".format(dt.datetime.now()))`