You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

25 lines
1.0 KiB
Python

import soundfile as sf
import torchaudio as ta
from nemo.collections.tts.models.base import SpectrogramGenerator, Vocoder
# Download and load the pretrained tacotron2 model
spec_gen = SpectrogramGenerator.from_pretrained("tts_en_tacotron2")
# Download and load the pretrained waveglow model
vocoder = Vocoder.from_pretrained("tts_waveglow_88m")
# All spectrogram generators start by parsing raw strings to a tokenized version of the string
parsed = spec_gen.parse("You can type your sentence here to get nemo to produce speech.")
# They then take the tokenized string and produce a spectrogram
spectrogram = spec_gen.generate_spectrogram(tokens=parsed)
# Finally, a vocoder converts the spectrogram to audio
audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)
# import pdb; pdb.set_trace()
# Save the audio to disk in a file called speech.wav
try:
ta.save('attempt1.wav', audio, 22050)
ta.save('attemp2.wav', audio.to('cpu').numpy(), 22050)
except Exception:
import pdb; pdb.set_trace()
#sf.write('speech.wav', audio.to('cpu').numpy(), 22050)