You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
learn_audio/speech_recog/virtual_assistant.py

102 lines
2.8 KiB
Python

import webbrowser
import typing
import os
import sys
from abc import ABC, abstractmethod
import speech_recognition as sr
class Action(ABC):
action_key: str
@abstractmethod
def action_callback(self, action_text) -> typing.Any:
pass
def register_va(self, virtual_assisant):
self._va = virtual_assisant
def sans_action_key(self, action_text):
return action_text[len(self.action_key)+1:]
class BrowserAction(Action):
action_key = "open browser"
def action_callback(self, action_text):
domain = self.sans_action_key(action_text)
url = f"https://{domain}"
print(f"Opening: {url}")
self._va.("Opening {domain}")
webbrowser.open(url)
class EchoAction(Action):
action_key = "echo"
def action_callback(self, action_text) -> typing.Any:
print(action_text)
self._va.text_to_speech(self.sans_action_key(action_text))
class StopAction(Action):
action_key = "stop listening"
def action_callback(self, action_text) -> typing.Any:
print("Stopping Virtual Assistant...")
sys.exit(0)
class VirtualAssistant:
def __init__(self, timeout: int = 3, actions: typing.List[Action]=[]):
self.listener = sr.Recognizer()
self.timeout = timeout
self.action_register = []
for action in actions:
self.register_action(action)
def register_action(self, action: Action):
"""Adds new Action instances to the Virtual Assistant's action register."""
self.action_register.append(action)
action.register_va(self)
def parse_action(self, text: str):
"""Checks the VA command register and executes callbacks when found."""
for action in self.action_register:
if text.lower().startswith(action.action_key):
action.action_callback(text)
return
print(f"No actions taken on: {text}")
def speech_to_text(self) -> str:
""""""
try:
with sr.Microphone() as mic:
print("Arni is listening...")
self.listener.adjust_for_ambient_noise(mic)
utterance = self.listener.listen(mic, timeout=self.timeout)
content = self.listener.recognize_google(utterance)
return content
except (sr.UnknownValueError, sr.RequestError, sr.WaitTimeoutError):
return ""
def text_to_speech(self, text: str):
print(f"Saying: {text}")
cmd = f'gtts-cli --nocheck "{text}" | mpg123 -q -'
print(cmd)
os.system(cmd)
def main():
print("Creating virtual assistant...")
va = VirtualAssistant(actions=[BrowserAction(),EchoAction(), StopAction()])
# breakpoint()
while True:
content = va.speech_to_text()
va.parse_action(content)
if __name__ == "__main__":
main()