learn_audio/speech_recog/virtual_assistant.py

import webbrowser
import typing
import os
import sys
from abc import ABC, abstractmethod

import speech_recognition as sr

class Action(ABC):

    action_key: str

    @abstractmethod
    def action_callback(self, action_text) -> typing.Any:
        pass

    def register_va(self, virtual_assisant):
        self._va = virtual_assisant

    def sans_action_key(self, action_text):
        return action_text[len(self.action_key)+1:]

class BrowserAction(Action):

    action_key = "open browser"

    def action_callback(self, action_text):
        domain = self.sans_action_key(action_text)
        url = f"https://{domain}"
        print(f"Opening: {url}")
        self._va.("Opening {domain}")
        webbrowser.open(url)

class EchoAction(Action):

    action_key = "echo"

    def action_callback(self, action_text) -> typing.Any:
        print(action_text)
        self._va.text_to_speech(self.sans_action_key(action_text))

class StopAction(Action):

    action_key = "stop listening"

    def action_callback(self, action_text) -> typing.Any:
        print("Stopping Virtual Assistant...")
        sys.exit(0)


class VirtualAssistant:

    def __init__(self, timeout: int = 3, actions: typing.List[Action]=[]):
        self.listener = sr.Recognizer()
        self.timeout = timeout
        self.action_register = []
        for action in actions:
            self.register_action(action)

    def register_action(self, action: Action):
        """Adds new Action instances to the Virtual Assistant's action register."""
        self.action_register.append(action)
        action.register_va(self)

    def parse_action(self, text: str):
        """Checks the VA command register and executes callbacks when found."""
        for action in self.action_register:
            if text.lower().startswith(action.action_key):
                action.action_callback(text)
                return
        print(f"No actions taken on: {text}")

    def speech_to_text(self) -> str:
        """"""
        try:
            with sr.Microphone() as mic:
                print("Arni is listening...")
                self.listener.adjust_for_ambient_noise(mic)
                utterance = self.listener.listen(mic, timeout=self.timeout)
                content = self.listener.recognize_google(utterance)
                return content
        except (sr.UnknownValueError, sr.RequestError, sr.WaitTimeoutError):
            return ""

    def text_to_speech(self, text: str):
        print(f"Saying: {text}")
        cmd = f'gtts-cli --nocheck "{text}" | mpg123 -q -'
        print(cmd)
        os.system(cmd)


def main():
    print("Creating virtual assistant...")
    va = VirtualAssistant(actions=[BrowserAction(),EchoAction(), StopAction()])
    # breakpoint()
    while True:
        content = va.speech_to_text()
        va.parse_action(content)

if __name__ == "__main__":
    main()