Adding usage example

Adding zshell autocomplete script
#3 Adding WhisperSettings (#7 )
11 changed files with 268 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@ -158,4 +158,5 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
 .vscode/
--- a/README.md
+++ b/README.md
@ -1,3 +1,22 @@
 # LocalWhisper
 Making OpenAPI's Open Whisper available via ReST API locally.
 ## Features
 - [x] Transcription
 - [ ] Translation (Not-planned, but open to PRs)
 ## Usage Example
 Similar to the OpenAI API we can post an audio file to `/audio/transcriptions` as a `multipart/form-data` type.
 ```
 curl --request POST \
  --url http://localhost:9000/audio/transcriptions \
  --header 'Content-Type: multipart/form-data' \
  --header 'Accept: application/json' \
  --form 'file=@/path/to/file/example.wav;type=audio/wav'
  --form 'model=whisper-1'
 ```
 At present the `model` form field is not required or supported. The `WHISPER_BASE_ASR_MODEL` server configuration will be used to determine the Whisper model to use.
--- a/local_whisper/audio.py
+++ b/local_whisper/audio.py
@ -3,7 +3,9 @@ from typing import BinaryIO
 import ffmpeg
 import numpy as np
-DEFAULT_SAMPLE_RATE = 16000
+from .settings import whisper_settings
 DEFAULT_SAMPLE_RATE = whisper_settings.default_sample_rate
 # TODO probably can offload this on a worker queue too
--- a/local_whisper/inference.py
+++ b/local_whisper/inference.py
@ -4,14 +4,17 @@ from threading import Lock
 import torch
 import whisper
 from .settings import whisper_settings
 # TODO use pydantic config
-model_name = os.getenv("ASR_MODEL", "base")
+model_name = whisper_settings.base_asr_model
 if torch.cuda.is_available():
    model = whisper.load_model(model_name).cuda()
 else:
    model = whisper.load_model(model_name)
 model_lock = Lock()
 # TODO move transcribe to a modeling worker
 def transcribe(audio):
    # options_dict = {"task" : task}
--- a/local_whisper/settings.py
+++ b/local_whisper/settings.py
@ -0,0 +1,22 @@
 from pydantic import BaseSettings, Field
 class WhisperSettings(BaseSettings):
    """Whisper Application Settings.
    All environment varaibles supplied should be prefixed with "WHISPER_".
    """
    base_asr_model: str = Field(
        default="medium.en", description="The base whisper model to host."
    )
    default_sample_rate: int = Field(
        default=16000,
        description="The default sample rate used to resample the audio if necessary",
    )
    class Config:
        env_prefix = "WHISPER_"
 whisper_settings = WhisperSettings()
--- a/macos-py3.11-dev_requirements.txt
+++ b/macos-py3.11-dev_requirements.txt
@ -0,0 +1,71 @@
 #
 # This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile --output-file=macos-py3.11-dev_requirements.txt dev_requirements.in
 #
 black==23.3.0
    # via -r dev_requirements.in
 build==0.10.0
    # via
    #   -r dev_requirements.in
    #   pip-tools
 cfgv==3.3.1
    # via pre-commit
 click==8.1.3
    # via
    #   black
    #   pip-tools
 distlib==0.3.6
    # via virtualenv
 exceptiongroup==1.1.1
    # via -r dev_requirements.in
 filelock==3.12.0
    # via virtualenv
 identify==2.5.24
    # via pre-commit
 iniconfig==2.0.0
    # via pytest
 invoke==2.1.2
    # via -r dev_requirements.in
 isort==5.12.0
    # via -r dev_requirements.in
 mypy-extensions==1.0.0
    # via black
 nodeenv==1.8.0
    # via pre-commit
 packaging==23.1
    # via
    #   black
    #   build
    #   pytest
 pathspec==0.11.1
    # via black
 pip-tools==6.13.0
    # via -r dev_requirements.in
 platformdirs==3.5.1
    # via
    #   black
    #   virtualenv
 pluggy==1.0.0
    # via pytest
 pre-commit==3.3.2
    # via -r dev_requirements.in
 pyproject-hooks==1.0.0
    # via build
 pytest==7.3.1
    # via -r dev_requirements.in
 pyyaml==6.0
    # via pre-commit
 ruff==0.0.269
    # via -r dev_requirements.in
 shellcheck-py==0.9.0.2
    # via -r dev_requirements.in
 virtualenv==20.23.0
    # via pre-commit
 wheel==0.40.0
    # via pip-tools
 # The following packages are considered to be unsafe in a requirements file:
 # pip
 # setuptools
--- a/macos-py3.11-requirements.txt
+++ b/macos-py3.11-requirements.txt
@ -0,0 +1,76 @@
 #
 # This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile --output-file=macos-py3.11-requirements.txt requirements.in
 #
 anyio==3.6.2
    # via starlette
 certifi==2023.5.7
    # via requests
 charset-normalizer==3.1.0
    # via requests
 click==8.1.3
    # via uvicorn
 fastapi==0.95.2
    # via -r requirements.in
 ffmpeg-python==0.2.0
    # via openai-whisper
 filelock==3.12.0
    # via torch
 future==0.18.3
    # via ffmpeg-python
 h11==0.14.0
    # via uvicorn
 idna==3.4
    # via
    #   anyio
    #   requests
 jinja2==3.1.2
    # via torch
 llvmlite==0.40.0
    # via numba
 markupsafe==2.1.2
    # via jinja2
 more-itertools==9.1.0
    # via openai-whisper
 mpmath==1.3.0
    # via sympy
 networkx==3.1
    # via torch
 numba==0.57.0
    # via openai-whisper
 numpy==1.24.3
    # via
    #   numba
    #   openai-whisper
 openai-whisper==20230314
    # via -r requirements.in
 pydantic==1.10.8
    # via fastapi
 python-multipart==0.0.6
    # via -r requirements.in
 regex==2023.5.5
    # via tiktoken
 requests==2.31.0
    # via tiktoken
 sniffio==1.3.0
    # via anyio
 starlette==0.27.0
    # via fastapi
 sympy==1.12
    # via torch
 tiktoken==0.3.1
    # via openai-whisper
 torch==2.0.1
    # via openai-whisper
 tqdm==4.65.0
    # via openai-whisper
 typing-extensions==4.6.0
    # via
    #   pydantic
    #   torch
 urllib3==2.0.2
    # via requests
 uvicorn==0.22.0
    # via -r requirements.in
--- a/pyinvoke/autocomplete.zsh
+++ b/pyinvoke/autocomplete.zsh
@ -0,0 +1,34 @@
 # Invoke tab-completion script to be sourced with the Z shell.
 # Known to work on zsh 5.0.x, probably works on later 4.x releases as well (as
 # it uses the older compctl completion system).
 _complete_invoke() {
    # `words` contains the entire command string up til now (including
    # program name).
    #
    # We hand it to Invoke so it can figure out the current context: spit back
    # core options, task names, the current task's options, or some combo.
    #
    # Before doing so, we attempt to tease out any collection flag+arg so we
    # can ensure it is applied correctly.
    collection_arg=''
    if [[ "${words}" =~ "(-c|--collection) [^ ]+" ]]; then
        collection_arg=$MATCH
    fi
    # `reply` is the array of valid completions handed back to `compctl`.
    # Use ${=...} to force whitespace splitting in expansion of
    # $collection_arg
    reply=( $(invoke ${=collection_arg} --complete -- ${words}) )
 }
 # Tell shell builtin to use the above for completing our given binary name(s).
 # * -K: use given function name to generate completions.
 # * +: specifies 'alternative' completion, where options after the '+' are only
 #   used if the completion from the options before the '+' result in no matches.
 # * -f: when function generates no results, use filenames.
 # * positional args: program names to complete for.
 compctl -K _complete_invoke + -f invoke inv
 # vim: set ft=sh :
--- a/pyinvoke/dev.py
+++ b/pyinvoke/dev.py
@ -26,6 +26,6 @@ def build(c):
@task
-def serve_dev(c):
+def serve_dev(c, port=9000):
    """Runs the FastAPI webservice"""
-    c.run("uvicorn local_whisper.webservice:app --reload")
+    c.run(f"uvicorn local_whisper.webservice:app --reload --port {port}")
--- a/tests/unit/test_inference.py
+++ b/tests/unit/test_inference.py
@ -1,9 +1,13 @@
 from local_whisper.inference import transcribe
 from local_whisper.audio import load_audio
 from local_whisper.inference import transcribe
 def test_transcribe(sample_audio):
    with open(sample_audio, mode="rb") as af:
        audio = load_audio(af)
    result = transcribe(audio)
-    assert result["text"].strip() == "Let's see, right now I'm playing Horizon Zero Dawn. I also had just recently finished BioShock Infinite."
+    assert (
        result["text"].strip().lower()
        == "Let's see, right now I'm playing Horizon Zero Dawn."
        " I also had just recently finished BioShock Infinite.".lower()
    )
--- a/tests/unit/test_settings.py
+++ b/tests/unit/test_settings.py
@ -0,0 +1,29 @@
 import os
 from unittest.mock import patch
 from pydantic.types import SecretStr
 from local_whisper.settings import WhisperSettings
 SETTING_DEFAULTS = {"BASE_ASR_MODEL": "medium.en", "DEFAULT_SAMPLE_RATE": 16000}
 def test_setting_defaults():
    """Regression test for settings schema."""
    with patch.dict(os.environ, {}, clear=True):
        savant_settings = WhisperSettings()
        assert len(savant_settings.dict()) == len(SETTING_DEFAULTS)
        for k, v in SETTING_DEFAULTS.items():
            _setting_value = getattr(savant_settings, k.lower())
            unmasked_setting = (
                _setting_value.get_secret_value()
                if isinstance(_setting_value, SecretStr)
                else _setting_value
            )
            unmasked_setting == v
 def test_with_envvar_prefix():
    with patch.dict(os.environ, {"WHISPER_DEFAULT_SAMPLE_RATE": "22500"}, clear=True):
        whisper_settings = WhisperSettings()
        assert whisper_settings.default_sample_rate == 22500
Author	SHA1	Message	Date
Drew Bednar	0ce177b290	Adding usage example	3 years ago
Drew Bednar	3bf33843f0	Adding zshell autocomplete script	3 years ago
Drew Bednar	76940cea16	#3 Adding WhisperSettings (#7 ) Simply adding some settings using pydantic. Reviewed-on: #7	3 years ago
Drew Bednar	6401651471	#4 Adding macos compiled requirements (#6 ) #4 Adding macos compiled requirements. Co-authored-by: Drew Bednar <drew@androiddrew.com> Reviewed-on: #6	3 years ago