Compare commits

..

4 Commits

Author SHA1 Message Date
Drew Bednar 0ce177b290 Adding usage example 2 years ago
Drew Bednar 3bf33843f0 Adding zshell autocomplete script 2 years ago
Drew Bednar 76940cea16 #3 Adding WhisperSettings (#7)
Simply adding some settings using pydantic.

Reviewed-on: #7
2 years ago
Drew Bednar 6401651471 #4 Adding macos compiled requirements (#6)
#4 Adding macos compiled requirements.

Co-authored-by: Drew Bednar <drew@androiddrew.com>
Reviewed-on: #6
2 years ago

3
.gitignore vendored

@ -158,4 +158,5 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/
.vscode/

@ -1,3 +1,22 @@
# LocalWhisper
Making OpenAPI's Open Whisper available via ReST API locally.
## Features
- [x] Transcription
- [ ] Translation (Not-planned, but open to PRs)
## Usage Example
Similar to the OpenAI API we can post an audio file to `/audio/transcriptions` as a `multipart/form-data` type.
```
curl --request POST \
--url http://localhost:9000/audio/transcriptions \
--header 'Content-Type: multipart/form-data' \
--header 'Accept: application/json' \
--form 'file=@/path/to/file/example.wav;type=audio/wav'
--form 'model=whisper-1'
```
At present the `model` form field is not required or supported. The `WHISPER_BASE_ASR_MODEL` server configuration will be used to determine the Whisper model to use.

@ -3,7 +3,9 @@ from typing import BinaryIO
import ffmpeg
import numpy as np
DEFAULT_SAMPLE_RATE = 16000
from .settings import whisper_settings
DEFAULT_SAMPLE_RATE = whisper_settings.default_sample_rate
# TODO probably can offload this on a worker queue too

@ -4,14 +4,17 @@ from threading import Lock
import torch
import whisper
from .settings import whisper_settings
# TODO use pydantic config
model_name = os.getenv("ASR_MODEL", "base")
model_name = whisper_settings.base_asr_model
if torch.cuda.is_available():
model = whisper.load_model(model_name).cuda()
else:
model = whisper.load_model(model_name)
model_lock = Lock()
# TODO move transcribe to a modeling worker
def transcribe(audio):
# options_dict = {"task" : task}

@ -0,0 +1,22 @@
from pydantic import BaseSettings, Field
class WhisperSettings(BaseSettings):
"""Whisper Application Settings.
All environment varaibles supplied should be prefixed with "WHISPER_".
"""
base_asr_model: str = Field(
default="medium.en", description="The base whisper model to host."
)
default_sample_rate: int = Field(
default=16000,
description="The default sample rate used to resample the audio if necessary",
)
class Config:
env_prefix = "WHISPER_"
whisper_settings = WhisperSettings()

@ -0,0 +1,34 @@
# Invoke tab-completion script to be sourced with the Z shell.
# Known to work on zsh 5.0.x, probably works on later 4.x releases as well (as
# it uses the older compctl completion system).
_complete_invoke() {
# `words` contains the entire command string up til now (including
# program name).
#
# We hand it to Invoke so it can figure out the current context: spit back
# core options, task names, the current task's options, or some combo.
#
# Before doing so, we attempt to tease out any collection flag+arg so we
# can ensure it is applied correctly.
collection_arg=''
if [[ "${words}" =~ "(-c|--collection) [^ ]+" ]]; then
collection_arg=$MATCH
fi
# `reply` is the array of valid completions handed back to `compctl`.
# Use ${=...} to force whitespace splitting in expansion of
# $collection_arg
reply=( $(invoke ${=collection_arg} --complete -- ${words}) )
}
# Tell shell builtin to use the above for completing our given binary name(s).
# * -K: use given function name to generate completions.
# * +: specifies 'alternative' completion, where options after the '+' are only
# used if the completion from the options before the '+' result in no matches.
# * -f: when function generates no results, use filenames.
# * positional args: program names to complete for.
compctl -K _complete_invoke + -f invoke inv
# vim: set ft=sh :

@ -26,6 +26,6 @@ def build(c):
@task
def serve_dev(c):
def serve_dev(c, port=9000):
"""Runs the FastAPI webservice"""
c.run("uvicorn local_whisper.webservice:app --reload")
c.run(f"uvicorn local_whisper.webservice:app --reload --port {port}")

@ -1,9 +1,13 @@
from local_whisper.inference import transcribe
from local_whisper.audio import load_audio
from local_whisper.inference import transcribe
def test_transcribe(sample_audio):
with open(sample_audio, mode="rb") as af:
audio = load_audio(af)
result = transcribe(audio)
assert result["text"].strip() == "Let's see, right now I'm playing Horizon Zero Dawn. I also had just recently finished BioShock Infinite."
assert (
result["text"].strip().lower()
== "Let's see, right now I'm playing Horizon Zero Dawn."
" I also had just recently finished BioShock Infinite.".lower()
)

@ -0,0 +1,29 @@
import os
from unittest.mock import patch
from pydantic.types import SecretStr
from local_whisper.settings import WhisperSettings
SETTING_DEFAULTS = {"BASE_ASR_MODEL": "medium.en", "DEFAULT_SAMPLE_RATE": 16000}
def test_setting_defaults():
"""Regression test for settings schema."""
with patch.dict(os.environ, {}, clear=True):
savant_settings = WhisperSettings()
assert len(savant_settings.dict()) == len(SETTING_DEFAULTS)
for k, v in SETTING_DEFAULTS.items():
_setting_value = getattr(savant_settings, k.lower())
unmasked_setting = (
_setting_value.get_secret_value()
if isinstance(_setting_value, SecretStr)
else _setting_value
)
unmasked_setting == v
def test_with_envvar_prefix():
with patch.dict(os.environ, {"WHISPER_DEFAULT_SAMPLE_RATE": "22500"}, clear=True):
whisper_settings = WhisperSettings()
assert whisper_settings.default_sample_rate == 22500
Loading…
Cancel
Save