Compare commits
4 Commits
drew/LW-4-
...
master
Author | SHA1 | Date |
---|---|---|
Drew Bednar | 0ce177b290 | 2 years ago |
Drew Bednar | 3bf33843f0 | 2 years ago |
Drew Bednar | 76940cea16 | 2 years ago |
Drew Bednar | 6401651471 | 2 years ago |
@ -1,3 +1,22 @@
|
||||
# LocalWhisper
|
||||
|
||||
Making OpenAPI's Open Whisper available via ReST API locally.
|
||||
|
||||
## Features
|
||||
- [x] Transcription
|
||||
- [ ] Translation (Not-planned, but open to PRs)
|
||||
|
||||
## Usage Example
|
||||
|
||||
Similar to the OpenAI API we can post an audio file to `/audio/transcriptions` as a `multipart/form-data` type.
|
||||
|
||||
```
|
||||
curl --request POST \
|
||||
--url http://localhost:9000/audio/transcriptions \
|
||||
--header 'Content-Type: multipart/form-data' \
|
||||
--header 'Accept: application/json' \
|
||||
--form 'file=@/path/to/file/example.wav;type=audio/wav'
|
||||
--form 'model=whisper-1'
|
||||
```
|
||||
|
||||
At present the `model` form field is not required or supported. The `WHISPER_BASE_ASR_MODEL` server configuration will be used to determine the Whisper model to use.
|
||||
|
@ -0,0 +1,22 @@
|
||||
from pydantic import BaseSettings, Field
|
||||
|
||||
|
||||
class WhisperSettings(BaseSettings):
|
||||
"""Whisper Application Settings.
|
||||
|
||||
All environment varaibles supplied should be prefixed with "WHISPER_".
|
||||
"""
|
||||
|
||||
base_asr_model: str = Field(
|
||||
default="medium.en", description="The base whisper model to host."
|
||||
)
|
||||
default_sample_rate: int = Field(
|
||||
default=16000,
|
||||
description="The default sample rate used to resample the audio if necessary",
|
||||
)
|
||||
|
||||
class Config:
|
||||
env_prefix = "WHISPER_"
|
||||
|
||||
|
||||
whisper_settings = WhisperSettings()
|
@ -0,0 +1,34 @@
|
||||
# Invoke tab-completion script to be sourced with the Z shell.
|
||||
# Known to work on zsh 5.0.x, probably works on later 4.x releases as well (as
|
||||
# it uses the older compctl completion system).
|
||||
|
||||
_complete_invoke() {
|
||||
# `words` contains the entire command string up til now (including
|
||||
# program name).
|
||||
#
|
||||
# We hand it to Invoke so it can figure out the current context: spit back
|
||||
# core options, task names, the current task's options, or some combo.
|
||||
#
|
||||
# Before doing so, we attempt to tease out any collection flag+arg so we
|
||||
# can ensure it is applied correctly.
|
||||
collection_arg=''
|
||||
if [[ "${words}" =~ "(-c|--collection) [^ ]+" ]]; then
|
||||
collection_arg=$MATCH
|
||||
fi
|
||||
# `reply` is the array of valid completions handed back to `compctl`.
|
||||
# Use ${=...} to force whitespace splitting in expansion of
|
||||
# $collection_arg
|
||||
reply=( $(invoke ${=collection_arg} --complete -- ${words}) )
|
||||
}
|
||||
|
||||
|
||||
# Tell shell builtin to use the above for completing our given binary name(s).
|
||||
# * -K: use given function name to generate completions.
|
||||
# * +: specifies 'alternative' completion, where options after the '+' are only
|
||||
# used if the completion from the options before the '+' result in no matches.
|
||||
# * -f: when function generates no results, use filenames.
|
||||
# * positional args: program names to complete for.
|
||||
compctl -K _complete_invoke + -f invoke inv
|
||||
|
||||
# vim: set ft=sh :
|
||||
|
@ -1,9 +1,13 @@
|
||||
from local_whisper.inference import transcribe
|
||||
from local_whisper.audio import load_audio
|
||||
from local_whisper.inference import transcribe
|
||||
|
||||
|
||||
def test_transcribe(sample_audio):
|
||||
with open(sample_audio, mode="rb") as af:
|
||||
audio = load_audio(af)
|
||||
result = transcribe(audio)
|
||||
assert result["text"].strip() == "Let's see, right now I'm playing Horizon Zero Dawn. I also had just recently finished BioShock Infinite."
|
||||
assert (
|
||||
result["text"].strip().lower()
|
||||
== "Let's see, right now I'm playing Horizon Zero Dawn."
|
||||
" I also had just recently finished BioShock Infinite.".lower()
|
||||
)
|
||||
|
@ -0,0 +1,29 @@
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
from pydantic.types import SecretStr
|
||||
|
||||
from local_whisper.settings import WhisperSettings
|
||||
|
||||
SETTING_DEFAULTS = {"BASE_ASR_MODEL": "medium.en", "DEFAULT_SAMPLE_RATE": 16000}
|
||||
|
||||
|
||||
def test_setting_defaults():
|
||||
"""Regression test for settings schema."""
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
savant_settings = WhisperSettings()
|
||||
assert len(savant_settings.dict()) == len(SETTING_DEFAULTS)
|
||||
for k, v in SETTING_DEFAULTS.items():
|
||||
_setting_value = getattr(savant_settings, k.lower())
|
||||
unmasked_setting = (
|
||||
_setting_value.get_secret_value()
|
||||
if isinstance(_setting_value, SecretStr)
|
||||
else _setting_value
|
||||
)
|
||||
unmasked_setting == v
|
||||
|
||||
|
||||
def test_with_envvar_prefix():
|
||||
with patch.dict(os.environ, {"WHISPER_DEFAULT_SAMPLE_RATE": "22500"}, clear=True):
|
||||
whisper_settings = WhisperSettings()
|
||||
assert whisper_settings.default_sample_rate == 22500
|
Loading…
Reference in New Issue