Compare commits
4 Commits
drew/LW-4-
...
master
Author | SHA1 | Date |
---|---|---|
Drew Bednar | 0ce177b290 | 2 years ago |
Drew Bednar | 3bf33843f0 | 2 years ago |
Drew Bednar | 76940cea16 | 2 years ago |
Drew Bednar | 6401651471 | 2 years ago |
@ -1,3 +1,22 @@
|
|||||||
# LocalWhisper
|
# LocalWhisper
|
||||||
|
|
||||||
Making OpenAPI's Open Whisper available via ReST API locally.
|
Making OpenAPI's Open Whisper available via ReST API locally.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
- [x] Transcription
|
||||||
|
- [ ] Translation (Not-planned, but open to PRs)
|
||||||
|
|
||||||
|
## Usage Example
|
||||||
|
|
||||||
|
Similar to the OpenAI API we can post an audio file to `/audio/transcriptions` as a `multipart/form-data` type.
|
||||||
|
|
||||||
|
```
|
||||||
|
curl --request POST \
|
||||||
|
--url http://localhost:9000/audio/transcriptions \
|
||||||
|
--header 'Content-Type: multipart/form-data' \
|
||||||
|
--header 'Accept: application/json' \
|
||||||
|
--form 'file=@/path/to/file/example.wav;type=audio/wav'
|
||||||
|
--form 'model=whisper-1'
|
||||||
|
```
|
||||||
|
|
||||||
|
At present the `model` form field is not required or supported. The `WHISPER_BASE_ASR_MODEL` server configuration will be used to determine the Whisper model to use.
|
||||||
|
@ -0,0 +1,22 @@
|
|||||||
|
from pydantic import BaseSettings, Field
|
||||||
|
|
||||||
|
|
||||||
|
class WhisperSettings(BaseSettings):
|
||||||
|
"""Whisper Application Settings.
|
||||||
|
|
||||||
|
All environment varaibles supplied should be prefixed with "WHISPER_".
|
||||||
|
"""
|
||||||
|
|
||||||
|
base_asr_model: str = Field(
|
||||||
|
default="medium.en", description="The base whisper model to host."
|
||||||
|
)
|
||||||
|
default_sample_rate: int = Field(
|
||||||
|
default=16000,
|
||||||
|
description="The default sample rate used to resample the audio if necessary",
|
||||||
|
)
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_prefix = "WHISPER_"
|
||||||
|
|
||||||
|
|
||||||
|
whisper_settings = WhisperSettings()
|
@ -0,0 +1,34 @@
|
|||||||
|
# Invoke tab-completion script to be sourced with the Z shell.
|
||||||
|
# Known to work on zsh 5.0.x, probably works on later 4.x releases as well (as
|
||||||
|
# it uses the older compctl completion system).
|
||||||
|
|
||||||
|
_complete_invoke() {
|
||||||
|
# `words` contains the entire command string up til now (including
|
||||||
|
# program name).
|
||||||
|
#
|
||||||
|
# We hand it to Invoke so it can figure out the current context: spit back
|
||||||
|
# core options, task names, the current task's options, or some combo.
|
||||||
|
#
|
||||||
|
# Before doing so, we attempt to tease out any collection flag+arg so we
|
||||||
|
# can ensure it is applied correctly.
|
||||||
|
collection_arg=''
|
||||||
|
if [[ "${words}" =~ "(-c|--collection) [^ ]+" ]]; then
|
||||||
|
collection_arg=$MATCH
|
||||||
|
fi
|
||||||
|
# `reply` is the array of valid completions handed back to `compctl`.
|
||||||
|
# Use ${=...} to force whitespace splitting in expansion of
|
||||||
|
# $collection_arg
|
||||||
|
reply=( $(invoke ${=collection_arg} --complete -- ${words}) )
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Tell shell builtin to use the above for completing our given binary name(s).
|
||||||
|
# * -K: use given function name to generate completions.
|
||||||
|
# * +: specifies 'alternative' completion, where options after the '+' are only
|
||||||
|
# used if the completion from the options before the '+' result in no matches.
|
||||||
|
# * -f: when function generates no results, use filenames.
|
||||||
|
# * positional args: program names to complete for.
|
||||||
|
compctl -K _complete_invoke + -f invoke inv
|
||||||
|
|
||||||
|
# vim: set ft=sh :
|
||||||
|
|
@ -1,9 +1,13 @@
|
|||||||
from local_whisper.inference import transcribe
|
|
||||||
from local_whisper.audio import load_audio
|
from local_whisper.audio import load_audio
|
||||||
|
from local_whisper.inference import transcribe
|
||||||
|
|
||||||
|
|
||||||
def test_transcribe(sample_audio):
|
def test_transcribe(sample_audio):
|
||||||
with open(sample_audio, mode="rb") as af:
|
with open(sample_audio, mode="rb") as af:
|
||||||
audio = load_audio(af)
|
audio = load_audio(af)
|
||||||
result = transcribe(audio)
|
result = transcribe(audio)
|
||||||
assert result["text"].strip() == "Let's see, right now I'm playing Horizon Zero Dawn. I also had just recently finished BioShock Infinite."
|
assert (
|
||||||
|
result["text"].strip().lower()
|
||||||
|
== "Let's see, right now I'm playing Horizon Zero Dawn."
|
||||||
|
" I also had just recently finished BioShock Infinite.".lower()
|
||||||
|
)
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
import os
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from pydantic.types import SecretStr
|
||||||
|
|
||||||
|
from local_whisper.settings import WhisperSettings
|
||||||
|
|
||||||
|
SETTING_DEFAULTS = {"BASE_ASR_MODEL": "medium.en", "DEFAULT_SAMPLE_RATE": 16000}
|
||||||
|
|
||||||
|
|
||||||
|
def test_setting_defaults():
|
||||||
|
"""Regression test for settings schema."""
|
||||||
|
with patch.dict(os.environ, {}, clear=True):
|
||||||
|
savant_settings = WhisperSettings()
|
||||||
|
assert len(savant_settings.dict()) == len(SETTING_DEFAULTS)
|
||||||
|
for k, v in SETTING_DEFAULTS.items():
|
||||||
|
_setting_value = getattr(savant_settings, k.lower())
|
||||||
|
unmasked_setting = (
|
||||||
|
_setting_value.get_secret_value()
|
||||||
|
if isinstance(_setting_value, SecretStr)
|
||||||
|
else _setting_value
|
||||||
|
)
|
||||||
|
unmasked_setting == v
|
||||||
|
|
||||||
|
|
||||||
|
def test_with_envvar_prefix():
|
||||||
|
with patch.dict(os.environ, {"WHISPER_DEFAULT_SAMPLE_RATE": "22500"}, clear=True):
|
||||||
|
whisper_settings = WhisperSettings()
|
||||||
|
assert whisper_settings.default_sample_rate == 22500
|
Loading…
Reference in New Issue