Compare commits
4 Commits
drew/3-whi
...
master
Author | SHA1 | Date |
---|---|---|
Drew Bednar | 0ce177b290 | 2 years ago |
Drew Bednar | 3bf33843f0 | 2 years ago |
Drew Bednar | 76940cea16 | 2 years ago |
Drew Bednar | 6401651471 | 2 years ago |
@ -1,3 +1,22 @@
|
|||||||
# LocalWhisper
|
# LocalWhisper
|
||||||
|
|
||||||
Making OpenAPI's Open Whisper available via ReST API locally.
|
Making OpenAPI's Open Whisper available via ReST API locally.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
- [x] Transcription
|
||||||
|
- [ ] Translation (Not-planned, but open to PRs)
|
||||||
|
|
||||||
|
## Usage Example
|
||||||
|
|
||||||
|
Similar to the OpenAI API we can post an audio file to `/audio/transcriptions` as a `multipart/form-data` type.
|
||||||
|
|
||||||
|
```
|
||||||
|
curl --request POST \
|
||||||
|
--url http://localhost:9000/audio/transcriptions \
|
||||||
|
--header 'Content-Type: multipart/form-data' \
|
||||||
|
--header 'Accept: application/json' \
|
||||||
|
--form 'file=@/path/to/file/example.wav;type=audio/wav'
|
||||||
|
--form 'model=whisper-1'
|
||||||
|
```
|
||||||
|
|
||||||
|
At present the `model` form field is not required or supported. The `WHISPER_BASE_ASR_MODEL` server configuration will be used to determine the Whisper model to use.
|
||||||
|
@ -0,0 +1,22 @@
|
|||||||
|
from pydantic import BaseSettings, Field
|
||||||
|
|
||||||
|
|
||||||
|
class WhisperSettings(BaseSettings):
|
||||||
|
"""Whisper Application Settings.
|
||||||
|
|
||||||
|
All environment varaibles supplied should be prefixed with "WHISPER_".
|
||||||
|
"""
|
||||||
|
|
||||||
|
base_asr_model: str = Field(
|
||||||
|
default="medium.en", description="The base whisper model to host."
|
||||||
|
)
|
||||||
|
default_sample_rate: int = Field(
|
||||||
|
default=16000,
|
||||||
|
description="The default sample rate used to resample the audio if necessary",
|
||||||
|
)
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_prefix = "WHISPER_"
|
||||||
|
|
||||||
|
|
||||||
|
whisper_settings = WhisperSettings()
|
@ -0,0 +1,71 @@
|
|||||||
|
#
|
||||||
|
# This file is autogenerated by pip-compile with Python 3.11
|
||||||
|
# by the following command:
|
||||||
|
#
|
||||||
|
# pip-compile --output-file=macos-py3.11-dev_requirements.txt dev_requirements.in
|
||||||
|
#
|
||||||
|
black==23.3.0
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
build==0.10.0
|
||||||
|
# via
|
||||||
|
# -r dev_requirements.in
|
||||||
|
# pip-tools
|
||||||
|
cfgv==3.3.1
|
||||||
|
# via pre-commit
|
||||||
|
click==8.1.3
|
||||||
|
# via
|
||||||
|
# black
|
||||||
|
# pip-tools
|
||||||
|
distlib==0.3.6
|
||||||
|
# via virtualenv
|
||||||
|
exceptiongroup==1.1.1
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
filelock==3.12.0
|
||||||
|
# via virtualenv
|
||||||
|
identify==2.5.24
|
||||||
|
# via pre-commit
|
||||||
|
iniconfig==2.0.0
|
||||||
|
# via pytest
|
||||||
|
invoke==2.1.2
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
isort==5.12.0
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
mypy-extensions==1.0.0
|
||||||
|
# via black
|
||||||
|
nodeenv==1.8.0
|
||||||
|
# via pre-commit
|
||||||
|
packaging==23.1
|
||||||
|
# via
|
||||||
|
# black
|
||||||
|
# build
|
||||||
|
# pytest
|
||||||
|
pathspec==0.11.1
|
||||||
|
# via black
|
||||||
|
pip-tools==6.13.0
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
platformdirs==3.5.1
|
||||||
|
# via
|
||||||
|
# black
|
||||||
|
# virtualenv
|
||||||
|
pluggy==1.0.0
|
||||||
|
# via pytest
|
||||||
|
pre-commit==3.3.2
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
pyproject-hooks==1.0.0
|
||||||
|
# via build
|
||||||
|
pytest==7.3.1
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
pyyaml==6.0
|
||||||
|
# via pre-commit
|
||||||
|
ruff==0.0.269
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
shellcheck-py==0.9.0.2
|
||||||
|
# via -r dev_requirements.in
|
||||||
|
virtualenv==20.23.0
|
||||||
|
# via pre-commit
|
||||||
|
wheel==0.40.0
|
||||||
|
# via pip-tools
|
||||||
|
|
||||||
|
# The following packages are considered to be unsafe in a requirements file:
|
||||||
|
# pip
|
||||||
|
# setuptools
|
@ -0,0 +1,76 @@
|
|||||||
|
#
|
||||||
|
# This file is autogenerated by pip-compile with Python 3.11
|
||||||
|
# by the following command:
|
||||||
|
#
|
||||||
|
# pip-compile --output-file=macos-py3.11-requirements.txt requirements.in
|
||||||
|
#
|
||||||
|
anyio==3.6.2
|
||||||
|
# via starlette
|
||||||
|
certifi==2023.5.7
|
||||||
|
# via requests
|
||||||
|
charset-normalizer==3.1.0
|
||||||
|
# via requests
|
||||||
|
click==8.1.3
|
||||||
|
# via uvicorn
|
||||||
|
fastapi==0.95.2
|
||||||
|
# via -r requirements.in
|
||||||
|
ffmpeg-python==0.2.0
|
||||||
|
# via openai-whisper
|
||||||
|
filelock==3.12.0
|
||||||
|
# via torch
|
||||||
|
future==0.18.3
|
||||||
|
# via ffmpeg-python
|
||||||
|
h11==0.14.0
|
||||||
|
# via uvicorn
|
||||||
|
idna==3.4
|
||||||
|
# via
|
||||||
|
# anyio
|
||||||
|
# requests
|
||||||
|
jinja2==3.1.2
|
||||||
|
# via torch
|
||||||
|
llvmlite==0.40.0
|
||||||
|
# via numba
|
||||||
|
markupsafe==2.1.2
|
||||||
|
# via jinja2
|
||||||
|
more-itertools==9.1.0
|
||||||
|
# via openai-whisper
|
||||||
|
mpmath==1.3.0
|
||||||
|
# via sympy
|
||||||
|
networkx==3.1
|
||||||
|
# via torch
|
||||||
|
numba==0.57.0
|
||||||
|
# via openai-whisper
|
||||||
|
numpy==1.24.3
|
||||||
|
# via
|
||||||
|
# numba
|
||||||
|
# openai-whisper
|
||||||
|
openai-whisper==20230314
|
||||||
|
# via -r requirements.in
|
||||||
|
pydantic==1.10.8
|
||||||
|
# via fastapi
|
||||||
|
python-multipart==0.0.6
|
||||||
|
# via -r requirements.in
|
||||||
|
regex==2023.5.5
|
||||||
|
# via tiktoken
|
||||||
|
requests==2.31.0
|
||||||
|
# via tiktoken
|
||||||
|
sniffio==1.3.0
|
||||||
|
# via anyio
|
||||||
|
starlette==0.27.0
|
||||||
|
# via fastapi
|
||||||
|
sympy==1.12
|
||||||
|
# via torch
|
||||||
|
tiktoken==0.3.1
|
||||||
|
# via openai-whisper
|
||||||
|
torch==2.0.1
|
||||||
|
# via openai-whisper
|
||||||
|
tqdm==4.65.0
|
||||||
|
# via openai-whisper
|
||||||
|
typing-extensions==4.6.0
|
||||||
|
# via
|
||||||
|
# pydantic
|
||||||
|
# torch
|
||||||
|
urllib3==2.0.2
|
||||||
|
# via requests
|
||||||
|
uvicorn==0.22.0
|
||||||
|
# via -r requirements.in
|
@ -0,0 +1,34 @@
|
|||||||
|
# Invoke tab-completion script to be sourced with the Z shell.
|
||||||
|
# Known to work on zsh 5.0.x, probably works on later 4.x releases as well (as
|
||||||
|
# it uses the older compctl completion system).
|
||||||
|
|
||||||
|
_complete_invoke() {
|
||||||
|
# `words` contains the entire command string up til now (including
|
||||||
|
# program name).
|
||||||
|
#
|
||||||
|
# We hand it to Invoke so it can figure out the current context: spit back
|
||||||
|
# core options, task names, the current task's options, or some combo.
|
||||||
|
#
|
||||||
|
# Before doing so, we attempt to tease out any collection flag+arg so we
|
||||||
|
# can ensure it is applied correctly.
|
||||||
|
collection_arg=''
|
||||||
|
if [[ "${words}" =~ "(-c|--collection) [^ ]+" ]]; then
|
||||||
|
collection_arg=$MATCH
|
||||||
|
fi
|
||||||
|
# `reply` is the array of valid completions handed back to `compctl`.
|
||||||
|
# Use ${=...} to force whitespace splitting in expansion of
|
||||||
|
# $collection_arg
|
||||||
|
reply=( $(invoke ${=collection_arg} --complete -- ${words}) )
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Tell shell builtin to use the above for completing our given binary name(s).
|
||||||
|
# * -K: use given function name to generate completions.
|
||||||
|
# * +: specifies 'alternative' completion, where options after the '+' are only
|
||||||
|
# used if the completion from the options before the '+' result in no matches.
|
||||||
|
# * -f: when function generates no results, use filenames.
|
||||||
|
# * positional args: program names to complete for.
|
||||||
|
compctl -K _complete_invoke + -f invoke inv
|
||||||
|
|
||||||
|
# vim: set ft=sh :
|
||||||
|
|
@ -1,9 +1,13 @@
|
|||||||
from local_whisper.inference import transcribe
|
|
||||||
from local_whisper.audio import load_audio
|
from local_whisper.audio import load_audio
|
||||||
|
from local_whisper.inference import transcribe
|
||||||
|
|
||||||
|
|
||||||
def test_transcribe(sample_audio):
|
def test_transcribe(sample_audio):
|
||||||
with open(sample_audio, mode="rb") as af:
|
with open(sample_audio, mode="rb") as af:
|
||||||
audio = load_audio(af)
|
audio = load_audio(af)
|
||||||
result = transcribe(audio)
|
result = transcribe(audio)
|
||||||
assert result["text"].strip() == "Let's see, right now I'm playing Horizon Zero Dawn. I also had just recently finished BioShock Infinite."
|
assert (
|
||||||
|
result["text"].strip().lower()
|
||||||
|
== "Let's see, right now I'm playing Horizon Zero Dawn."
|
||||||
|
" I also had just recently finished BioShock Infinite.".lower()
|
||||||
|
)
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
import os
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from pydantic.types import SecretStr
|
||||||
|
|
||||||
|
from local_whisper.settings import WhisperSettings
|
||||||
|
|
||||||
|
SETTING_DEFAULTS = {"BASE_ASR_MODEL": "medium.en", "DEFAULT_SAMPLE_RATE": 16000}
|
||||||
|
|
||||||
|
|
||||||
|
def test_setting_defaults():
|
||||||
|
"""Regression test for settings schema."""
|
||||||
|
with patch.dict(os.environ, {}, clear=True):
|
||||||
|
savant_settings = WhisperSettings()
|
||||||
|
assert len(savant_settings.dict()) == len(SETTING_DEFAULTS)
|
||||||
|
for k, v in SETTING_DEFAULTS.items():
|
||||||
|
_setting_value = getattr(savant_settings, k.lower())
|
||||||
|
unmasked_setting = (
|
||||||
|
_setting_value.get_secret_value()
|
||||||
|
if isinstance(_setting_value, SecretStr)
|
||||||
|
else _setting_value
|
||||||
|
)
|
||||||
|
unmasked_setting == v
|
||||||
|
|
||||||
|
|
||||||
|
def test_with_envvar_prefix():
|
||||||
|
with patch.dict(os.environ, {"WHISPER_DEFAULT_SAMPLE_RATE": "22500"}, clear=True):
|
||||||
|
whisper_settings = WhisperSettings()
|
||||||
|
assert whisper_settings.default_sample_rate == 22500
|
Loading…
Reference in New Issue