Compare commits

...

4 Commits

Author SHA1 Message Date
Drew Bednar 0ce177b290 Adding usage example 2 years ago
Drew Bednar 3bf33843f0 Adding zshell autocomplete script 2 years ago
Drew Bednar 76940cea16 #3 Adding WhisperSettings (#7)
Simply adding some settings using pydantic.

Reviewed-on: #7
2 years ago
Drew Bednar 6401651471 #4 Adding macos compiled requirements (#6)
#4 Adding macos compiled requirements.

Co-authored-by: Drew Bednar <drew@androiddrew.com>
Reviewed-on: #6
2 years ago

3
.gitignore vendored

@ -158,4 +158,5 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ .idea/
.vscode/

@ -1,3 +1,22 @@
# LocalWhisper # LocalWhisper
Making OpenAPI's Open Whisper available via ReST API locally. Making OpenAPI's Open Whisper available via ReST API locally.
## Features
- [x] Transcription
- [ ] Translation (Not-planned, but open to PRs)
## Usage Example
Similar to the OpenAI API we can post an audio file to `/audio/transcriptions` as a `multipart/form-data` type.
```
curl --request POST \
--url http://localhost:9000/audio/transcriptions \
--header 'Content-Type: multipart/form-data' \
--header 'Accept: application/json' \
--form 'file=@/path/to/file/example.wav;type=audio/wav'
--form 'model=whisper-1'
```
At present the `model` form field is not required or supported. The `WHISPER_BASE_ASR_MODEL` server configuration will be used to determine the Whisper model to use.

@ -3,7 +3,9 @@ from typing import BinaryIO
import ffmpeg import ffmpeg
import numpy as np import numpy as np
DEFAULT_SAMPLE_RATE = 16000 from .settings import whisper_settings
DEFAULT_SAMPLE_RATE = whisper_settings.default_sample_rate
# TODO probably can offload this on a worker queue too # TODO probably can offload this on a worker queue too

@ -4,14 +4,17 @@ from threading import Lock
import torch import torch
import whisper import whisper
from .settings import whisper_settings
# TODO use pydantic config # TODO use pydantic config
model_name = os.getenv("ASR_MODEL", "base") model_name = whisper_settings.base_asr_model
if torch.cuda.is_available(): if torch.cuda.is_available():
model = whisper.load_model(model_name).cuda() model = whisper.load_model(model_name).cuda()
else: else:
model = whisper.load_model(model_name) model = whisper.load_model(model_name)
model_lock = Lock() model_lock = Lock()
# TODO move transcribe to a modeling worker # TODO move transcribe to a modeling worker
def transcribe(audio): def transcribe(audio):
# options_dict = {"task" : task} # options_dict = {"task" : task}

@ -0,0 +1,22 @@
from pydantic import BaseSettings, Field
class WhisperSettings(BaseSettings):
"""Whisper Application Settings.
All environment varaibles supplied should be prefixed with "WHISPER_".
"""
base_asr_model: str = Field(
default="medium.en", description="The base whisper model to host."
)
default_sample_rate: int = Field(
default=16000,
description="The default sample rate used to resample the audio if necessary",
)
class Config:
env_prefix = "WHISPER_"
whisper_settings = WhisperSettings()

@ -0,0 +1,71 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --output-file=macos-py3.11-dev_requirements.txt dev_requirements.in
#
black==23.3.0
# via -r dev_requirements.in
build==0.10.0
# via
# -r dev_requirements.in
# pip-tools
cfgv==3.3.1
# via pre-commit
click==8.1.3
# via
# black
# pip-tools
distlib==0.3.6
# via virtualenv
exceptiongroup==1.1.1
# via -r dev_requirements.in
filelock==3.12.0
# via virtualenv
identify==2.5.24
# via pre-commit
iniconfig==2.0.0
# via pytest
invoke==2.1.2
# via -r dev_requirements.in
isort==5.12.0
# via -r dev_requirements.in
mypy-extensions==1.0.0
# via black
nodeenv==1.8.0
# via pre-commit
packaging==23.1
# via
# black
# build
# pytest
pathspec==0.11.1
# via black
pip-tools==6.13.0
# via -r dev_requirements.in
platformdirs==3.5.1
# via
# black
# virtualenv
pluggy==1.0.0
# via pytest
pre-commit==3.3.2
# via -r dev_requirements.in
pyproject-hooks==1.0.0
# via build
pytest==7.3.1
# via -r dev_requirements.in
pyyaml==6.0
# via pre-commit
ruff==0.0.269
# via -r dev_requirements.in
shellcheck-py==0.9.0.2
# via -r dev_requirements.in
virtualenv==20.23.0
# via pre-commit
wheel==0.40.0
# via pip-tools
# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools

@ -0,0 +1,76 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --output-file=macos-py3.11-requirements.txt requirements.in
#
anyio==3.6.2
# via starlette
certifi==2023.5.7
# via requests
charset-normalizer==3.1.0
# via requests
click==8.1.3
# via uvicorn
fastapi==0.95.2
# via -r requirements.in
ffmpeg-python==0.2.0
# via openai-whisper
filelock==3.12.0
# via torch
future==0.18.3
# via ffmpeg-python
h11==0.14.0
# via uvicorn
idna==3.4
# via
# anyio
# requests
jinja2==3.1.2
# via torch
llvmlite==0.40.0
# via numba
markupsafe==2.1.2
# via jinja2
more-itertools==9.1.0
# via openai-whisper
mpmath==1.3.0
# via sympy
networkx==3.1
# via torch
numba==0.57.0
# via openai-whisper
numpy==1.24.3
# via
# numba
# openai-whisper
openai-whisper==20230314
# via -r requirements.in
pydantic==1.10.8
# via fastapi
python-multipart==0.0.6
# via -r requirements.in
regex==2023.5.5
# via tiktoken
requests==2.31.0
# via tiktoken
sniffio==1.3.0
# via anyio
starlette==0.27.0
# via fastapi
sympy==1.12
# via torch
tiktoken==0.3.1
# via openai-whisper
torch==2.0.1
# via openai-whisper
tqdm==4.65.0
# via openai-whisper
typing-extensions==4.6.0
# via
# pydantic
# torch
urllib3==2.0.2
# via requests
uvicorn==0.22.0
# via -r requirements.in

@ -0,0 +1,34 @@
# Invoke tab-completion script to be sourced with the Z shell.
# Known to work on zsh 5.0.x, probably works on later 4.x releases as well (as
# it uses the older compctl completion system).
_complete_invoke() {
# `words` contains the entire command string up til now (including
# program name).
#
# We hand it to Invoke so it can figure out the current context: spit back
# core options, task names, the current task's options, or some combo.
#
# Before doing so, we attempt to tease out any collection flag+arg so we
# can ensure it is applied correctly.
collection_arg=''
if [[ "${words}" =~ "(-c|--collection) [^ ]+" ]]; then
collection_arg=$MATCH
fi
# `reply` is the array of valid completions handed back to `compctl`.
# Use ${=...} to force whitespace splitting in expansion of
# $collection_arg
reply=( $(invoke ${=collection_arg} --complete -- ${words}) )
}
# Tell shell builtin to use the above for completing our given binary name(s).
# * -K: use given function name to generate completions.
# * +: specifies 'alternative' completion, where options after the '+' are only
# used if the completion from the options before the '+' result in no matches.
# * -f: when function generates no results, use filenames.
# * positional args: program names to complete for.
compctl -K _complete_invoke + -f invoke inv
# vim: set ft=sh :

@ -26,6 +26,6 @@ def build(c):
@task @task
def serve_dev(c): def serve_dev(c, port=9000):
"""Runs the FastAPI webservice""" """Runs the FastAPI webservice"""
c.run("uvicorn local_whisper.webservice:app --reload") c.run(f"uvicorn local_whisper.webservice:app --reload --port {port}")

@ -1,9 +1,13 @@
from local_whisper.inference import transcribe
from local_whisper.audio import load_audio from local_whisper.audio import load_audio
from local_whisper.inference import transcribe
def test_transcribe(sample_audio): def test_transcribe(sample_audio):
with open(sample_audio, mode="rb") as af: with open(sample_audio, mode="rb") as af:
audio = load_audio(af) audio = load_audio(af)
result = transcribe(audio) result = transcribe(audio)
assert result["text"].strip() == "Let's see, right now I'm playing Horizon Zero Dawn. I also had just recently finished BioShock Infinite." assert (
result["text"].strip().lower()
== "Let's see, right now I'm playing Horizon Zero Dawn."
" I also had just recently finished BioShock Infinite.".lower()
)

@ -0,0 +1,29 @@
import os
from unittest.mock import patch
from pydantic.types import SecretStr
from local_whisper.settings import WhisperSettings
SETTING_DEFAULTS = {"BASE_ASR_MODEL": "medium.en", "DEFAULT_SAMPLE_RATE": 16000}
def test_setting_defaults():
"""Regression test for settings schema."""
with patch.dict(os.environ, {}, clear=True):
savant_settings = WhisperSettings()
assert len(savant_settings.dict()) == len(SETTING_DEFAULTS)
for k, v in SETTING_DEFAULTS.items():
_setting_value = getattr(savant_settings, k.lower())
unmasked_setting = (
_setting_value.get_secret_value()
if isinstance(_setting_value, SecretStr)
else _setting_value
)
unmasked_setting == v
def test_with_envvar_prefix():
with patch.dict(os.environ, {"WHISPER_DEFAULT_SAMPLE_RATE": "22500"}, clear=True):
whisper_settings = WhisperSettings()
assert whisper_settings.default_sample_rate == 22500
Loading…
Cancel
Save