Working initial implementation

pull/6/head
Drew Bednar 2 years ago
parent 4fddd87271
commit 3eac8f44cb

1
.gitignore vendored

@ -159,4 +159,3 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ #.idea/

@ -0,0 +1,29 @@
default_stages: [commit, push]
repos:
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.9.0.2
hooks:
- id: shellcheck
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
- id: black
- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: "v0.0.263"
hooks:
- id: ruff
# Enable auto fix
# args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)

@ -1,3 +1,3 @@
# LocalWhisper # LocalWhisper
Making OpenAPI's Open Whisper available via ReST API locally. Making OpenAPI's Open Whisper available via ReST API locally.

@ -0,0 +1,107 @@
import os
from threading import Lock
from typing import BinaryIO
import ffmpeg
import numpy as np
import torch
import whisper
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import RedirectResponse
from . import __version__
# TODO use pydantic config
model_name = os.getenv("ASR_MODEL", "base")
if torch.cuda.is_available():
model = whisper.load_model(model_name).cuda()
else:
model = whisper.load_model(model_name)
model_lock = Lock()
# TODO use pydantic config
SAMPLE_RATE = 16000
# TODO move transcribe to a modeling worker
def transcribe(
audio,
# task: Union[str, None],
# language: Union[str, None],
# initial_prompt: Union[str, None],
):
# options_dict = {"task" : task}
# if language:
# options_dict["language"] = language
# if initial_prompt:
# options_dict["initial_prompt"] = initial_prompt
with model_lock:
# result = model.transcribe(audio, **options_dict)
result = model.transcribe(audio)
return result
# TODO probably can offload this on a worker queue too
def load_audio(file: BinaryIO, encode=True, sr: int = SAMPLE_RATE):
"""
Open an audio file object and read as mono waveform, resampling as necessary.
Modified from https://github.com/openai/whisper/blob/main/whisper/audio.py
to accept a file object
Parameters
----------
file: BinaryIO
The audio file like object
encode: Boolean
If true, encode audio stream to WAV before sending to whisper
sr: int
The sample rate to resample the audio if necessary
Returns
-------
A NumPy array containing the audio waveform, in float32 dtype.
"""
if encode:
try:
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
out, _ = (
ffmpeg.input("pipe:", threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
.run(
cmd="ffmpeg",
capture_stdout=True,
capture_stderr=True,
input=file.read(),
)
)
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
else:
out = file.read()
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
app = FastAPI(
title="Local Whisper",
description="Making OpenAPI's Open Whisper available via ReST API locally.",
version=__version__,
swagger_ui_parameters={"defaultModelsExpandDepth": -1},
license_info={
"name": "MIT License",
},
)
@app.get("/", response_class=RedirectResponse, include_in_schema=False)
async def index():
return "/docs"
@app.post("/audio/transcriptions")
async def asr(file: UploadFile = File(...)):
if file.content_type.startswith("audio/"):
transcription = transcribe(load_audio(file.file))
return {"text": transcription["text"]}

@ -30,4 +30,3 @@ _complete_invoke() {
complete -F _complete_invoke -o default invoke inv complete -F _complete_invoke -o default invoke inv
# vim: set ft=sh : # vim: set ft=sh :

@ -23,6 +23,10 @@ savant-cli = "local_whisper.cli:main"
[project.optional-dependencies] [project.optional-dependencies]
whisper = ["openai-whisper"] whisper = ["openai-whisper"]
[tool.ruff]
# Allow lines to be as long as 120 characters.
line-length = 120
[tool.setuptools] [tool.setuptools]
packages = ["local_whisper"] packages = ["local_whisper"]

@ -1,2 +1,4 @@
fastapi fastapi
openai-whisper openai-whisper
uvicorn
python-multipart

@ -10,6 +10,8 @@ certifi==2023.5.7
# via requests # via requests
charset-normalizer==3.1.0 charset-normalizer==3.1.0
# via requests # via requests
click==8.1.3
# via uvicorn
cmake==3.26.3 cmake==3.26.3
# via triton # via triton
fastapi==0.95.2 fastapi==0.95.2
@ -22,6 +24,8 @@ filelock==3.12.0
# triton # triton
future==0.18.3 future==0.18.3
# via ffmpeg-python # via ffmpeg-python
h11==0.14.0
# via uvicorn
idna==3.4 idna==3.4
# via # via
# anyio # anyio
@ -75,6 +79,8 @@ openai-whisper==20230314
# via -r requirements.in # via -r requirements.in
pydantic==1.10.7 pydantic==1.10.7
# via fastapi # via fastapi
python-multipart==0.0.6
# via -r requirements.in
regex==2023.5.5 regex==2023.5.5
# via tiktoken # via tiktoken
requests==2.31.0 requests==2.31.0
@ -103,6 +109,8 @@ typing-extensions==4.5.0
# torch # torch
urllib3==2.0.2 urllib3==2.0.2
# via requests # via requests
uvicorn==0.22.0
# via -r requirements.in
wheel==0.40.0 wheel==0.40.0
# via # via
# nvidia-cublas-cu11 # nvidia-cublas-cu11

Loading…
Cancel
Save