From 3eac8f44cb954bd73660e37cc78350d88ea8de3a Mon Sep 17 00:00:00 2001
From: Drew Bednar <drew@runcible.io>
Date: Tue, 23 May 2023 10:26:42 -0400
Subject: [PATCH] Working initial implementation

---
 .gitignore                  |   1 -
 .pre-commit-config.yaml     |  29 ++++++++++
 README.md                   |   2 +-
 local_whisper/webservice.py | 107 ++++++++++++++++++++++++++++++++++++
 pyinvoke/autocomplete.sh    |   1 -
 pyproject.toml              |   4 ++
 requirements.in             |   4 +-
 requirements.txt            |   8 +++
 8 files changed, 152 insertions(+), 4 deletions(-)
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 local_whisper/webservice.py

diff --git a/.gitignore b/.gitignore
index 5d381cc..489bdd0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,4 +159,3 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..f63adbf
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,29 @@
+default_stages: [commit, push]
+repos:
+  - repo: https://github.com/shellcheck-py/shellcheck-py
+    rev: v0.9.0.2
+    hooks:
+      - id: shellcheck
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+  - repo: https://github.com/psf/black
+    rev: 23.3.0
+    hooks:
+      - id: black
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    # Ruff version.
+    rev: "v0.0.263"
+    hooks:
+      - id: ruff
+      # Enable auto fix
+      # args: [--fix, --exit-non-zero-on-fix]
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        name: isort (python)
diff --git a/README.md b/README.md
index 8cc42b2..0e63c16 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
 # LocalWhisper
 
-Making OpenAPI's Open Whisper available via ReST API locally.
\ No newline at end of file
+Making OpenAPI's Open Whisper available via ReST API locally.
diff --git a/local_whisper/webservice.py b/local_whisper/webservice.py
new file mode 100644
index 0000000..392a4ea
--- /dev/null
+++ b/local_whisper/webservice.py
@@ -0,0 +1,107 @@
+import os
+from threading import Lock
+from typing import BinaryIO
+
+import ffmpeg
+import numpy as np
+import torch
+import whisper
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import RedirectResponse
+
+from . import __version__
+
+# TODO use pydantic config
+model_name = os.getenv("ASR_MODEL", "base")
+if torch.cuda.is_available():
+    model = whisper.load_model(model_name).cuda()
+else:
+    model = whisper.load_model(model_name)
+model_lock = Lock()
+
+
+# TODO use pydantic config
+SAMPLE_RATE = 16000
+
+
+# TODO move transcribe to a modeling worker
+def transcribe(
+    audio,
+    # task: Union[str, None],
+    # language: Union[str, None],
+    # initial_prompt: Union[str, None],
+):
+    # options_dict = {"task" : task}
+    # if language:
+    #     options_dict["language"] = language
+    # if initial_prompt:
+    #     options_dict["initial_prompt"] = initial_prompt
+    with model_lock:
+        # result = model.transcribe(audio, **options_dict)
+        result = model.transcribe(audio)
+
+    return result
+
+
+# TODO probably can offload this on a worker queue too
+def load_audio(file: BinaryIO, encode=True, sr: int = SAMPLE_RATE):
+    """
+    Open an audio file object and read as mono waveform, resampling as necessary.
+    Modified from https://github.com/openai/whisper/blob/main/whisper/audio.py
+    to accept a file object
+
+    Parameters
+    ----------
+    file: BinaryIO
+        The audio file like object
+    encode: Boolean
+        If true, encode audio stream to WAV before sending to whisper
+    sr: int
+        The sample rate to resample the audio if necessary
+    Returns
+    -------
+    A NumPy array containing the audio waveform, in float32 dtype.
+    """
+    if encode:
+        try:
+            # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
+            # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
+            out, _ = (
+                ffmpeg.input("pipe:", threads=0)
+                .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
+                .run(
+                    cmd="ffmpeg",
+                    capture_stdout=True,
+                    capture_stderr=True,
+                    input=file.read(),
+                )
+            )
+        except ffmpeg.Error as e:
+            raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
+    else:
+        out = file.read()
+
+    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
+
+
+app = FastAPI(
+    title="Local Whisper",
+    description="Making OpenAPI's Open Whisper available via ReST API locally.",
+    version=__version__,
+    swagger_ui_parameters={"defaultModelsExpandDepth": -1},
+    license_info={
+        "name": "MIT License",
+    },
+)
+
+
+@app.get("/", response_class=RedirectResponse, include_in_schema=False)
+async def index():
+    return "/docs"
+
+
+@app.post("/audio/transcriptions")
+async def asr(file: UploadFile = File(...)):
+    if file.content_type.startswith("audio/"):
+        transcription = transcribe(load_audio(file.file))
+        return {"text": transcription["text"]}
diff --git a/pyinvoke/autocomplete.sh b/pyinvoke/autocomplete.sh
index 6279afd..d37d964 100644
--- a/pyinvoke/autocomplete.sh
+++ b/pyinvoke/autocomplete.sh
@@ -30,4 +30,3 @@ _complete_invoke() {
 complete -F _complete_invoke -o default invoke inv
 
 # vim: set ft=sh :
-
diff --git a/pyproject.toml b/pyproject.toml
index 97ee943..cd7b81a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,10 @@ savant-cli = "local_whisper.cli:main"
 [project.optional-dependencies]
 whisper = ["openai-whisper"]
 
+[tool.ruff]
+# Allow lines to be as long as 120 characters.
+line-length = 120
+
 [tool.setuptools]
 packages = ["local_whisper"]
 
diff --git a/requirements.in b/requirements.in
index 5855832..22f5b75 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1,2 +1,4 @@
 fastapi
-openai-whisper
\ No newline at end of file
+openai-whisper
+uvicorn
+python-multipart
diff --git a/requirements.txt b/requirements.txt
index 46d14c4..eb6c4b9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,8 @@ certifi==2023.5.7
     # via requests
 charset-normalizer==3.1.0
     # via requests
+click==8.1.3
+    # via uvicorn
 cmake==3.26.3
     # via triton
 fastapi==0.95.2
@@ -22,6 +24,8 @@ filelock==3.12.0
     #   triton
 future==0.18.3
     # via ffmpeg-python
+h11==0.14.0
+    # via uvicorn
 idna==3.4
     # via
     #   anyio
@@ -75,6 +79,8 @@ openai-whisper==20230314
     # via -r requirements.in
 pydantic==1.10.7
     # via fastapi
+python-multipart==0.0.6
+    # via -r requirements.in
 regex==2023.5.5
     # via tiktoken
 requests==2.31.0
@@ -103,6 +109,8 @@ typing-extensions==4.5.0
     #   torch
 urllib3==2.0.2
     # via requests
+uvicorn==0.22.0
+    # via -r requirements.in
 wheel==0.40.0
     # via
     #   nvidia-cublas-cu11