First test

3 years ago · 1c5ee1fc27
parent 3eac8f44cb
commit 1c5ee1fc27
9 changed files with 91 additions and 47 deletions
--- a/dev_requirements.in
+++ b/dev_requirements.in
@ -1,5 +1,6 @@
 black
 build
 exceptiongroup
 invoke
 isort
 pip-tools
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.11
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile dev_requirements.in
@ -18,6 +18,10 @@ click==8.1.3
    #   pip-tools
 distlib==0.3.6
    # via virtualenv
 exceptiongroup==1.1.1
    # via
    #   -r dev_requirements.in
    #   pytest
 filelock==3.12.0
    # via virtualenv
 identify==2.5.23
@ -59,6 +63,12 @@ ruff==0.0.263
    # via -r dev_requirements.in
 shellcheck-py==0.9.0.2
    # via -r dev_requirements.in
 tomli==2.0.1
    # via
    #   black
    #   build
    #   pyproject-hooks
    #   pytest
 virtualenv==20.23.0
    # via pre-commit
 wheel==0.40.0
--- a/local_whisper/audio.py
+++ b/local_whisper/audio.py
@ -0,0 +1,47 @@
 from typing import BinaryIO
 import ffmpeg
 import numpy as np
 DEFAULT_SAMPLE_RATE = 16000
 # TODO probably can offload this on a worker queue too
 def load_audio(file: BinaryIO, encode=True, sr: int = DEFAULT_SAMPLE_RATE):
    """
    Open an audio file object and read as mono waveform, resampling as necessary.
    Modified from https://github.com/openai/whisper/blob/main/whisper/audio.py
    to accept a file object
    Parameters
    ----------
    file: BinaryIO
        The audio file like object
    encode: Boolean
        If true, encode audio stream to WAV before sending to whisper
    sr: int
        The sample rate to resample the audio if necessary
    Returns
    -------
    A NumPy array containing the audio waveform, in float32 dtype.
    """
    if encode:
        try:
            # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
            # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
            out, _ = (
                ffmpeg.input("pipe:", threads=0)
                .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
                .run(
                    cmd="ffmpeg",
                    capture_stdout=True,
                    capture_stderr=True,
                    input=file.read(),
                )
            )
        except ffmpeg.Error as e:
            raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
    else:
        out = file.read()
    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
--- a/local_whisper/webservice.py
+++ b/local_whisper/webservice.py
@ -1,15 +1,13 @@
 import os
 from threading import Lock
 from typing import BinaryIO
 import ffmpeg
 import numpy as np
 import torch
 import whisper
 from fastapi import FastAPI, File, UploadFile
 from fastapi.responses import RedirectResponse
 from . import __version__
 from .audio import load_audio
 # TODO use pydantic config
 model_name = os.getenv("ASR_MODEL", "base")
@ -43,47 +41,6 @@ def transcribe(
    return result
 # TODO probably can offload this on a worker queue too
 def load_audio(file: BinaryIO, encode=True, sr: int = SAMPLE_RATE):
    """
    Open an audio file object and read as mono waveform, resampling as necessary.
    Modified from https://github.com/openai/whisper/blob/main/whisper/audio.py
    to accept a file object
    Parameters
    ----------
    file: BinaryIO
        The audio file like object
    encode: Boolean
        If true, encode audio stream to WAV before sending to whisper
    sr: int
        The sample rate to resample the audio if necessary
    Returns
    -------
    A NumPy array containing the audio waveform, in float32 dtype.
    """
    if encode:
        try:
            # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
            # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
            out, _ = (
                ffmpeg.input("pipe:", threads=0)
                .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
                .run(
                    cmd="ffmpeg",
                    capture_stdout=True,
                    capture_stderr=True,
                    input=file.read(),
                )
            )
        except ffmpeg.Error as e:
            raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
    else:
        out = file.read()
    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
 app = FastAPI(
    title="Local Whisper",
    description="Making OpenAPI's Open Whisper available via ReST API locally.",
--- a/pyinvoke/dev.py
+++ b/pyinvoke/dev.py
@ -23,3 +23,9 @@ def install_pre_commit(c):
 def build(c):
    """Builds wheel and source distributions of project."""
    pass
@task
 def serve_dev(c):
    """Runs the FastAPI webservice"""
    c.run("uvicorn local_whisper.webservice:app --reload")
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@ -0,0 +1,9 @@
 import os
 import pytest
@pytest.fixture
 def sample_audio():
    audio_path = os.path.join(os.path.dirname(__file__), "sample_data/drew6.wav")
    return audio_path
--- a/tests/unit/sample_data/drew6.wav
+++ b/tests/unit/sample_data/drew6.wav
--- a/tests/unit/test_audio.py
+++ b/tests/unit/test_audio.py
@ -0,0 +1,16 @@
 from local_whisper.audio import DEFAULT_SAMPLE_RATE, load_audio
 def test_audio(sample_audio):
    print(sample_audio)
    with open(sample_audio, mode="rb") as f:
        audio = load_audio(f)
    # Assert Mono
    assert audio.ndim == 1
    # Test the file length is appropriate size
    assert DEFAULT_SAMPLE_RATE * 8 < audio.shape[0] < DEFAULT_SAMPLE_RATE * 12
    # Taking the standard diviation of audio data can be used to check
    # Amplitude Variability, Noise Detection, or Normalization. Hear we just want 
    # to make certain it does not contain a lot of noise.
    assert 0 < audio.std() < 1
--- a/tests/unit/test_example.py
+++ b/tests/unit/test_example.py
@ -1,2 +0,0 @@
 def test_example():
    assert True