diff --git a/dev_requirements.in b/dev_requirements.in index ce250ab..7792971 100644 --- a/dev_requirements.in +++ b/dev_requirements.in @@ -1,5 +1,6 @@ black build +exceptiongroup invoke isort pip-tools diff --git a/dev_requirements.txt b/dev_requirements.txt index 601fbdf..db1f194 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile dev_requirements.in @@ -18,6 +18,10 @@ click==8.1.3 # pip-tools distlib==0.3.6 # via virtualenv +exceptiongroup==1.1.1 + # via + # -r dev_requirements.in + # pytest filelock==3.12.0 # via virtualenv identify==2.5.23 @@ -59,6 +63,12 @@ ruff==0.0.263 # via -r dev_requirements.in shellcheck-py==0.9.0.2 # via -r dev_requirements.in +tomli==2.0.1 + # via + # black + # build + # pyproject-hooks + # pytest virtualenv==20.23.0 # via pre-commit wheel==0.40.0 diff --git a/local_whisper/audio.py b/local_whisper/audio.py new file mode 100644 index 0000000..cc33676 --- /dev/null +++ b/local_whisper/audio.py @@ -0,0 +1,47 @@ +from typing import BinaryIO + +import ffmpeg +import numpy as np + +DEFAULT_SAMPLE_RATE = 16000 + + +# TODO probably can offload this on a worker queue too +def load_audio(file: BinaryIO, encode=True, sr: int = DEFAULT_SAMPLE_RATE): + """ + Open an audio file object and read as mono waveform, resampling as necessary. + Modified from https://github.com/openai/whisper/blob/main/whisper/audio.py + to accept a file object + + Parameters + ---------- + file: BinaryIO + The audio file like object + encode: Boolean + If true, encode audio stream to WAV before sending to whisper + sr: int + The sample rate to resample the audio if necessary + Returns + ------- + A NumPy array containing the audio waveform, in float32 dtype. + """ + if encode: + try: + # This launches a subprocess to decode audio while down-mixing and resampling as necessary. + # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. + out, _ = ( + ffmpeg.input("pipe:", threads=0) + .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr) + .run( + cmd="ffmpeg", + capture_stdout=True, + capture_stderr=True, + input=file.read(), + ) + ) + except ffmpeg.Error as e: + raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e + else: + out = file.read() + + return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 diff --git a/local_whisper/webservice.py b/local_whisper/webservice.py index 392a4ea..c914ce1 100644 --- a/local_whisper/webservice.py +++ b/local_whisper/webservice.py @@ -1,15 +1,13 @@ import os from threading import Lock -from typing import BinaryIO -import ffmpeg -import numpy as np import torch import whisper from fastapi import FastAPI, File, UploadFile from fastapi.responses import RedirectResponse from . import __version__ +from .audio import load_audio # TODO use pydantic config model_name = os.getenv("ASR_MODEL", "base") @@ -43,47 +41,6 @@ def transcribe( return result -# TODO probably can offload this on a worker queue too -def load_audio(file: BinaryIO, encode=True, sr: int = SAMPLE_RATE): - """ - Open an audio file object and read as mono waveform, resampling as necessary. - Modified from https://github.com/openai/whisper/blob/main/whisper/audio.py - to accept a file object - - Parameters - ---------- - file: BinaryIO - The audio file like object - encode: Boolean - If true, encode audio stream to WAV before sending to whisper - sr: int - The sample rate to resample the audio if necessary - Returns - ------- - A NumPy array containing the audio waveform, in float32 dtype. - """ - if encode: - try: - # This launches a subprocess to decode audio while down-mixing and resampling as necessary. - # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. - out, _ = ( - ffmpeg.input("pipe:", threads=0) - .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr) - .run( - cmd="ffmpeg", - capture_stdout=True, - capture_stderr=True, - input=file.read(), - ) - ) - except ffmpeg.Error as e: - raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e - else: - out = file.read() - - return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 - - app = FastAPI( title="Local Whisper", description="Making OpenAPI's Open Whisper available via ReST API locally.", diff --git a/pyinvoke/dev.py b/pyinvoke/dev.py index 8487330..776f515 100644 --- a/pyinvoke/dev.py +++ b/pyinvoke/dev.py @@ -23,3 +23,9 @@ def install_pre_commit(c): def build(c): """Builds wheel and source distributions of project.""" pass + + +@task +def serve_dev(c): + """Runs the FastAPI webservice""" + c.run("uvicorn local_whisper.webservice:app --reload") diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 0000000..2965919 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,9 @@ +import os + +import pytest + + +@pytest.fixture +def sample_audio(): + audio_path = os.path.join(os.path.dirname(__file__), "sample_data/drew6.wav") + return audio_path diff --git a/tests/unit/sample_data/drew6.wav b/tests/unit/sample_data/drew6.wav new file mode 100644 index 0000000..ecf4539 Binary files /dev/null and b/tests/unit/sample_data/drew6.wav differ diff --git a/tests/unit/test_audio.py b/tests/unit/test_audio.py new file mode 100644 index 0000000..b1050bf --- /dev/null +++ b/tests/unit/test_audio.py @@ -0,0 +1,16 @@ +from local_whisper.audio import DEFAULT_SAMPLE_RATE, load_audio + + +def test_audio(sample_audio): + print(sample_audio) + with open(sample_audio, mode="rb") as f: + audio = load_audio(f) + # Assert Mono + assert audio.ndim == 1 + # Test the file length is appropriate size + assert DEFAULT_SAMPLE_RATE * 8 < audio.shape[0] < DEFAULT_SAMPLE_RATE * 12 + # Taking the standard diviation of audio data can be used to check + # Amplitude Variability, Noise Detection, or Normalization. Hear we just want + # to make certain it does not contain a lot of noise. + assert 0 < audio.std() < 1 + \ No newline at end of file diff --git a/tests/unit/test_example.py b/tests/unit/test_example.py deleted file mode 100644 index 813df60..0000000 --- a/tests/unit/test_example.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_example(): - assert True