First test

pull/6/head
Drew Bednar 2 years ago
parent 3eac8f44cb
commit 1c5ee1fc27

@ -1,5 +1,6 @@
black
build
exceptiongroup
invoke
isort
pip-tools

@ -1,5 +1,5 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile dev_requirements.in
@ -18,6 +18,10 @@ click==8.1.3
# pip-tools
distlib==0.3.6
# via virtualenv
exceptiongroup==1.1.1
# via
# -r dev_requirements.in
# pytest
filelock==3.12.0
# via virtualenv
identify==2.5.23
@ -59,6 +63,12 @@ ruff==0.0.263
# via -r dev_requirements.in
shellcheck-py==0.9.0.2
# via -r dev_requirements.in
tomli==2.0.1
# via
# black
# build
# pyproject-hooks
# pytest
virtualenv==20.23.0
# via pre-commit
wheel==0.40.0

@ -0,0 +1,47 @@
from typing import BinaryIO
import ffmpeg
import numpy as np
DEFAULT_SAMPLE_RATE = 16000
# TODO probably can offload this on a worker queue too
def load_audio(file: BinaryIO, encode=True, sr: int = DEFAULT_SAMPLE_RATE):
"""
Open an audio file object and read as mono waveform, resampling as necessary.
Modified from https://github.com/openai/whisper/blob/main/whisper/audio.py
to accept a file object
Parameters
----------
file: BinaryIO
The audio file like object
encode: Boolean
If true, encode audio stream to WAV before sending to whisper
sr: int
The sample rate to resample the audio if necessary
Returns
-------
A NumPy array containing the audio waveform, in float32 dtype.
"""
if encode:
try:
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
out, _ = (
ffmpeg.input("pipe:", threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
.run(
cmd="ffmpeg",
capture_stdout=True,
capture_stderr=True,
input=file.read(),
)
)
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
else:
out = file.read()
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0

@ -1,15 +1,13 @@
import os
from threading import Lock
from typing import BinaryIO
import ffmpeg
import numpy as np
import torch
import whisper
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import RedirectResponse
from . import __version__
from .audio import load_audio
# TODO use pydantic config
model_name = os.getenv("ASR_MODEL", "base")
@ -43,47 +41,6 @@ def transcribe(
return result
# TODO probably can offload this on a worker queue too
def load_audio(file: BinaryIO, encode=True, sr: int = SAMPLE_RATE):
"""
Open an audio file object and read as mono waveform, resampling as necessary.
Modified from https://github.com/openai/whisper/blob/main/whisper/audio.py
to accept a file object
Parameters
----------
file: BinaryIO
The audio file like object
encode: Boolean
If true, encode audio stream to WAV before sending to whisper
sr: int
The sample rate to resample the audio if necessary
Returns
-------
A NumPy array containing the audio waveform, in float32 dtype.
"""
if encode:
try:
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
out, _ = (
ffmpeg.input("pipe:", threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
.run(
cmd="ffmpeg",
capture_stdout=True,
capture_stderr=True,
input=file.read(),
)
)
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
else:
out = file.read()
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
app = FastAPI(
title="Local Whisper",
description="Making OpenAPI's Open Whisper available via ReST API locally.",

@ -23,3 +23,9 @@ def install_pre_commit(c):
def build(c):
"""Builds wheel and source distributions of project."""
pass
@task
def serve_dev(c):
"""Runs the FastAPI webservice"""
c.run("uvicorn local_whisper.webservice:app --reload")

@ -0,0 +1,9 @@
import os
import pytest
@pytest.fixture
def sample_audio():
audio_path = os.path.join(os.path.dirname(__file__), "sample_data/drew6.wav")
return audio_path

Binary file not shown.

@ -0,0 +1,16 @@
from local_whisper.audio import DEFAULT_SAMPLE_RATE, load_audio
def test_audio(sample_audio):
print(sample_audio)
with open(sample_audio, mode="rb") as f:
audio = load_audio(f)
# Assert Mono
assert audio.ndim == 1
# Test the file length is appropriate size
assert DEFAULT_SAMPLE_RATE * 8 < audio.shape[0] < DEFAULT_SAMPLE_RATE * 12
# Taking the standard diviation of audio data can be used to check
# Amplitude Variability, Noise Detection, or Normalization. Hear we just want
# to make certain it does not contain a lot of noise.
assert 0 < audio.std() < 1

@ -1,2 +0,0 @@
def test_example():
assert True
Loading…
Cancel
Save