Hello split-it

main
Drew Bednar 12 months ago
commit 4e7cd1b366

@ -0,0 +1,6 @@
[run]
source = split_it
omit = test*
[report]
show_missing = True

@ -0,0 +1,44 @@
kind: pipeline
type: docker
name: CI Test/Lint Pipeline
steps:
- name: Unit Tests and Linters
# Bullseye because drone runner host OS is using older libseccomp2 causing issues
# with thread allocation. See: https://github.com/docker-library/python/issues/835
image: python:3.11-bullseye
commands:
- bash -xc './scripts/run_linters.sh'
- bash -xc './scripts/run_unit_tests.sh'
group: test-lint
trigger:
event:
- pull_request
- push
# Secrets used to pull private images
image_pull_secrets:
- dockerconfigjson
---
kind: pipeline
type: docker
name: Build Production Image
steps:
- name: Build split_it Container Image
image: plugins/docker
settings:
username: automate
password:
from_secret: automate_password
dockerfile: Dockerfile
registry: registry.runcible.io
repo: registry.runcible.io/split_it
tags:
- ${DRONE_COMMIT_SHA}
when:
branch:
- master
event:
- push

62
.gitignore vendored

@ -0,0 +1,62 @@
# ---> Python
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Pycharm
.idea

@ -0,0 +1,40 @@
default_stages: [commit, push]
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
args: [--allow-multiple-documents]
- id: check-added-large-files
- id: debug-statements
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.9.0.5
hooks:
- id: shellcheck
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
args: ["split_it", "./tests/"]
types: [ python ]
- repo: local
hooks:
- id: isort
name: isort
entry: isort
language: system
types: [python]
require_serial: true
- id: ruff
name: ruff
entry: ruff
language: system
types: [python]
args: [
# '-rn', # Only display messages
# '-sn', # Don't display the score
# '--disable=C,R,W0511', # Disable C and R type messages, and TODO fixme warning
]
require_serial: true

@ -0,0 +1,10 @@
MIT License
Copyright (c) 2024, Drew Bednar
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@ -0,0 +1,26 @@
# split_it
Tool for Splitting Audio
## First time setup
Create a virtual environment and activate it. Now from the root project directory run `./scripts/bootstrap`. This will install `pip-tools` and sync any dependencies for the first time.
## Dependency management
Dependencies are managed via [pip-tools].
### Adding a dependency
To add a dependency, edit `requirements.in` (or `dev-requirements.in`
for dev dependencies) and add your dependency then run `pip-compile
requirements.in`.
### Syncing dependencies
Run `pip-sync requirements.txt dev_requirements.txt`.
## Testing
Run the tests by invoking `py.test` in the project root. Make sure you
run any pending migrations beforehand.

@ -0,0 +1,12 @@
-c ./requirements.txt
black
build
invoke
isort<=5.12.0
pip-tools
pre-commit
pytest
pytest-cov
shellcheck-py==0.9.0.5
ruff

@ -0,0 +1,89 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile dev-requirements.in
#
black==24.1.1
# via -r dev-requirements.in
build==1.0.3
# via
# -r dev-requirements.in
# pip-tools
cfgv==3.4.0
# via pre-commit
click==8.1.7
# via
# black
# pip-tools
coverage[toml]==7.4.1
# via
# coverage
# pytest-cov
distlib==0.3.8
# via virtualenv
exceptiongroup==1.2.0
# via pytest
filelock==3.13.1
# via virtualenv
identify==2.5.33
# via pre-commit
iniconfig==2.0.0
# via pytest
invoke==2.2.0
# via -r dev-requirements.in
isort==5.12.0
# via -r dev-requirements.in
mypy-extensions==1.0.0
# via black
nodeenv==1.8.0
# via pre-commit
packaging==23.2
# via
# black
# build
# pytest
pathspec==0.12.1
# via black
pip-tools==7.3.0
# via -r dev-requirements.in
platformdirs==4.2.0
# via
# black
# virtualenv
pluggy==1.4.0
# via pytest
pre-commit==3.6.0
# via -r dev-requirements.in
pyproject-hooks==1.0.0
# via build
pytest==8.0.0
# via
# -r dev-requirements.in
# pytest-cov
pytest-cov==4.1.0
# via -r dev-requirements.in
pyyaml==6.0.1
# via pre-commit
ruff==0.2.0
# via -r dev-requirements.in
shellcheck-py==0.9.0.5
# via -r dev-requirements.in
tomli==2.0.1
# via
# black
# build
# coverage
# pip-tools
# pyproject-hooks
# pytest
typing-extensions==4.9.0
# via black
virtualenv==20.25.0
# via pre-commit
wheel==0.42.0
# via pip-tools
# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools

@ -0,0 +1,8 @@
#!/bin/bash
# Best practice: Bash strict mode.
set -euo pipefail
# Best practice: Make sure the image shuts down correctly by using `exec` in
# entry point shell scripts.
exec "$@"

@ -0,0 +1,59 @@
[project]
name = "split_it"
version = "0.1.0"
authors = [
{name = "Drew Bednar", email = "drew@runcible.io"},
]
description = "Tool for Splitting Audio"
requires-python = ">=3.10"
license = {text = "MIT"}
classifiers = [
"Programming Language :: Python :: 3",
]
dynamic = ["readme", "dependencies"]
[tool.setuptools.dynamic]
readme = {file = ["README.md"], content-type = "text/markdown"}
dependencies = {file = ["requirements.txt"]}
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[tool.isort]
profile = "black"
line_length = 120
force_single_line = true
filter_files = true
[tool.black]
line-length = 120
skip-string-normalization = true
exclude = "(^/\\.git|^/env/|^/venv/|^/node_modules/)"
[tool.ruff]
line-length = 120
# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".mypy_cache",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
]

@ -0,0 +1,2 @@
pydub
tqdm

@ -0,0 +1,10 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile requirements.in
#
pydub==0.25.1
# via -r requirements.in
tqdm==4.66.1
# via -r requirements.in

@ -0,0 +1,10 @@
#!/usr/bin/env bash
function _setup_env() {
# build a virtual env
if [ -z "${VIRTUAL_ENV}" ]; then
python3 -m venv env
# shellcheck source=/dev/null
source env/bin/activate
fi
}

@ -0,0 +1,47 @@
#!/usr/bin/env bash
# setting -e to exit immediately on a command failure.
# set -o pipefail sets the exit code of a pipeline to
# that of the rightmost command to exit with a non-zero status,
# or to zero if all commands of the pipeline exit successfully.
# set -u affects variables. When set, a reference to any variable
# you haven't previously defined - with the exceptions of $* and $@ -
# is an error, and causes the program to immediately exit
set -eo pipefail
IFS=$'\n\t'
# shellcheck source=/dev/null
source "$(dirname "$0")/_common.sh"
function project_bootstrap() {
_setup_env
pip install -U pip pip-tools=='7.3.0'
# Check if requirements.txt exists, if not, compile it from requirements.in
if [ ! -f requirements.txt ]; then
pip-compile requirements.in
fi
# Check if dev-requirements.txt exists, if not, compile it from dev-requirements.in
if [ ! -f dev-requirements.txt ]; then
pip-compile dev-requirements.in
fi
pip-sync requirements.txt dev-requirements.txt
}
function install_precommit_hooks() {
if [ ! -d ".git" ]; then
git init
fi
if [ -z "$VIRTUAL_ENV" ]; then
echo "warning: you are not in a virtualenv"
exit 1
fi
pre-commit install
}
project_bootstrap
install_precommit_hooks

@ -0,0 +1,28 @@
#! /usr/bin/env bash
# shellcheck source=/dev/null
source "$(dirname "$0")/_common.sh"
if [ "${DRONE}" == "true" ]; then
_setup_env
pip install -r requirements.txt -r dev-requirements.txt
fi
# Run linting commands and capture their return codes
"${VIRTUAL_ENV}/bin/python3" -m isort --check ./speech_collect ./tests ./tasks.py
ISORT_EXIT_CODE=$?
"${VIRTUAL_ENV}/bin/python3" -m black --check ./speech_collect ./tests ./tasks.py
BLACK_EXIT_CODE=$?
"${VIRTUAL_ENV}/bin/python3" -m ruff ./speech_collect ./tests ./tasks.py
RUFF_EXIT_CODE=$?
# Check if any linting command failed
if [ $ISORT_EXIT_CODE -ne 0 ] || [ $BLACK_EXIT_CODE -ne 0 ] || [ $RUFF_EXIT_CODE -ne 0 ]; then
echo "Some linting checks failed"
# Exit with a non-zero status, you can choose which error code to return
exit 1
fi
echo "All linting checks passed"

@ -0,0 +1,12 @@
#! /usr/bin/env bash
set -ex
# shellcheck source=/dev/null
source "$(dirname "$0")/_common.sh"
if [ "${DRONE}" == "true" ]; then
_setup_env
pip install -r requirements.txt -r dev-requirements.txt
fi
exec "${VIRTUAL_ENV}/bin/python3" -m pytest -vv --cov "$@"

@ -0,0 +1,134 @@
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from pydub import AudioSegment
from pydub.silence import split_on_silence
from tqdm import tqdm
def save_chunk(chunk, start_time, output_dir, output_format):
chunk.export(os.path.join(output_dir, f'chunk_{start_time}.{output_format}'), format=output_format)
def merge_short_chunks(chunks, min_chunk_length_ms):
merged_chunks = []
current_chunk = chunks[0]
for chunk in chunks[1:]:
if len(current_chunk) + len(chunk) < min_chunk_length_ms:
current_chunk += chunk
else:
merged_chunks.append(current_chunk)
current_chunk = chunk
merged_chunks.append(current_chunk)
return merged_chunks
def split_audio(
input_file, output_dir, chunk_length_ms, output_format, silence_based, silence_threshold, silence_min_len
):
# Load the input audio file using Pydub
audio = AudioSegment.from_file(input_file)
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if silence_based:
# Split the audio file based on silence
min_silence_len = silence_min_len
silence_thresh = silence_threshold
chunks = split_on_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
# Merge adjacent chunks shorter than the specified length
chunks = merge_short_chunks(chunks, chunk_length_ms)
# Set up progress bar with tqdm
pbar = tqdm(total=len(chunks), desc="Processing chunks based on silence")
# Save chunks in parallel using ThreadPoolExecutor
with ThreadPoolExecutor() as executor:
for i, chunk in enumerate(chunks):
executor.submit(save_chunk, chunk, i, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
else:
# Calculate the total length of the audio in milliseconds and the number of full chunks
audio_length_ms = len(audio)
num_chunks = audio_length_ms // chunk_length_ms
# Set up progress bar with tqdm
pbar = tqdm(total=num_chunks + (audio_length_ms % chunk_length_ms != 0), desc="Processing fixed-size chunks")
# Split and save chunks in parallel using ThreadPoolExecutor
with ThreadPoolExecutor() as executor:
for i in range(num_chunks):
start_time = i * chunk_length_ms
end_time = (i + 1) * chunk_length_ms
chunk = audio[start_time:end_time]
executor.submit(save_chunk, chunk, start_time, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
# Handle the last chunk if there is any remainder
if audio_length_ms % chunk_length_ms != 0:
start_time = num_chunks * chunk_length_ms
end_time = audio_length_ms
chunk = audio[start_time:end_time]
executor.submit(save_chunk, chunk, start_time, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
# Close progress bar
pbar.close()
def main():
# Set up argument parser for the CLI app
parser = argparse.ArgumentParser(description="Split an audio file into equally sized chunks.")
parser.add_argument("input_file", help="Path to the input audio file.")
parser.add_argument("output_dir", help="Path to the output directory where chunks will be saved.")
parser.add_argument(
"--chunk_length",
type=int,
default=12000,
help="Length of each chunk in milliseconds (default: 300000 ms / 5 minutes).",
)
parser.add_argument(
"--output_format",
type=str,
default="wav",
help="Output format for the audio chunks (default: wav). Supported formats include wav, mp3, and ogg.",
)
parser.add_argument(
"--silence_based",
action="store_true",
help="Split the audio based on silence instead of fixed-size chunks. If set, --chunk_length is ignored.",
)
parser.add_argument(
"--silence_threshold", type=int, default=-40, help="Threshold in dB for silence based splitting."
)
parser.add_argument(
"--silence_min_len", type=int, default=400, help="Minimum length of silence in milliseconds for splitting."
)
# Parse the arguments
args = parser.parse_args()
# Call the split_audio function with the provided arguments
split_audio(
args.input_file,
args.output_dir,
args.chunk_length,
args.output_format,
args.silence_based,
args.silence_threshold,
args.silence_min_len,
)
if __name__ == "__main__":
main()

@ -0,0 +1,52 @@
import os
from invoke import task
IMAGE_RESPOSITORY = os.environ.get("IMAGE_RESPOSITORY", "registry.runcible.io/split_it")
@task
def update_deps(c):
"""Updates depenencies"""
c.run("pip-compile requirements.in", pty=True)
c.run("pip-compile dev-requirements.in", pty=True)
@task
def sync_deps(c):
"""Syncs local dependencies"""
c.run("pip-sync requirements.txt dev-requirements.txt")
@task
def lint(c):
"""Runs all linters against the project."""
c.run("./scripts/run_linters.sh", pty=True)
@task
def delint(c):
"""Applies automated linters to project"""
c.run("isort ./split_it ./tests ./tasks.py", pty=True)
c.run("black ./split_it ./tests ./tasks.py", pty=True)
@task
def build(c):
"""Builds the project as a Python package."""
c.run("python3 -m build")
@task
def build_image(c, dev=True, registry_user=None, registry_token=None, push=False, login=False):
"""Builds the split_it container image."""
context_dir = c.run("pwd", hide=True).stdout.strip()
commit_sha = c.run("git rev-parse --short HEAD", hide=True).stdout.strip()
image_name = f"{IMAGE_RESPOSITORY}:{commit_sha}{'-dev' if dev else ''}"
c.run(f"docker build --build-arg='COMMIT_SHA={commit_sha}' -t {image_name} {context_dir}")
if login:
if registry_user is None or registry_token is None:
raise ValueError("--registry_user and --registry_token must be provided if using --login parameter")
c.run(f"docker login -u {registry_user} -p {registry_token}")
if push:
c.run(f"docker push {image_name}")

@ -0,0 +1,6 @@
from split_it.example import Example
def test_example():
my_example = Example(name="dirp")
assert my_example.name == "dirp"
Loading…
Cancel
Save