You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

138 lines
4.8 KiB
Python

import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from pydub import AudioSegment
from pydub.silence import split_on_silence
from tqdm import tqdm
def save_chunk(chunk, start_time, output_dir, output_format):
chunk.export(os.path.join(output_dir, f'chunk_{start_time}.{output_format}'), format=output_format)
def merge_short_chunks(chunks, min_chunk_length_ms):
merged_chunks = []
current_chunk = chunks[0]
for chunk in chunks[1:]:
if len(current_chunk) + len(chunk) < min_chunk_length_ms:
current_chunk += chunk
else:
merged_chunks.append(current_chunk)
current_chunk = chunk
merged_chunks.append(current_chunk)
return merged_chunks
def split_audio(
input_file, output_dir, chunk_length_ms, output_format, silence_based, silence_threshold, silence_min_len
):
# Load the input audio file using Pydub
audio = AudioSegment.from_file(input_file)
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if silence_based:
# Split the audio file based on silence
min_silence_len = silence_min_len
silence_thresh = silence_threshold
chunks = split_on_silence(
audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh, keep_silence=200
)
# Merge adjacent chunks shorter than the specified length
chunks = merge_short_chunks(chunks, chunk_length_ms)
# Set up progress bar with tqdm
pbar = tqdm(total=len(chunks), desc="Processing chunks based on silence")
# Save chunks in parallel using ThreadPoolExecutor
with ThreadPoolExecutor() as executor:
for i, chunk in enumerate(chunks):
executor.submit(save_chunk, chunk, i, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
else:
# Calculate the total length of the audio in milliseconds and the number of full chunks
audio_length_ms = len(audio)
num_chunks = audio_length_ms // chunk_length_ms
# Set up progress bar with tqdm
pbar = tqdm(total=num_chunks + (audio_length_ms % chunk_length_ms != 0), desc="Processing fixed-size chunks")
# Split and save chunks in parallel using ThreadPoolExecutor
with ThreadPoolExecutor() as executor:
for i in range(num_chunks):
start_time = i * chunk_length_ms
end_time = (i + 1) * chunk_length_ms
chunk = audio[start_time:end_time]
executor.submit(save_chunk, chunk, start_time, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
# Handle the last chunk if there is any remainder
if audio_length_ms % chunk_length_ms != 0:
start_time = num_chunks * chunk_length_ms
end_time = audio_length_ms
chunk = audio[start_time:end_time]
executor.submit(save_chunk, chunk, start_time, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
# Close progress bar
pbar.close()
def main():
# Set up argument parser for the CLI app
parser = argparse.ArgumentParser(description="Split an audio file into equally sized chunks.")
parser.add_argument("input_file", help="Path to the input audio file.")
parser.add_argument("output_dir", help="Path to the output directory where chunks will be saved.")
parser.add_argument(
"--chunk_length",
type=int,
default=12000,
help="Length of each chunk in milliseconds (default: 300000 ms / 5 minutes).",
)
parser.add_argument(
"--output_format",
type=str,
default="wav",
help="Output format for the audio chunks (default: wav). Supported formats include wav, mp3, and ogg.",
)
parser.add_argument(
"--silence_based",
action="store_true",
help="Split the audio based on silence instead of fixed-size chunks. If set, --chunk_length is ignored.",
)
parser.add_argument(
"--silence_threshold", type=int, default=-40, help="Threshold in dB for silence based splitting."
)
parser.add_argument(
"--silence_min_len", type=int, default=1000, help="Minimum length of silence in milliseconds for splitting."
)
# Parse the arguments
args = parser.parse_args()
# Call the split_audio function with the provided arguments
split_audio(
args.input_file,
args.output_dir,
args.chunk_length,
args.output_format,
args.silence_based,
args.silence_threshold,
args.silence_min_len,
)
if __name__ == "__main__":
main()