You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

138 lines
4.8 KiB
Python

12 months ago
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from pydub import AudioSegment
from pydub.silence import split_on_silence
from tqdm import tqdm
def save_chunk(chunk, start_time, output_dir, output_format):
chunk.export(os.path.join(output_dir, f'chunk_{start_time}.{output_format}'), format=output_format)
def merge_short_chunks(chunks, min_chunk_length_ms):
12 months ago
12 months ago
merged_chunks = []
current_chunk = chunks[0]
for chunk in chunks[1:]:
if len(current_chunk) + len(chunk) < min_chunk_length_ms:
current_chunk += chunk
else:
merged_chunks.append(current_chunk)
current_chunk = chunk
merged_chunks.append(current_chunk)
return merged_chunks
def split_audio(
input_file, output_dir, chunk_length_ms, output_format, silence_based, silence_threshold, silence_min_len
):
# Load the input audio file using Pydub
audio = AudioSegment.from_file(input_file)
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if silence_based:
# Split the audio file based on silence
min_silence_len = silence_min_len
silence_thresh = silence_threshold
12 months ago
chunks = split_on_silence(
audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh, keep_silence=200
)
12 months ago
# Merge adjacent chunks shorter than the specified length
chunks = merge_short_chunks(chunks, chunk_length_ms)
# Set up progress bar with tqdm
pbar = tqdm(total=len(chunks), desc="Processing chunks based on silence")
# Save chunks in parallel using ThreadPoolExecutor
with ThreadPoolExecutor() as executor:
for i, chunk in enumerate(chunks):
executor.submit(save_chunk, chunk, i, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
else:
# Calculate the total length of the audio in milliseconds and the number of full chunks
audio_length_ms = len(audio)
num_chunks = audio_length_ms // chunk_length_ms
# Set up progress bar with tqdm
pbar = tqdm(total=num_chunks + (audio_length_ms % chunk_length_ms != 0), desc="Processing fixed-size chunks")
# Split and save chunks in parallel using ThreadPoolExecutor
with ThreadPoolExecutor() as executor:
for i in range(num_chunks):
start_time = i * chunk_length_ms
end_time = (i + 1) * chunk_length_ms
chunk = audio[start_time:end_time]
executor.submit(save_chunk, chunk, start_time, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
# Handle the last chunk if there is any remainder
if audio_length_ms % chunk_length_ms != 0:
start_time = num_chunks * chunk_length_ms
end_time = audio_length_ms
chunk = audio[start_time:end_time]
executor.submit(save_chunk, chunk, start_time, output_dir, output_format).add_done_callback(
lambda x: pbar.update(1)
)
# Close progress bar
pbar.close()
def main():
# Set up argument parser for the CLI app
parser = argparse.ArgumentParser(description="Split an audio file into equally sized chunks.")
parser.add_argument("input_file", help="Path to the input audio file.")
parser.add_argument("output_dir", help="Path to the output directory where chunks will be saved.")
parser.add_argument(
"--chunk_length",
type=int,
default=12000,
help="Length of each chunk in milliseconds (default: 300000 ms / 5 minutes).",
)
parser.add_argument(
"--output_format",
type=str,
default="wav",
help="Output format for the audio chunks (default: wav). Supported formats include wav, mp3, and ogg.",
)
parser.add_argument(
"--silence_based",
action="store_true",
help="Split the audio based on silence instead of fixed-size chunks. If set, --chunk_length is ignored.",
)
parser.add_argument(
"--silence_threshold", type=int, default=-40, help="Threshold in dB for silence based splitting."
)
parser.add_argument(
12 months ago
"--silence_min_len", type=int, default=1000, help="Minimum length of silence in milliseconds for splitting."
12 months ago
)
# Parse the arguments
args = parser.parse_args()
# Call the split_audio function with the provided arguments
split_audio(
args.input_file,
args.output_dir,
args.chunk_length,
args.output_format,
args.silence_based,
args.silence_threshold,
args.silence_min_len,
)
if __name__ == "__main__":
main()