diff --git a/apps/subgen/Dockerfile b/apps/subgen/Dockerfile index 2070b83..2617011 100644 --- a/apps/subgen/Dockerfile +++ b/apps/subgen/Dockerfile @@ -10,7 +10,7 @@ ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 RUN apt-get update && apt-get install -y --no-install-recommends \ - python3 python3-pip ffmpeg curl gosu tzdata && \ + python3 python3-pip ffmpeg git gosu tzdata && \ apt-get clean && rm -rf /var/lib/apt/lists/* # Install torch with cu128 for Blackwell, then SubGen deps @@ -20,13 +20,21 @@ RUN python3 -m pip install -U --no-cache-dir --break-system-packages \ numpy stable-ts-whisperless fastapi requests faster-whisper \ uvicorn python-multipart ffmpeg-python watchdog -# Pin SubGen to a specific commit +# Pin SubGen to a specific upstream commit. WORKDIR /subgen -RUN curl -fsSL "https://raw.githubusercontent.com/McCloudS/subgen/${VERSION}/subgen.py" -o subgen.py && \ - curl -fsSL "https://raw.githubusercontent.com/McCloudS/subgen/${VERSION}/launcher.py" -o launcher.py && \ - curl -fsSL "https://raw.githubusercontent.com/McCloudS/subgen/${VERSION}/language_code.py" -o language_code.py && \ - curl -fsSL "https://raw.githubusercontent.com/McCloudS/subgen/${VERSION}/entrypoint.sh" -o /entrypoint.sh && \ - chmod +x /entrypoint.sh +RUN git clone https://github.com/McCloudS/subgen.git . && \ + git checkout "${VERSION}" && \ + cp entrypoint.sh /entrypoint.sh && chmod +x /entrypoint.sh + +# Apply downstream patches against the pinned upstream tree. +# git apply fails loudly if a hunk doesn't match — so an upstream +# rename or refactor will fail the build instead of silently +# producing a broken image. +COPY patches/ /tmp/patches/ +RUN for p in /tmp/patches/*.patch; do \ + echo "Applying $p" && git apply --verbose "$p"; \ + done && \ + rm -rf /tmp/patches .git RUN mkdir -p /cache diff --git a/apps/subgen/patches/01-always-extract-audio-via-ffmpeg.patch b/apps/subgen/patches/01-always-extract-audio-via-ffmpeg.patch new file mode 100644 index 0000000..d598dd6 --- /dev/null +++ b/apps/subgen/patches/01-always-extract-audio-via-ffmpeg.patch @@ -0,0 +1,32 @@ +From: home-ops downstream +Subject: Always extract audio via external ffmpeg + +faster-whisper's internal PyAV-based decoder fails on certain DTS +streams with `Frame does not match AudioFifo parameters`, leaving +PyAV in a corrupted state that segfaults the entire process on the +next decode call. Documented upstream: + + https://github.com/Purfview/whisper-standalone-win/issues/236 + +subgen already has `extract_audio_track_to_memory()` which uses +ffmpeg-python (subprocess) and correctly handles the same files +that crash PyAV. The original code only invoked this path when a +file had multiple audio tracks; this patch makes external ffmpeg +extraction the default for any file with at least one audio track, +so faster-whisper always receives clean mono 16kHz WAV bytes. + +--- + subgen.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/subgen.py ++++ b/subgen.py +@@ -1517,7 +1517,7 @@ + audio_bytes = None + audio_tracks = get_audio_tracks(file_path) + +- if len(audio_tracks) > 1: ++ if audio_tracks: + logging.debug(f"Handling multiple audio tracks from {file_path} and planning to extract audio track of language {language}") + logging.debug( + "Audio tracks:\n"