From 01eb490fe34eaae0143d5adde9b3a4a9b3cabddb Mon Sep 17 00:00:00 2001 From: weoses Date: Thu, 21 May 2026 00:13:02 +0200 Subject: [PATCH] - use different models for extractors of audio/video - separate-audio mode (in case that model does not work with audio-in-video) --- docker-compose.deploy.yml | 10 ++++++++-- group_vars/all.yml | 17 +++++++++++------ roles/deploy/templates/.env.j2 | 9 +++++++-- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/docker-compose.deploy.yml b/docker-compose.deploy.yml index 0ba8d5b..057f2fa 100644 --- a/docker-compose.deploy.yml +++ b/docker-compose.deploy.yml @@ -28,7 +28,9 @@ services: GEMINI_EXTRACTOR_APIKEY: ${GEMINI_EXTRACTOR_APIKEY} GEMINI_EXTRACTOR_APIENDPOINT: ${GEMINI_EXTRACTOR_APIENDPOINT} - GEMINI_EXTRACTOR_MODEL: ${GEMINI_EXTRACTOR_MODEL} + GEMINI_EXTRACTOR_MODEL_IMAGE: ${GEMINI_EXTRACTOR_MODEL_IMAGE} + GEMINI_EXTRACTOR_MODEL_VIDEO: ${GEMINI_EXTRACTOR_MODEL_VIDEO} + GEMINI_EXTRACTOR_MODEL_AUDIO: ${GEMINI_EXTRACTOR_MODEL_AUDIO} EXTRACTOR_PROVIDER: ${EXTRACTOR_PROVIDER} EMBEDDER_PROVIDER: ${EMBEDDER_PROVIDER} @@ -37,7 +39,11 @@ services: OPENROUTER_EMBEDDING_MODEL: ${OPENROUTER_EMBEDDING_MODEL} OPENROUTER_EXTRACTOR_APIKEY: ${OPENROUTER_EXTRACTOR_APIKEY} - OPENROUTER_EXTRACTOR_MODEL: ${OPENROUTER_EXTRACTOR_MODEL} + OPENROUTER_EXTRACTOR_MODEL_IMAGE: ${OPENROUTER_EXTRACTOR_MODEL_IMAGE} + OPENROUTER_EXTRACTOR_MODEL_VIDEO: ${OPENROUTER_EXTRACTOR_MODEL_VIDEO} + OPENROUTER_EXTRACTOR_MODEL_AUDIO: ${OPENROUTER_EXTRACTOR_MODEL_AUDIO} + + EXTRACTING_SEPARATE_AUDIO: ${EXTRACTING_SEPARATE_AUDIO} FFMPEG_BINARY: ${FFMPEG_BINARY} FFMPEG_CPULIMIT: ${FFMPEG_CPULIMIT} diff --git a/group_vars/all.yml b/group_vars/all.yml index ba07645..b2556e2 100644 --- a/group_vars/all.yml +++ b/group_vars/all.yml @@ -14,23 +14,28 @@ ffmpeg_binary: ffmpeg ffmpeg_cpulimit: 80 ffmpeg_threadslimit: +# LLM provider selection (gemini | openrouter) +extractor_provider: "openrouter" +embedder_provider: "gemini" +extracting_separate_audio: false + # Gemini embedding model gemini_embedding_model: "gemini-embedding-2-preview" gemini_embedding_api_endpoint: "" # Gemini extractor model -gemini_extractor_model: "gemini-2.5-flash" +gemini_extractor_model_image: "gemini-2.5-flash" +gemini_extractor_model_video: "gemini-2.5-flash" +gemini_extractor_model_audio: "gemini-2.5-flash" gemini_extractor_api_endpoint: "" -# LLM provider selection (gemini | openrouter) -extractor_provider: "openrouter" -embedder_provider: "gemini" - # OpenRouter embedding model openrouter_embedding_model: "" # OpenRouter extractor model -openrouter_extractor_model: "google/gemini-3-flash-preview" +openrouter_extractor_model_image: "google/gemini-3-flash-preview" +openrouter_extractor_model_video: "google/gemini-3-flash-preview" +openrouter_extractor_model_audio: "google/gemini-3-flash-preview" # filled by runner diff --git a/roles/deploy/templates/.env.j2 b/roles/deploy/templates/.env.j2 index 55dcdba..08a44e8 100644 --- a/roles/deploy/templates/.env.j2 +++ b/roles/deploy/templates/.env.j2 @@ -15,6 +15,7 @@ S3_SECURE={{ s3_secure }} S3_MEDIA_BUCKET={{ s3_media_bucket }} S3_TEMP_BUCKET={{ s3_temp_bucket }} +EXTRACTING_SEPARATE_AUDIO: {{ extracting_separate_audio }} # Gemini embedding GEMINI_EMBEDDING_APIKEY={{ gemini_api_key }} @@ -24,7 +25,9 @@ GEMINI_EMBEDDING_MODEL={{ gemini_embedding_model }} # Gemini extractor GEMINI_EXTRACTOR_APIKEY={{ gemini_api_key }} GEMINI_EXTRACTOR_APIENDPOINT={{ gemini_extractor_api_endpoint }} -GEMINI_EXTRACTOR_MODEL={{ gemini_extractor_model }} +GEMINI_EXTRACTOR_MODEL_IMAGE={{ gemini_extractor_model_image }} +GEMINI_EXTRACTOR_MODEL_VIDEO={{ gemini_extractor_model_video }} +GEMINI_EXTRACTOR_MODEL_AUDIO={{ gemini_extractor_model_audio }} # LLM provider selection EXTRACTOR_PROVIDER={{ extractor_provider }} @@ -36,7 +39,9 @@ OPENROUTER_EMBEDDING_MODEL={{ openrouter_embedding_model }} # OpenRouter extractor OPENROUTER_EXTRACTOR_APIKEY={{ openrouter_api_key | default('') }} -OPENROUTER_EXTRACTOR_MODEL={{ openrouter_extractor_model }} +OPENROUTER_EXTRACTOR_MODEL_IMAGE={{ openrouter_extractor_model_image }} +OPENROUTER_EXTRACTOR_MODEL_VIDEO={{ openrouter_extractor_model_video }} +OPENROUTER_EXTRACTOR_MODEL_AUDIO={{ openrouter_extractor_model_audio }} # PostgreSQL POSTGRES_DB={{ postgres_db }}