Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions scripts/performance/argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
"r100": 1,
}

REQUIRED_DOMAIN_BY_MODEL_FAMILY = {
"qwen_vl": "qwen3vl",
}


def list_of_strings(arg):
"""Split a comma-separated string into a list of substrings."""
Expand Down Expand Up @@ -90,6 +94,15 @@ def bool_arg(arg):
raise ValueError(f"Invalid value for boolean argument: {arg}")


def resolve_domain(domain: str | None, model_family_name: str) -> str:
"""Resolve the effective domain for a model family."""
resolved_domain = domain or "llm"
required_domain = REQUIRED_DOMAIN_BY_MODEL_FAMILY.get(model_family_name)
if required_domain is not None:
return required_domain
return resolved_domain


def is_cuda_graph_impl_valid(arg):
"""Validate and normalize the CUDA graph implementation argument."""
if arg in VALID_CUDA_GRAPH_IMPLS:
Expand Down
2 changes: 2 additions & 0 deletions scripts/performance/configs/qwen_vl/qwen3_vl_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
def set_qwen3_vl_common_configs(cfg: ConfigContainer) -> None:
"""Set common performance configurations for all Qwen3-VL configs."""
cfg.model.bias_activation_fusion = True
# Qwen3-VL uses a custom mRoPE path that does not support fused RoPE kernels.
cfg.model.apply_rope_fusion = False
cfg.model.recompute_granularity = None
cfg.model.recompute_method = None
cfg.model.recompute_num_layers = None
Expand Down
11 changes: 10 additions & 1 deletion scripts/performance/run_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import sys

import torch
from argument_parser import parse_cli_args
from argument_parser import parse_cli_args, resolve_domain
from utils.overrides import set_cli_overrides, set_post_overrides, set_user_overrides
from utils.utils import get_perf_optimized_recipe

Expand Down Expand Up @@ -67,6 +67,15 @@ def main():
# `argparse.parse_known_args()` returns the unknown args as a `list[str]`.
parser = parse_cli_args()
args, cli_overrides = parser.parse_known_args()
resolved_domain = resolve_domain(args.domain, args.model_family_name)
if resolved_domain != args.domain:
logger.info(
"Using domain '%s' for model family '%s' instead of requested/default '%s'.",
resolved_domain,
args.model_family_name,
args.domain,
)
args.domain = resolved_domain

if args.dump_env:
_dump_env_rank0()
Expand Down
7 changes: 7 additions & 0 deletions src/megatron/bridge/models/qwen_vl/qwen3_vl_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

from megatron.bridge.models.gpt_provider import GPTModelProvider
from megatron.bridge.models.qwen_vl.modelling_qwen3_vl.model import Qwen3VLModel
from megatron.bridge.utils import fusions


@dataclass
Expand Down Expand Up @@ -107,6 +108,9 @@ class Qwen3VLModelProvider(GPTModelProvider):

def provide(self, pre_process=None, post_process=None, vp_stage=None) -> Qwen3VLModel:
"""Provide a Qwen3 VL model instance with vision and language components."""
if not fusions.validate_rope_fusion_compatibility(self):
self.apply_rope_fusion = False

language_transformer_config = self
hf_vision_config = self.vision_config

Expand Down Expand Up @@ -257,6 +261,9 @@ def finalize(self) -> None:

def provide(self, pre_process=None, post_process=None, vp_stage=None) -> Qwen3VLModel:
"""Provide a Qwen3 VL MoE model instance with vision and language components."""
if not fusions.validate_rope_fusion_compatibility(self):
self.apply_rope_fusion = False

language_transformer_config = self
hf_vision_config = self.vision_config

Expand Down
18 changes: 14 additions & 4 deletions src/megatron/bridge/recipes/qwen_vl/qwen3_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ def _qwen3_vl_common(
return cfg


def _enable_235b_pipeline_split_accounting(model_cfg) -> None:
"""Account for embedding and loss stages in 235B pipeline splits."""
model_cfg.account_for_embedding_in_pipeline_split = True
model_cfg.account_for_loss_in_pipeline_split = True


# =============================================================================
# Qwen3-VL Pretrain Configurations (mock dataset)
# =============================================================================
Expand Down Expand Up @@ -251,7 +257,7 @@ def qwen3_vl_235b_a22b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonK
See `_qwen3_vl_common` for the full list of parameters.
"""
recommended_kwargs: Qwen3VLCommonKwargs = {
"hf_path": "Qwen/Qwen3-VL-235B-A22B",
"hf_path": "Qwen/Qwen3-VL-235B-A22B-Instruct",
"tensor_model_parallel_size": 4,
"pipeline_model_parallel_size": 16,
"expert_model_parallel_size": 8,
Expand All @@ -262,7 +268,9 @@ def qwen3_vl_235b_a22b_pretrain_mock_config(**user_kwargs: Unpack[Qwen3VLCommonK
"freeze_vision_projection": False,
}
combined_kwargs: Qwen3VLCommonKwargs = {**recommended_kwargs, **user_kwargs}
return _qwen3_vl_common(**combined_kwargs)
cfg = _qwen3_vl_common(**combined_kwargs)
_enable_235b_pipeline_split_accounting(cfg.model)
return cfg


def _make_energon_dataset(
Expand Down Expand Up @@ -573,7 +581,7 @@ def qwen3_vl_235b_a22b_sft_config() -> ConfigContainer:
cfg = _sft_common_vlm()

# Model configuration
hf_path = "Qwen/Qwen3-VL-235B-A22B"
hf_path = "Qwen/Qwen3-VL-235B-A22B-Instruct"
cfg.model = AutoBridge.from_hf_pretrained(hf_path).to_megatron_provider(load_weights=False)
cfg.model.seq_length = 4096

Expand All @@ -585,6 +593,7 @@ def qwen3_vl_235b_a22b_sft_config() -> ConfigContainer:
cfg.model.expert_model_parallel_size = 32
cfg.model.context_parallel_size = 1
cfg.model.sequence_parallel = False
_enable_235b_pipeline_split_accounting(cfg.model)

# VLM-specific settings
cfg.model.freeze_language_model = False
Expand Down Expand Up @@ -1007,7 +1016,7 @@ def qwen3_vl_235b_a22b_peft_config(peft_scheme: str | PEFT = "lora") -> ConfigCo
cfg.peft = peft_scheme

# Model configuration
hf_path = "Qwen/Qwen3-VL-235B-A22B"
hf_path = "Qwen/Qwen3-VL-235B-A22B-Instruct"
cfg.model = AutoBridge.from_hf_pretrained(hf_path).to_megatron_provider(load_weights=False)
cfg.model.seq_length = 4096

Expand All @@ -1019,6 +1028,7 @@ def qwen3_vl_235b_a22b_peft_config(peft_scheme: str | PEFT = "lora") -> ConfigCo
cfg.model.expert_model_parallel_size = 16
cfg.model.context_parallel_size = 1
cfg.model.sequence_parallel = False
_enable_235b_pipeline_split_accounting(cfg.model)

# VLM-specific settings
cfg.model.freeze_language_model = False
Expand Down
61 changes: 61 additions & 0 deletions tests/unit_tests/models/qwen_vl/test_qwen3_vl_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from types import SimpleNamespace

import pytest

import megatron.bridge.models.qwen_vl.qwen3_vl_provider as qwen3_vl_provider_module
from megatron.bridge.models.qwen_vl.qwen3_vl_provider import (
Qwen3VLModelProvider,
Qwen3VLMoEModelProvider,
)


class _DummyQwen3VLModel:
"""Minimal stand-in for Qwen3VLModel used to inspect provider inputs."""

def __init__(
self,
*,
language_transformer_config,
language_transformer_layer_spec,
vision_transformer_config,
pre_process,
post_process,
pg_collection,
):
self.language_transformer_config = language_transformer_config
self.language_transformer_layer_spec = language_transformer_layer_spec
self.vision_transformer_config = vision_transformer_config
self.pre_process = pre_process
self.post_process = post_process
self.pg_collection = pg_collection
self.freeze_calls = []

def freeze(self, **kwargs):
"""Record freeze calls without touching parameters."""
self.freeze_calls.append(kwargs)


@pytest.mark.parametrize("provider_cls", [Qwen3VLModelProvider, Qwen3VLMoEModelProvider])
def test_qwen3_vl_provide_disables_incompatible_rope_fusion(
provider_cls,
monkeypatch: pytest.MonkeyPatch,
):
"""Qwen3-VL providers should clear fused RoPE for mRoPE models before model build."""
monkeypatch.setattr(
qwen3_vl_provider_module,
"get_gpt_layer_with_transformer_engine_spec",
lambda **kwargs: SimpleNamespace(**kwargs),
)
monkeypatch.setattr(qwen3_vl_provider_module, "Qwen3VLModel", _DummyQwen3VLModel)

provider = provider_cls(
num_layers=4,
hidden_size=256,
num_attention_heads=8,
apply_rope_fusion=True,
)

model = provider.provide()

assert provider.apply_rope_fusion is False
assert model.language_transformer_config.apply_rope_fusion is False
45 changes: 45 additions & 0 deletions tests/unit_tests/recipes/qwen_vl/test_qwen3_vl_recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,18 @@ def to_megatron_provider(self, load_weights: bool = False):
return _FakeModelCfg()


class _TrackingAutoBridge(_FakeAutoBridge):
"""Fake AutoBridge that records the requested HF repo."""

last_hf_path: str | None = None

@staticmethod
def from_hf_pretrained(hf_path: str):
"""Record the HF repo used by the recipe under test."""
_TrackingAutoBridge.last_hf_path = hf_path
return _TrackingAutoBridge()


def _assert_basic_config(cfg):
"""Assert that a config has all required components."""
from megatron.bridge.training.config import ConfigContainer
Expand Down Expand Up @@ -294,6 +306,8 @@ def test_qwen3_vl_235b_sft_defaults(monkeypatch: pytest.MonkeyPatch):

# Check expert_model_parallel_size for MoE model
assert cfg.model.expert_model_parallel_size == 32
assert cfg.model.account_for_embedding_in_pipeline_split is True
assert cfg.model.account_for_loss_in_pipeline_split is True


def test_qwen3_vl_235b_peft_defaults(monkeypatch: pytest.MonkeyPatch):
Expand All @@ -312,6 +326,37 @@ def test_qwen3_vl_235b_peft_defaults(monkeypatch: pytest.MonkeyPatch):

# Check PEFT config
assert cfg.peft is not None
assert cfg.model.account_for_embedding_in_pipeline_split is True
assert cfg.model.account_for_loss_in_pipeline_split is True


@pytest.mark.parametrize(
"recipe_func",
[
_qwen3_vl_module.qwen3_vl_235b_a22b_pretrain_mock_config,
_qwen3_vl_module.qwen3_vl_235b_a22b_sft_config,
_qwen3_vl_module.qwen3_vl_235b_a22b_peft_config,
],
)
def test_qwen3_vl_235b_uses_instruct_repo(recipe_func: Callable, monkeypatch: pytest.MonkeyPatch):
"""Test that 235B-A22B recipes point at the published Instruct HF repo."""
monkeypatch.setattr(_qwen3_vl_module, "AutoBridge", _TrackingAutoBridge)
_TrackingAutoBridge.last_hf_path = None

cfg = recipe_func()

assert _TrackingAutoBridge.last_hf_path == "Qwen/Qwen3-VL-235B-A22B-Instruct"
assert cfg.dataset.hf_processor_path == "Qwen/Qwen3-VL-235B-A22B-Instruct"


def test_qwen3_vl_235b_a22b_pretrain_mock_uses_pipeline_split_accounting(monkeypatch: pytest.MonkeyPatch):
"""Test that 235B-A22B pretrain enables embedding/loss pipeline accounting."""
monkeypatch.setattr(_qwen3_vl_module, "AutoBridge", _FakeAutoBridge)

cfg = _qwen3_vl_module.qwen3_vl_235b_a22b_pretrain_mock_config()

assert cfg.model.account_for_embedding_in_pipeline_split is True
assert cfg.model.account_for_loss_in_pipeline_split is True


def test_qwen3_vl_sft_has_hf_dataset_provider(monkeypatch: pytest.MonkeyPatch):
Expand Down
15 changes: 15 additions & 0 deletions tests/unit_tests/scripts/test_performance_offline_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,21 @@ def test_argparse_rejects_hf_token_with_offline(import_performance_module):
)


def test_resolve_domain_forces_qwen3vl_for_qwen_vl(import_performance_module):
"""Qwen3-VL perf launches should always use the dedicated qwen3vl domain."""
argument_parser = import_performance_module("scripts.performance.argument_parser")

assert argument_parser.resolve_domain("llm", "qwen_vl") == "qwen3vl"
assert argument_parser.resolve_domain("vlm", "qwen_vl") == "qwen3vl"


def test_resolve_domain_keeps_llm_for_llama(import_performance_module):
"""Non-VLM model families should keep their requested/default domain."""
argument_parser = import_performance_module("scripts.performance.argument_parser")

assert argument_parser.resolve_domain("llm", "llama") == "llm"


def test_slurm_executor_sets_offline_env_and_container_writable(import_performance_module):
"""Offline mode should set HF_HUB_OFFLINE and preserve the offline Transformers default."""
executors = import_performance_module("scripts.performance.utils.executors")
Expand Down
61 changes: 61 additions & 0 deletions tests/unit_tests/scripts/test_qwen3_vl_performance_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import importlib
import sys
from pathlib import Path


SCRIPTS_PERF_PATH = Path(__file__).parents[3] / "scripts" / "performance"
if str(SCRIPTS_PERF_PATH) not in sys.path:
sys.path.insert(0, str(SCRIPTS_PERF_PATH))


class _FakeModelCfg:
"""Minimal fake model provider for Qwen3-VL perf config tests."""

def __init__(self):
self.tensor_model_parallel_size = 1
self.pipeline_model_parallel_size = 1
self.pipeline_dtype = None
self.virtual_pipeline_model_parallel_size = None
self.context_parallel_size = 1
self.expert_model_parallel_size = 1
self.expert_tensor_parallel_size = 1
self.sequence_parallel = False
self.seq_length = 64
self.freeze_language_model = False
self.freeze_vision_model = False
self.freeze_vision_projection = False
self.moe_token_dispatcher_type = None
self.moe_flex_dispatcher_backend = None
self.moe_hybridep_num_sms = None
self.moe_router_fusion = False
self.moe_permute_fusion = False
self.moe_grouped_gemm = False
self.moe_router_padding_for_fp8 = False
self.moe_shared_expert_overlap = False
self.moe_router_force_load_balancing = False
self.apply_rope_fusion = False

def finalize(self):
return None


class _FakeAutoBridge:
"""Fake AutoBridge used to avoid HF/network access in perf config tests."""

@staticmethod
def from_hf_pretrained(_hf_path: str):
return _FakeAutoBridge()

def to_megatron_provider(self, load_weights: bool = False):
return _FakeModelCfg()


def test_qwen3_vl_235b_perf_config_disables_rope_fusion(monkeypatch):
"""Qwen3-VL perf configs should not re-enable unsupported fused RoPE."""
qwen3_vl_module = importlib.import_module("megatron.bridge.recipes.qwen_vl.qwen3_vl")
monkeypatch.setattr(qwen3_vl_module, "AutoBridge", _FakeAutoBridge)

qwen3_vl_perf_module = importlib.import_module("configs.qwen_vl.qwen3_vl_pretrain")
cfg = qwen3_vl_perf_module.qwen3_vl_235b_a22b_pretrain_config_h100(precision="bf16", mock=True)

assert cfg.model.apply_rope_fusion is False
Loading