Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ nvidia-modelopt[torch]~=0.37.0
# torch 2.10.0+cu130 depends on nvidia-nccl-cu13==2.28.9
nvidia-nccl-cu13>=2.28.9,<=2.29.2
nvidia-cuda-nvrtc
transformers==4.57.3
transformers==5.3.0
prometheus_client
prometheus_fastapi_instrumentator
pydantic>=2.9.1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Patch for transformers SDPA mask to be export-compatible."""

import importlib.metadata
from functools import partial

from packaging import version

Expand Down Expand Up @@ -29,7 +30,14 @@ def _apply_patch(self):
try:
# imports only after version check
from transformers import masking_utils
from transformers.integrations.executorch import sdpa_mask_without_vmap

# Up to ~4.53+, HF exposed this helper next to ExecuTorch export utilities.
# Transformers 5.x removed it; sdpa_mask now supports use_vmap=False (the default),
# which is export-compatible without vmap.
try:
from transformers.integrations.executorch import sdpa_mask_without_vmap
except ImportError:
sdpa_mask_without_vmap = partial(masking_utils.sdpa_mask, use_vmap=False)

# recall original implementation
self.original_values["masking_utils.sdpa_mask"] = masking_utils.sdpa_mask
Expand Down
38 changes: 38 additions & 0 deletions tensorrt_llm/_torch/models/hf_parameter_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Compatibility for Hugging Face ``get_parameter_device`` / ``get_parameter_dtype``.

Transformers v5 no longer exports these from ``transformers.modeling_utils``; they
match ``ModuleUtilsMixin`` behavior for plain ``nn.Module`` stacks.
"""

from __future__ import annotations

import torch.nn as nn

try:
from transformers.modeling_utils import (get_parameter_device,
get_parameter_dtype)
except ImportError:

def get_parameter_device(module: nn.Module) -> torch.device:
return next(module.parameters()).device

def get_parameter_dtype(module: nn.Module) -> torch.dtype:
return next(
param.dtype
for param in module.parameters()
if param.is_floating_point())
3 changes: 1 addition & 2 deletions tensorrt_llm/_torch/models/modeling_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
import torch.nn as nn
from transformers.activations import ACT2FN
from transformers.modeling_outputs import BaseModelOutput
from transformers.modeling_utils import (get_parameter_device,
get_parameter_dtype)
from transformers.models.clip.configuration_clip import CLIPVisionConfig
from transformers.models.clip.modeling_clip import CLIPVisionEmbeddings

from tensorrt_llm._utils import prefer_pinned

from .hf_parameter_utils import get_parameter_device, get_parameter_dtype
from ..attention_backend.interface import (AttentionMetadata,
PredefinedAttentionMask)
from ..attention_backend.utils import get_attention_backend
Expand Down
3 changes: 1 addition & 2 deletions tensorrt_llm/_torch/models/modeling_siglip.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

import torch
import torch.nn as nn
from transformers.modeling_utils import (get_parameter_device,
get_parameter_dtype)
from transformers.models.siglip.configuration_siglip import SiglipVisionConfig
from transformers.models.siglip.modeling_siglip import (SiglipVisionConfig,
SiglipVisionEmbeddings)

from tensorrt_llm._utils import prefer_pinned

from .hf_parameter_utils import get_parameter_device, get_parameter_dtype
from ..attention_backend.interface import AttentionMetadata
from ..attention_backend.utils import get_attention_backend
from ..model_config import ModelConfig
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import torch.nn.functional as F
from diffusers.models.embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps
from tqdm import tqdm
from transformers.modeling_utils import get_parameter_device

from tensorrt_llm._torch.models.hf_parameter_utils import get_parameter_device
from tensorrt_llm._torch.modules.layer_norm import LayerNorm
from tensorrt_llm._torch.modules.linear import Linear
from tensorrt_llm._torch.modules.mlp import MLP
Expand Down
9 changes: 7 additions & 2 deletions tensorrt_llm/models/gpt/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@
import torch.nn as nn
import yaml
from tqdm import tqdm
from transformers import (AutoModelForCausalLM, AutoModelForVision2Seq,
AutoTokenizer)
try:
from transformers import AutoModelForVision2Seq
except ImportError:
# Transformers v5+: vision-to-seq auto models use AutoModelForImageTextToText
from transformers import AutoModelForImageTextToText as AutoModelForVision2Seq

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.models.gpt2.modeling_gpt2 import GPT2Block
from transformers.pytorch_utils import Conv1D

Expand Down
11 changes: 8 additions & 3 deletions tensorrt_llm/tools/multimodal_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@
from tensorrt_llm._utils import torch_dtype_to_str, to_json_file
from tensorrt_llm.builder import Builder
from tensorrt_llm.logger import logger
try:
from transformers import AutoModelForVision2Seq
except ImportError:
# Transformers v5+: vision-to-seq auto models use AutoModelForImageTextToText
from transformers import AutoModelForImageTextToText as AutoModelForVision2Seq

from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
AutoModelForVision2Seq, AutoProcessor,
Blip2ForConditionalGeneration, Blip2Processor,
FuyuForCausalLM, FuyuProcessor,
AutoProcessor, Blip2ForConditionalGeneration,
Blip2Processor, FuyuForCausalLM, FuyuProcessor,
LlavaForConditionalGeneration, NougatProcessor,
Pix2StructForConditionalGeneration,
VisionEncoderDecoderModel, CLIPVisionModel)
Expand Down
Loading