InternLM · lvhan028 · Apr 8, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/lmdeploy/api.py b/lmdeploy/api.py
@@ -17,6 +17,7 @@ def pipeline(model_path: str,
              chat_template_config: ChatTemplateConfig | None = None,
              log_level: str = 'WARNING',
              max_log_len: int | None = None,
+             trust_remote_code: bool = False,
              speculative_config: SpeculativeConfig | None = None,
              **kwargs):
     """Create a pipeline for inference.
@@ -41,6 +42,7 @@ def pipeline(model_path: str,
             ``WARNING``, ``INFO``, ``DEBUG``]
         max_log_len: Max number of prompt characters or prompt tokens
             being printed in log.
+        trust_remote_code: whether to trust remote code from model repositories.
         speculative_config: speculative decoding configuration.
         **kwargs: additional keyword arguments passed to the pipeline.
 
@@ -73,6 +75,7 @@ def pipeline(model_path: str,
                     chat_template_config=chat_template_config,
                     log_level=log_level,
                     max_log_len=max_log_len,
+                    trust_remote_code=trust_remote_code,
                     speculative_config=speculative_config,
                     **kwargs)
 

diff --git a/lmdeploy/archs.py b/lmdeploy/archs.py
@@ -128,14 +128,14 @@ def check_vl_llm(backend: str, config: dict) -> bool:
     return False
 
 
-def get_task(backend: str, model_path: str):
+def get_task(backend: str, model_path: str, trust_remote_code: bool = False):
     """Get pipeline type and pipeline class from model config."""
     from lmdeploy.serve.core import AsyncEngine
 
     if os.path.exists(os.path.join(model_path, 'triton_models', 'weights')):
         # workspace model
         return 'llm', AsyncEngine
-    _, config = get_model_arch(model_path)
+    _, config = get_model_arch(model_path, trust_remote_code=trust_remote_code)
     if check_vl_llm(backend, config.to_dict()):
         from lmdeploy.serve.core import VLAsyncEngine
         return 'vlm', VLAsyncEngine
@@ -144,17 +144,17 @@ def get_task(backend: str, model_path: str):
     return 'llm', AsyncEngine
 
 
-def get_model_arch(model_path: str):
+def get_model_arch(model_path: str, trust_remote_code: bool = False):
     """Get a model's architecture and configuration.
 
     Args:
         model_path(str): the model path
     """
     try:
-        cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+        cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code)
     except Exception as e:  # noqa
         from transformers import PretrainedConfig
-        cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=True)
+        cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code)
 
     _cfg = cfg.to_dict()
     if _cfg.get('architectures', None):

diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
@@ -63,6 +63,9 @@ def add_parser_api_server():
                             default=['*'],
                             help='A list of allowed http headers for cors')
         parser.add_argument('--proxy-url', type=str, default=None, help='The proxy url for api server.')
+        parser.add_argument('--trust-remote-code',
+                            action='store_true',
+                            help='Whether to trust remote code from model repositories.')
         parser.add_argument('--max-concurrent-requests',
                             type=int,
                             default=None,
@@ -303,6 +306,7 @@ def api_server(args):
                 max_log_len=args.max_log_len,
                 disable_fastapi_docs=args.disable_fastapi_docs,
                 max_concurrent_requests=args.max_concurrent_requests,
+                trust_remote_code=args.trust_remote_code,
                 reasoning_parser=args.reasoning_parser,
                 tool_call_parser=args.tool_call_parser,
                 speculative_config=speculative_config,
@@ -334,6 +338,7 @@ def api_server(args):
                 max_log_len=args.max_log_len,
                 disable_fastapi_docs=args.disable_fastapi_docs,
                 max_concurrent_requests=args.max_concurrent_requests,
+                trust_remote_code=args.trust_remote_code,
                 reasoning_parser=args.reasoning_parser,
                 tool_call_parser=args.tool_call_parser,
                 speculative_config=speculative_config,

diff --git a/lmdeploy/pipeline.py b/lmdeploy/pipeline.py
@@ -39,6 +39,7 @@ def __init__(self,
                  chat_template_config: ChatTemplateConfig | None = None,
                  log_level: str = 'WARNING',
                  max_log_len: int | None = None,
+                 trust_remote_code: bool = False,
                  speculative_config: SpeculativeConfig | None = None,
                  **kwargs):
         """Initialize Pipeline.
@@ -49,6 +50,7 @@ def __init__(self,
             chat_template_config: Chat template configuration.
             log_level: Log level.
             max_log_len: Max number of prompt characters or prompt tokens being printed in log.
+            trust_remote_code: whether to trust remote code from model repositories.
             speculative_config: Speculative decoding configuration.
             **kwargs: Additional keyword arguments.
         """
@@ -69,12 +71,13 @@ def __init__(self,
 
         # Create inference engine
         backend, backend_config = autoget_backend_config(model_path, backend_config)
-        _, pipeline_class = get_task(backend, model_path)
+        _, pipeline_class = get_task(backend, model_path, trust_remote_code=trust_remote_code)
         self.async_engine = pipeline_class(model_path,
                                            backend=backend,
                                            backend_config=backend_config,
                                            chat_template_config=chat_template_config,
                                            max_log_len=max_log_len,
+                                           trust_remote_code=trust_remote_code,
                                            speculative_config=speculative_config,
                                            **kwargs)
         self.internal_thread = _EventLoopThread(daemon=True)

diff --git a/lmdeploy/pytorch/config.py b/lmdeploy/pytorch/config.py
@@ -365,7 +365,7 @@ def get_head_size(self):
     def from_pretrained(
         cls,
         pretrained_model_name_or_path: str,
-        trust_remote_code: bool = True,
+        trust_remote_code: bool = False,
         dtype: str = 'auto',
         dist_config: DistConfig = None,
         hf_overrides: dict[str, Any] = None,
@@ -563,10 +563,11 @@ def from_config(
         target_cache_cfg: CacheConfig,
         target_model: str = None,
         dtype: str = 'auto',
+        trust_remote_code: bool = False,
     ):
         model = model or target_model
         model_config = ModelConfig.from_pretrained(model,
-                                                   trust_remote_code=True,
+                                                   trust_remote_code=trust_remote_code,
                                                    dtype=dtype,
                                                    is_draft_model=True,
                                                    spec_method=method,

diff --git a/lmdeploy/pytorch/engine/config_builder.py b/lmdeploy/pytorch/engine/config_builder.py
@@ -98,7 +98,7 @@ def build_misc_config(engine_config: PytorchEngineConfig):
 
     @staticmethod
     def build_specdecode_config(target_model, speculative_config: SpeculativeConfig, engine_config: PytorchEngineConfig,
-                                cache_config: CacheConfig):
+                                cache_config: CacheConfig, trust_remote_code: bool = False):
         """Build spec decode config."""
         specdecode_config = None
         if speculative_config is not None:
@@ -113,5 +113,6 @@ def build_specdecode_config(target_model, speculative_config: SpeculativeConfig,
                 target_model=target_model,
                 target_cache_cfg=cache_config,
                 dtype=engine_config.dtype,
+                trust_remote_code=trust_remote_code,
             )
         return specdecode_config
diff --git a/lmdeploy/pytorch/engine/engine.py b/lmdeploy/pytorch/engine/engine.py
@@ -94,7 +94,7 @@ def __init__(
         self,
         model_path: str,
         engine_config: PytorchEngineConfig = None,
-        trust_remote_code: bool = True,
+        trust_remote_code: bool = False,
         speculative_config: SpeculativeConfig = None,
     ) -> None:
         # make sure engine config exist
@@ -133,7 +133,7 @@ def __init__(
         misc_config = ConfigBuilder.build_misc_config(engine_config)
         # spec decode
         self.specdecode_config = ConfigBuilder.build_specdecode_config(model_path, speculative_config, engine_config,
-                                                                       cache_config)
+                                                                       cache_config, trust_remote_code)
 
         # build model agent
         self.executor = build_executor(
@@ -147,6 +147,7 @@ def __init__(
             distributed_executor_backend=engine_config.distributed_executor_backend,
             dtype=engine_config.dtype,
             specdecode_config=self.specdecode_config,
+            trust_remote_code=trust_remote_code,
         )
         self.executor.init()
 
@@ -198,7 +199,7 @@ def __init__(
     def from_pretrained(cls,
                         pretrained_model_name_or_path: str,
                         engine_config: PytorchEngineConfig = None,
-                        trust_remote_code: bool = True,
+                        trust_remote_code: bool = False,
                         speculative_config: SpeculativeConfig = None,
                         **kwargs):
         """Lmdeploy python inference engine.

diff --git a/lmdeploy/pytorch/engine/executor/__init__.py b/lmdeploy/pytorch/engine/executor/__init__.py
@@ -63,6 +63,7 @@ def build_executor(
     distributed_executor_backend: str = None,
     dtype: str = 'auto',
     specdecode_config: SpecDecodeConfig = None,
+    trust_remote_code: bool = False,
 ) -> ExecutorBase:
     """Build model agent executor."""
     logger = get_logger('lmdeploy')
@@ -71,7 +72,7 @@ def build_executor(
 
     model_config = ModelConfig.from_pretrained(
         model_path,
-        trust_remote_code=True,
+        trust_remote_code=trust_remote_code,
         dtype=dtype,
         hf_overrides=misc_config.hf_overrides,
         dist_config=dist_config,

diff --git a/lmdeploy/serve/core/async_engine.py b/lmdeploy/serve/core/async_engine.py
@@ -110,6 +110,7 @@ def __init__(self,
                  backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
                  chat_template_config: ChatTemplateConfig | None = None,
                  max_log_len: int | None = None,
+                 trust_remote_code: bool = False,
                  speculative_config: SpeculativeConfig | None = None,
                  **kwargs) -> None:
         logger.info(f'input backend={backend}, backend_config={backend_config}')
@@ -118,21 +119,25 @@ def __init__(self,
                                             if backend == 'turbomind' else PytorchEngineConfig())
         self.model_name = model_name if model_name else model_path
         self.chat_template = get_chat_template(model_path, chat_template_config)
-        self.tokenizer = Tokenizer(model_path)
+        self.tokenizer = Tokenizer(model_path, trust_remote_code=trust_remote_code)
         self.prompt_processor = MultimodalProcessor(self.tokenizer, self.chat_template)
-        self.hf_gen_cfg = get_hf_gen_cfg(model_path)
-        self.arch, self.hf_cfg = get_model_arch(model_path)
+        self.hf_gen_cfg = get_hf_gen_cfg(model_path, trust_remote_code=trust_remote_code)
+        self.arch, self.hf_cfg = get_model_arch(model_path, trust_remote_code=trust_remote_code)
         self.session_len = (_get_and_verify_max_len(self.hf_cfg, None)
                             if backend_config.session_len is None else backend_config.session_len)
         backend_config.session_len = self.session_len
         if speculative_config is not None and backend == 'turbomind':
             logger.warning('speculative decoding is not supported by turbomind ')
         # build backend engine
         if backend == 'turbomind':
-            self.engine = self._build_turbomind(model_path=model_path, backend_config=backend_config, **kwargs)
+            self.engine = self._build_turbomind(model_path=model_path,
+                                                backend_config=backend_config,
+                                                trust_remote_code=trust_remote_code,
+                                                **kwargs)
         elif backend == 'pytorch':
             self.engine = self._build_pytorch(model_path=model_path,
                                               backend_config=backend_config,
+                                              trust_remote_code=trust_remote_code,
                                               speculative_config=speculative_config,
                                               **kwargs)
         else:
@@ -169,19 +174,30 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_value, traceback):
         self.close()
 
-    def _build_turbomind(self, model_path: str, backend_config: TurbomindEngineConfig | None = None, **kwargs):
+    def _build_turbomind(self,
+                         model_path: str,
+                         backend_config: TurbomindEngineConfig | None = None,
+                         trust_remote_code: bool = False,
+                         **kwargs):
         """Inner build method for turbomind backend."""
         from lmdeploy import turbomind as tm
-        return tm.TurboMind.from_pretrained(model_path, engine_config=backend_config, **kwargs)
+        return tm.TurboMind.from_pretrained(model_path,
+                                            engine_config=backend_config,
+                                            trust_remote_code=trust_remote_code,
+                                            **kwargs)
 
     def _build_pytorch(self,
                        model_path: str,
                        backend_config: PytorchEngineConfig | None = None,
+                       trust_remote_code: bool = False,
                        speculative_config: SpeculativeConfig | None = None,
                        **kwargs):
         """Inner build method for pytorch backend."""
         from lmdeploy.pytorch.engine import Engine
-        return Engine.from_pretrained(model_path, engine_config=backend_config, speculative_config=speculative_config)
+        return Engine.from_pretrained(model_path,
+                                      engine_config=backend_config,
+                                      trust_remote_code=trust_remote_code,
+                                      speculative_config=speculative_config)
 
     def _build_stat_loggers(self):
         self.stat_loggers = []

diff --git a/lmdeploy/serve/core/vl_async_engine.py b/lmdeploy/serve/core/vl_async_engine.py
@@ -17,6 +17,7 @@ def __init__(self,
                  backend: Literal['turbomind', 'pytorch'] = 'turbomind',
                  backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
                  vision_config: VisionConfig | None = None,
+                 trust_remote_code: bool = False,
                  **kwargs) -> None:
         from lmdeploy.serve.processors import MultimodalProcessor
         from lmdeploy.utils import try_import_deeplink
@@ -27,8 +28,16 @@ def __init__(self,
         if backend_config and backend_config.enable_prefix_caching:
             backend_config.enable_prefix_caching = False
             logger.warning('Prefix caching is disabled since LMDeploy hasn\'t support in on VL models yet')
-        self.vl_encoder = ImageEncoder(model_path, backend, vision_config, backend_config=backend_config)
-        super().__init__(model_path, backend=backend, backend_config=backend_config, **kwargs)
+        self.vl_encoder = ImageEncoder(model_path,
+                                       backend,
+                                       vision_config,
+                                       backend_config=backend_config,
+                                       trust_remote_code=trust_remote_code)
+        super().__init__(model_path,
+                         backend=backend,
+                         backend_config=backend_config,
+                         trust_remote_code=trust_remote_code,
+                         **kwargs)
         # Update prompt_processor to support multimodal processing
         self.prompt_processor = MultimodalProcessor(self.tokenizer,
                                                     self.chat_template,

diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
@@ -1415,6 +1415,7 @@ def serve(model_path: str,
           max_log_len: int | None = None,
           disable_fastapi_docs: bool = False,
           max_concurrent_requests: int | None = None,
+          trust_remote_code: bool = False,
           reasoning_parser: str | None = None,
           tool_call_parser: str | None = None,
           allow_terminate_by_client: bool = False,
@@ -1487,7 +1488,7 @@ def serve(model_path: str,
         http_or_https = 'https'
 
     handle_torchrun()
-    _, pipeline_class = get_task(backend, model_path)
+    _, pipeline_class = get_task(backend, model_path, trust_remote_code=trust_remote_code)
     if isinstance(backend_config, PytorchEngineConfig):
         backend_config.enable_mp_engine = True
         # router replay
@@ -1499,6 +1500,7 @@ def serve(model_path: str,
                                                     backend_config=backend_config,
                                                     chat_template_config=chat_template_config,
                                                     max_log_len=max_log_len,
+                                                    trust_remote_code=trust_remote_code,
                                                     speculative_config=speculative_config,
                                                     **kwargs)
     # set reasoning parser and tool parser