diff --git a/slime/backends/megatron_utils/actor.py b/slime/backends/megatron_utils/actor.py index 586f88ded..0d9f65ec9 100644 --- a/slime/backends/megatron_utils/actor.py +++ b/slime/backends/megatron_utils/actor.py @@ -110,6 +110,11 @@ def init( # Load teacher model for Megatron-based on-policy distillation if with_opd_teacher: self.load_other_checkpoint("teacher", args.opd_teacher_load) + # Restore actor weights so update_weights() pushes the student (not teacher) to sglang. + # Without offload_train the generic recovery below is skipped, leaving self.model + # in teacher state and causing step-0 rollouts/evals to run with the teacher model. + if not self.args.offload_train: + self._switch_model("actor") if self.args.keep_old_actor: # Load old_actor checkpoint