diff --git a/.gitignore b/.gitignore
index 58be3ff..8e2662d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,4 +41,5 @@ out*/
 output/
 *.npz
 *.pkl
+recordings/
 
diff --git a/examples/handpose_ros2_publisher.py b/examples/handpose_ros2_publisher.py
index 15ae2d2..7e7f9fe 100644
--- a/examples/handpose_ros2_publisher.py
+++ b/examples/handpose_ros2_publisher.py
@@ -31,6 +31,7 @@
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
 from handpose.ik_retargeting import (  # noqa: E402
+    FINGER_TARGET_BODIES,
     ORCA_JOINT_NAMES,
     ORCAHandIKConfig,
     ORCAHandIKRetargeting,
@@ -38,10 +39,15 @@
 from handpose.tracker.hamer import HaMeRTracker  # noqa: E402
 
 
-def inject_target_bodies(mjcf_path: Path) -> str:
-    """Injects mocap bodies and tip sites into the MJCF XML string for IK targeting.
+def inject_target_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str:
+    """Inject mocap bodies for tracker sites and ensure IK target sites exist.
 
-    This matches the approach in live_demo_ik.py to ensure tip sites exist for all fingers.
+    Args:
+        mjcf_path: Path to MJCF model file
+        target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp)
+
+    Returns:
+        Modified MJCF XML string
     """
     tree = ET.parse(mjcf_path)
     root = tree.getroot()
@@ -51,37 +57,99 @@ def inject_target_bodies(mjcf_path: Path) -> str:
         raise ValueError("Could not find worldbody in MJCF")
 
     # Convert relative paths to absolute paths
-    # MuJoCo can't resolve relative paths when loading from string
     model_dir = mjcf_path.parent
     for asset in root.findall(".//asset"):
         for mesh in asset.findall("mesh"):
             file_attr = mesh.get("file")
             if file_attr and not Path(file_attr).is_absolute():
-                # Convert relative path to absolute
                 abs_path = (model_dir / file_attr).resolve()
                 mesh.set("file", str(abs_path))
 
-    # Add tip sites for all fingers (required for tip tracking)
+    # All IK target sites use blue for consistency
+    ik_site_color = "0 0 1 0.9"  # Blue for all IK targets
+
+    # Tip site offsets: [x, y, z] where z is upward direction
     tip_site_specs = {
-        "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.018])),
-        "index": ("right_index_ip", np.array([0.0, 0.0, 0.020])),
-        "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.022])),
-        "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.021])),
-        "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.018])),
+        "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])),
+        "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])),
+        "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])),
+        "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])),
+        "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])),
     }
 
-    for finger, (parent_body, offset) in tip_site_specs.items():
-        body_elem = root.find(f".//body[@name='{parent_body}']")
-        if body_elem is None:
-            continue
-        site_name = f"right_{finger}_tip_site"
-        if body_elem.find(f"./site[@name='{site_name}']") is not None:
-            continue
-        site = ET.SubElement(body_elem, "site")
-        site.set("name", site_name)
-        site.set("pos", " ".join(f"{value:.5f}" for value in offset))
-        site.set("size", "0.0025")
-        site.set("rgba", "1 0.6 0.2 0.8")
+    if "tip" in target_joint_types:
+        tip_sites_created = []
+        for finger, (parent_body, offset) in tip_site_specs.items():
+            body_elem = root.find(f".//body[@name='{parent_body}']")
+            if body_elem is None:
+                print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site")
+                continue
+            site_name = f"right_{finger}_tip_site"
+            existing_site = body_elem.find(f"./site[@name='{site_name}']")
+            if existing_site is not None:
+                # Update existing site - make it larger and blue
+                existing_site.set("type", "sphere")  # Ensure type is set
+                existing_site.set("size", "0.005")
+                existing_site.set("rgba", ik_site_color)
+                tip_sites_created.append(f"{finger} (updated)")
+            else:
+                # Create new tip site
+                site = ET.SubElement(body_elem, "site")
+                site.set("name", site_name)
+                site.set("type", "sphere")  # Explicit type for visibility
+                site.set("pos", " ".join(f"{value:.5f}" for value in offset))
+                site.set("size", "0.005")
+                site.set("rgba", ik_site_color)
+                tip_sites_created.append(f"{finger} (created)")
+        if tip_sites_created:
+            print(f"Tip sites: {', '.join(tip_sites_created)}")
+
+    # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms
+    allowed_types = set(target_joint_types)
+    markers_added = []
+    for finger_name, bodies in FINGER_TARGET_BODIES.items():
+        for joint_type, (frame_type, frame_name) in bodies.items():
+            if joint_type not in allowed_types:
+                continue
+
+            if frame_type == "body":
+                # Add a visual site to the body for visibility
+                body_elem = root.find(f".//body[@name='{frame_name}']")
+                if body_elem is not None:
+                    # Check if marker site already exists
+                    marker_site_name = f"ik_marker_{frame_name}"
+                    existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']")
+                    if existing_marker is not None:
+                        # Update existing marker
+                        existing_marker.set("size", "0.004")  # Same size as tip sites
+                        existing_marker.set("rgba", ik_site_color)
+                        markers_added.append(f"{finger_name}_{joint_type} (updated)")
+                    else:
+                        # Create new marker site
+                        site = ET.SubElement(body_elem, "site")
+                        site.set("name", marker_site_name)
+                        site.set("pos", "0 0 0")  # At body origin
+                        site.set("size", "0.004")  # Same size as tip sites for consistency
+                        site.set("rgba", ik_site_color)  # Blue for all IK targets
+                        markers_added.append(f"{finger_name}_{joint_type} (created)")
+                else:
+                    print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+            elif frame_type == "site":
+                # This is a tip site - find and update it (should already be handled above, but double-check)
+                found = False
+                for body in root.findall(".//body"):
+                    site_elem = body.find(f"./site[@name='{frame_name}']")
+                    if site_elem is not None:
+                        site_elem.set("size", "0.005")
+                        site_elem.set("rgba", ik_site_color)
+                        found = True
+                        markers_added.append(f"{finger_name}_{joint_type} (site updated)")
+                        break
+                if not found:
+                    print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+
+    if markers_added:
+        print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}")
 
     return ET.tostring(root, encoding="unicode")
 
@@ -101,11 +169,8 @@ def __init__(
         hold_last: bool = False,
         width: int = 1280,
         height: int = 720,
-        joint_smoothing: float = 1.0,
+        joint_smoothing: float = 0.7,
         target_joint_types: tuple[str, ...] = ("tip", "ip"),
-        auto_scale: bool = False,
-        auto_scale_once: bool = False,
-        auto_scale_update_rate: float = 0.1,
     ) -> None:
         """Initialize the HandPose ROS2 publisher.
 
@@ -120,11 +185,8 @@ def __init__(
             hold_last: If True, publish last valid joint values when hand is not detected
             width: Camera frame width
             height: Camera frame height
-            joint_smoothing: Smoothing factor for joint positions (0.0-1.0). Default: 1.0 (no smoothing)
+            joint_smoothing: Smoothing factor for joint positions (0.0-1.0). Default: 0.7
             target_joint_types: Tuple of joint types to target for IK (tip, ip, pip, mcp). Default: ("tip", "ip")
-            auto_scale: Enable automatic scale factor computation based on fingertip distances
-            auto_scale_once: Compute scale factor once on first hand detection (default: update every frame)
-            auto_scale_update_rate: Exponential smoothing rate for auto-scaling (0.0-1.0). Default: 0.1
         """
         super().__init__("handpose_hamer_publisher")
 
@@ -154,27 +216,34 @@ def __init__(
             raise FileNotFoundError(f"Model file not found: {model_file}")
 
         self.get_logger().info(f"Loading MuJoCo model from {model_file}")
-        # Inject tip sites into MJCF (required for tip tracking, matches live_demo_ik.py)
-        xml_string = inject_target_bodies(model_file)
+        # Inject tip sites into MJCF (required for tip tracking)
+        xml_string = inject_target_bodies(model_file, target_joint_types)
         self.model = mujoco.MjModel.from_xml_string(xml_string)
 
+        # Verify tip sites exist in the loaded model
+        if "tip" in target_joint_types:
+            self.get_logger().info("Verifying tip sites in loaded model:")
+            data_temp = mujoco.MjData(self.model)
+            for finger in ["thumb", "index", "middle", "ring", "pinky"]:
+                site_name = f"right_{finger}_tip_site"
+                site_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, site_name)
+                if site_id >= 0:
+                    site_pos = data_temp.site(site_id).xpos.copy()
+                    self.get_logger().info(
+                        f"  ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]"
+                    )
+                else:
+                    self.get_logger().warn(f"  ✗ {site_name}: NOT FOUND in model!")
+
         # IK retargeting
-        # Use the same wrist_offset_palm as live_demo_ik.py to match USD model coordinate frame
-        # The default MJCF offset [0.002, -0.00144, -0.03872] may not match the USD model
         ik_cfg = ORCAHandIKConfig(
             scale_factor=scale,
             target_joint_types=target_joint_types,
-            wrist_offset_palm=np.array([0.000, 0.0, -0.05]),  # Match live_demo_ik.py
-            auto_scale=auto_scale,
-            auto_scale_update_rate=auto_scale_update_rate if auto_scale else 0.0,
-            auto_scale_use_neutral_pose=True,
+            lm_damping=0.5,
+            ik_iterations=15,
         )
         self.ik = ORCAHandIKRetargeting(self.model, config=ik_cfg)
 
-        # Track if we've computed the scale factor once (for auto-scale-once mode)
-        self.scale_computed_once = False
-        self.auto_scale_once = auto_scale_once
-
         # Initialize joint position smoothing state and MuJoCo data for forward kinematics
         self.data = mujoco.MjData(self.model)
         self.smoothed_qpos = self.data.qpos.copy()
@@ -224,16 +293,6 @@ def tick(self) -> None:
         joint_vals = None
         if hand is not None:
             try:
-                # Auto-scale (once mode)
-                if self.auto_scale_once and not self.scale_computed_once and self.ik.config.auto_scale:
-                    # Temporarily disable continuous auto-scaling
-                    self.ik.config.auto_scale = False
-                    # Compute scale factor once
-                    computed_scale = self.ik.compute_auto_scale_factor(hand, use_neutral_robot_pose=True)
-                    self.ik.config.scale_factor = computed_scale
-                    self.scale_computed_once = True
-                    self.get_logger().info(f"[Auto-Scale] Computed scale factor: {computed_scale:.3f}")
-
                 # Update IK solver configuration with current robot state (required for proper IK solving)
                 # This matches the approach in live_demo_ik.py
                 self.data.qpos[:] = self.smoothed_qpos
@@ -357,31 +416,15 @@ def main() -> None:
     parser.add_argument(
         "--joint-smoothing",
         type=float,
-        default=1.0,
+        default=0.7,
         help="Smoothing factor for joint positions (0.0-1.0). \
-        Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)",
+        Lower values = more smoothing, higher = less smoothing. Default: 0.7",
     )
     parser.add_argument(
         "--targets",
         type=str,
-        default="tip, ip",
-        help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip",
-    )
-    parser.add_argument(
-        "--auto-scale",
-        action="store_true",
-        help="Enable automatic scale factor computation based on fingertip distances",
-    )
-    parser.add_argument(
-        "--auto-scale-once",
-        action="store_true",
-        help="Compute scale factor once on first hand detection (default: update every frame)",
-    )
-    parser.add_argument(
-        "--auto-scale-update-rate",
-        type=float,
-        default=0.1,
-        help="Exponential smoothing rate for auto-scaling (0.0-1.0). Lower = smoother. Default: 0.1",
+        default="tip",
+        help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip",
     )
 
     args = parser.parse_args()
@@ -400,14 +443,6 @@ def main() -> None:
     if not (0.0 < args.joint_smoothing <= 1.0):
         parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)")
 
-    # Validate auto-scale update rate
-    if args.auto_scale and not (0.0 <= args.auto_scale_update_rate <= 1.0):
-        parser.error("--auto-scale-update-rate must be between 0.0 and 1.0")
-
-    # Warn if auto-scale-once is used without auto-scale
-    if args.auto_scale_once and not args.auto_scale:
-        parser.error("--auto-scale-once requires --auto-scale to be enabled")
-
     # Initialize ROS2
     rclpy.init()
 
@@ -425,9 +460,6 @@ def main() -> None:
             height=args.height,
             joint_smoothing=args.joint_smoothing,
             target_joint_types=target_joints,
-            auto_scale=args.auto_scale,
-            auto_scale_once=args.auto_scale_once,
-            auto_scale_update_rate=args.auto_scale_update_rate,
         )
         rclpy.spin(node)
         node.destroy_node()
diff --git a/examples/live_demo_ik.py b/examples/live_demo_ik.py
index 579b337..be9a7a9 100644
--- a/examples/live_demo_ik.py
+++ b/examples/live_demo_ik.py
@@ -53,8 +53,16 @@ def dual_window_process(frame_queue: mp.Queue, running_flag: Flag, window_name:
     cv2.destroyAllWindows()
 
 
-def inject_target_bodies(mjcf_path: Path) -> str:
-    """Injects mocap bodies into the MJCF XML string for visualization."""
+def inject_target_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str:
+    """Inject mocap bodies for tracker sites and ensure IK target sites exist.
+
+    Args:
+        mjcf_path: Path to MJCF model file
+        target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp)
+
+    Returns:
+        Modified MJCF XML string
+    """
     tree = ET.parse(mjcf_path)
     root = tree.getroot()
     worldbody = root.find("worldbody")
@@ -63,60 +71,121 @@ def inject_target_bodies(mjcf_path: Path) -> str:
         raise ValueError("Could not find worldbody in MJCF")
 
     # Convert relative paths to absolute paths
-    # MuJoCo can't resolve relative paths when loading from string
     model_dir = mjcf_path.parent
     for asset in root.findall(".//asset"):
         for mesh in asset.findall("mesh"):
             file_attr = mesh.get("file")
             if file_attr and not Path(file_attr).is_absolute():
-                # Convert relative path to absolute
                 abs_path = (model_dir / file_attr).resolve()
                 mesh.set("file", str(abs_path))
 
-    # Add mocap bodies for all keypoints (MCP, PIP, TIP, and IP for thumb)
-    # Use different colors for different joint types
-    joint_colors = {
-        "mcp": "0 1 0 0.7",  # Green for MCP
-        "pip": "0 0 1 0.7",  # Blue for PIP
-        "ip": "1 1 0 0.7",  # Yellow for IP (thumb only)
-        "tip": "1 0 0 0.7",  # Red for TIP
+    # All IK target sites use blue for consistency
+    ik_site_color = "0 0 1 0.9"  # Blue for all IK targets
+
+    # Tip site offsets: [x, y, z] where z is upward direction
+    tip_site_specs = {
+        "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])),
+        "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])),
+        "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])),
+        "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])),
+        "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])),
     }
 
-    for finger, joints in FINGER_TARGET_BODIES.items():
-        for joint_type in joints.keys():
-            body = ET.SubElement(worldbody, "body")
-            body.set("name", f"target_{finger}_{joint_type}")
-            body.set("mocap", "true")
-            body.set("pos", "0 0 0")
+    if "tip" in target_joint_types:
+        tip_sites_created = []
+        for finger, (parent_body, offset) in tip_site_specs.items():
+            body_elem = root.find(f".//body[@name='{parent_body}']")
+            if body_elem is None:
+                print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site")
+                continue
+            site_name = f"right_{finger}_tip_site"
+            existing_site = body_elem.find(f"./site[@name='{site_name}']")
+            if existing_site is not None:
+                # Update existing site - make it larger and blue
+                existing_site.set("type", "sphere")  # Ensure type is set
+                existing_site.set("size", "0.005")
+                existing_site.set("rgba", ik_site_color)
+                tip_sites_created.append(f"{finger} (updated)")
+            else:
+                # Create new tip site
+                site = ET.SubElement(body_elem, "site")
+                site.set("name", site_name)
+                site.set("type", "sphere")  # Explicit type for visibility
+                site.set("pos", " ".join(f"{value:.5f}" for value in offset))
+                site.set("size", "0.005")
+                site.set("rgba", ik_site_color)
+                tip_sites_created.append(f"{finger} (created)")
+        print(f"Tip sites: {', '.join(tip_sites_created)}")
+
+    # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms
+    allowed_types = set(target_joint_types)
+    markers_added = []
+    for finger_name, bodies in FINGER_TARGET_BODIES.items():
+        for joint_type, (frame_type, frame_name) in bodies.items():
+            if joint_type not in allowed_types:
+                continue
 
-            # Add a visual sphere (smaller than before)
-            geom = ET.SubElement(body, "geom")
-            geom.set("type", "sphere")
-            geom.set("size", "0.003")  # 3mm radius (smaller)
-            geom.set("rgba", joint_colors.get(joint_type, "0.5 0.5 0.5 0.7"))
-            geom.set("contype", "0")  # No collision
-            geom.set("conaffinity", "0")
+            if frame_type == "body":
+                # Add a visual site to the body for visibility
+                body_elem = root.find(f".//body[@name='{frame_name}']")
+                if body_elem is not None:
+                    # Check if marker site already exists
+                    marker_site_name = f"ik_marker_{frame_name}"
+                    existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']")
+                    if existing_marker is not None:
+                        # Update existing marker
+                        existing_marker.set("size", "0.004")  # Same size as tip sites
+                        existing_marker.set("rgba", ik_site_color)
+                        markers_added.append(f"{finger_name}_{joint_type} (updated)")
+                    else:
+                        # Create new marker site
+                        site = ET.SubElement(body_elem, "site")
+                        site.set("name", marker_site_name)
+                        site.set("pos", "0 0 0")  # At body origin
+                        site.set("size", "0.004")  # Same size as tip sites for consistency
+                        site.set("rgba", ik_site_color)  # Blue for all IK targets
+                        markers_added.append(f"{finger_name}_{joint_type} (created)")
+                else:
+                    print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+            elif frame_type == "site":
+                # This is a tip site - find and update it (should already be handled above, but double-check)
+                found = False
+                for body in root.findall(".//body"):
+                    site_elem = body.find(f"./site[@name='{frame_name}']")
+                    if site_elem is not None:
+                        site_elem.set("size", "0.005")
+                        site_elem.set("rgba", ik_site_color)
+                        found = True
+                        markers_added.append(f"{finger_name}_{joint_type} (site updated)")
+                        break
+                if not found:
+                    print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+
+    print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}")
+
+    # Add mocap bodies for tracker sites (MediaPipe landmarks)
+    # All tracker sites use red for consistency - these represent the actual tracked hand
+    tracker_color = "1 0 0 0.9"  # Red for all tracker sites (actual hand)
+
+    allowed_types = set(target_joint_types)
+    for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+        for joint_type, mp_idx in mp_mapping.items():
+            if joint_type not in allowed_types:
+                continue
 
-    tip_site_specs = {
-        "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.018])),
-        "index": ("right_index_ip", np.array([0.0, 0.0, 0.020])),
-        "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.022])),
-        "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.021])),
-        "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.018])),
-    }
+            # Create mocap body for tracker site
+            mocap_body = ET.SubElement(worldbody, "body")
+            mocap_body.set("name", f"tracker_{finger_name}_{joint_type}")
+            mocap_body.set("mocap", "true")
+            mocap_body.set("pos", "0 0 0")
 
-    for finger, (parent_body, offset) in tip_site_specs.items():
-        body_elem = root.find(f".//body[@name='{parent_body}']")
-        if body_elem is None:
-            continue
-        site_name = f"right_{finger}_tip_site"
-        if body_elem.find(f"./site[@name='{site_name}']") is not None:
-            continue
-        site = ET.SubElement(body_elem, "site")
-        site.set("name", site_name)
-        site.set("pos", " ".join(f"{value:.5f}" for value in offset))
-        site.set("size", "0.0025")
-        site.set("rgba", "1 0.6 0.2 0.8")
+            # Add visual sphere for tracker site (larger, red)
+            geom = ET.SubElement(mocap_body, "geom")
+            geom.set("type", "sphere")
+            geom.set("size", "0.005")  # 5mm radius (larger than IK targets for visibility)
+            geom.set("rgba", tracker_color)  # Red for all tracker sites
+            geom.set("contype", "0")
+            geom.set("conaffinity", "0")
 
     return ET.tostring(root, encoding="unicode")
 
@@ -253,7 +322,6 @@ async def main_async(
     label_lookup: dict[int, list[str]],
     camera_matrix: np.ndarray | None = None,
     joint_smoothing: float = 1.0,
-    auto_scale_once: bool = False,
 ) -> None:
     """Async main loop with keyboard handling."""
     running = True
@@ -279,9 +347,6 @@ async def key_handler(key: str) -> None:
     # Initialize joint position smoothing state
     smoothed_qpos = data.qpos.copy()
 
-    # Track if we've computed scale once (for auto-scale-once mode)
-    scale_computed_once = False
-
     with mujoco.viewer.launch_passive(model, data) as viewer:
         while running and viewer.is_running() and cap.isOpened():
             ret, frame = cap.read()
@@ -298,16 +363,6 @@ async def key_handler(key: str) -> None:
             if hand_structures:
                 structure = hand_structures[0]
 
-                # --- AUTO-SCALE (once mode) ---
-                if auto_scale_once and not scale_computed_once and ik_solver.config.auto_scale:
-                    # Temporarily disable continuous auto-scaling
-                    ik_solver.config.auto_scale = False
-                    # Compute scale factor once
-                    computed_scale = ik_solver.compute_auto_scale_factor(structure, use_neutral_robot_pose=True)
-                    ik_solver.config.scale_factor = computed_scale
-                    scale_computed_once = True
-                    print(f"[Auto-Scale] Computed scale factor: {computed_scale:.3f}")
-
                 # --- IK SOLVE ---
                 # 1. Update the configuration object with current robot state
                 mujoco.mj_forward(model, data)
@@ -410,7 +465,7 @@ def main() -> None:
     parser.add_argument(
         "--targets",
         type=str,
-        default="tip, ip",
+        default="tip",
         help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip",
     )
     parser.add_argument(
@@ -429,25 +484,9 @@ def main() -> None:
     parser.add_argument(
         "--joint-smoothing",
         type=float,
-        default=1.0,
+        default=0.7,
         help="Smoothing factor for joint positions (0.0-1.0). \
-        Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)",
-    )
-    parser.add_argument(
-        "--auto-scale",
-        action="store_true",
-        help="Enable automatic scale factor computation based on fingertip distances",
-    )
-    parser.add_argument(
-        "--auto-scale-once",
-        action="store_true",
-        help="Compute scale factor once on first hand detection (default: update every frame)",
-    )
-    parser.add_argument(
-        "--auto-scale-update-rate",
-        type=float,
-        default=0.1,
-        help="Exponential smoothing rate for auto-scaling (0.0-1.0). Lower = smoother. Default: 0.1",
+        Lower values = more smoothing, higher = less smoothing. Default: 0.7",
     )
     args = parser.parse_args()
 
@@ -461,16 +500,15 @@ def main() -> None:
         parser.error(f"Unsupported target joint types: {', '.join(invalid)}")
 
     # 1. Setup paths and model
-    script_dir = Path(__file__).parent
-    model_path = script_dir.parent / "models" / args.model if not Path(args.model).is_absolute() else Path(args.model)
-
-    if not model_path.exists():
-        print(f"Error: Model not found at {model_path}")
-        return
+    model_file = Path(args.model)
+    if not model_file.is_absolute():
+        model_file = Path(__file__).parent.parent / args.model
+    if not model_file.exists():
+        raise FileNotFoundError(f"Model file not found: {model_file}")
 
     # 2. Inject visualization bodies (Mocap)
-    print("Injecting visualization targets...")
-    xml_string = inject_target_bodies(model_path)
+    print(f"Loading model from {model_file}")
+    xml_string = inject_target_bodies(model_file, target_joints)
 
     # 3. Load MuJoCo Model
     model = mujoco.MjModel.from_xml_string(xml_string)
@@ -487,25 +525,38 @@ def main() -> None:
 
     ik_config = ORCAHandIKConfig(
         scale_factor=args.scale,
-        wrist_offset_palm=np.array([0.000, 0.0, -0.05]),
         target_joint_types=target_joints,
-        auto_scale=args.auto_scale,
-        auto_scale_update_rate=args.auto_scale_update_rate if args.auto_scale else 0.0,
-        auto_scale_use_neutral_pose=True,
+        lm_damping=0.5,
+        ik_iterations=15,
     )
 
     ik_solver = ORCAHandIKRetargeting(model, config=ik_config)
 
+    # Verify tip sites exist in the loaded model
+    if "tip" in target_joints:
+        print("\nVerifying tip sites in loaded model:")
+        for finger in ["thumb", "index", "middle", "ring", "pinky"]:
+            site_name = f"right_{finger}_tip_site"
+            site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name)
+            if site_id >= 0:
+                site_pos = data.site(site_id).xpos.copy()
+                print(f"  ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]")
+            else:
+                print(f"  ✗ {site_name}: NOT FOUND in model!")
+
     allowed_joint_types = set(target_joints)
 
     # 5. Helper to map finger names and joint types to mocap body IDs
+    # Note: These use "tracker_" prefix to match the visualization bodies created by inject_target_bodies
     target_body_ids: dict[str, dict[str, int]] = {}
     for finger, joints in FINGER_TARGET_BODIES.items():
         for joint_type in joints.keys():
             if joint_type not in allowed_joint_types:
                 continue
             finger_dict = target_body_ids.setdefault(finger, {})
-            bid = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, f"target_{finger}_{joint_type}")
+            bid = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, f"tracker_{finger}_{joint_type}")
+            if bid < 0:
+                print(f"Warning: Mocap body 'tracker_{finger}_{joint_type}' not found")
             finger_dict[joint_type] = bid
 
     # 6. Main Loop
@@ -538,14 +589,6 @@ def main() -> None:
     if not (0.0 < args.joint_smoothing <= 1.0):
         parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)")
 
-    # Validate auto-scale update rate
-    if args.auto_scale and not (0.0 <= args.auto_scale_update_rate <= 1.0):
-        parser.error("--auto-scale-update-rate must be between 0.0 and 1.0")
-
-    # Warn if auto-scale-once is used without auto-scale
-    if args.auto_scale_once and not args.auto_scale:
-        parser.error("--auto-scale-once requires --auto-scale to be enabled")
-
     # Run async main loop
     asyncio.run(
         main_async(
@@ -560,7 +603,6 @@ def main() -> None:
             mp_label_lookup,
             camera_matrix,
             joint_smoothing=args.joint_smoothing,
-            auto_scale_once=args.auto_scale_once,
         )
     )
 
diff --git a/handpose/ik_retargeting.py b/handpose/ik_retargeting.py
index 4aed301..72453a2 100644
--- a/handpose/ik_retargeting.py
+++ b/handpose/ik_retargeting.py
@@ -5,6 +5,7 @@
 import mink
 import mujoco
 import numpy as np
+from mink.limits import CollisionAvoidanceLimit, ConfigurationLimit, Limit
 
 from handpose.tracker.base import HandStructure
 
@@ -44,22 +45,22 @@
     },
     "index": {
         "mcp": ("body", "right_index_mp"),
-        "pip": ("body", "right_index_pp"),
+        "pip": ("body", "right_index_ip"),  # _ip body contains the PIP joint
         "tip": ("site", "right_index_tip_site"),
     },
     "middle": {
         "mcp": ("body", "right_middle_mp"),
-        "pip": ("body", "right_middle_pp"),
+        "pip": ("body", "right_middle_ip"),  # _ip body contains the PIP joint
         "tip": ("site", "right_middle_tip_site"),
     },
     "ring": {
         "mcp": ("body", "right_ring_mp"),
-        "pip": ("body", "right_ring_pp"),
+        "pip": ("body", "right_ring_ip"),  # _ip body contains the PIP joint
         "tip": ("site", "right_ring_tip_site"),
     },
     "pinky": {
         "mcp": ("body", "right_pinky_mp"),
-        "pip": ("body", "right_pinky_pp"),
+        "pip": ("body", "right_pinky_ip"),  # _ip body contains the PIP joint
         "tip": ("site", "right_pinky_tip_site"),
     },
 }
@@ -106,16 +107,23 @@ class ORCAHandIKConfig:
     # Default from MJCF: right_wrist joint pos="0.002 -0.00144 -0.03872"
     wrist_offset_palm: np.ndarray | None = None
 
-    # IK solver parameters
+    # IK solver parameters (matching manus Mink configs)
     dt: float = 0.05  # Timestep for IK integration (seconds)
-    damping: float = 1e-2  # Levenberg-Marquardt damping
-    solver: str = "daqp"  # QP solver
+    damping: float = 1e-5  # Levenberg-Marquardt damping (matching manus: 1e-5)
+    solver: str = "quadprog"  # QP solver (matching manus: "quadprog")
     ik_iterations: int = 5  # Number of IK passes to perform before returning (for better convergence)
 
-    # Task costs
-    position_cost: float = 3.0  # Cost for position tracking
+    # Task costs (matching manus Mink configs)
+    position_cost: float = 1.0  # Cost for position tracking (matching manus: 1.0)
     orientation_cost: float = 0.0  # Cost for orientation tracking
-    posture_cost: float = 1e-4  # Cost for posture task (keeps hand near neutral)
+    lm_damping: float = 1.0  # Levenberg-Marquardt damping for FrameTask (matching manus: 1.0)
+
+    # Collision avoidance (optional)
+    use_collision_avoidance: bool = False  # Enable CollisionAvoidanceLimit
+    collision_geom_pairs: list[tuple[list[str], list[str]]] | None = None  # Geom pairs for collision avoidance
+    collision_gain: float = 0.85  # Collision avoidance gain (default from Mink)
+    collision_min_distance: float = 0.005  # Minimum distance between geoms (meters)
+    collision_detection_distance: float = 0.01  # Distance at which collision avoidance activates (meters)
 
     # Coordinate frame transformation
     # MediaPipe to Robot coordinate mapping: [MP_X, MP_Y, MP_Z] -> [Robot_X, Robot_Y, Robot_Z]
@@ -123,13 +131,6 @@ class ORCAHandIKConfig:
     coord_transform: np.ndarray | None = None
     target_joint_types: tuple[str, ...] = ("tip",)
 
-    # Auto-scaling options
-    auto_scale: bool = False  # If True, automatically computes scale_factor from fingertip distances
-    auto_scale_update_rate: float = (
-        0.1  # Exponential smoothing factor for auto-scaling (0.0 = no smoothing, 1.0 = instant)
-    )
-    auto_scale_use_neutral_pose: bool = True  # Use neutral robot pose for scale computation
-
     def __post_init__(self) -> None:
         """Initialize default values if None."""
         if self.wrist_offset_palm is None:
@@ -168,10 +169,6 @@ def __init__(
         self.model = model
         self.configuration = mink.Configuration(model)
 
-        # Initialize config
-        if config is None:
-            config = ORCAHandIKConfig.default_config()
-
         self.config = config
 
         # Identify the qpos indices for the finger joints we want to control
@@ -201,7 +198,7 @@ def __init__(
                     frame_type=frame_type,
                     position_cost=self.config.position_cost,
                     orientation_cost=self.config.orientation_cost,
-                    lm_damping=self.config.damping,
+                    lm_damping=self.config.lm_damping,
                 )
                 finger_tasks[joint_type] = task
             if finger_tasks:
@@ -213,127 +210,17 @@ def __init__(
                 "(tip tracking requires fingertip sites to be injected)."
             )
 
-        # We also need a posture task to encourage the hand to stay close to a "neutral" pose
-        # when not reaching for extremes. This prevents weird internal configurations.
-        self.posture_task = mink.PostureTask(
-            model,
-            cost=self.config.posture_cost,
-            lm_damping=self.config.damping,
-        )
-
-        self.target_pose = np.zeros(model.nq)
-        self.posture_task.set_target(self.target_pose)
-
-    def compute_auto_scale_factor(self, hand_structure: HandStructure, use_neutral_robot_pose: bool = True) -> float:
-        """Compute automatic scale factor based on fingertip distances.
-
-        Compares the distance from wrist to fingertips in the human hand
-        with the distance from wrist to fingertips in the ORCA hand model.
-
-        Args:
-            hand_structure: The tracked human hand structure
-            use_neutral_robot_pose: If True, uses neutral robot pose (q=0).
-                If False, uses current robot configuration.
-
-        Returns:
-            Scale factor (robot_fingertip_distance / human_fingertip_distance)
-        """
-        # Get human hand fingertip positions relative to wrist
-        wrist_pos = hand_structure.wrist_position
-        human_fingertips = {
-            "thumb": hand_structure.thumb.tip,
-            "index": hand_structure.index.tip,
-            "middle": hand_structure.middle.tip,
-            "ring": hand_structure.ring.tip,
-            "pinky": hand_structure.pinky.tip,
-        }
-
-        # Compute average distance from wrist to fingertips in human hand
-        human_distances = []
-        for finger, tip_pos in human_fingertips.items():
-            dist = np.linalg.norm(tip_pos - wrist_pos)
-            if dist > 0.01:  # Filter out invalid/too-small distances
-                human_distances.append(dist)
-
-        if len(human_distances) == 0:
-            return self.config.scale_factor  # Fallback to current scale
-
-        avg_human_distance = np.mean(human_distances)
-
-        # Get robot hand fingertip positions
-        if use_neutral_robot_pose:
-            # Save current configuration
-            saved_q = self.configuration.q.copy()
-            # Set to neutral pose
-            self.configuration.q[:] = 0.0
-            self.configuration.update()
-
-        # Get wrist position in robot model
-        t_palm = self.configuration.get_transform_frame_to_world("right_palm", "body")
-        p_palm = t_palm.translation()
-        r_palm = t_palm.rotation().as_matrix()
-        wrist_offset_world = r_palm @ self.config.wrist_offset_palm
-        p_wrist_robot = p_palm + wrist_offset_world
-
-        # Get fingertip positions from robot model using mink Configuration
-        robot_fingertips = {}
-        for finger_name in ["thumb", "index", "middle", "ring", "pinky"]:
-            if finger_name not in self.tasks:
-                continue
-            finger_tasks = self.tasks[finger_name]
-            if "tip" not in finger_tasks:
-                continue
-
-            # Get the frame name and type
-            frame_type, frame_name = FINGER_TARGET_BODIES[finger_name]["tip"]
-
-            # Get transform from mink Configuration
-            try:
-                if frame_type == "body":
-                    t_tip = self.configuration.get_transform_frame_to_world(frame_name, "body")
-                else:  # site
-                    t_tip = self.configuration.get_transform_frame_to_world(frame_name, "site")
-                tip_pos = t_tip.translation()
-                robot_fingertips[finger_name] = tip_pos
-            except Exception:
-                # Fallback: try to get from MuJoCo model directly
-                obj_type = mujoco.mjtObj.mjOBJ_BODY if frame_type == "body" else mujoco.mjtObj.mjOBJ_SITE
-                frame_id = mujoco.mj_name2id(self.model, obj_type, frame_name)
-                if frame_id >= 0:
-                    # Need to forward kinematics - use data
-                    data = mujoco.MjData(self.model)
-                    data.qpos[:] = self.configuration.q
-                    mujoco.mj_forward(self.model, data)
-                    if obj_type == mujoco.mjtObj.mjOBJ_SITE:
-                        tip_pos = data.site(frame_id).xpos.copy()
-                    else:
-                        tip_pos = data.body(frame_id).xpos.copy()
-                    robot_fingertips[finger_name] = tip_pos
-
-        # Restore configuration if we changed it
-        if use_neutral_robot_pose:
-            self.configuration.q[:] = saved_q
-            self.configuration.update()
-
-        # Compute average distance from wrist to fingertips in robot hand
-        robot_distances = []
-        for finger, tip_pos in robot_fingertips.items():
-            dist = np.linalg.norm(tip_pos - p_wrist_robot)
-            if dist > 0.01:  # Filter out invalid/too-small distances
-                robot_distances.append(dist)
-
-        if len(robot_distances) == 0:
-            return self.config.scale_factor  # Fallback to current scale
-
-        avg_robot_distance = np.mean(robot_distances)
-
-        # Compute scale factor
-        if avg_human_distance > 1e-6:
-            scale_factor = avg_robot_distance / avg_human_distance
-        else:
-            return self.config.scale_factor  # Fallback
-
-        return scale_factor
+        # Initialize collision avoidance limits if enabled
+        self.limits: list[Limit] = [ConfigurationLimit(model=model)]
+        if self.config.use_collision_avoidance and self.config.collision_geom_pairs:
+            collision_limit = CollisionAvoidanceLimit(
+                model=model,
+                geom_pairs=self.config.collision_geom_pairs,
+                gain=self.config.collision_gain,
+                minimum_distance_from_collisions=self.config.collision_min_distance,
+                collision_detection_distance=self.config.collision_detection_distance,
+            )
+            self.limits.append(collision_limit)
 
     def _hand_structure_to_landmarks(self, hand_structure: HandStructure) -> np.ndarray:
         """Convert HandStructure to 21x3 landmarks array (MediaPipe format).
@@ -455,20 +342,6 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray:
         Returns:
             The full qpos array for the robot.
         """
-        # Auto-scale if enabled
-        if self.config.auto_scale and isinstance(hand_input, HandStructure):
-            new_scale = self.compute_auto_scale_factor(
-                hand_input, use_neutral_robot_pose=self.config.auto_scale_use_neutral_pose
-            )
-            # Apply exponential smoothing to avoid jitter
-            if self.config.auto_scale_update_rate > 0.0:
-                self.config.scale_factor = (
-                    self.config.auto_scale_update_rate * new_scale
-                    + (1.0 - self.config.auto_scale_update_rate) * self.config.scale_factor
-                )
-            else:
-                self.config.scale_factor = new_scale
-
         targets = self.compute_target_positions(hand_input)
 
         # Use configurable parameters
@@ -478,8 +351,8 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray:
 
         # Perform multiple IK iterations for better convergence
         for iteration in range(self.config.ik_iterations):
-            # Build active tasks list (posture task + finger tasks)
-            active_tasks: list[mink.Task] = [self.posture_task]
+            # Build active tasks list (finger tasks only)
+            active_tasks: list[mink.Task] = []
 
             # Get palm transform and compute wrist position (consistent with compute_target_positions)
             # Recompute each iteration in case configuration changed
@@ -520,8 +393,8 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray:
                     task.set_target(target_se3)
                     active_tasks.append(task)
 
-            # Solve IK
-            vel = mink.solve_ik(self.configuration, active_tasks, dt, solver, damping)
+            # Solve IK with limits (collision avoidance if enabled)
+            vel = mink.solve_ik(self.configuration, active_tasks, dt, solver, damping, limits=self.limits)
 
             # Integrate velocity to update configuration
             self.configuration.integrate_inplace(vel, dt)
diff --git a/models/orca_hand_fixed.mjcf b/models/orca_hand_fixed.mjcf
index 89209ae..2c48d66 100644
--- a/models/orca_hand_fixed.mjcf
+++ b/models/orca_hand_fixed.mjcf
@@ -3,6 +3,11 @@
   <!-- eulerseq "XYZ" corresponds to URDF "rpy" -->
   <compiler angle="radian" eulerseq="XYZ"/>
   
+  <!-- Visual settings for offscreen rendering -->
+  <visual>
+    <global offwidth="1920" offheight="1080"/>
+  </visual>
+  
   <default>
     <mesh scale="0.001 0.001 0.001"/>
     <!-- Visual classes -->
@@ -26,8 +31,8 @@
 
   <asset>
     <!-- Materials -->
-    <material name="white" rgba="1 1 1 1" shininess="0.5" specular="0.1"/>
-    <material name="black" rgba="0.25 0.25 0.25 1" shininess="0.2" specular="0.05"/>
+    <material name="white" rgba="1 1 1 0.3" shininess="0.5" specular="0.1"/>
+    <material name="black" rgba="0.25 0.25 0.25 0.3" shininess="0.2" specular="0.05"/>
     <material name="invisible" rgba="1 1 1 0" shininess="0" specular="0"/>
     
     <!-- Visual -->
diff --git a/pyproject.toml b/pyproject.toml
index afe77f0..59a84fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,7 @@ module = [
     'sensor_msgs.*',
     'h5py',
     "colorlogging",
+    'mink.*',
 ]
 ignore_missing_imports = true
 
diff --git a/scripts/record_and_process_video.py b/scripts/record_and_process_video.py
new file mode 100644
index 0000000..fd54852
--- /dev/null
+++ b/scripts/record_and_process_video.py
@@ -0,0 +1,862 @@
+"""Record video, process with HAMER, visualize in MuJoCo, and concatenate videos.
+
+This script:
+1. Listens for key press ('r' or 's') to start recording
+2. Records video from camera
+3. Saves the recorded video
+4. Processes video with HAMER frame by frame
+5. Opens MuJoCo and visualizes the processed hand tracking
+6. Records the MuJoCo visualization
+7. Concatenates both videos together
+"""
+
+import argparse
+import sys
+import time
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Optional
+
+import cv2
+import mujoco
+import mujoco.viewer
+import numpy as np
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from handpose.ik_retargeting import (
+    FINGER_TARGET_BODIES,
+    MP_LANDMARK_INDICES,
+    ORCAHandIKConfig,
+    ORCAHandIKRetargeting,
+)
+from handpose.tracker.base import HandStructure
+from handpose.tracker.hamer import HaMeRTracker
+
+
+def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str:
+    """Inject mocap bodies for tracker sites and ensure IK target sites exist."""
+    tree = ET.parse(mjcf_path)
+    root = tree.getroot()
+    worldbody = root.find("worldbody")
+
+    if worldbody is None:
+        raise ValueError("Could not find worldbody in MJCF")
+
+    # Convert relative paths to absolute paths
+    model_dir = mjcf_path.parent
+    for asset in root.findall(".//asset"):
+        for mesh in asset.findall("mesh"):
+            file_attr = mesh.get("file")
+            if file_attr and not Path(file_attr).is_absolute():
+                abs_path = (model_dir / file_attr).resolve()
+                mesh.set("file", str(abs_path))
+
+    # IK target sites use blue
+    ik_site_color = "0 0 1 0.9"
+
+    # Tip site offsets
+    tip_site_specs = {
+        "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])),
+        "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])),
+        "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])),
+        "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])),
+        "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])),
+    }
+
+    if "tip" in target_joint_types:
+        for finger, (parent_body, offset) in tip_site_specs.items():
+            body_elem = root.find(f".//body[@name='{parent_body}']")
+            if body_elem is None:
+                continue
+            site_name = f"right_{finger}_tip_site"
+            existing_site = body_elem.find(f"./site[@name='{site_name}']")
+            if existing_site is not None:
+                existing_site.set("type", "sphere")
+                existing_site.set("size", "0.005")
+                existing_site.set("rgba", ik_site_color)
+            else:
+                site = ET.SubElement(body_elem, "site")
+                site.set("name", site_name)
+                site.set("type", "sphere")
+                site.set("pos", " ".join(f"{value:.5f}" for value in offset))
+                site.set("size", "0.005")
+                site.set("rgba", ik_site_color)
+
+    # Add visual markers for IK target bodies
+    allowed_types = set(target_joint_types)
+    for finger_name, bodies in FINGER_TARGET_BODIES.items():
+        for joint_type, (frame_type, frame_name) in bodies.items():
+            if joint_type not in allowed_types:
+                continue
+
+            if frame_type == "body":
+                body_elem = root.find(f".//body[@name='{frame_name}']")
+                if body_elem is not None:
+                    marker_site_name = f"ik_marker_{frame_name}"
+                    existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']")
+                    if existing_marker is not None:
+                        existing_marker.set("size", "0.004")
+                        existing_marker.set("rgba", ik_site_color)
+                    else:
+                        site = ET.SubElement(body_elem, "site")
+                        site.set("name", marker_site_name)
+                        site.set("pos", "0 0 0")
+                        site.set("size", "0.004")
+                        site.set("rgba", ik_site_color)
+
+    # Add mocap bodies for tracker sites (red)
+    tracker_color = "1 0 0 0.9"
+    # Add wrist tracker body
+    wrist_body = ET.SubElement(worldbody, "body")
+    wrist_body.set("name", "tracker_wrist")
+    wrist_body.set("mocap", "true")
+    wrist_body.set("pos", "0 0 0")
+    wrist_geom = ET.SubElement(wrist_body, "geom")
+    wrist_geom.set("type", "sphere")
+    wrist_geom.set("size", "0.005")
+    wrist_geom.set("rgba", tracker_color)
+    wrist_geom.set("contype", "0")
+    wrist_geom.set("conaffinity", "0")
+
+    for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+        for joint_type, mp_idx in mp_mapping.items():
+            if joint_type not in allowed_types:
+                continue
+
+            mocap_body = ET.SubElement(worldbody, "body")
+            mocap_body.set("name", f"tracker_{finger_name}_{joint_type}")
+            mocap_body.set("mocap", "true")
+            mocap_body.set("pos", "0 0 0")
+
+            geom = ET.SubElement(mocap_body, "geom")
+            geom.set("type", "sphere")
+            geom.set("size", "0.005")
+            geom.set("rgba", tracker_color)
+            geom.set("contype", "0")
+            geom.set("conaffinity", "0")
+
+    # Add skeleton lines (cylinders) between joints for skeleton overlay
+    skeleton_color = "0 1 0 0.8"  # Green skeleton lines
+    skeleton_connections = [
+        # Thumb chain
+        ("tracker_thumb_mcp", "tracker_thumb_ip"),
+        ("tracker_thumb_ip", "tracker_thumb_tip"),
+        # Index chain
+        ("tracker_index_mcp", "tracker_index_pip"),
+        ("tracker_index_pip", "tracker_index_tip"),
+        # Middle chain
+        ("tracker_middle_mcp", "tracker_middle_pip"),
+        ("tracker_middle_pip", "tracker_middle_tip"),
+        # Ring chain
+        ("tracker_ring_mcp", "tracker_ring_pip"),
+        ("tracker_ring_pip", "tracker_ring_tip"),
+        # Pinky chain
+        ("tracker_pinky_mcp", "tracker_pinky_pip"),
+        ("tracker_pinky_pip", "tracker_pinky_tip"),
+    ]
+
+    # Add wrist connections (from wrist to each finger MCP)
+    wrist_connections = [
+        ("tracker_index_mcp", "tracker_wrist"),
+        ("tracker_middle_mcp", "tracker_wrist"),
+        ("tracker_ring_mcp", "tracker_wrist"),
+        ("tracker_pinky_mcp", "tracker_wrist"),
+        ("tracker_thumb_mcp", "tracker_wrist"),
+    ]
+
+    # Create skeleton line geoms (will be positioned dynamically)
+    for start_joint, end_joint in skeleton_connections + wrist_connections:
+        # Create a mocap body for the skeleton line
+        line_body = ET.SubElement(worldbody, "body")
+        line_body.set("name", f"skeleton_line_{start_joint}_{end_joint}")
+        line_body.set("mocap", "true")
+        line_body.set("pos", "0 0 0")
+
+        # Add cylinder geom for the line
+        line_geom = ET.SubElement(line_body, "geom")
+        line_geom.set("type", "cylinder")
+        line_geom.set("size", "0.001 0.02")  # Thin cylinder (radius, half-height) - will be updated dynamically
+        line_geom.set("rgba", skeleton_color)
+        line_geom.set("contype", "0")
+        line_geom.set("conaffinity", "0")
+
+    return ET.tostring(root, encoding="unicode")
+
+
+def hand_structure_to_landmarks(structure: HandStructure) -> np.ndarray:
+    """Convert HandStructure to 21x3 landmarks array (MediaPipe format)."""
+    landmarks = np.zeros((21, 3))
+
+    landmarks[0] = structure.wrist_position
+    landmarks[1] = structure.thumb.mcp
+    landmarks[2] = structure.thumb.mcp
+    landmarks[3] = structure.thumb.ip if structure.thumb.ip is not None else structure.thumb.mcp
+    landmarks[4] = structure.thumb.tip
+    landmarks[5] = structure.index.mcp
+    landmarks[6] = structure.index.pip if structure.index.pip is not None else structure.index.mcp
+    landmarks[7] = structure.index.dip if structure.index.dip is not None else structure.index.tip
+    landmarks[8] = structure.index.tip
+    landmarks[9] = structure.middle.mcp
+    landmarks[10] = structure.middle.pip if structure.middle.pip is not None else structure.middle.mcp
+    landmarks[11] = structure.middle.dip if structure.middle.dip is not None else structure.middle.tip
+    landmarks[12] = structure.middle.tip
+    landmarks[13] = structure.ring.mcp
+    landmarks[14] = structure.ring.pip if structure.ring.pip is not None else structure.ring.mcp
+    landmarks[15] = structure.ring.dip if structure.ring.dip is not None else structure.ring.tip
+    landmarks[16] = structure.ring.tip
+    landmarks[17] = structure.pinky.mcp
+    landmarks[18] = structure.pinky.pip if structure.pinky.pip is not None else structure.pinky.mcp
+    landmarks[19] = structure.pinky.dip if structure.pinky.dip is not None else structure.pinky.tip
+    landmarks[20] = structure.pinky.tip
+
+    return landmarks
+
+
+def get_joint_position_world(
+    model: mujoco.MjModel,
+    data: mujoco.MjData,
+    joint_name: str,
+    hand_structure: HandStructure,
+    ik_solver: ORCAHandIKRetargeting,
+    target_joint_types: tuple[str, ...],
+) -> np.ndarray | None:
+    """Get world position of a joint by name."""
+    # Check if it's a mocap body we can query directly
+    if joint_name.startswith("tracker_"):
+        body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, joint_name)
+        if body_id >= 0:
+            mocap_idx = model.body_mocapid[body_id]
+            if mocap_idx >= 0 and mocap_idx < model.nmocap:
+                return data.mocap_pos[mocap_idx].copy()
+
+    # Fallback: calculate from hand structure
+    if joint_name in ("wrist", "tracker_wrist"):
+        t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body")
+        p_palm = t_palm.translation()
+        r_palm = t_palm.rotation().as_matrix()
+        wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm
+        return p_palm + wrist_offset_world
+
+    return None
+
+
+def update_skeleton_lines(
+    model: mujoco.MjModel,
+    data: mujoco.MjData,
+    hand_structure: HandStructure,
+    target_joint_types: tuple[str, ...],
+    ik_solver: ORCAHandIKRetargeting,
+) -> None:
+    """Update skeleton line positions and orientations between joints."""
+    skeleton_connections = [
+        ("tracker_thumb_mcp", "tracker_thumb_ip"),
+        ("tracker_thumb_ip", "tracker_thumb_tip"),
+        ("tracker_index_mcp", "tracker_index_pip"),
+        ("tracker_index_pip", "tracker_index_tip"),
+        ("tracker_middle_mcp", "tracker_middle_pip"),
+        ("tracker_middle_pip", "tracker_middle_tip"),
+        ("tracker_ring_mcp", "tracker_ring_pip"),
+        ("tracker_ring_pip", "tracker_ring_tip"),
+        ("tracker_pinky_mcp", "tracker_pinky_pip"),
+        ("tracker_pinky_pip", "tracker_pinky_tip"),
+        ("tracker_index_mcp", "tracker_wrist"),
+        ("tracker_middle_mcp", "tracker_wrist"),
+        ("tracker_ring_mcp", "tracker_wrist"),
+        ("tracker_pinky_mcp", "tracker_wrist"),
+        ("tracker_thumb_mcp", "tracker_wrist"),
+    ]
+
+    for start_joint, end_joint in skeleton_connections:
+        p1 = get_joint_position_world(model, data, start_joint, hand_structure, ik_solver, target_joint_types)
+        p2 = get_joint_position_world(model, data, end_joint, hand_structure, ik_solver, target_joint_types)
+
+        if p1 is None or p2 is None:
+            continue
+
+        # Calculate midpoint and direction
+        midpoint = (p1 + p2) / 2.0
+        direction = p2 - p1
+        length = np.linalg.norm(direction)
+
+        if length < 1e-6:
+            continue
+
+        direction = direction / length
+
+        # Create rotation quaternion to align cylinder with direction
+        # Default cylinder axis is [0, 0, 1], we want it along direction
+        z_axis = np.array([0, 0, 1])
+        if np.abs(np.dot(direction, z_axis)) > 0.99:
+            # Parallel to z-axis, use identity
+            quat = np.array([1, 0, 0, 0])
+        else:
+            # Calculate rotation axis and angle
+            rot_axis = np.cross(z_axis, direction)
+            rot_axis = rot_axis / np.linalg.norm(rot_axis)
+            cos_angle = np.dot(z_axis, direction)
+            angle = np.arccos(np.clip(cos_angle, -1, 1))
+            quat = np.array([
+                np.cos(angle / 2),
+                rot_axis[0] * np.sin(angle / 2),
+                rot_axis[1] * np.sin(angle / 2),
+                rot_axis[2] * np.sin(angle / 2),
+            ])
+
+        # Update mocap body position and orientation
+        line_body_name = f"skeleton_line_{start_joint}_{end_joint}"
+        body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, line_body_name)
+        if body_id >= 0:
+            mocap_idx = model.body_mocapid[body_id]
+            if mocap_idx >= 0 and mocap_idx < model.nmocap:
+                data.mocap_pos[mocap_idx] = midpoint
+                data.mocap_quat[mocap_idx] = quat
+
+
+def update_tracker_sites(
+    model: mujoco.MjModel,
+    data: mujoco.MjData,
+    hand_structure: HandStructure,
+    target_joint_types: tuple[str, ...],
+    ik_solver: ORCAHandIKRetargeting,
+) -> None:
+    """Update mocap body positions for tracker sites based on hand structure."""
+    landmarks_hand_frame = hand_structure_to_landmarks(hand_structure)
+
+    t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body")
+    p_palm = t_palm.translation()
+    r_palm = t_palm.rotation().as_matrix()
+
+    wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm
+    p_wrist = p_palm + wrist_offset_world
+
+    scale = ik_solver.config.scale_factor
+    coord_transform = ik_solver.config.coord_transform
+
+    allowed_types = set(target_joint_types)
+    for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+        for joint_type, mp_idx in mp_mapping.items():
+            if joint_type not in allowed_types:
+                continue
+
+            landmark_pos_hand_frame = landmarks_hand_frame[mp_idx]
+            scaled_vec = landmark_pos_hand_frame * scale
+            target_pos_world = p_wrist + scaled_vec
+
+            rel_vec = target_pos_world - p_wrist
+            transformed_vec = coord_transform @ rel_vec
+            final_pos = p_wrist + transformed_vec
+
+            mocap_body_name = f"tracker_{finger_name}_{joint_type}"
+            body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, mocap_body_name)
+            if body_id >= 0:
+                mocap_idx = model.body_mocapid[body_id]
+                if mocap_idx >= 0 and mocap_idx < model.nmocap:
+                    data.mocap_pos[mocap_idx] = final_pos
+
+    # Update wrist tracker position
+    wrist_body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, "tracker_wrist")
+    if wrist_body_id >= 0:
+        wrist_mocap_idx = model.body_mocapid[wrist_body_id]
+        if wrist_mocap_idx >= 0 and wrist_mocap_idx < model.nmocap:
+            data.mocap_pos[wrist_mocap_idx] = p_wrist
+
+    # Update skeleton lines after updating tracker sites
+    update_skeleton_lines(model, data, hand_structure, target_joint_types, ik_solver)
+
+
+def record_camera_video(cap: cv2.VideoCapture, output_path: Path, fps: float = 30.0) -> bool:
+    """Record video from camera until 'q' is pressed.
+
+    Returns True if recording was successful, False otherwise.
+    """
+    print(f"\n{'='*70}")
+    print("VIDEO RECORDING")
+    print(f"{'='*70}")
+    print("Press 'r' or 's' to START recording")
+    print("Press 'q' to STOP recording and save")
+    print(f"{'='*70}\n")
+
+    # Set camera FPS if possible
+    cap.set(cv2.CAP_PROP_FPS, fps)
+
+    # Wait for start key
+    recording = False
+    out = None
+    frame_count = 0
+    start_time = None
+    frame_time = 1.0 / fps  # Time per frame in seconds
+    target_time = time.time() + frame_time
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            print("Failed to read from camera")
+            if out is not None:
+                out.release()
+            cv2.destroyAllWindows()
+            return False
+
+        display_frame = frame.copy()
+        if not recording:
+            cv2.putText(
+                display_frame,
+                "Press 'r' or 's' to START recording",
+                (10, 30),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                1,
+                (0, 255, 0),
+                2,
+            )
+        else:
+            elapsed = time.time() - start_time
+            cv2.putText(
+                display_frame,
+                f"RECORDING... {elapsed:.1f}s ({frame_count} frames) - Press 'q' to STOP",
+                (10, 30),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.7,
+                (0, 0, 255),
+                2,
+            )
+
+        cv2.imshow("Camera Feed - Recording", display_frame)
+        key = cv2.waitKey(1) & 0xFF
+
+        if key == ord("q") and recording:
+            break
+        elif (key == ord("r") or key == ord("s")) and not recording:
+            recording = True
+            # Get frame dimensions
+            height, width = frame.shape[:2]
+            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+            out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
+            if not out.isOpened():
+                print(f"Error: Failed to open video writer for {output_path}")
+                cv2.destroyAllWindows()
+                return False
+            start_time = time.time()
+            print(f"Recording started at {fps} FPS. Saving to: {output_path}")
+
+        if recording and out is not None:
+            # Write frame and control timing
+            out.write(frame)
+            frame_count += 1
+
+            # Sleep to maintain target FPS
+            sleep_time = max(0, target_time - time.time())
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+            target_time += frame_time
+
+
+    if out is not None:
+        out.release()
+        elapsed = time.time() - start_time if start_time else 0
+        print(f"Recording stopped. Saved {frame_count} frames ({elapsed:.2f}s) to {output_path}")
+        print(f"Actual FPS: {frame_count / elapsed:.2f} (target: {fps:.2f})")
+
+    cv2.destroyAllWindows()
+    return recording
+
+
+def process_video_with_hamer(
+    video_path: Path,
+    tracker: HaMeRTracker,
+    ik_solver: ORCAHandIKRetargeting,
+    model: mujoco.MjModel,
+    data: mujoco.MjData,
+    target_joint_types: tuple[str, ...],
+) -> list[tuple[np.ndarray, Optional[HandStructure]]]:
+    """Process video with HAMER and return frames with hand structures."""
+    print(f"\n{'='*70}")
+    print("PROCESSING VIDEO WITH HAMER")
+    print(f"{'='*70}")
+    print(f"Processing: {video_path}")
+
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        raise RuntimeError(f"Failed to open video: {video_path}")
+
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    print(f"Video: {frame_count} frames at {fps} FPS")
+
+    processed_frames = []
+    timestamp = 0.0
+    dt = 1.0 / fps if fps > 0 else 1.0 / 30.0
+
+    frame_idx = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+
+        # Use previous frame's solution as warm start for better IK convergence
+        # Only reset on first frame
+        if frame_idx == 0:
+            mujoco.mj_resetData(model, data)
+
+        timestamp = frame_idx * dt
+        hand_structures = tracker.detect_hands(frame, timestamp=timestamp)
+
+        hand_structure = hand_structures[0] if hand_structures else None
+
+        if hand_structure:
+            # Update IK solver with current state (warm start from previous frame)
+            mujoco.mj_forward(model, data)
+            ik_solver.configuration.update(data.qpos)
+
+            # Solve IK
+            qpos = ik_solver.solve(hand_structure)
+            if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)):
+                data.qpos[:] = qpos
+                data.qvel[:] = 0
+
+            # Update tracker site positions
+            update_tracker_sites(model, data, hand_structure, target_joint_types, ik_solver)
+
+        # Forward kinematics
+        mujoco.mj_forward(model, data)
+
+        processed_frames.append((frame.copy(), hand_structure))
+        frame_idx += 1
+
+        if frame_idx % 10 == 0:
+            print(f"Processed {frame_idx}/{frame_count} frames...")
+
+    cap.release()
+    print(f"Processing complete. Processed {len(processed_frames)} frames.")
+    return processed_frames
+
+
+def record_mujoco_visualization(
+    processed_frames: list[tuple[np.ndarray, Optional[HandStructure]]],
+    model: mujoco.MjModel,
+    data: mujoco.MjData,
+    output_path: Path,
+    fps: float = 30.0,
+    width: int = 1920,
+    height: int = 1080,
+    ik_solver: Optional[ORCAHandIKRetargeting] = None,
+    target_joint_types: tuple[str, ...] = ("tip",),
+) -> None:
+    """Record MuJoCo visualization from processed frames."""
+    print(f"\n{'='*70}")
+    print("RECORDING MUJOCO VISUALIZATION")
+    print(f"{'='*70}")
+    print(f"Recording {len(processed_frames)} frames to: {output_path}")
+
+    # Create renderer
+    # The model should have framebuffer settings in MJCF for larger resolutions
+    try:
+        renderer = mujoco.Renderer(model, height=height, width=width, max_geom=10000)
+    except ValueError as e:
+        # Fallback to smaller resolution if framebuffer is too small
+        print(f"Warning: {e}")
+        print("Falling back to 640x480 resolution")
+        width = 640
+        height = 480
+        renderer = mujoco.Renderer(model, height=height, width=width, max_geom=10000)
+
+    # Setup camera
+    cam = mujoco.MjvCamera()
+    cam.type = mujoco.mjtCamera.mjCAMERA_FREE
+    cam.distance = 0.6  # Further back to fit entire hand
+    cam.azimuth = -20
+    cam.elevation = 35
+    cam.lookat[:] = [0.04, 0.0, 0.08]  # Look at palm area
+
+    # Setup options - enable transparency and joint visualization for skeleton overlay
+    option = mujoco.MjvOption()
+    option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = False
+    option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = False
+    option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = True  # Make meshes transparent
+    option.flags[mujoco.mjtVisFlag.mjVIS_JOINT] = True  # Show joints for skeleton overlay
+
+    # Create video writer
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
+
+    # Process each frame
+    for frame_idx, (frame, hand_structure) in enumerate(processed_frames):
+        # Only reset on first frame
+        if frame_idx == 0:
+            mujoco.mj_resetData(model, data)
+
+        if hand_structure is not None and ik_solver is not None:
+            # Update IK solver configuration
+            mujoco.mj_forward(model, data)
+            ik_solver.configuration.update(data.qpos)
+
+            # Solve IK
+            qpos = ik_solver.solve(hand_structure)
+            if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)):
+                data.qpos[:] = qpos
+                data.qvel[:] = 0
+
+            # Update tracker site positions
+            update_tracker_sites(model, data, hand_structure, target_joint_types, ik_solver)
+
+        # Forward kinematics
+        mujoco.mj_forward(model, data)
+
+        # Update scene with current state
+        renderer.update_scene(data, camera=cam, scene_option=option)
+
+        # Render to numpy array (returns RGB, shape: (height, width, 3))
+        rgb_frame = renderer.render()
+
+        # Convert RGB to BGR for OpenCV
+        # Note: MuJoCo Renderer already returns correct orientation, no need to flip
+        bgr_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)
+
+        out.write(bgr_frame)
+
+        if (frame_idx + 1) % 10 == 0:
+            print(f"Rendered {frame_idx + 1}/{len(processed_frames)} frames...")
+
+    out.release()
+    renderer.close()
+    print(f"MuJoCo visualization recording complete. Saved to: {output_path}")
+
+
+def concatenate_videos(video1_path: Path, video2_path: Path, output_path: Path) -> None:
+    """Concatenate two videos side by side."""
+    print(f"\n{'='*70}")
+    print("CONCATENATING VIDEOS")
+    print(f"{'='*70}")
+    print(f"Concatenating {video1_path} and {video2_path}")
+    print(f"Output: {output_path}")
+
+    cap1 = cv2.VideoCapture(str(video1_path))
+    cap2 = cv2.VideoCapture(str(video2_path))
+
+    if not cap1.isOpened() or not cap2.isOpened():
+        raise RuntimeError("Failed to open one or both videos")
+
+    # Get properties
+    fps1 = cap1.get(cv2.CAP_PROP_FPS)
+    fps2 = cap2.get(cv2.CAP_PROP_FPS)
+    fps = min(fps1, fps2) if fps1 > 0 and fps2 > 0 else 30.0
+
+    width1 = int(cap1.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height1 = int(cap1.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    width2 = int(cap2.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height2 = int(cap2.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+    # Use the maximum height and sum of widths for side-by-side
+    max_height = max(height1, height2)
+    total_width = width1 + width2
+
+    # Resize videos to same height if needed
+    if height1 != height2:
+        scale1 = max_height / height1
+        scale2 = max_height / height2
+        width1 = int(width1 * scale1)
+        width2 = int(width2 * scale2)
+        total_width = width1 + width2
+
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    out = cv2.VideoWriter(str(output_path), fourcc, fps, (total_width, max_height))
+
+    frame_count = 0
+    while True:
+        ret1, frame1 = cap1.read()
+        ret2, frame2 = cap2.read()
+
+        if not ret1 or not ret2:
+            break
+
+        # Resize if needed
+        if height1 != max_height:
+            frame1 = cv2.resize(frame1, (width1, max_height))
+        if height2 != max_height:
+            frame2 = cv2.resize(frame2, (width2, max_height))
+
+        # Concatenate side by side
+        concatenated = np.hstack([frame1, frame2])
+        out.write(concatenated)
+
+        frame_count += 1
+        if frame_count % 10 == 0:
+            print(f"Concatenated {frame_count} frames...")
+
+    cap1.release()
+    cap2.release()
+    out.release()
+    print(f"Concatenation complete. Saved {frame_count} frames to {output_path}")
+
+
+def main() -> None:
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Record video, process with HAMER, visualize in MuJoCo, and concatenate",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument("--camera", type=int, default=0, help="Camera device index")
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="models/orca_hand_fixed.mjcf",
+        help="Path to MuJoCo MJCF model file",
+    )
+    parser.add_argument(
+        "--targets",
+        type=str,
+        default="tip",
+        help="Comma-separated joint targets (tip,ip,pip,mcp)",
+    )
+    parser.add_argument(
+        "--scale",
+        type=float,
+        default=1.0,
+        help="Hand scale factor (robot/human)",
+    )
+    parser.add_argument(
+        "--fps",
+        type=float,
+        default=30.0,
+        help="Video FPS",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default="recordings",
+        help="Output directory for videos",
+    )
+    parser.add_argument(
+        "--video-path",
+        type=str,
+        default=None,
+        help="Path to input video file (if provided, skips camera recording and uses this video)",
+    )
+    args = parser.parse_args()
+
+    # Parse target joint types
+    raw_targets = [part.strip().lower() for part in args.targets.split(",")]
+    target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt))
+    if not target_joints:
+        target_joints = ("tip",)
+    supported_targets = {"tip", "ip", "pip", "mcp"}
+    invalid = [jt for jt in target_joints if jt not in supported_targets]
+    if invalid:
+        parser.error(f"Unsupported target joint types: {', '.join(invalid)}")
+
+    # Create output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Generate output filenames
+    timestamp = int(time.time())
+    if args.video_path:
+        # Use input video name as base for output files
+        input_video_name = Path(args.video_path).stem
+        camera_video_path = Path(args.video_path)  # Use input video directly
+        mujoco_video_path = output_dir / f"mujoco_{input_video_name}_{timestamp}.mp4"
+        final_video_path = output_dir / f"concatenated_{input_video_name}_{timestamp}.mp4"
+    else:
+        camera_video_path = output_dir / f"camera_recording_{timestamp}.mp4"
+        mujoco_video_path = output_dir / f"mujoco_recording_{timestamp}.mp4"
+        final_video_path = output_dir / f"concatenated_{timestamp}.mp4"
+
+    # Load model
+    model_file = Path(args.model)
+    if not model_file.is_absolute():
+        model_file = Path(__file__).parent.parent / args.model
+    if not model_file.exists():
+        raise FileNotFoundError(f"Model file not found: {model_file}")
+
+    print(f"Loading model from {model_file}")
+    xml_string = inject_visualization_bodies(model_file, target_joints)
+    model = mujoco.MjModel.from_xml_string(xml_string)
+    data = mujoco.MjData(model)
+
+    # Initialize tracker and IK solver
+    tracker = HaMeRTracker()
+    ik_config = ORCAHandIKConfig(
+        scale_factor=args.scale,
+        target_joint_types=target_joints,
+        lm_damping=0.5,
+        ik_iterations=15,
+    )
+    ik_solver = ORCAHandIKRetargeting(model, config=ik_config)
+
+    # Verify tip sites exist in the loaded model
+    if "tip" in target_joints:
+        print("\nVerifying tip sites in loaded model:")
+        for finger in ["thumb", "index", "middle", "ring", "pinky"]:
+            site_name = f"right_{finger}_tip_site"
+            site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name)
+            if site_id >= 0:
+                site_pos = data.site(site_id).xpos.copy()
+                print(f"  ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]")
+            else:
+                print(f"  ✗ {site_name}: NOT FOUND in model!")
+
+    # Step 1: Record camera video or use provided video file
+    if args.video_path:
+        camera_video_path = Path(args.video_path)
+        if not camera_video_path.exists():
+            raise FileNotFoundError(f"Video file not found: {camera_video_path}")
+        print(f"Using existing video: {camera_video_path}")
+    else:
+        cap = cv2.VideoCapture(args.camera)
+        if not cap.isOpened():
+            raise RuntimeError(f"Failed to open camera {args.camera}")
+
+        if not record_camera_video(cap, camera_video_path, args.fps):
+            print("Recording cancelled or failed.")
+            cap.release()
+            return
+
+        cap.release()
+
+    # Step 2: Process video with HAMER
+    processed_frames = process_video_with_hamer(
+        camera_video_path,
+        tracker,
+        ik_solver,
+        model,
+        data,
+        target_joints,
+    )
+
+    # Get video FPS (use input video FPS if provided, otherwise use args.fps)
+    if args.video_path:
+        cap = cv2.VideoCapture(str(camera_video_path))
+        video_fps = cap.get(cv2.CAP_PROP_FPS)
+        cap.release()
+        if video_fps > 0:
+            fps = video_fps
+        else:
+            fps = args.fps
+    else:
+        fps = args.fps
+
+    # Step 3: Record MuJoCo visualization
+    record_mujoco_visualization(
+        processed_frames,
+        model,
+        data,
+        mujoco_video_path,
+        fps,
+        ik_solver=ik_solver,
+        target_joint_types=target_joints,
+    )
+
+    # Step 4: Concatenate videos
+    concatenate_videos(camera_video_path, mujoco_video_path, final_video_path)
+
+    print(f"\n{'='*70}")
+    print("ALL DONE!")
+    print(f"{'='*70}")
+    print(f"Camera video: {camera_video_path}")
+    print(f"MuJoCo video: {mujoco_video_path}")
+    print(f"Concatenated video: {final_video_path}")
+    print(f"{'='*70}\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/visualize_tracker_ik_sites.py b/scripts/visualize_tracker_ik_sites.py
new file mode 100644
index 0000000..ac99398
--- /dev/null
+++ b/scripts/visualize_tracker_ik_sites.py
@@ -0,0 +1,557 @@
+"""Visualize tracker sites and IK target sites on ORCA hand model.
+
+This script overlays:
+1. Tracker sites (from MediaPipe/HaMeR hand tracking) - shown as colored spheres
+2. IK target sites (on ORCA hand model) - shown as different colored spheres
+
+Both are color-coded and labeled for easy comparison.
+"""
+
+import argparse
+import multiprocessing as mp
+import sys
+import time
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from queue import Empty
+from typing import Protocol
+
+import cv2
+import mujoco
+import mujoco.viewer
+import numpy as np
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from handpose.ik_retargeting import (
+    FINGER_TARGET_BODIES,
+    MP_LANDMARK_INDICES,
+    ORCAHandIKConfig,
+    ORCAHandIKRetargeting,
+)
+from handpose.tracker.base import HandStructure
+from handpose.tracker.hamer import HaMeRTracker
+from handpose.tracker.mediapipe import MediaPipeTracker
+
+
+def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str:
+    """Inject mocap bodies for tracker sites and ensure IK target sites exist.
+
+    Args:
+        mjcf_path: Path to MJCF model file
+        target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp)
+
+    Returns:
+        Modified MJCF XML string
+    """
+    tree = ET.parse(mjcf_path)
+    root = tree.getroot()
+    worldbody = root.find("worldbody")
+
+    if worldbody is None:
+        raise ValueError("Could not find worldbody in MJCF")
+
+    # Convert relative paths to absolute paths
+    model_dir = mjcf_path.parent
+    for asset in root.findall(".//asset"):
+        for mesh in asset.findall("mesh"):
+            file_attr = mesh.get("file")
+            if file_attr and not Path(file_attr).is_absolute():
+                abs_path = (model_dir / file_attr).resolve()
+                mesh.set("file", str(abs_path))
+
+    # All IK target sites use blue for consistency
+    ik_site_color = "0 0 1 0.9"  # Blue for all IK targets
+
+    # Tip site offsets: [x, y, z] where z is upward direction
+    tip_site_specs = {
+        "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])),
+        "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])),
+        "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])),
+        "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])),
+        "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])),
+    }
+
+    if "tip" in target_joint_types:
+        tip_sites_created = []
+        for finger, (parent_body, offset) in tip_site_specs.items():
+            body_elem = root.find(f".//body[@name='{parent_body}']")
+            if body_elem is None:
+                print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site")
+                continue
+            site_name = f"right_{finger}_tip_site"
+            existing_site = body_elem.find(f"./site[@name='{site_name}']")
+            if existing_site is not None:
+                # Update existing site - make it larger and blue
+                existing_site.set("type", "sphere")  # Ensure type is set
+                existing_site.set("size", "0.005")
+                existing_site.set("rgba", ik_site_color)
+                tip_sites_created.append(f"{finger} (updated)")
+            else:
+                # Create new tip site
+                site = ET.SubElement(body_elem, "site")
+                site.set("name", site_name)
+                site.set("type", "sphere")  # Explicit type for visibility
+                site.set("pos", " ".join(f"{value:.5f}" for value in offset))
+                site.set("size", "0.005")
+                site.set("rgba", ik_site_color)
+                tip_sites_created.append(f"{finger} (created)")
+        print(f"Tip sites: {', '.join(tip_sites_created)}")
+
+    # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms
+    allowed_types = set(target_joint_types)
+    markers_added = []
+    for finger_name, bodies in FINGER_TARGET_BODIES.items():
+        for joint_type, (frame_type, frame_name) in bodies.items():
+            if joint_type not in allowed_types:
+                continue
+
+            if frame_type == "body":
+                # Add a visual site to the body for visibility
+                body_elem = root.find(f".//body[@name='{frame_name}']")
+                if body_elem is not None:
+                    # Check if marker site already exists
+                    marker_site_name = f"ik_marker_{frame_name}"
+                    existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']")
+                    if existing_marker is not None:
+                        # Update existing marker
+                        existing_marker.set("size", "0.004")  # Same size as tip sites
+                        existing_marker.set("rgba", ik_site_color)
+                        markers_added.append(f"{finger_name}_{joint_type} (updated)")
+                    else:
+                        # Create new marker site
+                        site = ET.SubElement(body_elem, "site")
+                        site.set("name", marker_site_name)
+                        site.set("pos", "0 0 0")  # At body origin
+                        site.set("size", "0.004")  # Same size as tip sites for consistency
+                        site.set("rgba", ik_site_color)  # Blue for all IK targets
+                        markers_added.append(f"{finger_name}_{joint_type} (created)")
+                else:
+                    print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+            elif frame_type == "site":
+                # This is a tip site - find and update it (should already be handled above, but double-check)
+                found = False
+                for body in root.findall(".//body"):
+                    site_elem = body.find(f"./site[@name='{frame_name}']")
+                    if site_elem is not None:
+                        site_elem.set("size", "0.005")
+                        site_elem.set("rgba", ik_site_color)
+                        found = True
+                        markers_added.append(f"{finger_name}_{joint_type} (site updated)")
+                        break
+                if not found:
+                    print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+
+    print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}")
+
+    # Add mocap bodies for tracker sites (MediaPipe landmarks)
+    # All tracker sites use red for consistency - these represent the actual tracked hand
+    tracker_color = "1 0 0 0.9"  # Red for all tracker sites (actual hand)
+
+    allowed_types = set(target_joint_types)
+    for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+        for joint_type, mp_idx in mp_mapping.items():
+            if joint_type not in allowed_types:
+                continue
+
+            # Create mocap body for tracker site
+            mocap_body = ET.SubElement(worldbody, "body")
+            mocap_body.set("name", f"tracker_{finger_name}_{joint_type}")
+            mocap_body.set("mocap", "true")
+            mocap_body.set("pos", "0 0 0")
+
+            # Add visual sphere for tracker site (larger, red)
+            geom = ET.SubElement(mocap_body, "geom")
+            geom.set("type", "sphere")
+            geom.set("size", "0.005")  # 5mm radius (larger than IK targets for visibility)
+            geom.set("rgba", tracker_color)  # Red for all tracker sites
+            geom.set("contype", "0")
+            geom.set("conaffinity", "0")
+
+    return ET.tostring(root, encoding="unicode")
+
+
+def hand_structure_to_landmarks(structure: HandStructure) -> np.ndarray:
+    """Convert HandStructure to 21x3 landmarks array (MediaPipe format).
+
+    Returns landmarks in hand frame (wrist at origin).
+    """
+    landmarks = np.zeros((21, 3))
+
+    # Wrist (index 0)
+    landmarks[0] = structure.wrist_position
+
+    # Thumb: CMC(1), MCP(2), IP(3), tip(4)
+    landmarks[1] = structure.thumb.mcp  # CMC approximate
+    landmarks[2] = structure.thumb.mcp
+    landmarks[3] = structure.thumb.ip if structure.thumb.ip is not None else structure.thumb.mcp
+    landmarks[4] = structure.thumb.tip
+
+    # Index: MCP(5), PIP(6), DIP(7), tip(8)
+    landmarks[5] = structure.index.mcp
+    landmarks[6] = structure.index.pip if structure.index.pip is not None else structure.index.mcp
+    landmarks[7] = structure.index.dip if structure.index.dip is not None else structure.index.tip
+    landmarks[8] = structure.index.tip
+
+    # Middle: MCP(9), PIP(10), DIP(11), tip(12)
+    landmarks[9] = structure.middle.mcp
+    landmarks[10] = structure.middle.pip if structure.middle.pip is not None else structure.middle.mcp
+    landmarks[11] = structure.middle.dip if structure.middle.dip is not None else structure.middle.tip
+    landmarks[12] = structure.middle.tip
+
+    # Ring: MCP(13), PIP(14), DIP(15), tip(16)
+    landmarks[13] = structure.ring.mcp
+    landmarks[14] = structure.ring.pip if structure.ring.pip is not None else structure.ring.mcp
+    landmarks[15] = structure.ring.dip if structure.ring.dip is not None else structure.ring.tip
+    landmarks[16] = structure.ring.tip
+
+    # Pinky: MCP(17), PIP(18), DIP(19), tip(20)
+    landmarks[17] = structure.pinky.mcp
+    landmarks[18] = structure.pinky.pip if structure.pinky.pip is not None else structure.pinky.mcp
+    landmarks[19] = structure.pinky.dip if structure.pinky.dip is not None else structure.pinky.tip
+    landmarks[20] = structure.pinky.tip
+
+    return landmarks
+
+
+def update_tracker_sites(
+    model: mujoco.MjModel,
+    data: mujoco.MjData,
+    hand_structure: HandStructure,
+    target_joint_types: tuple[str, ...],
+    ik_solver: ORCAHandIKRetargeting,
+) -> None:
+    """Update mocap body positions for tracker sites based on hand structure.
+
+    Args:
+        model: MuJoCo model
+        data: MuJoCo data
+        hand_structure: Tracked hand structure
+        target_joint_types: Joint types to visualize
+        ik_solver: IK solver (for coordinate transformation)
+    """
+    landmarks_hand_frame = hand_structure_to_landmarks(hand_structure)
+
+    # Get palm transform to convert hand frame to world frame
+    t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body")
+    p_palm = t_palm.translation()
+    r_palm = t_palm.rotation().as_matrix()
+
+    # Wrist offset in palm frame
+    wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm
+    p_wrist = p_palm + wrist_offset_world
+
+    # Scale factor
+    scale = ik_solver.config.scale_factor
+
+    # Coordinate transformation
+    coord_transform = ik_solver.config.coord_transform
+
+    allowed_types = set(target_joint_types)
+    for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+        for joint_type, mp_idx in mp_mapping.items():
+            if joint_type not in allowed_types:
+                continue
+
+            # Get landmark position in hand frame
+            landmark_pos_hand_frame = landmarks_hand_frame[mp_idx]
+
+            # Scale to robot size
+            scaled_vec = landmark_pos_hand_frame * scale
+
+            # Transform to world frame (anchor to wrist)
+            target_pos_world = p_wrist + scaled_vec
+
+            # Apply coordinate transformation
+            rel_vec = target_pos_world - p_wrist
+            transformed_vec = coord_transform @ rel_vec
+            final_pos = p_wrist + transformed_vec
+
+            # Update mocap body position
+            mocap_body_name = f"tracker_{finger_name}_{joint_type}"
+            body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, mocap_body_name)
+            if body_id >= 0:
+                # Get the mocap index for this body (mocap bodies have mocapid >= 0)
+                mocap_idx = model.body_mocapid[body_id]
+                if mocap_idx >= 0 and mocap_idx < model.nmocap:
+                    data.mocap_pos[mocap_idx] = final_pos
+
+
+def visualize_ik_target_sites(
+    model: mujoco.MjModel,
+    data: mujoco.MjData,
+    target_joint_types: tuple[str, ...],
+) -> dict[str, dict[str, np.ndarray]]:
+    """Get positions of IK target sites on ORCA hand.
+
+    Args:
+        model: MuJoCo model
+        data: MuJoCo data
+        target_joint_types: Joint types to visualize
+
+    Returns:
+        Dictionary mapping finger names to joint_type -> position
+    """
+    ik_positions = {}
+    allowed_types = set(target_joint_types)
+
+    for finger_name, bodies in FINGER_TARGET_BODIES.items():
+        finger_positions = {}
+        for joint_type, (frame_type, frame_name) in bodies.items():
+            if joint_type not in allowed_types:
+                continue
+
+            obj_type = mujoco.mjtObj.mjOBJ_BODY if frame_type == "body" else mujoco.mjtObj.mjOBJ_SITE
+            frame_id = mujoco.mj_name2id(model, obj_type, frame_name)
+
+            if frame_id >= 0:
+                if obj_type == mujoco.mjtObj.mjOBJ_SITE:
+                    pos = data.site(frame_id).xpos.copy()
+                else:
+                    pos = data.body(frame_id).xpos.copy()
+                finger_positions[joint_type] = pos
+
+        if finger_positions:
+            ik_positions[finger_name] = finger_positions
+
+    return ik_positions
+
+
+class Flag(Protocol):
+    value: int
+
+
+def display_feed_process(frame_queue: mp.Queue, running_flag: Flag, window_name: str) -> None:
+    """Display frames in a separate process to avoid GLFW/OpenCV conflicts."""
+    while True:
+        if not running_flag.value and frame_queue.empty():
+            break
+
+        try:
+            frame = frame_queue.get(timeout=0.05)
+        except Empty:
+            if not running_flag.value:
+                break
+            continue
+
+        cv2.imshow(window_name, frame)
+        key = cv2.waitKey(1) & 0xFF
+        if key == ord("q"):
+            running_flag.value = 0
+            break
+
+    cv2.destroyAllWindows()
+
+
+def main() -> None:
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Visualize tracker sites and IK target sites on ORCA hand",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument("--camera", type=int, default=0, help="Camera device index")
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="models/orca_hand_fixed.mjcf",
+        help="Path to MuJoCo MJCF model file",
+    )
+    parser.add_argument(
+        "--tracker",
+        type=str,
+        default="hamer",
+        choices=["hamer", "mediapipe"],
+        help="Hand tracking backend",
+    )
+    parser.add_argument(
+        "--targets",
+        type=str,
+        default="tip",
+        help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip",
+    )
+    parser.add_argument(
+        "--scale",
+        type=float,
+        default=1.0,
+        help="Hand scale factor (robot/human)",
+    )
+    parser.add_argument(
+        "--show-feed",
+        action="store_true",
+        help="Show camera feed with hand tracking overlay",
+    )
+    parser.add_argument(
+        "--joint-smoothing",
+        type=float,
+        default=0.7,
+        help="Smoothing factor for joint positions (0.0-1.0). \
+        Lower values = more smoothing, higher = less smoothing. Default: 0.7",
+    )
+    args = parser.parse_args()
+
+    # Parse target joint types
+    raw_targets = [part.strip().lower() for part in args.targets.split(",")]
+    target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt))
+    if not target_joints:
+        target_joints = ("tip",)
+    supported_targets = {"tip", "ip", "pip", "mcp"}
+    invalid = [jt for jt in target_joints if jt not in supported_targets]
+    if invalid:
+        parser.error(f"Unsupported target joint types: {', '.join(invalid)}")
+
+    # Load model
+    model_file = Path(args.model)
+    if not model_file.is_absolute():
+        model_file = Path(__file__).parent.parent / args.model
+    if not model_file.exists():
+        raise FileNotFoundError(f"Model file not found: {model_file}")
+
+    print(f"Loading model from {model_file}")
+    xml_string = inject_visualization_bodies(model_file, target_joints)
+    model = mujoco.MjModel.from_xml_string(xml_string)
+    data = mujoco.MjData(model)
+
+    # Verify tip sites exist in the loaded model
+    if "tip" in target_joints:
+        print("\nVerifying tip sites in loaded model:")
+        for finger in ["thumb", "index", "middle", "ring", "pinky"]:
+            site_name = f"right_{finger}_tip_site"
+            site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name)
+            if site_id >= 0:
+                site_pos = data.site(site_id).xpos.copy()
+                print(f"  ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]")
+            else:
+                print(f"  ✗ {site_name}: NOT FOUND in model!")
+
+    # Initialize tracker
+    if args.tracker == "hamer":
+        tracker = HaMeRTracker()
+    else:
+        tracker = MediaPipeTracker()
+
+    # Initialize IK solver
+    ik_config = ORCAHandIKConfig(
+        scale_factor=args.scale,
+        target_joint_types=target_joints,
+        lm_damping=0.5,
+        ik_iterations=15,
+    )
+    ik_solver = ORCAHandIKRetargeting(model, config=ik_config)
+
+    # Open camera
+    cap = cv2.VideoCapture(args.camera)
+    if not cap.isOpened():
+        raise RuntimeError(f"Failed to open camera {args.camera}")
+
+    print("\n" + "=" * 70)
+    print("VISUALIZATION GUIDE:")
+    print("=" * 70)
+    print("TRACKER SITES (larger RED spheres - actual tracked hand from camera):")
+    print("  Red spheres = All tracker sites (thumb, index, middle, ring, pinky)")
+    print("               These represent the actual human hand position")
+    print("\nIK TARGET SITES (smaller BLUE spheres - on ORCA hand model):")
+    print("  Blue spheres = All IK target sites (thumb, index, middle, ring, pinky)")
+    print("                These represent where the robot hand should move")
+    print("\nNOTE: RED spheres (tracker) should align with BLUE spheres (IK targets)")
+    print("      when IK is solving correctly. RED = actual hand, BLUE = robot targets.")
+    print("\nPress 'q' in MuJoCo viewer to quit")
+    print("=" * 70 + "\n")
+
+    # Validate joint smoothing value
+    if not (0.0 < args.joint_smoothing <= 1.0):
+        parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)")
+
+    start_time = time.time()
+
+    # Initialize joint position smoothing state
+    smoothed_qpos = data.qpos.copy()
+
+    # Set up multiprocessing for display window if requested
+    display_process: mp.Process | None = None
+    frame_queue: mp.Queue | None = None
+    running_flag: Flag | None = None
+
+    if args.show_feed:
+        print("Starting camera feed display in separate process...")
+        frame_queue = mp.Queue(maxsize=2)  # Small buffer to avoid lag
+        running_flag = mp.Value("i", 1)
+        display_process = mp.Process(
+            target=display_feed_process,
+            args=(frame_queue, running_flag, "Hand Tracking Feed"),
+        )
+        display_process.start()
+
+    try:
+        with mujoco.viewer.launch_passive(model, data) as viewer:
+            while viewer.is_running() and cap.isOpened():
+                ret, frame = cap.read()
+                if not ret:
+                    break
+
+                timestamp = time.time() - start_time
+                hand_structures = tracker.detect_hands(frame, timestamp=timestamp)
+
+                # Send frame to display process if enabled
+                if args.show_feed and frame_queue is not None and running_flag is not None:
+                    if hand_structures:
+                        vis_frame = tracker.visualize(frame, hand_structures)
+                    else:
+                        vis_frame = frame.copy()
+                    try:
+                        frame_queue.put_nowait(vis_frame)
+                    except Exception:
+                        pass  # Queue full, skip this frame
+
+                if hand_structures:
+                    hand = hand_structures[0]
+
+                    # Update IK solver configuration with current smoothed state
+                    mujoco.mj_forward(model, data)
+                    data.qpos[:] = smoothed_qpos
+                    mujoco.mj_forward(model, data)
+                    ik_solver.configuration.update(data.qpos)
+
+                    # Solve IK
+                    qpos = ik_solver.solve(hand)
+
+                    # Apply joint position smoothing
+                    if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)):
+                        # Exponential moving average: smoothed = joint_smoothing * new + (1 - joint_smoothing) * old
+                        smoothed_qpos = args.joint_smoothing * qpos + (1.0 - args.joint_smoothing) * smoothed_qpos
+                        data.qpos[:] = smoothed_qpos
+                        data.qvel[:] = 0
+
+                    # Update tracker site positions
+                    update_tracker_sites(model, data, hand, target_joints, ik_solver)
+
+                # Forward kinematics to update IK target site positions
+                mujoco.mj_forward(model, data)
+
+                # Get IK target positions (sites are automatically rendered by MuJoCo)
+                ik_positions = visualize_ik_target_sites(model, data, target_joints)
+
+                # Print current positions for debugging (first frame only)
+                if hand_structures:
+                    print("\nCurrent IK Target Positions:")
+                    for finger, positions in ik_positions.items():
+                        for joint_type, pos in positions.items():
+                            print(f"  {finger}_{joint_type}: [{pos[0]:.4f}, {pos[1]:.4f}, {pos[2]:.4f}]")
+
+                # Sync viewer
+                viewer.sync()
+
+    finally:
+        # Clean up display process
+        if running_flag is not None:
+            running_flag.value = 0
+        if display_process is not None:
+            display_process.join(timeout=1.0)
+            if display_process.is_alive():
+                display_process.terminate()
+        cap.release()
+        print("\nVisualization closed.")
+
+
+if __name__ == "__main__":
+    main()