diff --git a/.gitignore b/.gitignore
index 58be3ff..8e2662d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,4 +41,5 @@ out*/
output/
*.npz
*.pkl
+recordings/
diff --git a/examples/handpose_ros2_publisher.py b/examples/handpose_ros2_publisher.py
index 15ae2d2..7e7f9fe 100644
--- a/examples/handpose_ros2_publisher.py
+++ b/examples/handpose_ros2_publisher.py
@@ -31,6 +31,7 @@
sys.path.insert(0, str(Path(__file__).parent.parent))
from handpose.ik_retargeting import ( # noqa: E402
+ FINGER_TARGET_BODIES,
ORCA_JOINT_NAMES,
ORCAHandIKConfig,
ORCAHandIKRetargeting,
@@ -38,10 +39,15 @@
from handpose.tracker.hamer import HaMeRTracker # noqa: E402
-def inject_target_bodies(mjcf_path: Path) -> str:
- """Injects mocap bodies and tip sites into the MJCF XML string for IK targeting.
+def inject_target_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str:
+ """Inject mocap bodies for tracker sites and ensure IK target sites exist.
- This matches the approach in live_demo_ik.py to ensure tip sites exist for all fingers.
+ Args:
+ mjcf_path: Path to MJCF model file
+ target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp)
+
+ Returns:
+ Modified MJCF XML string
"""
tree = ET.parse(mjcf_path)
root = tree.getroot()
@@ -51,37 +57,99 @@ def inject_target_bodies(mjcf_path: Path) -> str:
raise ValueError("Could not find worldbody in MJCF")
# Convert relative paths to absolute paths
- # MuJoCo can't resolve relative paths when loading from string
model_dir = mjcf_path.parent
for asset in root.findall(".//asset"):
for mesh in asset.findall("mesh"):
file_attr = mesh.get("file")
if file_attr and not Path(file_attr).is_absolute():
- # Convert relative path to absolute
abs_path = (model_dir / file_attr).resolve()
mesh.set("file", str(abs_path))
- # Add tip sites for all fingers (required for tip tracking)
+ # All IK target sites use blue for consistency
+ ik_site_color = "0 0 1 0.9" # Blue for all IK targets
+
+ # Tip site offsets: [x, y, z] where z is upward direction
tip_site_specs = {
- "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.018])),
- "index": ("right_index_ip", np.array([0.0, 0.0, 0.020])),
- "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.022])),
- "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.021])),
- "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.018])),
+ "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])),
+ "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])),
+ "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])),
+ "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])),
+ "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])),
}
- for finger, (parent_body, offset) in tip_site_specs.items():
- body_elem = root.find(f".//body[@name='{parent_body}']")
- if body_elem is None:
- continue
- site_name = f"right_{finger}_tip_site"
- if body_elem.find(f"./site[@name='{site_name}']") is not None:
- continue
- site = ET.SubElement(body_elem, "site")
- site.set("name", site_name)
- site.set("pos", " ".join(f"{value:.5f}" for value in offset))
- site.set("size", "0.0025")
- site.set("rgba", "1 0.6 0.2 0.8")
+ if "tip" in target_joint_types:
+ tip_sites_created = []
+ for finger, (parent_body, offset) in tip_site_specs.items():
+ body_elem = root.find(f".//body[@name='{parent_body}']")
+ if body_elem is None:
+ print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site")
+ continue
+ site_name = f"right_{finger}_tip_site"
+ existing_site = body_elem.find(f"./site[@name='{site_name}']")
+ if existing_site is not None:
+ # Update existing site - make it larger and blue
+ existing_site.set("type", "sphere") # Ensure type is set
+ existing_site.set("size", "0.005")
+ existing_site.set("rgba", ik_site_color)
+ tip_sites_created.append(f"{finger} (updated)")
+ else:
+ # Create new tip site
+ site = ET.SubElement(body_elem, "site")
+ site.set("name", site_name)
+ site.set("type", "sphere") # Explicit type for visibility
+ site.set("pos", " ".join(f"{value:.5f}" for value in offset))
+ site.set("size", "0.005")
+ site.set("rgba", ik_site_color)
+ tip_sites_created.append(f"{finger} (created)")
+ if tip_sites_created:
+ print(f"Tip sites: {', '.join(tip_sites_created)}")
+
+ # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms
+ allowed_types = set(target_joint_types)
+ markers_added = []
+ for finger_name, bodies in FINGER_TARGET_BODIES.items():
+ for joint_type, (frame_type, frame_name) in bodies.items():
+ if joint_type not in allowed_types:
+ continue
+
+ if frame_type == "body":
+ # Add a visual site to the body for visibility
+ body_elem = root.find(f".//body[@name='{frame_name}']")
+ if body_elem is not None:
+ # Check if marker site already exists
+ marker_site_name = f"ik_marker_{frame_name}"
+ existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']")
+ if existing_marker is not None:
+ # Update existing marker
+ existing_marker.set("size", "0.004") # Same size as tip sites
+ existing_marker.set("rgba", ik_site_color)
+ markers_added.append(f"{finger_name}_{joint_type} (updated)")
+ else:
+ # Create new marker site
+ site = ET.SubElement(body_elem, "site")
+ site.set("name", marker_site_name)
+ site.set("pos", "0 0 0") # At body origin
+ site.set("size", "0.004") # Same size as tip sites for consistency
+ site.set("rgba", ik_site_color) # Blue for all IK targets
+ markers_added.append(f"{finger_name}_{joint_type} (created)")
+ else:
+ print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+ elif frame_type == "site":
+ # This is a tip site - find and update it (should already be handled above, but double-check)
+ found = False
+ for body in root.findall(".//body"):
+ site_elem = body.find(f"./site[@name='{frame_name}']")
+ if site_elem is not None:
+ site_elem.set("size", "0.005")
+ site_elem.set("rgba", ik_site_color)
+ found = True
+ markers_added.append(f"{finger_name}_{joint_type} (site updated)")
+ break
+ if not found:
+ print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+
+ if markers_added:
+ print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}")
return ET.tostring(root, encoding="unicode")
@@ -101,11 +169,8 @@ def __init__(
hold_last: bool = False,
width: int = 1280,
height: int = 720,
- joint_smoothing: float = 1.0,
+ joint_smoothing: float = 0.7,
target_joint_types: tuple[str, ...] = ("tip", "ip"),
- auto_scale: bool = False,
- auto_scale_once: bool = False,
- auto_scale_update_rate: float = 0.1,
) -> None:
"""Initialize the HandPose ROS2 publisher.
@@ -120,11 +185,8 @@ def __init__(
hold_last: If True, publish last valid joint values when hand is not detected
width: Camera frame width
height: Camera frame height
- joint_smoothing: Smoothing factor for joint positions (0.0-1.0). Default: 1.0 (no smoothing)
+ joint_smoothing: Smoothing factor for joint positions (0.0-1.0). Default: 0.7
target_joint_types: Tuple of joint types to target for IK (tip, ip, pip, mcp). Default: ("tip", "ip")
- auto_scale: Enable automatic scale factor computation based on fingertip distances
- auto_scale_once: Compute scale factor once on first hand detection (default: update every frame)
- auto_scale_update_rate: Exponential smoothing rate for auto-scaling (0.0-1.0). Default: 0.1
"""
super().__init__("handpose_hamer_publisher")
@@ -154,27 +216,34 @@ def __init__(
raise FileNotFoundError(f"Model file not found: {model_file}")
self.get_logger().info(f"Loading MuJoCo model from {model_file}")
- # Inject tip sites into MJCF (required for tip tracking, matches live_demo_ik.py)
- xml_string = inject_target_bodies(model_file)
+ # Inject tip sites into MJCF (required for tip tracking)
+ xml_string = inject_target_bodies(model_file, target_joint_types)
self.model = mujoco.MjModel.from_xml_string(xml_string)
+ # Verify tip sites exist in the loaded model
+ if "tip" in target_joint_types:
+ self.get_logger().info("Verifying tip sites in loaded model:")
+ data_temp = mujoco.MjData(self.model)
+ for finger in ["thumb", "index", "middle", "ring", "pinky"]:
+ site_name = f"right_{finger}_tip_site"
+ site_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, site_name)
+ if site_id >= 0:
+ site_pos = data_temp.site(site_id).xpos.copy()
+ self.get_logger().info(
+ f" ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]"
+ )
+ else:
+ self.get_logger().warn(f" ✗ {site_name}: NOT FOUND in model!")
+
# IK retargeting
- # Use the same wrist_offset_palm as live_demo_ik.py to match USD model coordinate frame
- # The default MJCF offset [0.002, -0.00144, -0.03872] may not match the USD model
ik_cfg = ORCAHandIKConfig(
scale_factor=scale,
target_joint_types=target_joint_types,
- wrist_offset_palm=np.array([0.000, 0.0, -0.05]), # Match live_demo_ik.py
- auto_scale=auto_scale,
- auto_scale_update_rate=auto_scale_update_rate if auto_scale else 0.0,
- auto_scale_use_neutral_pose=True,
+ lm_damping=0.5,
+ ik_iterations=15,
)
self.ik = ORCAHandIKRetargeting(self.model, config=ik_cfg)
- # Track if we've computed the scale factor once (for auto-scale-once mode)
- self.scale_computed_once = False
- self.auto_scale_once = auto_scale_once
-
# Initialize joint position smoothing state and MuJoCo data for forward kinematics
self.data = mujoco.MjData(self.model)
self.smoothed_qpos = self.data.qpos.copy()
@@ -224,16 +293,6 @@ def tick(self) -> None:
joint_vals = None
if hand is not None:
try:
- # Auto-scale (once mode)
- if self.auto_scale_once and not self.scale_computed_once and self.ik.config.auto_scale:
- # Temporarily disable continuous auto-scaling
- self.ik.config.auto_scale = False
- # Compute scale factor once
- computed_scale = self.ik.compute_auto_scale_factor(hand, use_neutral_robot_pose=True)
- self.ik.config.scale_factor = computed_scale
- self.scale_computed_once = True
- self.get_logger().info(f"[Auto-Scale] Computed scale factor: {computed_scale:.3f}")
-
# Update IK solver configuration with current robot state (required for proper IK solving)
# This matches the approach in live_demo_ik.py
self.data.qpos[:] = self.smoothed_qpos
@@ -357,31 +416,15 @@ def main() -> None:
parser.add_argument(
"--joint-smoothing",
type=float,
- default=1.0,
+ default=0.7,
help="Smoothing factor for joint positions (0.0-1.0). \
- Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)",
+ Lower values = more smoothing, higher = less smoothing. Default: 0.7",
)
parser.add_argument(
"--targets",
type=str,
- default="tip, ip",
- help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip",
- )
- parser.add_argument(
- "--auto-scale",
- action="store_true",
- help="Enable automatic scale factor computation based on fingertip distances",
- )
- parser.add_argument(
- "--auto-scale-once",
- action="store_true",
- help="Compute scale factor once on first hand detection (default: update every frame)",
- )
- parser.add_argument(
- "--auto-scale-update-rate",
- type=float,
- default=0.1,
- help="Exponential smoothing rate for auto-scaling (0.0-1.0). Lower = smoother. Default: 0.1",
+ default="tip",
+ help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip",
)
args = parser.parse_args()
@@ -400,14 +443,6 @@ def main() -> None:
if not (0.0 < args.joint_smoothing <= 1.0):
parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)")
- # Validate auto-scale update rate
- if args.auto_scale and not (0.0 <= args.auto_scale_update_rate <= 1.0):
- parser.error("--auto-scale-update-rate must be between 0.0 and 1.0")
-
- # Warn if auto-scale-once is used without auto-scale
- if args.auto_scale_once and not args.auto_scale:
- parser.error("--auto-scale-once requires --auto-scale to be enabled")
-
# Initialize ROS2
rclpy.init()
@@ -425,9 +460,6 @@ def main() -> None:
height=args.height,
joint_smoothing=args.joint_smoothing,
target_joint_types=target_joints,
- auto_scale=args.auto_scale,
- auto_scale_once=args.auto_scale_once,
- auto_scale_update_rate=args.auto_scale_update_rate,
)
rclpy.spin(node)
node.destroy_node()
diff --git a/examples/live_demo_ik.py b/examples/live_demo_ik.py
index 579b337..be9a7a9 100644
--- a/examples/live_demo_ik.py
+++ b/examples/live_demo_ik.py
@@ -53,8 +53,16 @@ def dual_window_process(frame_queue: mp.Queue, running_flag: Flag, window_name:
cv2.destroyAllWindows()
-def inject_target_bodies(mjcf_path: Path) -> str:
- """Injects mocap bodies into the MJCF XML string for visualization."""
+def inject_target_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str:
+ """Inject mocap bodies for tracker sites and ensure IK target sites exist.
+
+ Args:
+ mjcf_path: Path to MJCF model file
+ target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp)
+
+ Returns:
+ Modified MJCF XML string
+ """
tree = ET.parse(mjcf_path)
root = tree.getroot()
worldbody = root.find("worldbody")
@@ -63,60 +71,121 @@ def inject_target_bodies(mjcf_path: Path) -> str:
raise ValueError("Could not find worldbody in MJCF")
# Convert relative paths to absolute paths
- # MuJoCo can't resolve relative paths when loading from string
model_dir = mjcf_path.parent
for asset in root.findall(".//asset"):
for mesh in asset.findall("mesh"):
file_attr = mesh.get("file")
if file_attr and not Path(file_attr).is_absolute():
- # Convert relative path to absolute
abs_path = (model_dir / file_attr).resolve()
mesh.set("file", str(abs_path))
- # Add mocap bodies for all keypoints (MCP, PIP, TIP, and IP for thumb)
- # Use different colors for different joint types
- joint_colors = {
- "mcp": "0 1 0 0.7", # Green for MCP
- "pip": "0 0 1 0.7", # Blue for PIP
- "ip": "1 1 0 0.7", # Yellow for IP (thumb only)
- "tip": "1 0 0 0.7", # Red for TIP
+ # All IK target sites use blue for consistency
+ ik_site_color = "0 0 1 0.9" # Blue for all IK targets
+
+ # Tip site offsets: [x, y, z] where z is upward direction
+ tip_site_specs = {
+ "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])),
+ "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])),
+ "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])),
+ "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])),
+ "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])),
}
- for finger, joints in FINGER_TARGET_BODIES.items():
- for joint_type in joints.keys():
- body = ET.SubElement(worldbody, "body")
- body.set("name", f"target_{finger}_{joint_type}")
- body.set("mocap", "true")
- body.set("pos", "0 0 0")
+ if "tip" in target_joint_types:
+ tip_sites_created = []
+ for finger, (parent_body, offset) in tip_site_specs.items():
+ body_elem = root.find(f".//body[@name='{parent_body}']")
+ if body_elem is None:
+ print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site")
+ continue
+ site_name = f"right_{finger}_tip_site"
+ existing_site = body_elem.find(f"./site[@name='{site_name}']")
+ if existing_site is not None:
+ # Update existing site - make it larger and blue
+ existing_site.set("type", "sphere") # Ensure type is set
+ existing_site.set("size", "0.005")
+ existing_site.set("rgba", ik_site_color)
+ tip_sites_created.append(f"{finger} (updated)")
+ else:
+ # Create new tip site
+ site = ET.SubElement(body_elem, "site")
+ site.set("name", site_name)
+ site.set("type", "sphere") # Explicit type for visibility
+ site.set("pos", " ".join(f"{value:.5f}" for value in offset))
+ site.set("size", "0.005")
+ site.set("rgba", ik_site_color)
+ tip_sites_created.append(f"{finger} (created)")
+ print(f"Tip sites: {', '.join(tip_sites_created)}")
+
+ # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms
+ allowed_types = set(target_joint_types)
+ markers_added = []
+ for finger_name, bodies in FINGER_TARGET_BODIES.items():
+ for joint_type, (frame_type, frame_name) in bodies.items():
+ if joint_type not in allowed_types:
+ continue
- # Add a visual sphere (smaller than before)
- geom = ET.SubElement(body, "geom")
- geom.set("type", "sphere")
- geom.set("size", "0.003") # 3mm radius (smaller)
- geom.set("rgba", joint_colors.get(joint_type, "0.5 0.5 0.5 0.7"))
- geom.set("contype", "0") # No collision
- geom.set("conaffinity", "0")
+ if frame_type == "body":
+ # Add a visual site to the body for visibility
+ body_elem = root.find(f".//body[@name='{frame_name}']")
+ if body_elem is not None:
+ # Check if marker site already exists
+ marker_site_name = f"ik_marker_{frame_name}"
+ existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']")
+ if existing_marker is not None:
+ # Update existing marker
+ existing_marker.set("size", "0.004") # Same size as tip sites
+ existing_marker.set("rgba", ik_site_color)
+ markers_added.append(f"{finger_name}_{joint_type} (updated)")
+ else:
+ # Create new marker site
+ site = ET.SubElement(body_elem, "site")
+ site.set("name", marker_site_name)
+ site.set("pos", "0 0 0") # At body origin
+ site.set("size", "0.004") # Same size as tip sites for consistency
+ site.set("rgba", ik_site_color) # Blue for all IK targets
+ markers_added.append(f"{finger_name}_{joint_type} (created)")
+ else:
+ print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+ elif frame_type == "site":
+ # This is a tip site - find and update it (should already be handled above, but double-check)
+ found = False
+ for body in root.findall(".//body"):
+ site_elem = body.find(f"./site[@name='{frame_name}']")
+ if site_elem is not None:
+ site_elem.set("size", "0.005")
+ site_elem.set("rgba", ik_site_color)
+ found = True
+ markers_added.append(f"{finger_name}_{joint_type} (site updated)")
+ break
+ if not found:
+ print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+
+ print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}")
+
+ # Add mocap bodies for tracker sites (MediaPipe landmarks)
+ # All tracker sites use red for consistency - these represent the actual tracked hand
+ tracker_color = "1 0 0 0.9" # Red for all tracker sites (actual hand)
+
+ allowed_types = set(target_joint_types)
+ for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+ for joint_type, mp_idx in mp_mapping.items():
+ if joint_type not in allowed_types:
+ continue
- tip_site_specs = {
- "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.018])),
- "index": ("right_index_ip", np.array([0.0, 0.0, 0.020])),
- "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.022])),
- "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.021])),
- "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.018])),
- }
+ # Create mocap body for tracker site
+ mocap_body = ET.SubElement(worldbody, "body")
+ mocap_body.set("name", f"tracker_{finger_name}_{joint_type}")
+ mocap_body.set("mocap", "true")
+ mocap_body.set("pos", "0 0 0")
- for finger, (parent_body, offset) in tip_site_specs.items():
- body_elem = root.find(f".//body[@name='{parent_body}']")
- if body_elem is None:
- continue
- site_name = f"right_{finger}_tip_site"
- if body_elem.find(f"./site[@name='{site_name}']") is not None:
- continue
- site = ET.SubElement(body_elem, "site")
- site.set("name", site_name)
- site.set("pos", " ".join(f"{value:.5f}" for value in offset))
- site.set("size", "0.0025")
- site.set("rgba", "1 0.6 0.2 0.8")
+ # Add visual sphere for tracker site (larger, red)
+ geom = ET.SubElement(mocap_body, "geom")
+ geom.set("type", "sphere")
+ geom.set("size", "0.005") # 5mm radius (larger than IK targets for visibility)
+ geom.set("rgba", tracker_color) # Red for all tracker sites
+ geom.set("contype", "0")
+ geom.set("conaffinity", "0")
return ET.tostring(root, encoding="unicode")
@@ -253,7 +322,6 @@ async def main_async(
label_lookup: dict[int, list[str]],
camera_matrix: np.ndarray | None = None,
joint_smoothing: float = 1.0,
- auto_scale_once: bool = False,
) -> None:
"""Async main loop with keyboard handling."""
running = True
@@ -279,9 +347,6 @@ async def key_handler(key: str) -> None:
# Initialize joint position smoothing state
smoothed_qpos = data.qpos.copy()
- # Track if we've computed scale once (for auto-scale-once mode)
- scale_computed_once = False
-
with mujoco.viewer.launch_passive(model, data) as viewer:
while running and viewer.is_running() and cap.isOpened():
ret, frame = cap.read()
@@ -298,16 +363,6 @@ async def key_handler(key: str) -> None:
if hand_structures:
structure = hand_structures[0]
- # --- AUTO-SCALE (once mode) ---
- if auto_scale_once and not scale_computed_once and ik_solver.config.auto_scale:
- # Temporarily disable continuous auto-scaling
- ik_solver.config.auto_scale = False
- # Compute scale factor once
- computed_scale = ik_solver.compute_auto_scale_factor(structure, use_neutral_robot_pose=True)
- ik_solver.config.scale_factor = computed_scale
- scale_computed_once = True
- print(f"[Auto-Scale] Computed scale factor: {computed_scale:.3f}")
-
# --- IK SOLVE ---
# 1. Update the configuration object with current robot state
mujoco.mj_forward(model, data)
@@ -410,7 +465,7 @@ def main() -> None:
parser.add_argument(
"--targets",
type=str,
- default="tip, ip",
+ default="tip",
help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip, ip",
)
parser.add_argument(
@@ -429,25 +484,9 @@ def main() -> None:
parser.add_argument(
"--joint-smoothing",
type=float,
- default=1.0,
+ default=0.7,
help="Smoothing factor for joint positions (0.0-1.0). \
- Lower values = more smoothing, higher = less smoothing. Default: 1.0 (no smoothing)",
- )
- parser.add_argument(
- "--auto-scale",
- action="store_true",
- help="Enable automatic scale factor computation based on fingertip distances",
- )
- parser.add_argument(
- "--auto-scale-once",
- action="store_true",
- help="Compute scale factor once on first hand detection (default: update every frame)",
- )
- parser.add_argument(
- "--auto-scale-update-rate",
- type=float,
- default=0.1,
- help="Exponential smoothing rate for auto-scaling (0.0-1.0). Lower = smoother. Default: 0.1",
+ Lower values = more smoothing, higher = less smoothing. Default: 0.7",
)
args = parser.parse_args()
@@ -461,16 +500,15 @@ def main() -> None:
parser.error(f"Unsupported target joint types: {', '.join(invalid)}")
# 1. Setup paths and model
- script_dir = Path(__file__).parent
- model_path = script_dir.parent / "models" / args.model if not Path(args.model).is_absolute() else Path(args.model)
-
- if not model_path.exists():
- print(f"Error: Model not found at {model_path}")
- return
+ model_file = Path(args.model)
+ if not model_file.is_absolute():
+ model_file = Path(__file__).parent.parent / args.model
+ if not model_file.exists():
+ raise FileNotFoundError(f"Model file not found: {model_file}")
# 2. Inject visualization bodies (Mocap)
- print("Injecting visualization targets...")
- xml_string = inject_target_bodies(model_path)
+ print(f"Loading model from {model_file}")
+ xml_string = inject_target_bodies(model_file, target_joints)
# 3. Load MuJoCo Model
model = mujoco.MjModel.from_xml_string(xml_string)
@@ -487,25 +525,38 @@ def main() -> None:
ik_config = ORCAHandIKConfig(
scale_factor=args.scale,
- wrist_offset_palm=np.array([0.000, 0.0, -0.05]),
target_joint_types=target_joints,
- auto_scale=args.auto_scale,
- auto_scale_update_rate=args.auto_scale_update_rate if args.auto_scale else 0.0,
- auto_scale_use_neutral_pose=True,
+ lm_damping=0.5,
+ ik_iterations=15,
)
ik_solver = ORCAHandIKRetargeting(model, config=ik_config)
+ # Verify tip sites exist in the loaded model
+ if "tip" in target_joints:
+ print("\nVerifying tip sites in loaded model:")
+ for finger in ["thumb", "index", "middle", "ring", "pinky"]:
+ site_name = f"right_{finger}_tip_site"
+ site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name)
+ if site_id >= 0:
+ site_pos = data.site(site_id).xpos.copy()
+ print(f" ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]")
+ else:
+ print(f" ✗ {site_name}: NOT FOUND in model!")
+
allowed_joint_types = set(target_joints)
# 5. Helper to map finger names and joint types to mocap body IDs
+ # Note: These use "tracker_" prefix to match the visualization bodies created by inject_target_bodies
target_body_ids: dict[str, dict[str, int]] = {}
for finger, joints in FINGER_TARGET_BODIES.items():
for joint_type in joints.keys():
if joint_type not in allowed_joint_types:
continue
finger_dict = target_body_ids.setdefault(finger, {})
- bid = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, f"target_{finger}_{joint_type}")
+ bid = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, f"tracker_{finger}_{joint_type}")
+ if bid < 0:
+ print(f"Warning: Mocap body 'tracker_{finger}_{joint_type}' not found")
finger_dict[joint_type] = bid
# 6. Main Loop
@@ -538,14 +589,6 @@ def main() -> None:
if not (0.0 < args.joint_smoothing <= 1.0):
parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)")
- # Validate auto-scale update rate
- if args.auto_scale and not (0.0 <= args.auto_scale_update_rate <= 1.0):
- parser.error("--auto-scale-update-rate must be between 0.0 and 1.0")
-
- # Warn if auto-scale-once is used without auto-scale
- if args.auto_scale_once and not args.auto_scale:
- parser.error("--auto-scale-once requires --auto-scale to be enabled")
-
# Run async main loop
asyncio.run(
main_async(
@@ -560,7 +603,6 @@ def main() -> None:
mp_label_lookup,
camera_matrix,
joint_smoothing=args.joint_smoothing,
- auto_scale_once=args.auto_scale_once,
)
)
diff --git a/handpose/ik_retargeting.py b/handpose/ik_retargeting.py
index 4aed301..72453a2 100644
--- a/handpose/ik_retargeting.py
+++ b/handpose/ik_retargeting.py
@@ -5,6 +5,7 @@
import mink
import mujoco
import numpy as np
+from mink.limits import CollisionAvoidanceLimit, ConfigurationLimit, Limit
from handpose.tracker.base import HandStructure
@@ -44,22 +45,22 @@
},
"index": {
"mcp": ("body", "right_index_mp"),
- "pip": ("body", "right_index_pp"),
+ "pip": ("body", "right_index_ip"), # _ip body contains the PIP joint
"tip": ("site", "right_index_tip_site"),
},
"middle": {
"mcp": ("body", "right_middle_mp"),
- "pip": ("body", "right_middle_pp"),
+ "pip": ("body", "right_middle_ip"), # _ip body contains the PIP joint
"tip": ("site", "right_middle_tip_site"),
},
"ring": {
"mcp": ("body", "right_ring_mp"),
- "pip": ("body", "right_ring_pp"),
+ "pip": ("body", "right_ring_ip"), # _ip body contains the PIP joint
"tip": ("site", "right_ring_tip_site"),
},
"pinky": {
"mcp": ("body", "right_pinky_mp"),
- "pip": ("body", "right_pinky_pp"),
+ "pip": ("body", "right_pinky_ip"), # _ip body contains the PIP joint
"tip": ("site", "right_pinky_tip_site"),
},
}
@@ -106,16 +107,23 @@ class ORCAHandIKConfig:
# Default from MJCF: right_wrist joint pos="0.002 -0.00144 -0.03872"
wrist_offset_palm: np.ndarray | None = None
- # IK solver parameters
+ # IK solver parameters (matching manus Mink configs)
dt: float = 0.05 # Timestep for IK integration (seconds)
- damping: float = 1e-2 # Levenberg-Marquardt damping
- solver: str = "daqp" # QP solver
+ damping: float = 1e-5 # Levenberg-Marquardt damping (matching manus: 1e-5)
+ solver: str = "quadprog" # QP solver (matching manus: "quadprog")
ik_iterations: int = 5 # Number of IK passes to perform before returning (for better convergence)
- # Task costs
- position_cost: float = 3.0 # Cost for position tracking
+ # Task costs (matching manus Mink configs)
+ position_cost: float = 1.0 # Cost for position tracking (matching manus: 1.0)
orientation_cost: float = 0.0 # Cost for orientation tracking
- posture_cost: float = 1e-4 # Cost for posture task (keeps hand near neutral)
+ lm_damping: float = 1.0 # Levenberg-Marquardt damping for FrameTask (matching manus: 1.0)
+
+ # Collision avoidance (optional)
+ use_collision_avoidance: bool = False # Enable CollisionAvoidanceLimit
+ collision_geom_pairs: list[tuple[list[str], list[str]]] | None = None # Geom pairs for collision avoidance
+ collision_gain: float = 0.85 # Collision avoidance gain (default from Mink)
+ collision_min_distance: float = 0.005 # Minimum distance between geoms (meters)
+ collision_detection_distance: float = 0.01 # Distance at which collision avoidance activates (meters)
# Coordinate frame transformation
# MediaPipe to Robot coordinate mapping: [MP_X, MP_Y, MP_Z] -> [Robot_X, Robot_Y, Robot_Z]
@@ -123,13 +131,6 @@ class ORCAHandIKConfig:
coord_transform: np.ndarray | None = None
target_joint_types: tuple[str, ...] = ("tip",)
- # Auto-scaling options
- auto_scale: bool = False # If True, automatically computes scale_factor from fingertip distances
- auto_scale_update_rate: float = (
- 0.1 # Exponential smoothing factor for auto-scaling (0.0 = no smoothing, 1.0 = instant)
- )
- auto_scale_use_neutral_pose: bool = True # Use neutral robot pose for scale computation
-
def __post_init__(self) -> None:
"""Initialize default values if None."""
if self.wrist_offset_palm is None:
@@ -168,10 +169,6 @@ def __init__(
self.model = model
self.configuration = mink.Configuration(model)
- # Initialize config
- if config is None:
- config = ORCAHandIKConfig.default_config()
-
self.config = config
# Identify the qpos indices for the finger joints we want to control
@@ -201,7 +198,7 @@ def __init__(
frame_type=frame_type,
position_cost=self.config.position_cost,
orientation_cost=self.config.orientation_cost,
- lm_damping=self.config.damping,
+ lm_damping=self.config.lm_damping,
)
finger_tasks[joint_type] = task
if finger_tasks:
@@ -213,127 +210,17 @@ def __init__(
"(tip tracking requires fingertip sites to be injected)."
)
- # We also need a posture task to encourage the hand to stay close to a "neutral" pose
- # when not reaching for extremes. This prevents weird internal configurations.
- self.posture_task = mink.PostureTask(
- model,
- cost=self.config.posture_cost,
- lm_damping=self.config.damping,
- )
-
- self.target_pose = np.zeros(model.nq)
- self.posture_task.set_target(self.target_pose)
-
- def compute_auto_scale_factor(self, hand_structure: HandStructure, use_neutral_robot_pose: bool = True) -> float:
- """Compute automatic scale factor based on fingertip distances.
-
- Compares the distance from wrist to fingertips in the human hand
- with the distance from wrist to fingertips in the ORCA hand model.
-
- Args:
- hand_structure: The tracked human hand structure
- use_neutral_robot_pose: If True, uses neutral robot pose (q=0).
- If False, uses current robot configuration.
-
- Returns:
- Scale factor (robot_fingertip_distance / human_fingertip_distance)
- """
- # Get human hand fingertip positions relative to wrist
- wrist_pos = hand_structure.wrist_position
- human_fingertips = {
- "thumb": hand_structure.thumb.tip,
- "index": hand_structure.index.tip,
- "middle": hand_structure.middle.tip,
- "ring": hand_structure.ring.tip,
- "pinky": hand_structure.pinky.tip,
- }
-
- # Compute average distance from wrist to fingertips in human hand
- human_distances = []
- for finger, tip_pos in human_fingertips.items():
- dist = np.linalg.norm(tip_pos - wrist_pos)
- if dist > 0.01: # Filter out invalid/too-small distances
- human_distances.append(dist)
-
- if len(human_distances) == 0:
- return self.config.scale_factor # Fallback to current scale
-
- avg_human_distance = np.mean(human_distances)
-
- # Get robot hand fingertip positions
- if use_neutral_robot_pose:
- # Save current configuration
- saved_q = self.configuration.q.copy()
- # Set to neutral pose
- self.configuration.q[:] = 0.0
- self.configuration.update()
-
- # Get wrist position in robot model
- t_palm = self.configuration.get_transform_frame_to_world("right_palm", "body")
- p_palm = t_palm.translation()
- r_palm = t_palm.rotation().as_matrix()
- wrist_offset_world = r_palm @ self.config.wrist_offset_palm
- p_wrist_robot = p_palm + wrist_offset_world
-
- # Get fingertip positions from robot model using mink Configuration
- robot_fingertips = {}
- for finger_name in ["thumb", "index", "middle", "ring", "pinky"]:
- if finger_name not in self.tasks:
- continue
- finger_tasks = self.tasks[finger_name]
- if "tip" not in finger_tasks:
- continue
-
- # Get the frame name and type
- frame_type, frame_name = FINGER_TARGET_BODIES[finger_name]["tip"]
-
- # Get transform from mink Configuration
- try:
- if frame_type == "body":
- t_tip = self.configuration.get_transform_frame_to_world(frame_name, "body")
- else: # site
- t_tip = self.configuration.get_transform_frame_to_world(frame_name, "site")
- tip_pos = t_tip.translation()
- robot_fingertips[finger_name] = tip_pos
- except Exception:
- # Fallback: try to get from MuJoCo model directly
- obj_type = mujoco.mjtObj.mjOBJ_BODY if frame_type == "body" else mujoco.mjtObj.mjOBJ_SITE
- frame_id = mujoco.mj_name2id(self.model, obj_type, frame_name)
- if frame_id >= 0:
- # Need to forward kinematics - use data
- data = mujoco.MjData(self.model)
- data.qpos[:] = self.configuration.q
- mujoco.mj_forward(self.model, data)
- if obj_type == mujoco.mjtObj.mjOBJ_SITE:
- tip_pos = data.site(frame_id).xpos.copy()
- else:
- tip_pos = data.body(frame_id).xpos.copy()
- robot_fingertips[finger_name] = tip_pos
-
- # Restore configuration if we changed it
- if use_neutral_robot_pose:
- self.configuration.q[:] = saved_q
- self.configuration.update()
-
- # Compute average distance from wrist to fingertips in robot hand
- robot_distances = []
- for finger, tip_pos in robot_fingertips.items():
- dist = np.linalg.norm(tip_pos - p_wrist_robot)
- if dist > 0.01: # Filter out invalid/too-small distances
- robot_distances.append(dist)
-
- if len(robot_distances) == 0:
- return self.config.scale_factor # Fallback to current scale
-
- avg_robot_distance = np.mean(robot_distances)
-
- # Compute scale factor
- if avg_human_distance > 1e-6:
- scale_factor = avg_robot_distance / avg_human_distance
- else:
- return self.config.scale_factor # Fallback
-
- return scale_factor
+ # Initialize collision avoidance limits if enabled
+ self.limits: list[Limit] = [ConfigurationLimit(model=model)]
+ if self.config.use_collision_avoidance and self.config.collision_geom_pairs:
+ collision_limit = CollisionAvoidanceLimit(
+ model=model,
+ geom_pairs=self.config.collision_geom_pairs,
+ gain=self.config.collision_gain,
+ minimum_distance_from_collisions=self.config.collision_min_distance,
+ collision_detection_distance=self.config.collision_detection_distance,
+ )
+ self.limits.append(collision_limit)
def _hand_structure_to_landmarks(self, hand_structure: HandStructure) -> np.ndarray:
"""Convert HandStructure to 21x3 landmarks array (MediaPipe format).
@@ -455,20 +342,6 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray:
Returns:
The full qpos array for the robot.
"""
- # Auto-scale if enabled
- if self.config.auto_scale and isinstance(hand_input, HandStructure):
- new_scale = self.compute_auto_scale_factor(
- hand_input, use_neutral_robot_pose=self.config.auto_scale_use_neutral_pose
- )
- # Apply exponential smoothing to avoid jitter
- if self.config.auto_scale_update_rate > 0.0:
- self.config.scale_factor = (
- self.config.auto_scale_update_rate * new_scale
- + (1.0 - self.config.auto_scale_update_rate) * self.config.scale_factor
- )
- else:
- self.config.scale_factor = new_scale
-
targets = self.compute_target_positions(hand_input)
# Use configurable parameters
@@ -478,8 +351,8 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray:
# Perform multiple IK iterations for better convergence
for iteration in range(self.config.ik_iterations):
- # Build active tasks list (posture task + finger tasks)
- active_tasks: list[mink.Task] = [self.posture_task]
+ # Build active tasks list (finger tasks only)
+ active_tasks: list[mink.Task] = []
# Get palm transform and compute wrist position (consistent with compute_target_positions)
# Recompute each iteration in case configuration changed
@@ -520,8 +393,8 @@ def solve(self, hand_input: HandStructure | np.ndarray) -> np.ndarray:
task.set_target(target_se3)
active_tasks.append(task)
- # Solve IK
- vel = mink.solve_ik(self.configuration, active_tasks, dt, solver, damping)
+ # Solve IK with limits (collision avoidance if enabled)
+ vel = mink.solve_ik(self.configuration, active_tasks, dt, solver, damping, limits=self.limits)
# Integrate velocity to update configuration
self.configuration.integrate_inplace(vel, dt)
diff --git a/models/orca_hand_fixed.mjcf b/models/orca_hand_fixed.mjcf
index 89209ae..2c48d66 100644
--- a/models/orca_hand_fixed.mjcf
+++ b/models/orca_hand_fixed.mjcf
@@ -3,6 +3,11 @@
+
+
+
+
+
@@ -26,8 +31,8 @@
-
-
+
+
diff --git a/pyproject.toml b/pyproject.toml
index afe77f0..59a84fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,7 @@ module = [
'sensor_msgs.*',
'h5py',
"colorlogging",
+ 'mink.*',
]
ignore_missing_imports = true
diff --git a/scripts/record_and_process_video.py b/scripts/record_and_process_video.py
new file mode 100644
index 0000000..fd54852
--- /dev/null
+++ b/scripts/record_and_process_video.py
@@ -0,0 +1,862 @@
+"""Record video, process with HAMER, visualize in MuJoCo, and concatenate videos.
+
+This script:
+1. Listens for key press ('r' or 's') to start recording
+2. Records video from camera
+3. Saves the recorded video
+4. Processes video with HAMER frame by frame
+5. Opens MuJoCo and visualizes the processed hand tracking
+6. Records the MuJoCo visualization
+7. Concatenates both videos together
+"""
+
+import argparse
+import sys
+import time
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Optional
+
+import cv2
+import mujoco
+import mujoco.viewer
+import numpy as np
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from handpose.ik_retargeting import (
+ FINGER_TARGET_BODIES,
+ MP_LANDMARK_INDICES,
+ ORCAHandIKConfig,
+ ORCAHandIKRetargeting,
+)
+from handpose.tracker.base import HandStructure
+from handpose.tracker.hamer import HaMeRTracker
+
+
+def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str:
+ """Inject mocap bodies for tracker sites and ensure IK target sites exist."""
+ tree = ET.parse(mjcf_path)
+ root = tree.getroot()
+ worldbody = root.find("worldbody")
+
+ if worldbody is None:
+ raise ValueError("Could not find worldbody in MJCF")
+
+ # Convert relative paths to absolute paths
+ model_dir = mjcf_path.parent
+ for asset in root.findall(".//asset"):
+ for mesh in asset.findall("mesh"):
+ file_attr = mesh.get("file")
+ if file_attr and not Path(file_attr).is_absolute():
+ abs_path = (model_dir / file_attr).resolve()
+ mesh.set("file", str(abs_path))
+
+ # IK target sites use blue
+ ik_site_color = "0 0 1 0.9"
+
+ # Tip site offsets
+ tip_site_specs = {
+ "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])),
+ "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])),
+ "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])),
+ "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])),
+ "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])),
+ }
+
+ if "tip" in target_joint_types:
+ for finger, (parent_body, offset) in tip_site_specs.items():
+ body_elem = root.find(f".//body[@name='{parent_body}']")
+ if body_elem is None:
+ continue
+ site_name = f"right_{finger}_tip_site"
+ existing_site = body_elem.find(f"./site[@name='{site_name}']")
+ if existing_site is not None:
+ existing_site.set("type", "sphere")
+ existing_site.set("size", "0.005")
+ existing_site.set("rgba", ik_site_color)
+ else:
+ site = ET.SubElement(body_elem, "site")
+ site.set("name", site_name)
+ site.set("type", "sphere")
+ site.set("pos", " ".join(f"{value:.5f}" for value in offset))
+ site.set("size", "0.005")
+ site.set("rgba", ik_site_color)
+
+ # Add visual markers for IK target bodies
+ allowed_types = set(target_joint_types)
+ for finger_name, bodies in FINGER_TARGET_BODIES.items():
+ for joint_type, (frame_type, frame_name) in bodies.items():
+ if joint_type not in allowed_types:
+ continue
+
+ if frame_type == "body":
+ body_elem = root.find(f".//body[@name='{frame_name}']")
+ if body_elem is not None:
+ marker_site_name = f"ik_marker_{frame_name}"
+ existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']")
+ if existing_marker is not None:
+ existing_marker.set("size", "0.004")
+ existing_marker.set("rgba", ik_site_color)
+ else:
+ site = ET.SubElement(body_elem, "site")
+ site.set("name", marker_site_name)
+ site.set("pos", "0 0 0")
+ site.set("size", "0.004")
+ site.set("rgba", ik_site_color)
+
+ # Add mocap bodies for tracker sites (red)
+ tracker_color = "1 0 0 0.9"
+ # Add wrist tracker body
+ wrist_body = ET.SubElement(worldbody, "body")
+ wrist_body.set("name", "tracker_wrist")
+ wrist_body.set("mocap", "true")
+ wrist_body.set("pos", "0 0 0")
+ wrist_geom = ET.SubElement(wrist_body, "geom")
+ wrist_geom.set("type", "sphere")
+ wrist_geom.set("size", "0.005")
+ wrist_geom.set("rgba", tracker_color)
+ wrist_geom.set("contype", "0")
+ wrist_geom.set("conaffinity", "0")
+
+ for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+ for joint_type, mp_idx in mp_mapping.items():
+ if joint_type not in allowed_types:
+ continue
+
+ mocap_body = ET.SubElement(worldbody, "body")
+ mocap_body.set("name", f"tracker_{finger_name}_{joint_type}")
+ mocap_body.set("mocap", "true")
+ mocap_body.set("pos", "0 0 0")
+
+ geom = ET.SubElement(mocap_body, "geom")
+ geom.set("type", "sphere")
+ geom.set("size", "0.005")
+ geom.set("rgba", tracker_color)
+ geom.set("contype", "0")
+ geom.set("conaffinity", "0")
+
+ # Add skeleton lines (cylinders) between joints for skeleton overlay
+ skeleton_color = "0 1 0 0.8" # Green skeleton lines
+ skeleton_connections = [
+ # Thumb chain
+ ("tracker_thumb_mcp", "tracker_thumb_ip"),
+ ("tracker_thumb_ip", "tracker_thumb_tip"),
+ # Index chain
+ ("tracker_index_mcp", "tracker_index_pip"),
+ ("tracker_index_pip", "tracker_index_tip"),
+ # Middle chain
+ ("tracker_middle_mcp", "tracker_middle_pip"),
+ ("tracker_middle_pip", "tracker_middle_tip"),
+ # Ring chain
+ ("tracker_ring_mcp", "tracker_ring_pip"),
+ ("tracker_ring_pip", "tracker_ring_tip"),
+ # Pinky chain
+ ("tracker_pinky_mcp", "tracker_pinky_pip"),
+ ("tracker_pinky_pip", "tracker_pinky_tip"),
+ ]
+
+ # Add wrist connections (from wrist to each finger MCP)
+ wrist_connections = [
+ ("tracker_index_mcp", "tracker_wrist"),
+ ("tracker_middle_mcp", "tracker_wrist"),
+ ("tracker_ring_mcp", "tracker_wrist"),
+ ("tracker_pinky_mcp", "tracker_wrist"),
+ ("tracker_thumb_mcp", "tracker_wrist"),
+ ]
+
+ # Create skeleton line geoms (will be positioned dynamically)
+ for start_joint, end_joint in skeleton_connections + wrist_connections:
+ # Create a mocap body for the skeleton line
+ line_body = ET.SubElement(worldbody, "body")
+ line_body.set("name", f"skeleton_line_{start_joint}_{end_joint}")
+ line_body.set("mocap", "true")
+ line_body.set("pos", "0 0 0")
+
+ # Add cylinder geom for the line
+ line_geom = ET.SubElement(line_body, "geom")
+ line_geom.set("type", "cylinder")
+ line_geom.set("size", "0.001 0.02") # Thin cylinder (radius, half-height) - will be updated dynamically
+ line_geom.set("rgba", skeleton_color)
+ line_geom.set("contype", "0")
+ line_geom.set("conaffinity", "0")
+
+ return ET.tostring(root, encoding="unicode")
+
+
+def hand_structure_to_landmarks(structure: HandStructure) -> np.ndarray:
+ """Convert HandStructure to 21x3 landmarks array (MediaPipe format)."""
+ landmarks = np.zeros((21, 3))
+
+ landmarks[0] = structure.wrist_position
+ landmarks[1] = structure.thumb.mcp
+ landmarks[2] = structure.thumb.mcp
+ landmarks[3] = structure.thumb.ip if structure.thumb.ip is not None else structure.thumb.mcp
+ landmarks[4] = structure.thumb.tip
+ landmarks[5] = structure.index.mcp
+ landmarks[6] = structure.index.pip if structure.index.pip is not None else structure.index.mcp
+ landmarks[7] = structure.index.dip if structure.index.dip is not None else structure.index.tip
+ landmarks[8] = structure.index.tip
+ landmarks[9] = structure.middle.mcp
+ landmarks[10] = structure.middle.pip if structure.middle.pip is not None else structure.middle.mcp
+ landmarks[11] = structure.middle.dip if structure.middle.dip is not None else structure.middle.tip
+ landmarks[12] = structure.middle.tip
+ landmarks[13] = structure.ring.mcp
+ landmarks[14] = structure.ring.pip if structure.ring.pip is not None else structure.ring.mcp
+ landmarks[15] = structure.ring.dip if structure.ring.dip is not None else structure.ring.tip
+ landmarks[16] = structure.ring.tip
+ landmarks[17] = structure.pinky.mcp
+ landmarks[18] = structure.pinky.pip if structure.pinky.pip is not None else structure.pinky.mcp
+ landmarks[19] = structure.pinky.dip if structure.pinky.dip is not None else structure.pinky.tip
+ landmarks[20] = structure.pinky.tip
+
+ return landmarks
+
+
+def get_joint_position_world(
+ model: mujoco.MjModel,
+ data: mujoco.MjData,
+ joint_name: str,
+ hand_structure: HandStructure,
+ ik_solver: ORCAHandIKRetargeting,
+ target_joint_types: tuple[str, ...],
+) -> np.ndarray | None:
+ """Get world position of a joint by name."""
+ # Check if it's a mocap body we can query directly
+ if joint_name.startswith("tracker_"):
+ body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, joint_name)
+ if body_id >= 0:
+ mocap_idx = model.body_mocapid[body_id]
+ if mocap_idx >= 0 and mocap_idx < model.nmocap:
+ return data.mocap_pos[mocap_idx].copy()
+
+ # Fallback: calculate from hand structure
+ if joint_name in ("wrist", "tracker_wrist"):
+ t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body")
+ p_palm = t_palm.translation()
+ r_palm = t_palm.rotation().as_matrix()
+ wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm
+ return p_palm + wrist_offset_world
+
+ return None
+
+
+def update_skeleton_lines(
+ model: mujoco.MjModel,
+ data: mujoco.MjData,
+ hand_structure: HandStructure,
+ target_joint_types: tuple[str, ...],
+ ik_solver: ORCAHandIKRetargeting,
+) -> None:
+ """Update skeleton line positions and orientations between joints."""
+ skeleton_connections = [
+ ("tracker_thumb_mcp", "tracker_thumb_ip"),
+ ("tracker_thumb_ip", "tracker_thumb_tip"),
+ ("tracker_index_mcp", "tracker_index_pip"),
+ ("tracker_index_pip", "tracker_index_tip"),
+ ("tracker_middle_mcp", "tracker_middle_pip"),
+ ("tracker_middle_pip", "tracker_middle_tip"),
+ ("tracker_ring_mcp", "tracker_ring_pip"),
+ ("tracker_ring_pip", "tracker_ring_tip"),
+ ("tracker_pinky_mcp", "tracker_pinky_pip"),
+ ("tracker_pinky_pip", "tracker_pinky_tip"),
+ ("tracker_index_mcp", "tracker_wrist"),
+ ("tracker_middle_mcp", "tracker_wrist"),
+ ("tracker_ring_mcp", "tracker_wrist"),
+ ("tracker_pinky_mcp", "tracker_wrist"),
+ ("tracker_thumb_mcp", "tracker_wrist"),
+ ]
+
+ for start_joint, end_joint in skeleton_connections:
+ p1 = get_joint_position_world(model, data, start_joint, hand_structure, ik_solver, target_joint_types)
+ p2 = get_joint_position_world(model, data, end_joint, hand_structure, ik_solver, target_joint_types)
+
+ if p1 is None or p2 is None:
+ continue
+
+ # Calculate midpoint and direction
+ midpoint = (p1 + p2) / 2.0
+ direction = p2 - p1
+ length = np.linalg.norm(direction)
+
+ if length < 1e-6:
+ continue
+
+ direction = direction / length
+
+ # Create rotation quaternion to align cylinder with direction
+ # Default cylinder axis is [0, 0, 1], we want it along direction
+ z_axis = np.array([0, 0, 1])
+ if np.abs(np.dot(direction, z_axis)) > 0.99:
+ # Parallel to z-axis, use identity
+ quat = np.array([1, 0, 0, 0])
+ else:
+ # Calculate rotation axis and angle
+ rot_axis = np.cross(z_axis, direction)
+ rot_axis = rot_axis / np.linalg.norm(rot_axis)
+ cos_angle = np.dot(z_axis, direction)
+ angle = np.arccos(np.clip(cos_angle, -1, 1))
+ quat = np.array([
+ np.cos(angle / 2),
+ rot_axis[0] * np.sin(angle / 2),
+ rot_axis[1] * np.sin(angle / 2),
+ rot_axis[2] * np.sin(angle / 2),
+ ])
+
+ # Update mocap body position and orientation
+ line_body_name = f"skeleton_line_{start_joint}_{end_joint}"
+ body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, line_body_name)
+ if body_id >= 0:
+ mocap_idx = model.body_mocapid[body_id]
+ if mocap_idx >= 0 and mocap_idx < model.nmocap:
+ data.mocap_pos[mocap_idx] = midpoint
+ data.mocap_quat[mocap_idx] = quat
+
+
+def update_tracker_sites(
+ model: mujoco.MjModel,
+ data: mujoco.MjData,
+ hand_structure: HandStructure,
+ target_joint_types: tuple[str, ...],
+ ik_solver: ORCAHandIKRetargeting,
+) -> None:
+ """Update mocap body positions for tracker sites based on hand structure."""
+ landmarks_hand_frame = hand_structure_to_landmarks(hand_structure)
+
+ t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body")
+ p_palm = t_palm.translation()
+ r_palm = t_palm.rotation().as_matrix()
+
+ wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm
+ p_wrist = p_palm + wrist_offset_world
+
+ scale = ik_solver.config.scale_factor
+ coord_transform = ik_solver.config.coord_transform
+
+ allowed_types = set(target_joint_types)
+ for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+ for joint_type, mp_idx in mp_mapping.items():
+ if joint_type not in allowed_types:
+ continue
+
+ landmark_pos_hand_frame = landmarks_hand_frame[mp_idx]
+ scaled_vec = landmark_pos_hand_frame * scale
+ target_pos_world = p_wrist + scaled_vec
+
+ rel_vec = target_pos_world - p_wrist
+ transformed_vec = coord_transform @ rel_vec
+ final_pos = p_wrist + transformed_vec
+
+ mocap_body_name = f"tracker_{finger_name}_{joint_type}"
+ body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, mocap_body_name)
+ if body_id >= 0:
+ mocap_idx = model.body_mocapid[body_id]
+ if mocap_idx >= 0 and mocap_idx < model.nmocap:
+ data.mocap_pos[mocap_idx] = final_pos
+
+ # Update wrist tracker position
+ wrist_body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, "tracker_wrist")
+ if wrist_body_id >= 0:
+ wrist_mocap_idx = model.body_mocapid[wrist_body_id]
+ if wrist_mocap_idx >= 0 and wrist_mocap_idx < model.nmocap:
+ data.mocap_pos[wrist_mocap_idx] = p_wrist
+
+ # Update skeleton lines after updating tracker sites
+ update_skeleton_lines(model, data, hand_structure, target_joint_types, ik_solver)
+
+
+def record_camera_video(cap: cv2.VideoCapture, output_path: Path, fps: float = 30.0) -> bool:
+ """Record video from camera until 'q' is pressed.
+
+ Returns True if recording was successful, False otherwise.
+ """
+ print(f"\n{'='*70}")
+ print("VIDEO RECORDING")
+ print(f"{'='*70}")
+ print("Press 'r' or 's' to START recording")
+ print("Press 'q' to STOP recording and save")
+ print(f"{'='*70}\n")
+
+ # Set camera FPS if possible
+ cap.set(cv2.CAP_PROP_FPS, fps)
+
+ # Wait for start key
+ recording = False
+ out = None
+ frame_count = 0
+ start_time = None
+ frame_time = 1.0 / fps # Time per frame in seconds
+ target_time = time.time() + frame_time
+
+ while True:
+ ret, frame = cap.read()
+ if not ret:
+ print("Failed to read from camera")
+ if out is not None:
+ out.release()
+ cv2.destroyAllWindows()
+ return False
+
+ display_frame = frame.copy()
+ if not recording:
+ cv2.putText(
+ display_frame,
+ "Press 'r' or 's' to START recording",
+ (10, 30),
+ cv2.FONT_HERSHEY_SIMPLEX,
+ 1,
+ (0, 255, 0),
+ 2,
+ )
+ else:
+ elapsed = time.time() - start_time
+ cv2.putText(
+ display_frame,
+ f"RECORDING... {elapsed:.1f}s ({frame_count} frames) - Press 'q' to STOP",
+ (10, 30),
+ cv2.FONT_HERSHEY_SIMPLEX,
+ 0.7,
+ (0, 0, 255),
+ 2,
+ )
+
+ cv2.imshow("Camera Feed - Recording", display_frame)
+ key = cv2.waitKey(1) & 0xFF
+
+ if key == ord("q") and recording:
+ break
+ elif (key == ord("r") or key == ord("s")) and not recording:
+ recording = True
+ # Get frame dimensions
+ height, width = frame.shape[:2]
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+ out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
+ if not out.isOpened():
+ print(f"Error: Failed to open video writer for {output_path}")
+ cv2.destroyAllWindows()
+ return False
+ start_time = time.time()
+ print(f"Recording started at {fps} FPS. Saving to: {output_path}")
+
+ if recording and out is not None:
+ # Write frame and control timing
+ out.write(frame)
+ frame_count += 1
+
+ # Sleep to maintain target FPS
+ sleep_time = max(0, target_time - time.time())
+ if sleep_time > 0:
+ time.sleep(sleep_time)
+ target_time += frame_time
+
+
+ if out is not None:
+ out.release()
+ elapsed = time.time() - start_time if start_time else 0
+ print(f"Recording stopped. Saved {frame_count} frames ({elapsed:.2f}s) to {output_path}")
+ print(f"Actual FPS: {frame_count / elapsed:.2f} (target: {fps:.2f})")
+
+ cv2.destroyAllWindows()
+ return recording
+
+
+def process_video_with_hamer(
+ video_path: Path,
+ tracker: HaMeRTracker,
+ ik_solver: ORCAHandIKRetargeting,
+ model: mujoco.MjModel,
+ data: mujoco.MjData,
+ target_joint_types: tuple[str, ...],
+) -> list[tuple[np.ndarray, Optional[HandStructure]]]:
+ """Process video with HAMER and return frames with hand structures."""
+ print(f"\n{'='*70}")
+ print("PROCESSING VIDEO WITH HAMER")
+ print(f"{'='*70}")
+ print(f"Processing: {video_path}")
+
+ cap = cv2.VideoCapture(str(video_path))
+ if not cap.isOpened():
+ raise RuntimeError(f"Failed to open video: {video_path}")
+
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ print(f"Video: {frame_count} frames at {fps} FPS")
+
+ processed_frames = []
+ timestamp = 0.0
+ dt = 1.0 / fps if fps > 0 else 1.0 / 30.0
+
+ frame_idx = 0
+ while True:
+ ret, frame = cap.read()
+ if not ret:
+ break
+
+ # Use previous frame's solution as warm start for better IK convergence
+ # Only reset on first frame
+ if frame_idx == 0:
+ mujoco.mj_resetData(model, data)
+
+ timestamp = frame_idx * dt
+ hand_structures = tracker.detect_hands(frame, timestamp=timestamp)
+
+ hand_structure = hand_structures[0] if hand_structures else None
+
+ if hand_structure:
+ # Update IK solver with current state (warm start from previous frame)
+ mujoco.mj_forward(model, data)
+ ik_solver.configuration.update(data.qpos)
+
+ # Solve IK
+ qpos = ik_solver.solve(hand_structure)
+ if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)):
+ data.qpos[:] = qpos
+ data.qvel[:] = 0
+
+ # Update tracker site positions
+ update_tracker_sites(model, data, hand_structure, target_joint_types, ik_solver)
+
+ # Forward kinematics
+ mujoco.mj_forward(model, data)
+
+ processed_frames.append((frame.copy(), hand_structure))
+ frame_idx += 1
+
+ if frame_idx % 10 == 0:
+ print(f"Processed {frame_idx}/{frame_count} frames...")
+
+ cap.release()
+ print(f"Processing complete. Processed {len(processed_frames)} frames.")
+ return processed_frames
+
+
+def record_mujoco_visualization(
+ processed_frames: list[tuple[np.ndarray, Optional[HandStructure]]],
+ model: mujoco.MjModel,
+ data: mujoco.MjData,
+ output_path: Path,
+ fps: float = 30.0,
+ width: int = 1920,
+ height: int = 1080,
+ ik_solver: Optional[ORCAHandIKRetargeting] = None,
+ target_joint_types: tuple[str, ...] = ("tip",),
+) -> None:
+ """Record MuJoCo visualization from processed frames."""
+ print(f"\n{'='*70}")
+ print("RECORDING MUJOCO VISUALIZATION")
+ print(f"{'='*70}")
+ print(f"Recording {len(processed_frames)} frames to: {output_path}")
+
+ # Create renderer
+ # The model should have framebuffer settings in MJCF for larger resolutions
+ try:
+ renderer = mujoco.Renderer(model, height=height, width=width, max_geom=10000)
+ except ValueError as e:
+ # Fallback to smaller resolution if framebuffer is too small
+ print(f"Warning: {e}")
+ print("Falling back to 640x480 resolution")
+ width = 640
+ height = 480
+ renderer = mujoco.Renderer(model, height=height, width=width, max_geom=10000)
+
+ # Setup camera
+ cam = mujoco.MjvCamera()
+ cam.type = mujoco.mjtCamera.mjCAMERA_FREE
+ cam.distance = 0.6 # Further back to fit entire hand
+ cam.azimuth = -20
+ cam.elevation = 35
+ cam.lookat[:] = [0.04, 0.0, 0.08] # Look at palm area
+
+ # Setup options - enable transparency and joint visualization for skeleton overlay
+ option = mujoco.MjvOption()
+ option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = False
+ option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = False
+ option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = True # Make meshes transparent
+ option.flags[mujoco.mjtVisFlag.mjVIS_JOINT] = True # Show joints for skeleton overlay
+
+ # Create video writer
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+ out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
+
+ # Process each frame
+ for frame_idx, (frame, hand_structure) in enumerate(processed_frames):
+ # Only reset on first frame
+ if frame_idx == 0:
+ mujoco.mj_resetData(model, data)
+
+ if hand_structure is not None and ik_solver is not None:
+ # Update IK solver configuration
+ mujoco.mj_forward(model, data)
+ ik_solver.configuration.update(data.qpos)
+
+ # Solve IK
+ qpos = ik_solver.solve(hand_structure)
+ if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)):
+ data.qpos[:] = qpos
+ data.qvel[:] = 0
+
+ # Update tracker site positions
+ update_tracker_sites(model, data, hand_structure, target_joint_types, ik_solver)
+
+ # Forward kinematics
+ mujoco.mj_forward(model, data)
+
+ # Update scene with current state
+ renderer.update_scene(data, camera=cam, scene_option=option)
+
+ # Render to numpy array (returns RGB, shape: (height, width, 3))
+ rgb_frame = renderer.render()
+
+ # Convert RGB to BGR for OpenCV
+ # Note: MuJoCo Renderer already returns correct orientation, no need to flip
+ bgr_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)
+
+ out.write(bgr_frame)
+
+ if (frame_idx + 1) % 10 == 0:
+ print(f"Rendered {frame_idx + 1}/{len(processed_frames)} frames...")
+
+ out.release()
+ renderer.close()
+ print(f"MuJoCo visualization recording complete. Saved to: {output_path}")
+
+
+def concatenate_videos(video1_path: Path, video2_path: Path, output_path: Path) -> None:
+ """Concatenate two videos side by side."""
+ print(f"\n{'='*70}")
+ print("CONCATENATING VIDEOS")
+ print(f"{'='*70}")
+ print(f"Concatenating {video1_path} and {video2_path}")
+ print(f"Output: {output_path}")
+
+ cap1 = cv2.VideoCapture(str(video1_path))
+ cap2 = cv2.VideoCapture(str(video2_path))
+
+ if not cap1.isOpened() or not cap2.isOpened():
+ raise RuntimeError("Failed to open one or both videos")
+
+ # Get properties
+ fps1 = cap1.get(cv2.CAP_PROP_FPS)
+ fps2 = cap2.get(cv2.CAP_PROP_FPS)
+ fps = min(fps1, fps2) if fps1 > 0 and fps2 > 0 else 30.0
+
+ width1 = int(cap1.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height1 = int(cap1.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ width2 = int(cap2.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height2 = int(cap2.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+ # Use the maximum height and sum of widths for side-by-side
+ max_height = max(height1, height2)
+ total_width = width1 + width2
+
+ # Resize videos to same height if needed
+ if height1 != height2:
+ scale1 = max_height / height1
+ scale2 = max_height / height2
+ width1 = int(width1 * scale1)
+ width2 = int(width2 * scale2)
+ total_width = width1 + width2
+
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+ out = cv2.VideoWriter(str(output_path), fourcc, fps, (total_width, max_height))
+
+ frame_count = 0
+ while True:
+ ret1, frame1 = cap1.read()
+ ret2, frame2 = cap2.read()
+
+ if not ret1 or not ret2:
+ break
+
+ # Resize if needed
+ if height1 != max_height:
+ frame1 = cv2.resize(frame1, (width1, max_height))
+ if height2 != max_height:
+ frame2 = cv2.resize(frame2, (width2, max_height))
+
+ # Concatenate side by side
+ concatenated = np.hstack([frame1, frame2])
+ out.write(concatenated)
+
+ frame_count += 1
+ if frame_count % 10 == 0:
+ print(f"Concatenated {frame_count} frames...")
+
+ cap1.release()
+ cap2.release()
+ out.release()
+ print(f"Concatenation complete. Saved {frame_count} frames to {output_path}")
+
+
+def main() -> None:
+ """Main entry point."""
+ parser = argparse.ArgumentParser(
+ description="Record video, process with HAMER, visualize in MuJoCo, and concatenate",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.add_argument("--camera", type=int, default=0, help="Camera device index")
+ parser.add_argument(
+ "--model",
+ type=str,
+ default="models/orca_hand_fixed.mjcf",
+ help="Path to MuJoCo MJCF model file",
+ )
+ parser.add_argument(
+ "--targets",
+ type=str,
+ default="tip",
+ help="Comma-separated joint targets (tip,ip,pip,mcp)",
+ )
+ parser.add_argument(
+ "--scale",
+ type=float,
+ default=1.0,
+ help="Hand scale factor (robot/human)",
+ )
+ parser.add_argument(
+ "--fps",
+ type=float,
+ default=30.0,
+ help="Video FPS",
+ )
+ parser.add_argument(
+ "--output-dir",
+ type=str,
+ default="recordings",
+ help="Output directory for videos",
+ )
+ parser.add_argument(
+ "--video-path",
+ type=str,
+ default=None,
+ help="Path to input video file (if provided, skips camera recording and uses this video)",
+ )
+ args = parser.parse_args()
+
+ # Parse target joint types
+ raw_targets = [part.strip().lower() for part in args.targets.split(",")]
+ target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt))
+ if not target_joints:
+ target_joints = ("tip",)
+ supported_targets = {"tip", "ip", "pip", "mcp"}
+ invalid = [jt for jt in target_joints if jt not in supported_targets]
+ if invalid:
+ parser.error(f"Unsupported target joint types: {', '.join(invalid)}")
+
+ # Create output directory
+ output_dir = Path(args.output_dir)
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ # Generate output filenames
+ timestamp = int(time.time())
+ if args.video_path:
+ # Use input video name as base for output files
+ input_video_name = Path(args.video_path).stem
+ camera_video_path = Path(args.video_path) # Use input video directly
+ mujoco_video_path = output_dir / f"mujoco_{input_video_name}_{timestamp}.mp4"
+ final_video_path = output_dir / f"concatenated_{input_video_name}_{timestamp}.mp4"
+ else:
+ camera_video_path = output_dir / f"camera_recording_{timestamp}.mp4"
+ mujoco_video_path = output_dir / f"mujoco_recording_{timestamp}.mp4"
+ final_video_path = output_dir / f"concatenated_{timestamp}.mp4"
+
+ # Load model
+ model_file = Path(args.model)
+ if not model_file.is_absolute():
+ model_file = Path(__file__).parent.parent / args.model
+ if not model_file.exists():
+ raise FileNotFoundError(f"Model file not found: {model_file}")
+
+ print(f"Loading model from {model_file}")
+ xml_string = inject_visualization_bodies(model_file, target_joints)
+ model = mujoco.MjModel.from_xml_string(xml_string)
+ data = mujoco.MjData(model)
+
+ # Initialize tracker and IK solver
+ tracker = HaMeRTracker()
+ ik_config = ORCAHandIKConfig(
+ scale_factor=args.scale,
+ target_joint_types=target_joints,
+ lm_damping=0.5,
+ ik_iterations=15,
+ )
+ ik_solver = ORCAHandIKRetargeting(model, config=ik_config)
+
+ # Verify tip sites exist in the loaded model
+ if "tip" in target_joints:
+ print("\nVerifying tip sites in loaded model:")
+ for finger in ["thumb", "index", "middle", "ring", "pinky"]:
+ site_name = f"right_{finger}_tip_site"
+ site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name)
+ if site_id >= 0:
+ site_pos = data.site(site_id).xpos.copy()
+ print(f" ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]")
+ else:
+ print(f" ✗ {site_name}: NOT FOUND in model!")
+
+ # Step 1: Record camera video or use provided video file
+ if args.video_path:
+ camera_video_path = Path(args.video_path)
+ if not camera_video_path.exists():
+ raise FileNotFoundError(f"Video file not found: {camera_video_path}")
+ print(f"Using existing video: {camera_video_path}")
+ else:
+ cap = cv2.VideoCapture(args.camera)
+ if not cap.isOpened():
+ raise RuntimeError(f"Failed to open camera {args.camera}")
+
+ if not record_camera_video(cap, camera_video_path, args.fps):
+ print("Recording cancelled or failed.")
+ cap.release()
+ return
+
+ cap.release()
+
+ # Step 2: Process video with HAMER
+ processed_frames = process_video_with_hamer(
+ camera_video_path,
+ tracker,
+ ik_solver,
+ model,
+ data,
+ target_joints,
+ )
+
+ # Get video FPS (use input video FPS if provided, otherwise use args.fps)
+ if args.video_path:
+ cap = cv2.VideoCapture(str(camera_video_path))
+ video_fps = cap.get(cv2.CAP_PROP_FPS)
+ cap.release()
+ if video_fps > 0:
+ fps = video_fps
+ else:
+ fps = args.fps
+ else:
+ fps = args.fps
+
+ # Step 3: Record MuJoCo visualization
+ record_mujoco_visualization(
+ processed_frames,
+ model,
+ data,
+ mujoco_video_path,
+ fps,
+ ik_solver=ik_solver,
+ target_joint_types=target_joints,
+ )
+
+ # Step 4: Concatenate videos
+ concatenate_videos(camera_video_path, mujoco_video_path, final_video_path)
+
+ print(f"\n{'='*70}")
+ print("ALL DONE!")
+ print(f"{'='*70}")
+ print(f"Camera video: {camera_video_path}")
+ print(f"MuJoCo video: {mujoco_video_path}")
+ print(f"Concatenated video: {final_video_path}")
+ print(f"{'='*70}\n")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/visualize_tracker_ik_sites.py b/scripts/visualize_tracker_ik_sites.py
new file mode 100644
index 0000000..ac99398
--- /dev/null
+++ b/scripts/visualize_tracker_ik_sites.py
@@ -0,0 +1,557 @@
+"""Visualize tracker sites and IK target sites on ORCA hand model.
+
+This script overlays:
+1. Tracker sites (from MediaPipe/HaMeR hand tracking) - shown as colored spheres
+2. IK target sites (on ORCA hand model) - shown as different colored spheres
+
+Both are color-coded and labeled for easy comparison.
+"""
+
+import argparse
+import multiprocessing as mp
+import sys
+import time
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from queue import Empty
+from typing import Protocol
+
+import cv2
+import mujoco
+import mujoco.viewer
+import numpy as np
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from handpose.ik_retargeting import (
+ FINGER_TARGET_BODIES,
+ MP_LANDMARK_INDICES,
+ ORCAHandIKConfig,
+ ORCAHandIKRetargeting,
+)
+from handpose.tracker.base import HandStructure
+from handpose.tracker.hamer import HaMeRTracker
+from handpose.tracker.mediapipe import MediaPipeTracker
+
+
+def inject_visualization_bodies(mjcf_path: Path, target_joint_types: tuple[str, ...]) -> str:
+ """Inject mocap bodies for tracker sites and ensure IK target sites exist.
+
+ Args:
+ mjcf_path: Path to MJCF model file
+ target_joint_types: Tuple of joint types to visualize (tip, ip, pip, mcp)
+
+ Returns:
+ Modified MJCF XML string
+ """
+ tree = ET.parse(mjcf_path)
+ root = tree.getroot()
+ worldbody = root.find("worldbody")
+
+ if worldbody is None:
+ raise ValueError("Could not find worldbody in MJCF")
+
+ # Convert relative paths to absolute paths
+ model_dir = mjcf_path.parent
+ for asset in root.findall(".//asset"):
+ for mesh in asset.findall("mesh"):
+ file_attr = mesh.get("file")
+ if file_attr and not Path(file_attr).is_absolute():
+ abs_path = (model_dir / file_attr).resolve()
+ mesh.set("file", str(abs_path))
+
+ # All IK target sites use blue for consistency
+ ik_site_color = "0 0 1 0.9" # Blue for all IK targets
+
+ # Tip site offsets: [x, y, z] where z is upward direction
+ tip_site_specs = {
+ "thumb": ("right_thumb_dp", np.array([0.0, 0.0, 0.025])),
+ "index": ("right_index_ip", np.array([0.0, 0.0, 0.035])),
+ "middle": ("right_middle_ip", np.array([0.0, 0.0, 0.037])),
+ "ring": ("right_ring_ip", np.array([0.0, 0.0, 0.036])),
+ "pinky": ("right_pinky_ip", np.array([0.0, 0.0, 0.032])),
+ }
+
+ if "tip" in target_joint_types:
+ tip_sites_created = []
+ for finger, (parent_body, offset) in tip_site_specs.items():
+ body_elem = root.find(f".//body[@name='{parent_body}']")
+ if body_elem is None:
+ print(f"Warning: Parent body '{parent_body}' not found for {finger} tip site")
+ continue
+ site_name = f"right_{finger}_tip_site"
+ existing_site = body_elem.find(f"./site[@name='{site_name}']")
+ if existing_site is not None:
+ # Update existing site - make it larger and blue
+ existing_site.set("type", "sphere") # Ensure type is set
+ existing_site.set("size", "0.005")
+ existing_site.set("rgba", ik_site_color)
+ tip_sites_created.append(f"{finger} (updated)")
+ else:
+ # Create new tip site
+ site = ET.SubElement(body_elem, "site")
+ site.set("name", site_name)
+ site.set("type", "sphere") # Explicit type for visibility
+ site.set("pos", " ".join(f"{value:.5f}" for value in offset))
+ site.set("size", "0.005")
+ site.set("rgba", ik_site_color)
+ tip_sites_created.append(f"{finger} (created)")
+ print(f"Tip sites: {', '.join(tip_sites_created)}")
+
+ # Add visual markers for IK target bodies (MCP, PIP, IP) that might not have visual geoms
+ allowed_types = set(target_joint_types)
+ markers_added = []
+ for finger_name, bodies in FINGER_TARGET_BODIES.items():
+ for joint_type, (frame_type, frame_name) in bodies.items():
+ if joint_type not in allowed_types:
+ continue
+
+ if frame_type == "body":
+ # Add a visual site to the body for visibility
+ body_elem = root.find(f".//body[@name='{frame_name}']")
+ if body_elem is not None:
+ # Check if marker site already exists
+ marker_site_name = f"ik_marker_{frame_name}"
+ existing_marker = body_elem.find(f"./site[@name='{marker_site_name}']")
+ if existing_marker is not None:
+ # Update existing marker
+ existing_marker.set("size", "0.004") # Same size as tip sites
+ existing_marker.set("rgba", ik_site_color)
+ markers_added.append(f"{finger_name}_{joint_type} (updated)")
+ else:
+ # Create new marker site
+ site = ET.SubElement(body_elem, "site")
+ site.set("name", marker_site_name)
+ site.set("pos", "0 0 0") # At body origin
+ site.set("size", "0.004") # Same size as tip sites for consistency
+ site.set("rgba", ik_site_color) # Blue for all IK targets
+ markers_added.append(f"{finger_name}_{joint_type} (created)")
+ else:
+ print(f"Warning: Body '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+ elif frame_type == "site":
+ # This is a tip site - find and update it (should already be handled above, but double-check)
+ found = False
+ for body in root.findall(".//body"):
+ site_elem = body.find(f"./site[@name='{frame_name}']")
+ if site_elem is not None:
+ site_elem.set("size", "0.005")
+ site_elem.set("rgba", ik_site_color)
+ found = True
+ markers_added.append(f"{finger_name}_{joint_type} (site updated)")
+ break
+ if not found:
+ print(f"Warning: Site '{frame_name}' not found in MJCF for {finger_name}_{joint_type}")
+
+ print(f"Added/updated {len(markers_added)} IK target markers: {', '.join(markers_added)}")
+
+ # Add mocap bodies for tracker sites (MediaPipe landmarks)
+ # All tracker sites use red for consistency - these represent the actual tracked hand
+ tracker_color = "1 0 0 0.9" # Red for all tracker sites (actual hand)
+
+ allowed_types = set(target_joint_types)
+ for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+ for joint_type, mp_idx in mp_mapping.items():
+ if joint_type not in allowed_types:
+ continue
+
+ # Create mocap body for tracker site
+ mocap_body = ET.SubElement(worldbody, "body")
+ mocap_body.set("name", f"tracker_{finger_name}_{joint_type}")
+ mocap_body.set("mocap", "true")
+ mocap_body.set("pos", "0 0 0")
+
+ # Add visual sphere for tracker site (larger, red)
+ geom = ET.SubElement(mocap_body, "geom")
+ geom.set("type", "sphere")
+ geom.set("size", "0.005") # 5mm radius (larger than IK targets for visibility)
+ geom.set("rgba", tracker_color) # Red for all tracker sites
+ geom.set("contype", "0")
+ geom.set("conaffinity", "0")
+
+ return ET.tostring(root, encoding="unicode")
+
+
+def hand_structure_to_landmarks(structure: HandStructure) -> np.ndarray:
+ """Convert HandStructure to 21x3 landmarks array (MediaPipe format).
+
+ Returns landmarks in hand frame (wrist at origin).
+ """
+ landmarks = np.zeros((21, 3))
+
+ # Wrist (index 0)
+ landmarks[0] = structure.wrist_position
+
+ # Thumb: CMC(1), MCP(2), IP(3), tip(4)
+ landmarks[1] = structure.thumb.mcp # CMC approximate
+ landmarks[2] = structure.thumb.mcp
+ landmarks[3] = structure.thumb.ip if structure.thumb.ip is not None else structure.thumb.mcp
+ landmarks[4] = structure.thumb.tip
+
+ # Index: MCP(5), PIP(6), DIP(7), tip(8)
+ landmarks[5] = structure.index.mcp
+ landmarks[6] = structure.index.pip if structure.index.pip is not None else structure.index.mcp
+ landmarks[7] = structure.index.dip if structure.index.dip is not None else structure.index.tip
+ landmarks[8] = structure.index.tip
+
+ # Middle: MCP(9), PIP(10), DIP(11), tip(12)
+ landmarks[9] = structure.middle.mcp
+ landmarks[10] = structure.middle.pip if structure.middle.pip is not None else structure.middle.mcp
+ landmarks[11] = structure.middle.dip if structure.middle.dip is not None else structure.middle.tip
+ landmarks[12] = structure.middle.tip
+
+ # Ring: MCP(13), PIP(14), DIP(15), tip(16)
+ landmarks[13] = structure.ring.mcp
+ landmarks[14] = structure.ring.pip if structure.ring.pip is not None else structure.ring.mcp
+ landmarks[15] = structure.ring.dip if structure.ring.dip is not None else structure.ring.tip
+ landmarks[16] = structure.ring.tip
+
+ # Pinky: MCP(17), PIP(18), DIP(19), tip(20)
+ landmarks[17] = structure.pinky.mcp
+ landmarks[18] = structure.pinky.pip if structure.pinky.pip is not None else structure.pinky.mcp
+ landmarks[19] = structure.pinky.dip if structure.pinky.dip is not None else structure.pinky.tip
+ landmarks[20] = structure.pinky.tip
+
+ return landmarks
+
+
+def update_tracker_sites(
+ model: mujoco.MjModel,
+ data: mujoco.MjData,
+ hand_structure: HandStructure,
+ target_joint_types: tuple[str, ...],
+ ik_solver: ORCAHandIKRetargeting,
+) -> None:
+ """Update mocap body positions for tracker sites based on hand structure.
+
+ Args:
+ model: MuJoCo model
+ data: MuJoCo data
+ hand_structure: Tracked hand structure
+ target_joint_types: Joint types to visualize
+ ik_solver: IK solver (for coordinate transformation)
+ """
+ landmarks_hand_frame = hand_structure_to_landmarks(hand_structure)
+
+ # Get palm transform to convert hand frame to world frame
+ t_palm = ik_solver.configuration.get_transform_frame_to_world("right_palm", "body")
+ p_palm = t_palm.translation()
+ r_palm = t_palm.rotation().as_matrix()
+
+ # Wrist offset in palm frame
+ wrist_offset_world = r_palm @ ik_solver.config.wrist_offset_palm
+ p_wrist = p_palm + wrist_offset_world
+
+ # Scale factor
+ scale = ik_solver.config.scale_factor
+
+ # Coordinate transformation
+ coord_transform = ik_solver.config.coord_transform
+
+ allowed_types = set(target_joint_types)
+ for finger_name, mp_mapping in MP_LANDMARK_INDICES.items():
+ for joint_type, mp_idx in mp_mapping.items():
+ if joint_type not in allowed_types:
+ continue
+
+ # Get landmark position in hand frame
+ landmark_pos_hand_frame = landmarks_hand_frame[mp_idx]
+
+ # Scale to robot size
+ scaled_vec = landmark_pos_hand_frame * scale
+
+ # Transform to world frame (anchor to wrist)
+ target_pos_world = p_wrist + scaled_vec
+
+ # Apply coordinate transformation
+ rel_vec = target_pos_world - p_wrist
+ transformed_vec = coord_transform @ rel_vec
+ final_pos = p_wrist + transformed_vec
+
+ # Update mocap body position
+ mocap_body_name = f"tracker_{finger_name}_{joint_type}"
+ body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, mocap_body_name)
+ if body_id >= 0:
+ # Get the mocap index for this body (mocap bodies have mocapid >= 0)
+ mocap_idx = model.body_mocapid[body_id]
+ if mocap_idx >= 0 and mocap_idx < model.nmocap:
+ data.mocap_pos[mocap_idx] = final_pos
+
+
+def visualize_ik_target_sites(
+ model: mujoco.MjModel,
+ data: mujoco.MjData,
+ target_joint_types: tuple[str, ...],
+) -> dict[str, dict[str, np.ndarray]]:
+ """Get positions of IK target sites on ORCA hand.
+
+ Args:
+ model: MuJoCo model
+ data: MuJoCo data
+ target_joint_types: Joint types to visualize
+
+ Returns:
+ Dictionary mapping finger names to joint_type -> position
+ """
+ ik_positions = {}
+ allowed_types = set(target_joint_types)
+
+ for finger_name, bodies in FINGER_TARGET_BODIES.items():
+ finger_positions = {}
+ for joint_type, (frame_type, frame_name) in bodies.items():
+ if joint_type not in allowed_types:
+ continue
+
+ obj_type = mujoco.mjtObj.mjOBJ_BODY if frame_type == "body" else mujoco.mjtObj.mjOBJ_SITE
+ frame_id = mujoco.mj_name2id(model, obj_type, frame_name)
+
+ if frame_id >= 0:
+ if obj_type == mujoco.mjtObj.mjOBJ_SITE:
+ pos = data.site(frame_id).xpos.copy()
+ else:
+ pos = data.body(frame_id).xpos.copy()
+ finger_positions[joint_type] = pos
+
+ if finger_positions:
+ ik_positions[finger_name] = finger_positions
+
+ return ik_positions
+
+
+class Flag(Protocol):
+ value: int
+
+
+def display_feed_process(frame_queue: mp.Queue, running_flag: Flag, window_name: str) -> None:
+ """Display frames in a separate process to avoid GLFW/OpenCV conflicts."""
+ while True:
+ if not running_flag.value and frame_queue.empty():
+ break
+
+ try:
+ frame = frame_queue.get(timeout=0.05)
+ except Empty:
+ if not running_flag.value:
+ break
+ continue
+
+ cv2.imshow(window_name, frame)
+ key = cv2.waitKey(1) & 0xFF
+ if key == ord("q"):
+ running_flag.value = 0
+ break
+
+ cv2.destroyAllWindows()
+
+
+def main() -> None:
+ """Main entry point."""
+ parser = argparse.ArgumentParser(
+ description="Visualize tracker sites and IK target sites on ORCA hand",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.add_argument("--camera", type=int, default=0, help="Camera device index")
+ parser.add_argument(
+ "--model",
+ type=str,
+ default="models/orca_hand_fixed.mjcf",
+ help="Path to MuJoCo MJCF model file",
+ )
+ parser.add_argument(
+ "--tracker",
+ type=str,
+ default="hamer",
+ choices=["hamer", "mediapipe"],
+ help="Hand tracking backend",
+ )
+ parser.add_argument(
+ "--targets",
+ type=str,
+ default="tip",
+ help="Comma-separated joint targets (tip,ip,pip,mcp). Default: tip",
+ )
+ parser.add_argument(
+ "--scale",
+ type=float,
+ default=1.0,
+ help="Hand scale factor (robot/human)",
+ )
+ parser.add_argument(
+ "--show-feed",
+ action="store_true",
+ help="Show camera feed with hand tracking overlay",
+ )
+ parser.add_argument(
+ "--joint-smoothing",
+ type=float,
+ default=0.7,
+ help="Smoothing factor for joint positions (0.0-1.0). \
+ Lower values = more smoothing, higher = less smoothing. Default: 0.7",
+ )
+ args = parser.parse_args()
+
+ # Parse target joint types
+ raw_targets = [part.strip().lower() for part in args.targets.split(",")]
+ target_joints = tuple(dict.fromkeys(jt for jt in raw_targets if jt))
+ if not target_joints:
+ target_joints = ("tip",)
+ supported_targets = {"tip", "ip", "pip", "mcp"}
+ invalid = [jt for jt in target_joints if jt not in supported_targets]
+ if invalid:
+ parser.error(f"Unsupported target joint types: {', '.join(invalid)}")
+
+ # Load model
+ model_file = Path(args.model)
+ if not model_file.is_absolute():
+ model_file = Path(__file__).parent.parent / args.model
+ if not model_file.exists():
+ raise FileNotFoundError(f"Model file not found: {model_file}")
+
+ print(f"Loading model from {model_file}")
+ xml_string = inject_visualization_bodies(model_file, target_joints)
+ model = mujoco.MjModel.from_xml_string(xml_string)
+ data = mujoco.MjData(model)
+
+ # Verify tip sites exist in the loaded model
+ if "tip" in target_joints:
+ print("\nVerifying tip sites in loaded model:")
+ for finger in ["thumb", "index", "middle", "ring", "pinky"]:
+ site_name = f"right_{finger}_tip_site"
+ site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, site_name)
+ if site_id >= 0:
+ site_pos = data.site(site_id).xpos.copy()
+ print(f" ✓ {site_name}: found at position [{site_pos[0]:.4f}, {site_pos[1]:.4f}, {site_pos[2]:.4f}]")
+ else:
+ print(f" ✗ {site_name}: NOT FOUND in model!")
+
+ # Initialize tracker
+ if args.tracker == "hamer":
+ tracker = HaMeRTracker()
+ else:
+ tracker = MediaPipeTracker()
+
+ # Initialize IK solver
+ ik_config = ORCAHandIKConfig(
+ scale_factor=args.scale,
+ target_joint_types=target_joints,
+ lm_damping=0.5,
+ ik_iterations=15,
+ )
+ ik_solver = ORCAHandIKRetargeting(model, config=ik_config)
+
+ # Open camera
+ cap = cv2.VideoCapture(args.camera)
+ if not cap.isOpened():
+ raise RuntimeError(f"Failed to open camera {args.camera}")
+
+ print("\n" + "=" * 70)
+ print("VISUALIZATION GUIDE:")
+ print("=" * 70)
+ print("TRACKER SITES (larger RED spheres - actual tracked hand from camera):")
+ print(" Red spheres = All tracker sites (thumb, index, middle, ring, pinky)")
+ print(" These represent the actual human hand position")
+ print("\nIK TARGET SITES (smaller BLUE spheres - on ORCA hand model):")
+ print(" Blue spheres = All IK target sites (thumb, index, middle, ring, pinky)")
+ print(" These represent where the robot hand should move")
+ print("\nNOTE: RED spheres (tracker) should align with BLUE spheres (IK targets)")
+ print(" when IK is solving correctly. RED = actual hand, BLUE = robot targets.")
+ print("\nPress 'q' in MuJoCo viewer to quit")
+ print("=" * 70 + "\n")
+
+ # Validate joint smoothing value
+ if not (0.0 < args.joint_smoothing <= 1.0):
+ parser.error("--joint-smoothing must be between 0.0 and 1.0 (exclusive of 0.0)")
+
+ start_time = time.time()
+
+ # Initialize joint position smoothing state
+ smoothed_qpos = data.qpos.copy()
+
+ # Set up multiprocessing for display window if requested
+ display_process: mp.Process | None = None
+ frame_queue: mp.Queue | None = None
+ running_flag: Flag | None = None
+
+ if args.show_feed:
+ print("Starting camera feed display in separate process...")
+ frame_queue = mp.Queue(maxsize=2) # Small buffer to avoid lag
+ running_flag = mp.Value("i", 1)
+ display_process = mp.Process(
+ target=display_feed_process,
+ args=(frame_queue, running_flag, "Hand Tracking Feed"),
+ )
+ display_process.start()
+
+ try:
+ with mujoco.viewer.launch_passive(model, data) as viewer:
+ while viewer.is_running() and cap.isOpened():
+ ret, frame = cap.read()
+ if not ret:
+ break
+
+ timestamp = time.time() - start_time
+ hand_structures = tracker.detect_hands(frame, timestamp=timestamp)
+
+ # Send frame to display process if enabled
+ if args.show_feed and frame_queue is not None and running_flag is not None:
+ if hand_structures:
+ vis_frame = tracker.visualize(frame, hand_structures)
+ else:
+ vis_frame = frame.copy()
+ try:
+ frame_queue.put_nowait(vis_frame)
+ except Exception:
+ pass # Queue full, skip this frame
+
+ if hand_structures:
+ hand = hand_structures[0]
+
+ # Update IK solver configuration with current smoothed state
+ mujoco.mj_forward(model, data)
+ data.qpos[:] = smoothed_qpos
+ mujoco.mj_forward(model, data)
+ ik_solver.configuration.update(data.qpos)
+
+ # Solve IK
+ qpos = ik_solver.solve(hand)
+
+ # Apply joint position smoothing
+ if not np.any(np.isnan(qpos)) and not np.any(np.isinf(qpos)):
+ # Exponential moving average: smoothed = joint_smoothing * new + (1 - joint_smoothing) * old
+ smoothed_qpos = args.joint_smoothing * qpos + (1.0 - args.joint_smoothing) * smoothed_qpos
+ data.qpos[:] = smoothed_qpos
+ data.qvel[:] = 0
+
+ # Update tracker site positions
+ update_tracker_sites(model, data, hand, target_joints, ik_solver)
+
+ # Forward kinematics to update IK target site positions
+ mujoco.mj_forward(model, data)
+
+ # Get IK target positions (sites are automatically rendered by MuJoCo)
+ ik_positions = visualize_ik_target_sites(model, data, target_joints)
+
+ # Print current positions for debugging (first frame only)
+ if hand_structures:
+ print("\nCurrent IK Target Positions:")
+ for finger, positions in ik_positions.items():
+ for joint_type, pos in positions.items():
+ print(f" {finger}_{joint_type}: [{pos[0]:.4f}, {pos[1]:.4f}, {pos[2]:.4f}]")
+
+ # Sync viewer
+ viewer.sync()
+
+ finally:
+ # Clean up display process
+ if running_flag is not None:
+ running_flag.value = 0
+ if display_process is not None:
+ display_process.join(timeout=1.0)
+ if display_process.is_alive():
+ display_process.terminate()
+ cap.release()
+ print("\nVisualization closed.")
+
+
+if __name__ == "__main__":
+ main()