HTML demos for face, hand, gesture, and posture tracking using MediaPipe. Includes Python CLI tools for processing video files. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
126 lines
4.5 KiB
Python
Executable File
126 lines
4.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Hand Landmarks on a static image using MediaPipe Tasks.
|
|
|
|
Usage:
|
|
python hand_landmarker_cli.py --image hand.png --model hand_landmarker.task --max_hands 2 --out annotated.png
|
|
|
|
What it does:
|
|
• Loads the MediaPipe Hand Landmarker model (.task file)
|
|
• Runs landmark detection on a single image
|
|
• Prints handedness and 21 landmark coords for each detected hand
|
|
• Saves an annotated image with landmarks and connections
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import mediapipe as mp
|
|
|
|
# MediaPipe Tasks API aliases
|
|
BaseOptions = mp.tasks.BaseOptions
|
|
HandLandmarker = mp.tasks.vision.HandLandmarker
|
|
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
|
|
VisionRunningMode = mp.tasks.vision.RunningMode
|
|
|
|
# Landmark connection topology (same as mp.solutions.hands.HAND_CONNECTIONS, copied to avoid extra dependency)
|
|
HAND_CONNECTIONS = [
|
|
(0,1),(1,2),(2,3),(3,4), # Thumb
|
|
(0,5),(5,6),(6,7),(7,8), # Index
|
|
(5,9),(9,10),(10,11),(11,12), # Middle
|
|
(9,13),(13,14),(14,15),(15,16), # Ring
|
|
(13,17),(17,18),(18,19),(19,20), # Pinky
|
|
(0,17) # Palm base to pinky base
|
|
]
|
|
|
|
def draw_landmarks(image_bgr: np.ndarray, landmarks_norm: list):
|
|
"""
|
|
Draws landmarks and connections on a BGR image.
|
|
`landmarks_norm` is a list of normalized (x,y,z) MediaPipe landmarks (0..1).
|
|
"""
|
|
h, w = image_bgr.shape[:2]
|
|
|
|
# Convert normalized to pixel coords
|
|
pts = []
|
|
for lm in landmarks_norm:
|
|
x = int(lm.x * w)
|
|
y = int(lm.y * h)
|
|
pts.append((x, y))
|
|
|
|
# Draw connections
|
|
for a, b in HAND_CONNECTIONS:
|
|
if 0 <= a < len(pts) and 0 <= b < len(pts):
|
|
cv2.line(image_bgr, pts[a], pts[b], (0, 255, 0), 2, cv2.LINE_AA)
|
|
|
|
# Draw keypoints
|
|
for i, (x, y) in enumerate(pts):
|
|
cv2.circle(image_bgr, (x, y), 3, (255, 255, 255), -1, cv2.LINE_AA)
|
|
cv2.circle(image_bgr, (x, y), 2, (0, 0, 255), -1, cv2.LINE_AA)
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser(description="MediaPipe Hand Landmarker (static image)")
|
|
ap.add_argument("--image", required=True, help="Path to an input image (e.g., hand.jpg)")
|
|
ap.add_argument("--model", default="hand_landmarker.task", help="Path to MediaPipe .task model")
|
|
ap.add_argument("--max_hands", type=int, default=2, help="Maximum hands to detect")
|
|
ap.add_argument("--out", default="annotated.png", help="Output path for annotated image")
|
|
args = ap.parse_args()
|
|
|
|
img_path = Path(args.image)
|
|
if not img_path.exists():
|
|
print(f"[ERROR] Image not found: {img_path}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
model_path = Path(args.model)
|
|
if not model_path.exists():
|
|
print(f"[ERROR] Model not found: {model_path}", file=sys.stderr)
|
|
print("Download the model bundle (.task) and point --model to it.", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
# Load image for MP and for drawing
|
|
mp_image = mp.Image.create_from_file(str(img_path))
|
|
image_bgr = cv2.imread(str(img_path))
|
|
if image_bgr is None:
|
|
print(f"[ERROR] Could not read image with OpenCV: {img_path}", file=sys.stderr)
|
|
sys.exit(3)
|
|
|
|
# Configure and run the landmarker
|
|
options = HandLandmarkerOptions(
|
|
base_options=BaseOptions(model_asset_path=str(model_path)),
|
|
running_mode=VisionRunningMode.IMAGE,
|
|
num_hands=args.max_hands,
|
|
min_hand_detection_confidence=0.5,
|
|
min_hand_presence_confidence=0.5,
|
|
min_tracking_confidence=0.5
|
|
)
|
|
|
|
with HandLandmarker.create_from_options(options) as landmarker:
|
|
result = landmarker.detect(mp_image)
|
|
|
|
# Print results
|
|
if not result.hand_landmarks:
|
|
print("No hands detected.")
|
|
else:
|
|
for i, (handedness, lms, world_lms) in enumerate(
|
|
zip(result.handedness, result.hand_landmarks, result.hand_world_landmarks)
|
|
):
|
|
label = handedness[0].category_name if handedness else "Unknown"
|
|
score = handedness[0].score if handedness else 0.0
|
|
print(f"\nHand #{i+1}: {label} (score {score:.3f})")
|
|
for idx, lm in enumerate(lms):
|
|
print(f" L{idx:02d}: x={lm.x:.3f} y={lm.y:.3f} z={lm.z:.3f}")
|
|
|
|
# Draw
|
|
draw_landmarks(image_bgr, lms)
|
|
# Put label
|
|
cv2.putText(image_bgr, f"{label}", (10, 30 + i*30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,0), 2, cv2.LINE_AA)
|
|
|
|
# Save annotated image
|
|
cv2.imwrite(str(args.out), image_bgr)
|
|
print(f"\nSaved annotated image to: {args.out}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|