Initial commit: MediaPipe landmarks demo

HTML demos for face, hand, gesture, and posture tracking using MediaPipe. Includes Python CLI tools for processing video files. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 22:38:40 -05:00
commit 8bcc62b045
22 changed files with 2347 additions and 0 deletions
--- a/recognize_gesture.py
+++ b/recognize_gesture.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+import argparse
+import sys
+import mediapipe as mp
+
+BaseOptions = mp.tasks.BaseOptions
+VisionRunningMode = mp.tasks.vision.RunningMode
+GestureRecognizer = mp.tasks.vision.GestureRecognizer
+GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
+
+def _first_category(item):
+    """
+    Accepts either:
+      - a Classifications object with .categories
+      - a list of Category
+      - None / empty
+    Returns the first Category or None.
+    """
+    if item is None:
+        return None
+    # Shape 1: Classifications with .categories
+    cats = getattr(item, "categories", None)
+    if isinstance(cats, list):
+        return cats[0] if cats else None
+    # Shape 2: already a list[Category]
+    if isinstance(item, list):
+        return item[0] if item else None
+    return None
+
+def _len_safe(x):
+    return len(x) if isinstance(x, list) else 0
+
+def main():
+    parser = argparse.ArgumentParser(description="Recognize hand gestures in a still image with MediaPipe.")
+    parser.add_argument("-i", "--image", default="hand.jpg", help="Path to input image (default: hand.jpg)")
+    parser.add_argument("-m", "--model", default="gesture_recognizer.task",
+                        help="Path to gesture_recognizer .task model (default: gesture_recognizer.task)")
+    parser.add_argument("--num_hands", type=int, default=2, help="Max hands to detect")
+    args = parser.parse_args()
+
+    options = GestureRecognizerOptions(
+        base_options=BaseOptions(model_asset_path=args.model),
+        running_mode=VisionRunningMode.IMAGE,
+        num_hands=args.num_hands,
+    )
+
+    # Load the image
+    try:
+        mp_image = mp.Image.create_from_file(args.image)
+    except Exception as e:
+        print(f"Failed to load image '{args.image}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+    with GestureRecognizer.create_from_options(options) as recognizer:
+        result = recognizer.recognize(mp_image)
+
+    if result is None:
+        print("No result returned.")
+        return
+
+    n = max(
+        _len_safe(getattr(result, "gestures", [])),
+        _len_safe(getattr(result, "handedness", [])),
+        _len_safe(getattr(result, "hand_landmarks", [])),
+    )
+    if n == 0:
+        print("No hands/gestures detected.")
+        return
+
+    for i in range(n):
+        handed = None
+        if _len_safe(getattr(result, "handedness", [])) > i:
+            cat = _first_category(result.handedness[i])
+            if cat:
+                handed = cat.category_name
+
+        top_gesture = None
+        score = None
+        if _len_safe(getattr(result, "gestures", [])) > i:
+            cat = _first_category(result.gestures[i])
+            if cat:
+                top_gesture = cat.category_name
+                score = cat.score
+
+        header = f"Hand #{i+1}" + (f" ({handed})" if handed else "")
+        print(header + ":")
+        if top_gesture:
+            print(f"  Gesture: {top_gesture} (score={score:.3f})")
+        else:
+            print("  Gesture: none")
+
+        # If you want pixel landmark coordinates later:
+        # if _len_safe(getattr(result, "hand_landmarks", [])) > i:
+        #     for j, lm in enumerate(result.hand_landmarks[i]):
+        #         print(f"    lm{j}: x={lm.x:.3f} y={lm.y:.3f} z={lm.z:.3f}")
+
+if __name__ == "__main__":
+    main()