Initial commit: MediaPipe landmarks demo

HTML demos for face, hand, gesture, and posture tracking using MediaPipe. Includes Python CLI tools for processing video files. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 22:38:40 -05:00
commit 8bcc62b045
22 changed files with 2347 additions and 0 deletions
--- a/process_mp4_holistic.py
+++ b/process_mp4_holistic.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""
+holistic_mp4.py
+Process an MP4 with MediaPipe Holistic:
+ - Saves annotated video
+ - Exports CSV of face/pose/hand landmarks per frame
+
+Usage:
+  python holistic_mp4.py /path/to/input.mp4
+  python holistic_mp4.py /path/to/input.mp4 --out-video out.mp4 --out-csv out.csv --show
+"""
+
+import argparse
+import csv
+import os
+import sys
+from pathlib import Path
+
+import cv2
+import mediapipe as mp
+
+mp_holistic = mp.solutions.holistic
+mp_drawing = mp.solutions.drawing_utils
+mp_styles = mp.solutions.drawing_styles
+
+
+def parse_args():
+    p = argparse.ArgumentParser(description="Run MediaPipe Holistic on an MP4 and export annotated video + CSV landmarks.")
+    p.add_argument("input", help="Input .mp4 file")
+    p.add_argument("--out-video", help="Output annotated MP4 path (default: <input>_annotated.mp4)")
+    p.add_argument("--out-csv", help="Output CSV path for landmarks (default: <input>_landmarks.csv)")
+    p.add_argument("--model-complexity", type=int, default=1, choices=[0, 1, 2], help="Holistic model complexity")
+    p.add_argument("--no-smooth", action="store_true", help="Disable smoothing (smoothing is ON by default)")
+    p.add_argument("--refine-face", action="store_true", help="Refine face landmarks (iris, lips).")
+    p.add_argument("--show", action="store_true", help="Show preview window while processing")
+    return p.parse_args()
+
+
+def open_video_writer(cap, out_path):
+    # Properties from input
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    if fps is None or fps <= 0:
+        fps = 30.0  # sensible fallback
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+    # Writer
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter(out_path, fourcc, float(fps), (width, height))
+    if not writer.isOpened():
+        raise RuntimeError(f"Failed to open VideoWriter at {out_path}")
+    return writer, fps, (width, height)
+
+
+def write_landmarks_to_csv(writer, frame_idx, ts_ms, kind, landmarks, world_landmarks=None, handedness=None):
+    """
+    landmarks: NormalizedLandmarkList (x,y,z, visibility?) -> face/hand have no visibility; pose has visibility.
+    world_landmarks: LandmarkList in meters (optional, pose_world_landmarks available).
+    handedness: "Left"|"Right"|None (we label hand sets by field name; not a confidence score here)
+    """
+    if not landmarks:
+        return
+
+    # index by position; world coords may be absent or differ in length
+    wl = world_landmarks.landmark if world_landmarks and getattr(world_landmarks, "landmark", None) else None
+
+    for i, lm in enumerate(landmarks.landmark):
+        world_x = world_y = world_z = ""
+        if wl and i < len(wl):
+            world_x, world_y, world_z = wl[i].x, wl[i].y, wl[i].z
+
+        # Some landmark types (pose) include visibility; others (face/hands) don't
+        vis = getattr(lm, "visibility", "")
+        writer.writerow([
+            frame_idx,
+            int(ts_ms),
+            kind,          # e.g., face, pose, left_hand, right_hand
+            i,
+            lm.x, lm.y, lm.z,
+            vis,
+            "",            # presence not provided in Holistic landmarks
+            world_x, world_y, world_z,
+            handedness or ""
+        ])
+
+
+def main():
+    args = parse_args()
+    in_path = Path(args.input)
+    if not in_path.exists():
+        print(f"Input not found: {in_path}", file=sys.stderr)
+        sys.exit(1)
+
+    out_video = Path(args.out_video) if args.out_video else in_path.with_name(in_path.stem + "_annotated.mp4")
+    out_csv = Path(args.out_csv) if args.out_csv else in_path.with_name(in_path.stem + "_landmarks.csv")
+
+    cap = cv2.VideoCapture(str(in_path))
+    if not cap.isOpened():
+        print(f"Could not open video: {in_path}", file=sys.stderr)
+        sys.exit(1)
+
+    writer, fps, (w, h) = open_video_writer(cap, str(out_video))
+
+    # Prepare CSV
+    out_csv.parent.mkdir(parents=True, exist_ok=True)
+    csv_file = open(out_csv, "w", newline="", encoding="utf-8")
+    csv_writer = csv.writer(csv_file)
+    csv_writer.writerow([
+        "frame", "timestamp_ms", "type", "landmark_index",
+        "x", "y", "z", "visibility", "presence",
+        "world_x", "world_y", "world_z", "handedness"
+    ])
+
+    # Holistic configuration
+    holistic = mp_holistic.Holistic(
+        static_image_mode=False,
+        model_complexity=args.model_complexity,
+        smooth_landmarks=(not args.no_smooth),
+        refine_face_landmarks=args.refine_face,
+        enable_segmentation=False
+    )
+
+    try:
+        frame_idx = 0
+        print(f"Processing: {in_path.name} -> {out_video.name}, {out_csv.name}")
+        while True:
+            ok, frame_bgr = cap.read()
+            if not ok:
+                break
+
+            # Timestamp (ms) based on frame index and fps
+            ts_ms = (frame_idx / fps) * 1000.0
+
+            # Convert to RGB for MediaPipe
+            image_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
+            image_rgb.flags.writeable = False
+            results = holistic.process(image_rgb)
+            image_rgb.flags.writeable = True
+
+            # Draw on a BGR copy for output
+            out_frame = frame_bgr
+
+            # Face
+            if results.face_landmarks:
+                mp_drawing.draw_landmarks(
+                    out_frame,
+                    results.face_landmarks,
+                    mp_holistic.FACEMESH_TESSELATION,
+                    landmark_drawing_spec=None,
+                    connection_drawing_spec=mp_styles.get_default_face_mesh_tesselation_style(),
+                )
+                write_landmarks_to_csv(csv_writer, frame_idx, ts_ms, "face", results.face_landmarks)
+
+            # Pose
+            if results.pose_landmarks:
+                mp_drawing.draw_landmarks(
+                    out_frame,
+                    results.pose_landmarks,
+                    mp_holistic.POSE_CONNECTIONS,
+                    landmark_drawing_spec=mp_styles.get_default_pose_landmarks_style()
+                )
+                write_landmarks_to_csv(
+                    csv_writer, frame_idx, ts_ms, "pose",
+                    results.pose_landmarks,
+                    world_landmarks=getattr(results, "pose_world_landmarks", None)
+                )
+
+            # Left hand
+            if results.left_hand_landmarks:
+                mp_drawing.draw_landmarks(
+                    out_frame,
+                    results.left_hand_landmarks,
+                    mp_holistic.HAND_CONNECTIONS,
+                    landmark_drawing_spec=mp_styles.get_default_hand_landmarks_style()
+                )
+                write_landmarks_to_csv(csv_writer, frame_idx, ts_ms, "left_hand", results.left_hand_landmarks, handedness="Left")
+
+            # Right hand
+            if results.right_hand_landmarks:
+                mp_drawing.draw_landmarks(
+                    out_frame,
+                    results.right_hand_landmarks,
+                    mp_holistic.HAND_CONNECTIONS,
+                    landmark_drawing_spec=mp_styles.get_default_hand_landmarks_style()
+                )
+                write_landmarks_to_csv(csv_writer, frame_idx, ts_ms, "right_hand", results.right_hand_landmarks, handedness="Right")
+
+            # Write frame
+            writer.write(out_frame)
+
+            # Optional preview
+            if args.show:
+                cv2.imshow("Holistic (annotated)", out_frame)
+                if cv2.waitKey(1) & 0xFF == 27:  # ESC
+                    break
+
+            # Lightweight progress
+            if frame_idx % 120 == 0:
+                print(f"  frame {frame_idx}", end="\r", flush=True)
+            frame_idx += 1
+
+        print(f"\nDone.\n  Video: {out_video}\n  CSV:   {out_csv}")
+
+    finally:
+        holistic.close()
+        writer.release()
+        cap.release()
+        csv_file.close()
+        if args.show:
+            cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    main()