Initial commit: MediaPipe landmarks demo
HTML demos for face, hand, gesture, and posture tracking using MediaPipe. Includes Python CLI tools for processing video files. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
214
process_mp4_holistic.py
Executable file
214
process_mp4_holistic.py
Executable file
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
holistic_mp4.py
|
||||
Process an MP4 with MediaPipe Holistic:
|
||||
- Saves annotated video
|
||||
- Exports CSV of face/pose/hand landmarks per frame
|
||||
|
||||
Usage:
|
||||
python holistic_mp4.py /path/to/input.mp4
|
||||
python holistic_mp4.py /path/to/input.mp4 --out-video out.mp4 --out-csv out.csv --show
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
|
||||
mp_holistic = mp.solutions.holistic
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_styles = mp.solutions.drawing_styles
|
||||
|
||||
|
||||
def parse_args():
|
||||
p = argparse.ArgumentParser(description="Run MediaPipe Holistic on an MP4 and export annotated video + CSV landmarks.")
|
||||
p.add_argument("input", help="Input .mp4 file")
|
||||
p.add_argument("--out-video", help="Output annotated MP4 path (default: <input>_annotated.mp4)")
|
||||
p.add_argument("--out-csv", help="Output CSV path for landmarks (default: <input>_landmarks.csv)")
|
||||
p.add_argument("--model-complexity", type=int, default=1, choices=[0, 1, 2], help="Holistic model complexity")
|
||||
p.add_argument("--no-smooth", action="store_true", help="Disable smoothing (smoothing is ON by default)")
|
||||
p.add_argument("--refine-face", action="store_true", help="Refine face landmarks (iris, lips).")
|
||||
p.add_argument("--show", action="store_true", help="Show preview window while processing")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
def open_video_writer(cap, out_path):
|
||||
# Properties from input
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
if fps is None or fps <= 0:
|
||||
fps = 30.0 # sensible fallback
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
# Writer
|
||||
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
||||
writer = cv2.VideoWriter(out_path, fourcc, float(fps), (width, height))
|
||||
if not writer.isOpened():
|
||||
raise RuntimeError(f"Failed to open VideoWriter at {out_path}")
|
||||
return writer, fps, (width, height)
|
||||
|
||||
|
||||
def write_landmarks_to_csv(writer, frame_idx, ts_ms, kind, landmarks, world_landmarks=None, handedness=None):
|
||||
"""
|
||||
landmarks: NormalizedLandmarkList (x,y,z, visibility?) -> face/hand have no visibility; pose has visibility.
|
||||
world_landmarks: LandmarkList in meters (optional, pose_world_landmarks available).
|
||||
handedness: "Left"|"Right"|None (we label hand sets by field name; not a confidence score here)
|
||||
"""
|
||||
if not landmarks:
|
||||
return
|
||||
|
||||
# index by position; world coords may be absent or differ in length
|
||||
wl = world_landmarks.landmark if world_landmarks and getattr(world_landmarks, "landmark", None) else None
|
||||
|
||||
for i, lm in enumerate(landmarks.landmark):
|
||||
world_x = world_y = world_z = ""
|
||||
if wl and i < len(wl):
|
||||
world_x, world_y, world_z = wl[i].x, wl[i].y, wl[i].z
|
||||
|
||||
# Some landmark types (pose) include visibility; others (face/hands) don't
|
||||
vis = getattr(lm, "visibility", "")
|
||||
writer.writerow([
|
||||
frame_idx,
|
||||
int(ts_ms),
|
||||
kind, # e.g., face, pose, left_hand, right_hand
|
||||
i,
|
||||
lm.x, lm.y, lm.z,
|
||||
vis,
|
||||
"", # presence not provided in Holistic landmarks
|
||||
world_x, world_y, world_z,
|
||||
handedness or ""
|
||||
])
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
in_path = Path(args.input)
|
||||
if not in_path.exists():
|
||||
print(f"Input not found: {in_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
out_video = Path(args.out_video) if args.out_video else in_path.with_name(in_path.stem + "_annotated.mp4")
|
||||
out_csv = Path(args.out_csv) if args.out_csv else in_path.with_name(in_path.stem + "_landmarks.csv")
|
||||
|
||||
cap = cv2.VideoCapture(str(in_path))
|
||||
if not cap.isOpened():
|
||||
print(f"Could not open video: {in_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
writer, fps, (w, h) = open_video_writer(cap, str(out_video))
|
||||
|
||||
# Prepare CSV
|
||||
out_csv.parent.mkdir(parents=True, exist_ok=True)
|
||||
csv_file = open(out_csv, "w", newline="", encoding="utf-8")
|
||||
csv_writer = csv.writer(csv_file)
|
||||
csv_writer.writerow([
|
||||
"frame", "timestamp_ms", "type", "landmark_index",
|
||||
"x", "y", "z", "visibility", "presence",
|
||||
"world_x", "world_y", "world_z", "handedness"
|
||||
])
|
||||
|
||||
# Holistic configuration
|
||||
holistic = mp_holistic.Holistic(
|
||||
static_image_mode=False,
|
||||
model_complexity=args.model_complexity,
|
||||
smooth_landmarks=(not args.no_smooth),
|
||||
refine_face_landmarks=args.refine_face,
|
||||
enable_segmentation=False
|
||||
)
|
||||
|
||||
try:
|
||||
frame_idx = 0
|
||||
print(f"Processing: {in_path.name} -> {out_video.name}, {out_csv.name}")
|
||||
while True:
|
||||
ok, frame_bgr = cap.read()
|
||||
if not ok:
|
||||
break
|
||||
|
||||
# Timestamp (ms) based on frame index and fps
|
||||
ts_ms = (frame_idx / fps) * 1000.0
|
||||
|
||||
# Convert to RGB for MediaPipe
|
||||
image_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
|
||||
image_rgb.flags.writeable = False
|
||||
results = holistic.process(image_rgb)
|
||||
image_rgb.flags.writeable = True
|
||||
|
||||
# Draw on a BGR copy for output
|
||||
out_frame = frame_bgr
|
||||
|
||||
# Face
|
||||
if results.face_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
out_frame,
|
||||
results.face_landmarks,
|
||||
mp_holistic.FACEMESH_TESSELATION,
|
||||
landmark_drawing_spec=None,
|
||||
connection_drawing_spec=mp_styles.get_default_face_mesh_tesselation_style(),
|
||||
)
|
||||
write_landmarks_to_csv(csv_writer, frame_idx, ts_ms, "face", results.face_landmarks)
|
||||
|
||||
# Pose
|
||||
if results.pose_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
out_frame,
|
||||
results.pose_landmarks,
|
||||
mp_holistic.POSE_CONNECTIONS,
|
||||
landmark_drawing_spec=mp_styles.get_default_pose_landmarks_style()
|
||||
)
|
||||
write_landmarks_to_csv(
|
||||
csv_writer, frame_idx, ts_ms, "pose",
|
||||
results.pose_landmarks,
|
||||
world_landmarks=getattr(results, "pose_world_landmarks", None)
|
||||
)
|
||||
|
||||
# Left hand
|
||||
if results.left_hand_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
out_frame,
|
||||
results.left_hand_landmarks,
|
||||
mp_holistic.HAND_CONNECTIONS,
|
||||
landmark_drawing_spec=mp_styles.get_default_hand_landmarks_style()
|
||||
)
|
||||
write_landmarks_to_csv(csv_writer, frame_idx, ts_ms, "left_hand", results.left_hand_landmarks, handedness="Left")
|
||||
|
||||
# Right hand
|
||||
if results.right_hand_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
out_frame,
|
||||
results.right_hand_landmarks,
|
||||
mp_holistic.HAND_CONNECTIONS,
|
||||
landmark_drawing_spec=mp_styles.get_default_hand_landmarks_style()
|
||||
)
|
||||
write_landmarks_to_csv(csv_writer, frame_idx, ts_ms, "right_hand", results.right_hand_landmarks, handedness="Right")
|
||||
|
||||
# Write frame
|
||||
writer.write(out_frame)
|
||||
|
||||
# Optional preview
|
||||
if args.show:
|
||||
cv2.imshow("Holistic (annotated)", out_frame)
|
||||
if cv2.waitKey(1) & 0xFF == 27: # ESC
|
||||
break
|
||||
|
||||
# Lightweight progress
|
||||
if frame_idx % 120 == 0:
|
||||
print(f" frame {frame_idx}", end="\r", flush=True)
|
||||
frame_idx += 1
|
||||
|
||||
print(f"\nDone.\n Video: {out_video}\n CSV: {out_csv}")
|
||||
|
||||
finally:
|
||||
holistic.close()
|
||||
writer.release()
|
||||
cap.release()
|
||||
csv_file.close()
|
||||
if args.show:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user