HTML demos for face, hand, gesture, and posture tracking using MediaPipe. Includes Python CLI tools for processing video files. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
99 lines
3.2 KiB
Python
Executable File
99 lines
3.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import sys
|
|
import mediapipe as mp
|
|
|
|
BaseOptions = mp.tasks.BaseOptions
|
|
VisionRunningMode = mp.tasks.vision.RunningMode
|
|
GestureRecognizer = mp.tasks.vision.GestureRecognizer
|
|
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
|
|
|
|
def _first_category(item):
|
|
"""
|
|
Accepts either:
|
|
- a Classifications object with .categories
|
|
- a list of Category
|
|
- None / empty
|
|
Returns the first Category or None.
|
|
"""
|
|
if item is None:
|
|
return None
|
|
# Shape 1: Classifications with .categories
|
|
cats = getattr(item, "categories", None)
|
|
if isinstance(cats, list):
|
|
return cats[0] if cats else None
|
|
# Shape 2: already a list[Category]
|
|
if isinstance(item, list):
|
|
return item[0] if item else None
|
|
return None
|
|
|
|
def _len_safe(x):
|
|
return len(x) if isinstance(x, list) else 0
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Recognize hand gestures in a still image with MediaPipe.")
|
|
parser.add_argument("-i", "--image", default="hand.jpg", help="Path to input image (default: hand.jpg)")
|
|
parser.add_argument("-m", "--model", default="gesture_recognizer.task",
|
|
help="Path to gesture_recognizer .task model (default: gesture_recognizer.task)")
|
|
parser.add_argument("--num_hands", type=int, default=2, help="Max hands to detect")
|
|
args = parser.parse_args()
|
|
|
|
options = GestureRecognizerOptions(
|
|
base_options=BaseOptions(model_asset_path=args.model),
|
|
running_mode=VisionRunningMode.IMAGE,
|
|
num_hands=args.num_hands,
|
|
)
|
|
|
|
# Load the image
|
|
try:
|
|
mp_image = mp.Image.create_from_file(args.image)
|
|
except Exception as e:
|
|
print(f"Failed to load image '{args.image}': {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
with GestureRecognizer.create_from_options(options) as recognizer:
|
|
result = recognizer.recognize(mp_image)
|
|
|
|
if result is None:
|
|
print("No result returned.")
|
|
return
|
|
|
|
n = max(
|
|
_len_safe(getattr(result, "gestures", [])),
|
|
_len_safe(getattr(result, "handedness", [])),
|
|
_len_safe(getattr(result, "hand_landmarks", [])),
|
|
)
|
|
if n == 0:
|
|
print("No hands/gestures detected.")
|
|
return
|
|
|
|
for i in range(n):
|
|
handed = None
|
|
if _len_safe(getattr(result, "handedness", [])) > i:
|
|
cat = _first_category(result.handedness[i])
|
|
if cat:
|
|
handed = cat.category_name
|
|
|
|
top_gesture = None
|
|
score = None
|
|
if _len_safe(getattr(result, "gestures", [])) > i:
|
|
cat = _first_category(result.gestures[i])
|
|
if cat:
|
|
top_gesture = cat.category_name
|
|
score = cat.score
|
|
|
|
header = f"Hand #{i+1}" + (f" ({handed})" if handed else "")
|
|
print(header + ":")
|
|
if top_gesture:
|
|
print(f" Gesture: {top_gesture} (score={score:.3f})")
|
|
else:
|
|
print(" Gesture: none")
|
|
|
|
# If you want pixel landmark coordinates later:
|
|
# if _len_safe(getattr(result, "hand_landmarks", [])) > i:
|
|
# for j, lm in enumerate(result.hand_landmarks[i]):
|
|
# print(f" lm{j}: x={lm.x:.3f} y={lm.y:.3f} z={lm.z:.3f}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|