#!/usr/bin/env python3 import argparse import sys import mediapipe as mp BaseOptions = mp.tasks.BaseOptions VisionRunningMode = mp.tasks.vision.RunningMode GestureRecognizer = mp.tasks.vision.GestureRecognizer GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions def _first_category(item): """ Accepts either: - a Classifications object with .categories - a list of Category - None / empty Returns the first Category or None. """ if item is None: return None # Shape 1: Classifications with .categories cats = getattr(item, "categories", None) if isinstance(cats, list): return cats[0] if cats else None # Shape 2: already a list[Category] if isinstance(item, list): return item[0] if item else None return None def _len_safe(x): return len(x) if isinstance(x, list) else 0 def main(): parser = argparse.ArgumentParser(description="Recognize hand gestures in a still image with MediaPipe.") parser.add_argument("-i", "--image", default="hand.jpg", help="Path to input image (default: hand.jpg)") parser.add_argument("-m", "--model", default="gesture_recognizer.task", help="Path to gesture_recognizer .task model (default: gesture_recognizer.task)") parser.add_argument("--num_hands", type=int, default=2, help="Max hands to detect") args = parser.parse_args() options = GestureRecognizerOptions( base_options=BaseOptions(model_asset_path=args.model), running_mode=VisionRunningMode.IMAGE, num_hands=args.num_hands, ) # Load the image try: mp_image = mp.Image.create_from_file(args.image) except Exception as e: print(f"Failed to load image '{args.image}': {e}", file=sys.stderr) sys.exit(1) with GestureRecognizer.create_from_options(options) as recognizer: result = recognizer.recognize(mp_image) if result is None: print("No result returned.") return n = max( _len_safe(getattr(result, "gestures", [])), _len_safe(getattr(result, "handedness", [])), _len_safe(getattr(result, "hand_landmarks", [])), ) if n == 0: print("No hands/gestures detected.") return for i in range(n): handed = None if _len_safe(getattr(result, "handedness", [])) > i: cat = _first_category(result.handedness[i]) if cat: handed = cat.category_name top_gesture = None score = None if _len_safe(getattr(result, "gestures", [])) > i: cat = _first_category(result.gestures[i]) if cat: top_gesture = cat.category_name score = cat.score header = f"Hand #{i+1}" + (f" ({handed})" if handed else "") print(header + ":") if top_gesture: print(f" Gesture: {top_gesture} (score={score:.3f})") else: print(" Gesture: none") # If you want pixel landmark coordinates later: # if _len_safe(getattr(result, "hand_landmarks", [])) > i: # for j, lm in enumerate(result.hand_landmarks[i]): # print(f" lm{j}: x={lm.x:.3f} y={lm.y:.3f} z={lm.z:.3f}") if __name__ == "__main__": main()