slr_handshapes_locations/first_attempt_landmark_hands/capture_sequence.py

#!/usr/bin/env python3
# capture_sequence.py
# Automatically record N short sequences for each label (default: 100 train / 20 val)
# Centered 3-second countdown before recording.
# Per-clip depleting progress bar (full → empty) across the top during capture.

import argparse, os, time, math, re
from pathlib import Path
import numpy as np, cv2, mediapipe as mp

def normalize_frame(pts, handed=None):
    pts = pts.astype(np.float32).copy()
    pts[:, :2] -= pts[0, :2]
    if handed and handed.lower().startswith("left"):
        pts[:, 0] *= -1.0
    v = pts[9, :2]
    ang = math.atan2(v[1], v[0])
    c, s = math.cos(math.pi/2 - ang), math.sin(math.pi/2 - ang)
    R = np.array([[c, -s], [s, c]], np.float32)
    pts[:, :2] = pts[:, :2] @ R.T
    xy = pts[:, :2]
    d = np.max(np.linalg.norm(xy[None,:,:] - xy[:,None,:], axis=-1))
    if d < 1e-6: d = 1.0
    pts[:, :2] /= d; pts[:, 2] /= d
    return pts

def next_idx(folder: Path, prefix="clip_"):
    pat = re.compile(rf"^{re.escape(prefix)}(\d+)\.npz$")
    mx = 0
    if folder.exists():
        for n in os.listdir(folder):
            m = pat.match(n)
            if m: mx = max(mx, int(m.group(1)))
    return mx + 1

def countdown(cap, seconds=3):
    """Display a centered countdown before starting capture."""
    for i in range(seconds, 0, -1):
        start = time.time()
        while time.time() - start < 1.0:
            ok, frame = cap.read()
            if not ok:
                continue
            h, w = frame.shape[:2]

            # Main big number in center
            text = str(i)
            font_scale = 5
            thickness = 10
            (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
            cv2.putText(frame, text,
                        ((w - tw)//2, (h + th)//2),
                        cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0,0,255), thickness, cv2.LINE_AA)

            # Smaller message above
            msg = "Starting in..."
            font_scale_msg = 1.2
            thickness_msg = 3
            (mw, mh), _ = cv2.getTextSize(msg, cv2.FONT_HERSHEY_SIMPLEX, font_scale_msg, thickness_msg)
            cv2.putText(frame, msg,
                        ((w - mw)//2, (h//2) - th - 20),
                        cv2.FONT_HERSHEY_SIMPLEX, font_scale_msg, (0,255,255), thickness_msg, cv2.LINE_AA)

            cv2.imshow("sequence capture", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                cap.release(); cv2.destroyAllWindows(); raise SystemExit("Aborted during countdown")

def draw_progress_bar(img, frac_remaining, bar_h=16, margin=12):
    """
    Draw a top progress bar that starts full and depletes to empty.
    frac_remaining: 1.0 at start → 0.0 at end.
    """
    h, w = img.shape[:2]
    x0, x1 = margin, w - margin
    y0, y1 = margin, margin + bar_h

    # Background bar
    cv2.rectangle(img, (x0, y0), (x1, y1), (40, 40, 40), -1)  # dark gray
    cv2.rectangle(img, (x0, y0), (x1, y1), (90, 90, 90), 2)   # border

    # Foreground (remaining)
    rem_w = int((x1 - x0) * max(0.0, min(1.0, frac_remaining)))
    if rem_w > 0:
        cv2.rectangle(img, (x0, y0), (x0 + rem_w, y1), (0, 200, 0), -1)  # green

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--label", required=True, help="Letter label (A..Z)")
    ap.add_argument("--split", required=True, choices=["train","val"])
    ap.add_argument("--seconds", type=float, default=0.8, help="Clip length (s)")
    ap.add_argument("--camera", type=int, default=0)
    ap.add_argument("--width", type=int, default=640)
    ap.add_argument("--height", type=int, default=480)
    ap.add_argument("--count", type=int, default=None,
                    help="How many clips (default=100 train, 20 val)")
    args = ap.parse_args()

    if args.count is None:
        args.count = 100 if args.split == "train" else 20

    L = args.label.upper().strip()
    if not (len(L) == 1 and "A" <= L <= "Z"):
        raise SystemExit("Use --label A..Z")

    out_dir = Path("sequences") / args.split / L
    out_dir.mkdir(parents=True, exist_ok=True)
    idx = next_idx(out_dir)

    hands = mp.solutions.hands.Hands(
        static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5
    )
    cap = cv2.VideoCapture(args.camera)
    if not cap.isOpened():
        raise SystemExit(f"Could not open camera {args.camera}")
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)

    print(f"Recording {args.count} clips for {L}/{args.split}, {args.seconds}s each.")
    countdown(cap, 3)

    for n in range(args.count):
        seq_X, seq_tip = [], []
        start_t = time.time()
        end_t = start_t + args.seconds

        while True:
            now = time.time()
            if now >= end_t:
                break

            ok, fr = cap.read()
            if not ok:
                break

            rgb = cv2.cvtColor(fr, cv2.COLOR_BGR2RGB)
            res = hands.process(rgb)
            if res.multi_hand_landmarks:
                ih = res.multi_hand_landmarks[0]
                handed = None
                if res.multi_handedness:
                    handed = res.multi_handedness[0].classification[0].label
                pts = np.array([[lm.x, lm.y, lm.z] for lm in ih.landmark], np.float32)
                pts = normalize_frame(pts, handed)
                seq_X.append(pts.reshape(-1))
                seq_tip.append(pts[8, :2])

                # draw fingertip marker (for feedback)
                cv2.circle(fr,
                           (int(fr.shape[1] * pts[8, 0]), int(fr.shape[0] * pts[8, 1])),
                           6, (0, 255, 0), -1)

            # overlay progress + status
            frac_remaining = (end_t - now) / max(1e-6, args.seconds)  # 1 → 0
            draw_progress_bar(fr, frac_remaining, bar_h=16, margin=12)
            cv2.putText(fr, f"{L} {args.split}  Clip {n+1}/{args.count}",
                        (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,0), 2, cv2.LINE_AA)

            cv2.imshow("sequence capture", fr)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                cap.release(); cv2.destroyAllWindows(); return

        if seq_X:
            X = np.stack(seq_X, 0)
            tip = np.stack(seq_tip, 0)
            path = out_dir / f"clip_{idx:03d}.npz"
            np.savez_compressed(path, X=X, tip=tip)
            print(f"💾 saved {path} frames={X.shape[0]}")
            idx += 1
        else:
            print("⚠️ No hand detected; skipped clip.")

    print("✅ Done recording.")
    cap.release(); cv2.destroyAllWindows()

if __name__ == "__main__":
    main()