Files
2026-01-19 22:27:20 -05:00

177 lines
6.5 KiB
Python
Executable File

#!/usr/bin/env python3
# capture_sequence.py
# Automatically record N short sequences for each label (default: 100 train / 20 val)
# Centered 3-second countdown before recording.
# Per-clip depleting progress bar (full → empty) across the top during capture.
import argparse, os, time, math, re
from pathlib import Path
import numpy as np, cv2, mediapipe as mp
def normalize_frame(pts, handed=None):
pts = pts.astype(np.float32).copy()
pts[:, :2] -= pts[0, :2]
if handed and handed.lower().startswith("left"):
pts[:, 0] *= -1.0
v = pts[9, :2]
ang = math.atan2(v[1], v[0])
c, s = math.cos(math.pi/2 - ang), math.sin(math.pi/2 - ang)
R = np.array([[c, -s], [s, c]], np.float32)
pts[:, :2] = pts[:, :2] @ R.T
xy = pts[:, :2]
d = np.max(np.linalg.norm(xy[None,:,:] - xy[:,None,:], axis=-1))
if d < 1e-6: d = 1.0
pts[:, :2] /= d; pts[:, 2] /= d
return pts
def next_idx(folder: Path, prefix="clip_"):
pat = re.compile(rf"^{re.escape(prefix)}(\d+)\.npz$")
mx = 0
if folder.exists():
for n in os.listdir(folder):
m = pat.match(n)
if m: mx = max(mx, int(m.group(1)))
return mx + 1
def countdown(cap, seconds=3):
"""Display a centered countdown before starting capture."""
for i in range(seconds, 0, -1):
start = time.time()
while time.time() - start < 1.0:
ok, frame = cap.read()
if not ok:
continue
h, w = frame.shape[:2]
# Main big number in center
text = str(i)
font_scale = 5
thickness = 10
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
cv2.putText(frame, text,
((w - tw)//2, (h + th)//2),
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0,0,255), thickness, cv2.LINE_AA)
# Smaller message above
msg = "Starting in..."
font_scale_msg = 1.2
thickness_msg = 3
(mw, mh), _ = cv2.getTextSize(msg, cv2.FONT_HERSHEY_SIMPLEX, font_scale_msg, thickness_msg)
cv2.putText(frame, msg,
((w - mw)//2, (h//2) - th - 20),
cv2.FONT_HERSHEY_SIMPLEX, font_scale_msg, (0,255,255), thickness_msg, cv2.LINE_AA)
cv2.imshow("sequence capture", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
cap.release(); cv2.destroyAllWindows(); raise SystemExit("Aborted during countdown")
def draw_progress_bar(img, frac_remaining, bar_h=16, margin=12):
"""
Draw a top progress bar that starts full and depletes to empty.
frac_remaining: 1.0 at start → 0.0 at end.
"""
h, w = img.shape[:2]
x0, x1 = margin, w - margin
y0, y1 = margin, margin + bar_h
# Background bar
cv2.rectangle(img, (x0, y0), (x1, y1), (40, 40, 40), -1) # dark gray
cv2.rectangle(img, (x0, y0), (x1, y1), (90, 90, 90), 2) # border
# Foreground (remaining)
rem_w = int((x1 - x0) * max(0.0, min(1.0, frac_remaining)))
if rem_w > 0:
cv2.rectangle(img, (x0, y0), (x0 + rem_w, y1), (0, 200, 0), -1) # green
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--label", required=True, help="Letter label (A..Z)")
ap.add_argument("--split", required=True, choices=["train","val"])
ap.add_argument("--seconds", type=float, default=0.8, help="Clip length (s)")
ap.add_argument("--camera", type=int, default=0)
ap.add_argument("--width", type=int, default=640)
ap.add_argument("--height", type=int, default=480)
ap.add_argument("--count", type=int, default=None,
help="How many clips (default=100 train, 20 val)")
args = ap.parse_args()
if args.count is None:
args.count = 100 if args.split == "train" else 20
L = args.label.upper().strip()
if not (len(L) == 1 and "A" <= L <= "Z"):
raise SystemExit("Use --label A..Z")
out_dir = Path("sequences") / args.split / L
out_dir.mkdir(parents=True, exist_ok=True)
idx = next_idx(out_dir)
hands = mp.solutions.hands.Hands(
static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5
)
cap = cv2.VideoCapture(args.camera)
if not cap.isOpened():
raise SystemExit(f"Could not open camera {args.camera}")
cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
print(f"Recording {args.count} clips for {L}/{args.split}, {args.seconds}s each.")
countdown(cap, 3)
for n in range(args.count):
seq_X, seq_tip = [], []
start_t = time.time()
end_t = start_t + args.seconds
while True:
now = time.time()
if now >= end_t:
break
ok, fr = cap.read()
if not ok:
break
rgb = cv2.cvtColor(fr, cv2.COLOR_BGR2RGB)
res = hands.process(rgb)
if res.multi_hand_landmarks:
ih = res.multi_hand_landmarks[0]
handed = None
if res.multi_handedness:
handed = res.multi_handedness[0].classification[0].label
pts = np.array([[lm.x, lm.y, lm.z] for lm in ih.landmark], np.float32)
pts = normalize_frame(pts, handed)
seq_X.append(pts.reshape(-1))
seq_tip.append(pts[8, :2])
# draw fingertip marker (for feedback)
cv2.circle(fr,
(int(fr.shape[1] * pts[8, 0]), int(fr.shape[0] * pts[8, 1])),
6, (0, 255, 0), -1)
# overlay progress + status
frac_remaining = (end_t - now) / max(1e-6, args.seconds) # 1 → 0
draw_progress_bar(fr, frac_remaining, bar_h=16, margin=12)
cv2.putText(fr, f"{L} {args.split} Clip {n+1}/{args.count}",
(20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,0), 2, cv2.LINE_AA)
cv2.imshow("sequence capture", fr)
if cv2.waitKey(1) & 0xFF == ord('q'):
cap.release(); cv2.destroyAllWindows(); return
if seq_X:
X = np.stack(seq_X, 0)
tip = np.stack(seq_tip, 0)
path = out_dir / f"clip_{idx:03d}.npz"
np.savez_compressed(path, X=X, tip=tip)
print(f"💾 saved {path} frames={X.shape[0]}")
idx += 1
else:
print("⚠️ No hand detected; skipped clip.")
print("✅ Done recording.")
cap.release(); cv2.destroyAllWindows()
if __name__ == "__main__":
main()