Initial commit: ASL handshape recognition project
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
10
.gitignore
vendored
Normal file
10
.gitignore
vendored
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Ignore everything
|
||||||
|
*
|
||||||
|
|
||||||
|
# But not these files...
|
||||||
|
!.gitignore
|
||||||
|
!*.py
|
||||||
|
!*.sh
|
||||||
|
|
||||||
|
# And not directories (so git can recurse into them)
|
||||||
|
!*/
|
||||||
12
1prep.sh
Executable file
12
1prep.sh
Executable file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Single letters:
|
||||||
|
python prep_landmarks_binary.py --letter A
|
||||||
|
# etc for B-Z
|
||||||
|
|
||||||
|
# OR all letters at once:
|
||||||
|
# for L in {A..Z}; do
|
||||||
|
# python prep_landmarks_binary.py --letter "$L"
|
||||||
|
# done
|
||||||
|
|
||||||
14
2train.sh
Executable file
14
2train.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Train single letters:
|
||||||
|
python train_mlp.py --letter A --epochs 40 --batch 64
|
||||||
|
# etc for B-Z
|
||||||
|
|
||||||
|
# Each run saves: asl_<LETTER>_mlp.pt
|
||||||
|
|
||||||
|
# OR all letters at once:
|
||||||
|
# for L in {A..Z}; do
|
||||||
|
# python train_mlp.py --letter "$L" --epochs 40 --batch 64
|
||||||
|
# done
|
||||||
|
|
||||||
5
3demo.sh
Executable file
5
3demo.sh
Executable file
@@ -0,0 +1,5 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
python infer_webcam.py --letter A
|
||||||
|
|
||||||
62
eval_val.py
Normal file
62
eval_val.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Evaluate the trained per-letter model on the saved val split.
|
||||||
|
Prints confusion matrix and a classification report.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python eval_val.py --letter A
|
||||||
|
"""
|
||||||
|
import argparse, json
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.metrics import confusion_matrix, classification_report
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
class MLP(nn.Module):
|
||||||
|
def __init__(self, in_dim, num_classes):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(
|
||||||
|
nn.Linear(in_dim,128), nn.ReLU(), nn.Dropout(0.2),
|
||||||
|
nn.Linear(128,64), nn.ReLU(), nn.Dropout(0.1),
|
||||||
|
nn.Linear(64,num_classes),
|
||||||
|
)
|
||||||
|
def forward(self, x): return self.net(x)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--letter", required=True, help="Target letter (A–Z)")
|
||||||
|
args = ap.parse_args()
|
||||||
|
L = args.letter.upper()
|
||||||
|
|
||||||
|
# Load val split and classes
|
||||||
|
X = np.load(f"landmarks_{L}/val_X.npy")
|
||||||
|
y = np.load(f"landmarks_{L}/val_y.npy")
|
||||||
|
classes = json.load(open(f"landmarks_{L}/class_names.json"))
|
||||||
|
|
||||||
|
# Load checkpoint (disable weights-only safety; handle tensor/ndarray)
|
||||||
|
state = torch.load(f"asl_{L}_mlp.pt", map_location="cpu", weights_only=False)
|
||||||
|
X_mean = state["X_mean"]
|
||||||
|
X_std = state["X_std"]
|
||||||
|
if isinstance(X_mean, torch.Tensor): X_mean = X_mean.cpu().numpy()
|
||||||
|
if isinstance(X_std, torch.Tensor): X_std = X_std.cpu().numpy()
|
||||||
|
X_mean = np.asarray(X_mean, dtype=np.float32)
|
||||||
|
X_std = np.asarray(X_std, dtype=np.float32) + 1e-6
|
||||||
|
|
||||||
|
model = MLP(X.shape[1], len(classes))
|
||||||
|
model.load_state_dict(state["model"])
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
# Normalize and predict
|
||||||
|
Xn = (X - X_mean) / X_std
|
||||||
|
with torch.no_grad():
|
||||||
|
probs = torch.softmax(model(torch.from_numpy(Xn).float()), dim=1).numpy()
|
||||||
|
pred = probs.argmax(axis=1)
|
||||||
|
|
||||||
|
print("Classes:", classes) # e.g., ['Not_A','A']
|
||||||
|
print("\nConfusion matrix (rows=true, cols=pred):")
|
||||||
|
print(confusion_matrix(y, pred))
|
||||||
|
print("\nReport:")
|
||||||
|
print(classification_report(y, pred, target_names=classes, digits=3))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
220
infer_webcam-multi.py
Normal file
220
infer_webcam-multi.py
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
infer_webcam_multi.py
|
||||||
|
Live multi-letter inference from webcam using multiple per-letter binary models.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
# Detect A, B, C using default filenames asl_A_mlp.pt, asl_B_mlp.pt, asl_C_mlp.pt
|
||||||
|
python infer_webcam_multi.py --letters A,B,C
|
||||||
|
|
||||||
|
# Same but with a confidence threshold for accepting any letter
|
||||||
|
python infer_webcam_multi.py --letters A,B,C --threshold 0.8
|
||||||
|
|
||||||
|
# Explicit model paths (overrides --letters)
|
||||||
|
python infer_webcam_multi.py --models asl_A_mlp.pt asl_B_mlp.pt --threshold 0.75
|
||||||
|
|
||||||
|
Press 'q' to quit.
|
||||||
|
"""
|
||||||
|
import os, math, argparse
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
import mediapipe as mp
|
||||||
|
|
||||||
|
# ---------- geometry helpers ----------
|
||||||
|
def _angle(v): return math.atan2(v[1], v[0])
|
||||||
|
def _rot2d(t):
|
||||||
|
c, s = math.cos(t), math.sin(t)
|
||||||
|
return np.array([[c, -s], [s, c]], dtype=np.float32)
|
||||||
|
|
||||||
|
def normalize_landmarks(pts, handedness_label=None):
|
||||||
|
pts = pts.astype(np.float32).copy()
|
||||||
|
# translate wrist to origin
|
||||||
|
pts[:, :2] -= pts[0, :2]
|
||||||
|
# mirror left→right
|
||||||
|
if handedness_label and handedness_label.lower().startswith("left"):
|
||||||
|
pts[:, 0] *= -1.0
|
||||||
|
# rotate wrist→middle_mcp to +Y
|
||||||
|
v = pts[9, :2]
|
||||||
|
R = _rot2d(math.pi/2 - _angle(v))
|
||||||
|
pts[:, :2] = pts[:, :2] @ R.T
|
||||||
|
# scale by max pairwise distance
|
||||||
|
xy = pts[:, :2]
|
||||||
|
d = np.linalg.norm(xy[None,:,:] - xy[:,None,:], axis=-1).max()
|
||||||
|
d = 1.0 if d < 1e-6 else float(d)
|
||||||
|
pts[:, :2] /= d; pts[:, 2] /= d
|
||||||
|
return pts.reshape(-1)
|
||||||
|
|
||||||
|
# ---------- MLP ----------
|
||||||
|
class MLP(torch.nn.Module):
|
||||||
|
def __init__(self, in_dim, num_classes):
|
||||||
|
super().__init__()
|
||||||
|
self.net = torch.nn.Sequential(
|
||||||
|
torch.nn.Linear(in_dim, 128),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
torch.nn.Dropout(0.2),
|
||||||
|
torch.nn.Linear(128, 64),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
torch.nn.Dropout(0.1),
|
||||||
|
torch.nn.Linear(64, num_classes),
|
||||||
|
)
|
||||||
|
def forward(self, x): return self.net(x)
|
||||||
|
|
||||||
|
# ---------- Utilities ----------
|
||||||
|
def load_model_bundle(model_path):
|
||||||
|
"""
|
||||||
|
Load a single per-letter model checkpoint and return a dict bundle with:
|
||||||
|
- 'model': torch.nn.Module (eval, on device)
|
||||||
|
- 'classes': list of class names, e.g. ['Not_A', 'A']
|
||||||
|
- 'pos_index': index of the positive (letter) class in 'classes'
|
||||||
|
- 'X_mean', 'X_std': np arrays (1, 63)
|
||||||
|
- 'letter': inferred letter string for display (e.g., 'A')
|
||||||
|
"""
|
||||||
|
state = torch.load(model_path, map_location="cpu", weights_only=False)
|
||||||
|
classes = state["classes"]
|
||||||
|
# identify the "letter" class: prefer anything not starting with "Not_"
|
||||||
|
# fallback: last class
|
||||||
|
pos_idx = None
|
||||||
|
for i, c in enumerate(classes):
|
||||||
|
if not c.lower().startswith("not_"):
|
||||||
|
pos_idx = i
|
||||||
|
break
|
||||||
|
if pos_idx is None:
|
||||||
|
pos_idx = len(classes) - 1
|
||||||
|
|
||||||
|
# letter name (strip Not_ if needed)
|
||||||
|
letter_name = classes[pos_idx]
|
||||||
|
if letter_name.lower().startswith("not_"):
|
||||||
|
letter_name = letter_name[4:]
|
||||||
|
|
||||||
|
X_mean = state["X_mean"]; X_std = state["X_std"]
|
||||||
|
if isinstance(X_mean, torch.Tensor): X_mean = X_mean.cpu().numpy()
|
||||||
|
if isinstance(X_std, torch.Tensor): X_std = X_std.cpu().numpy()
|
||||||
|
X_mean = np.asarray(X_mean, dtype=np.float32)
|
||||||
|
X_std = np.asarray(X_std, dtype=np.float32) + 1e-6
|
||||||
|
|
||||||
|
model = MLP(63, len(classes))
|
||||||
|
model.load_state_dict(state["model"])
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"path": model_path,
|
||||||
|
"model": model,
|
||||||
|
"classes": classes,
|
||||||
|
"pos_index": pos_idx,
|
||||||
|
"X_mean": X_mean,
|
||||||
|
"X_std": X_std,
|
||||||
|
"letter": letter_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
def put_text(img, text, org, scale=1.1, color=(0,255,0), thick=2):
|
||||||
|
cv2.putText(img, text, org, cv2.FONT_HERSHEY_SIMPLEX, scale, color, thick, cv2.LINE_AA)
|
||||||
|
|
||||||
|
# ---------- Main ----------
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--letters", help="Comma-separated letters, e.g. A,B,C (uses asl_<L>_mlp.pt)")
|
||||||
|
ap.add_argument("--models", nargs="+", help="Explicit model paths (overrides --letters)")
|
||||||
|
ap.add_argument("--threshold", type=float, default=0.5,
|
||||||
|
help="Reject threshold on positive-class probability (default: 0.5)")
|
||||||
|
ap.add_argument("--camera", type=int, default=0, help="OpenCV camera index (default: 0)")
|
||||||
|
ap.add_argument("--width", type=int, default=640, help="Requested capture width (default: 640)")
|
||||||
|
ap.add_argument("--height", type=int, default=480, help="Requested capture height (default: 480)")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
model_paths = []
|
||||||
|
if args.models:
|
||||||
|
model_paths = args.models
|
||||||
|
elif args.letters:
|
||||||
|
for L in [s.strip().upper() for s in args.letters.split(",") if s.strip()]:
|
||||||
|
model_paths.append(f"asl_{L}_mlp.pt")
|
||||||
|
else:
|
||||||
|
raise SystemExit("Please provide --letters A,B,C or --models path1.pt path2.pt ...")
|
||||||
|
|
||||||
|
# Check files
|
||||||
|
for p in model_paths:
|
||||||
|
if not os.path.exists(p):
|
||||||
|
raise SystemExit(f"❌ Model file not found: {p}")
|
||||||
|
|
||||||
|
# Device
|
||||||
|
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
|
||||||
|
|
||||||
|
# Load bundles
|
||||||
|
bundles = [load_model_bundle(p) for p in model_paths]
|
||||||
|
for b in bundles:
|
||||||
|
b["model"].to(device)
|
||||||
|
print("✅ Loaded models:", ", ".join(f"{b['letter']}({os.path.basename(b['path'])})" for b in bundles))
|
||||||
|
|
||||||
|
# MediaPipe Hands
|
||||||
|
hands = mp.solutions.hands.Hands(
|
||||||
|
static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5
|
||||||
|
)
|
||||||
|
|
||||||
|
# Camera
|
||||||
|
cap = cv2.VideoCapture(args.camera)
|
||||||
|
if not cap.isOpened():
|
||||||
|
raise SystemExit(f"❌ Could not open camera index {args.camera}")
|
||||||
|
cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
|
||||||
|
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
|
||||||
|
|
||||||
|
print("Press 'q' to quit.")
|
||||||
|
while True:
|
||||||
|
ok, frame = cap.read()
|
||||||
|
if not ok:
|
||||||
|
break
|
||||||
|
|
||||||
|
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||||
|
res = hands.process(rgb)
|
||||||
|
|
||||||
|
overlay = frame.copy()
|
||||||
|
label_text = "No hand"
|
||||||
|
scoreboard = []
|
||||||
|
if res.multi_hand_landmarks:
|
||||||
|
ih = res.multi_hand_landmarks[0]
|
||||||
|
handed = None
|
||||||
|
if res.multi_handedness:
|
||||||
|
handed = res.multi_handedness[0].classification[0].label
|
||||||
|
|
||||||
|
pts = np.array([[lm.x, lm.y, lm.z] for lm in ih.landmark], dtype=np.float32)
|
||||||
|
feat = normalize_landmarks(pts, handedness_label=handed)
|
||||||
|
|
||||||
|
# Evaluate each model
|
||||||
|
best_letter, best_prob = None, -1.0
|
||||||
|
for b in bundles:
|
||||||
|
X_mean = b["X_mean"].flatten()
|
||||||
|
X_std = b["X_std"].flatten()
|
||||||
|
xn = (feat - X_mean) / X_std
|
||||||
|
xt = torch.from_numpy(xn).float().unsqueeze(0).to(device)
|
||||||
|
with torch.no_grad():
|
||||||
|
probs = torch.softmax(b["model"](xt), dim=1)[0].cpu().numpy()
|
||||||
|
p_pos = float(probs[b["pos_index"]])
|
||||||
|
scoreboard.append((b["letter"], p_pos))
|
||||||
|
if p_pos > best_prob:
|
||||||
|
best_prob = p_pos
|
||||||
|
best_letter = b["letter"]
|
||||||
|
|
||||||
|
# Compose label based on threshold
|
||||||
|
if best_prob >= args.threshold:
|
||||||
|
label_text = f"{best_letter} {best_prob*100:.1f}%"
|
||||||
|
else:
|
||||||
|
label_text = f"Unknown ({best_letter} {best_prob*100:.1f}%)"
|
||||||
|
|
||||||
|
# Sort scoreboard desc and show top 3
|
||||||
|
scoreboard.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
y0 = 80
|
||||||
|
put_text(overlay, "Scores:", (20, y0), scale=0.9, color=(0,255,255), thick=2)
|
||||||
|
y = y0 + 30
|
||||||
|
for i, (L, p) in enumerate(scoreboard[:3]):
|
||||||
|
put_text(overlay, f"{L}: {p*100:.1f}%", (20, y), scale=0.9, color=(0,255,0), thick=2)
|
||||||
|
y += 28
|
||||||
|
|
||||||
|
put_text(overlay, label_text, (20, 40), scale=1.2, color=(0,255,0), thick=3)
|
||||||
|
cv2.imshow("ASL multi-letter demo", overlay)
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
137
infer_webcam.py
Executable file
137
infer_webcam.py
Executable file
@@ -0,0 +1,137 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
infer_webcam.py
|
||||||
|
Live webcam demo: detect a hand with MediaPipe, normalize landmarks,
|
||||||
|
classify with a trained MLP model.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
python infer_webcam.py --letter A # loads asl_A_mlp.pt
|
||||||
|
python infer_webcam.py --letter B # loads asl_B_mlp.pt
|
||||||
|
python infer_webcam.py --model /path/to/asl_A_mlp.pt
|
||||||
|
|
||||||
|
Press 'q' to quit.
|
||||||
|
"""
|
||||||
|
import os, math, argparse
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
import mediapipe as mp
|
||||||
|
|
||||||
|
# ---------- geometry helpers ----------
|
||||||
|
def _angle(v): return math.atan2(v[1], v[0])
|
||||||
|
def _rot2d(t):
|
||||||
|
c, s = math.cos(t), math.sin(t)
|
||||||
|
return np.array([[c, -s], [s, c]], dtype=np.float32)
|
||||||
|
|
||||||
|
def normalize_landmarks(pts, handedness_label=None):
|
||||||
|
pts = pts.astype(np.float32).copy()
|
||||||
|
# translate wrist to origin
|
||||||
|
pts[:, :2] -= pts[0, :2]
|
||||||
|
# mirror left→right
|
||||||
|
if handedness_label and handedness_label.lower().startswith("left"):
|
||||||
|
pts[:, 0] *= -1.0
|
||||||
|
# rotate wrist→middle_mcp to +Y
|
||||||
|
v = pts[9, :2]
|
||||||
|
R = _rot2d(math.pi/2 - _angle(v))
|
||||||
|
pts[:, :2] = pts[:, :2] @ R.T
|
||||||
|
# scale by max pairwise distance
|
||||||
|
xy = pts[:, :2]
|
||||||
|
d = np.linalg.norm(xy[None,:,:] - xy[:,None,:], axis=-1).max()
|
||||||
|
d = 1.0 if d < 1e-6 else float(d)
|
||||||
|
pts[:, :2] /= d; pts[:, 2] /= d
|
||||||
|
return pts.reshape(-1)
|
||||||
|
|
||||||
|
# ---------- model ----------
|
||||||
|
class MLP(torch.nn.Module):
|
||||||
|
def __init__(self, in_dim, num_classes):
|
||||||
|
super().__init__()
|
||||||
|
self.net = torch.nn.Sequential(
|
||||||
|
torch.nn.Linear(in_dim, 128),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
torch.nn.Dropout(0.2),
|
||||||
|
torch.nn.Linear(128, 64),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
torch.nn.Dropout(0.1),
|
||||||
|
torch.nn.Linear(64, num_classes),
|
||||||
|
)
|
||||||
|
def forward(self, x): return self.net(x)
|
||||||
|
|
||||||
|
# ---------- main ----------
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
grp = ap.add_mutually_exclusive_group(required=True)
|
||||||
|
grp.add_argument("--letter", help="Target letter (A–Z). Loads asl_<LETTER>_mlp.pt")
|
||||||
|
grp.add_argument("--model", help="Path to trained .pt model (overrides --letter)")
|
||||||
|
ap.add_argument("--camera", type=int, default=0, help="OpenCV camera index (default: 0)")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
# Resolve model path
|
||||||
|
model_path = args.model
|
||||||
|
if model_path is None:
|
||||||
|
letter = args.letter.upper()
|
||||||
|
model_path = f"asl_{letter}_mlp.pt"
|
||||||
|
|
||||||
|
if not os.path.exists(model_path):
|
||||||
|
raise SystemExit(f"❌ Model file not found: {model_path}")
|
||||||
|
|
||||||
|
# Load state (allowing tensors or numpy inside; disable weights-only safety)
|
||||||
|
state = torch.load(model_path, map_location="cpu", weights_only=False)
|
||||||
|
classes = state["classes"]
|
||||||
|
X_mean = state["X_mean"]
|
||||||
|
X_std = state["X_std"]
|
||||||
|
|
||||||
|
# Convert X_mean/X_std to numpy no matter how they were saved
|
||||||
|
if isinstance(X_mean, torch.Tensor): X_mean = X_mean.cpu().numpy()
|
||||||
|
if isinstance(X_std, torch.Tensor): X_std = X_std.cpu().numpy()
|
||||||
|
X_mean = np.asarray(X_mean, dtype=np.float32)
|
||||||
|
X_std = np.asarray(X_std, dtype=np.float32) + 1e-6
|
||||||
|
|
||||||
|
model = MLP(63, len(classes))
|
||||||
|
model.load_state_dict(state["model"])
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
|
||||||
|
model.to(device)
|
||||||
|
|
||||||
|
hands = mp.solutions.hands.Hands(
|
||||||
|
static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5
|
||||||
|
)
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture(args.camera)
|
||||||
|
if not cap.isOpened():
|
||||||
|
raise SystemExit(f"❌ Could not open camera index {args.camera}")
|
||||||
|
|
||||||
|
print(f"✅ Loaded {model_path} with classes {classes}")
|
||||||
|
print("Press 'q' to quit.")
|
||||||
|
while True:
|
||||||
|
ok, frame = cap.read()
|
||||||
|
if not ok: break
|
||||||
|
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||||
|
res = hands.process(rgb)
|
||||||
|
|
||||||
|
label_text = "No hand"
|
||||||
|
if res.multi_hand_landmarks:
|
||||||
|
ih = res.multi_hand_landmarks[0]
|
||||||
|
handed = None
|
||||||
|
if res.multi_handedness:
|
||||||
|
handed = res.multi_handedness[0].classification[0].label
|
||||||
|
pts = np.array([[lm.x, lm.y, lm.z] for lm in ih.landmark], dtype=np.float32)
|
||||||
|
feat = normalize_landmarks(pts, handedness_label=handed)
|
||||||
|
# standardize
|
||||||
|
xn = (feat - X_mean.flatten()) / X_std.flatten()
|
||||||
|
xt = torch.from_numpy(xn).float().unsqueeze(0).to(device)
|
||||||
|
with torch.no_grad():
|
||||||
|
probs = torch.softmax(model(xt), dim=1)[0].cpu().numpy()
|
||||||
|
idx = int(probs.argmax())
|
||||||
|
label_text = f"{classes[idx]} {probs[idx]*100:.1f}%"
|
||||||
|
|
||||||
|
cv2.putText(frame, label_text, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (0,255,0), 2)
|
||||||
|
cv2.imshow("ASL handshape demo", frame)
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
14
make_all_letter_directories.sh
Executable file
14
make_all_letter_directories.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Create train/val directories for per-letter binary datasets:
|
||||||
|
# data/asl/train/{A,Not_A,...,Z,Not_Z} and same under val/
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
for split in train val; do
|
||||||
|
for L in {A..Z}; do
|
||||||
|
mkdir -p "data/asl/$split/$L"
|
||||||
|
mkdir -p "data/asl/$split/Not_$L"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "✅ Created data/asl/train|val/{A,Not_A,...,Z,Not_Z}"
|
||||||
137
prep_landmarks_binary.py
Executable file
137
prep_landmarks_binary.py
Executable file
@@ -0,0 +1,137 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Prepare landmarks for a single binary task (Letter vs Not_Letter).
|
||||||
|
|
||||||
|
Data layout (per letter):
|
||||||
|
data/asl/
|
||||||
|
train/
|
||||||
|
A/
|
||||||
|
Not_A/
|
||||||
|
val/
|
||||||
|
A/
|
||||||
|
Not_A/
|
||||||
|
|
||||||
|
Usage (no outdir needed):
|
||||||
|
python prep_landmarks_binary.py --letter A
|
||||||
|
# -> saves into landmarks_A/
|
||||||
|
|
||||||
|
Optional:
|
||||||
|
python prep_landmarks_binary.py --letter B --data /path/to/dataset
|
||||||
|
"""
|
||||||
|
import os, argparse, json, math
|
||||||
|
from pathlib import Path
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
import mediapipe as mp
|
||||||
|
|
||||||
|
# ---------- geometry helpers ----------
|
||||||
|
def _angle(v): return math.atan2(v[1], v[0])
|
||||||
|
|
||||||
|
def _rot2d(t):
|
||||||
|
c, s = math.cos(t), math.sin(t)
|
||||||
|
return np.array([[c, -s], [s, c]], dtype=np.float32)
|
||||||
|
|
||||||
|
def normalize_landmarks(pts, handed=None):
|
||||||
|
"""
|
||||||
|
pts: (21,3) in MediaPipe normalized image coords.
|
||||||
|
Steps:
|
||||||
|
1) translate wrist to origin
|
||||||
|
2) mirror left->right (canonicalize)
|
||||||
|
3) rotate wrist->middle_mcp to +Y
|
||||||
|
4) scale by max pairwise XY distance
|
||||||
|
returns: (63,) float32
|
||||||
|
"""
|
||||||
|
pts = pts.astype(np.float32).copy()
|
||||||
|
# 1) translate
|
||||||
|
pts[:, :2] -= pts[0, :2]
|
||||||
|
# 2) canonicalize left/right
|
||||||
|
if handed and handed.lower().startswith("left"):
|
||||||
|
pts[:, 0] *= -1.0
|
||||||
|
# 3) rotate
|
||||||
|
v = pts[9, :2] # middle MCP
|
||||||
|
R = _rot2d(math.pi/2 - _angle(v)) # align to +Y
|
||||||
|
pts[:, :2] = pts[:, :2] @ R.T
|
||||||
|
# 4) scale
|
||||||
|
xy = pts[:, :2]
|
||||||
|
d = np.linalg.norm(xy[None,:,:] - xy[:,None,:], axis=-1).max()
|
||||||
|
d = 1.0 if d < 1e-6 else float(d)
|
||||||
|
pts[:, :2] /= d; pts[:, 2] /= d
|
||||||
|
return pts.reshape(-1)
|
||||||
|
|
||||||
|
# ---------- extraction ----------
|
||||||
|
def collect(split_dir: Path, pos_name: str, neg_name: str, min_det_conf: float):
|
||||||
|
X, y, paths = [], [], []
|
||||||
|
total, used = 0, 0
|
||||||
|
|
||||||
|
hands = mp.solutions.hands.Hands(
|
||||||
|
static_image_mode=True,
|
||||||
|
max_num_hands=1,
|
||||||
|
min_detection_confidence=min_det_conf
|
||||||
|
)
|
||||||
|
|
||||||
|
for label, cls in [(1, pos_name), (0, neg_name)]:
|
||||||
|
cls_dir = split_dir / cls
|
||||||
|
if not cls_dir.exists():
|
||||||
|
continue
|
||||||
|
for p in cls_dir.rglob("*"):
|
||||||
|
if not p.is_file() or p.suffix.lower() not in {".jpg",".jpeg",".png",".bmp",".webp"}:
|
||||||
|
continue
|
||||||
|
total += 1
|
||||||
|
bgr = cv2.imread(str(p))
|
||||||
|
if bgr is None:
|
||||||
|
continue
|
||||||
|
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
|
||||||
|
res = hands.process(rgb)
|
||||||
|
if not res.multi_hand_landmarks:
|
||||||
|
continue
|
||||||
|
|
||||||
|
ih = res.multi_hand_landmarks[0]
|
||||||
|
handed = None
|
||||||
|
if res.multi_handedness:
|
||||||
|
handed = res.multi_handedness[0].classification[0].label # "Left"/"Right"
|
||||||
|
|
||||||
|
pts = np.array([[lm.x, lm.y, lm.z] for lm in ih.landmark], dtype=np.float32)
|
||||||
|
feat = normalize_landmarks(pts, handed)
|
||||||
|
X.append(feat); y.append(label); paths.append(str(p)); used += 1
|
||||||
|
|
||||||
|
X = np.stack(X) if X else np.zeros((0,63), np.float32)
|
||||||
|
y = np.array(y, dtype=np.int64)
|
||||||
|
print(f"Split '{split_dir.name}': found {total}, used {used} (hands detected).")
|
||||||
|
return X, y, paths
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--letter", required=True, help="Target letter (A–Z)")
|
||||||
|
ap.add_argument("--data", default="data/asl", help="Root with train/ and val/ (default: data/asl)")
|
||||||
|
ap.add_argument("--outdir", default=None, help="Output dir (default: landmarks_<LETTER>)")
|
||||||
|
ap.add_argument("--min_det_conf", type=float, default=0.5, help="MediaPipe min detection confidence")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
L = args.letter.upper()
|
||||||
|
pos_name = L
|
||||||
|
neg_name = f"Not_{L}"
|
||||||
|
outdir = args.outdir or f"landmarks_{L}"
|
||||||
|
os.makedirs(outdir, exist_ok=True)
|
||||||
|
|
||||||
|
train_dir = Path(args.data) / "train"
|
||||||
|
val_dir = Path(args.data) / "val"
|
||||||
|
|
||||||
|
Xtr, ytr, ptr = collect(train_dir, pos_name, neg_name, args.min_det_conf)
|
||||||
|
Xva, yva, pva = collect(val_dir, pos_name, neg_name, args.min_det_conf)
|
||||||
|
|
||||||
|
# Save arrays + metadata
|
||||||
|
np.save(f"{outdir}/train_X.npy", Xtr)
|
||||||
|
np.save(f"{outdir}/train_y.npy", ytr)
|
||||||
|
np.save(f"{outdir}/val_X.npy", Xva)
|
||||||
|
np.save(f"{outdir}/val_y.npy", yva)
|
||||||
|
|
||||||
|
with open(f"{outdir}/class_names.json","w") as f:
|
||||||
|
json.dump([neg_name, pos_name], f) # index 0: Not_L, index 1: L
|
||||||
|
|
||||||
|
open(f"{outdir}/train_paths.txt","w").write("\n".join(ptr))
|
||||||
|
open(f"{outdir}/val_paths.txt","w").write("\n".join(pva))
|
||||||
|
|
||||||
|
print(f"✅ Saved {L}: train {Xtr.shape}, val {Xva.shape}, classes={[neg_name, pos_name]} → {outdir}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
127
train_mlp.py
Executable file
127
train_mlp.py
Executable file
@@ -0,0 +1,127 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
train_mlp.py
|
||||||
|
Train a small MLP on landmarks for a single letter (binary: Letter vs Not_Letter).
|
||||||
|
|
||||||
|
Expected workflow:
|
||||||
|
python prep_landmarks_binary.py --letter A # saves landmarks_A/
|
||||||
|
python train_mlp.py --letter A --epochs 40 --batch 64
|
||||||
|
python infer_webcam.py --letter A
|
||||||
|
"""
|
||||||
|
import os, json, argparse
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from torch.utils.data import TensorDataset, DataLoader
|
||||||
|
|
||||||
|
def get_device():
|
||||||
|
return torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
|
||||||
|
|
||||||
|
class MLP(nn.Module):
|
||||||
|
def __init__(self, in_dim, num_classes):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(
|
||||||
|
nn.Linear(in_dim, 128),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Dropout(0.2),
|
||||||
|
nn.Linear(128, 64),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Dropout(0.1),
|
||||||
|
nn.Linear(64, num_classes),
|
||||||
|
)
|
||||||
|
def forward(self, x): return self.net(x)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--letter", required=True, help="Target letter (A–Z)")
|
||||||
|
ap.add_argument("--epochs", type=int, default=40)
|
||||||
|
ap.add_argument("--batch", type=int, default=64)
|
||||||
|
ap.add_argument("--lr", type=float, default=1e-3)
|
||||||
|
ap.add_argument("--landmarks", default=None,
|
||||||
|
help="Landmarks folder (default: landmarks_<LETTER>)")
|
||||||
|
ap.add_argument("--out", default=None,
|
||||||
|
help="Output filename (default: asl_<LETTER>_mlp.pt)")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
letter = args.letter.upper()
|
||||||
|
landmarks_dir = args.landmarks or f"landmarks_{letter}"
|
||||||
|
out_file = args.out or f"asl_{letter}_mlp.pt"
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
trX = np.load(os.path.join(landmarks_dir, "train_X.npy"))
|
||||||
|
trY = np.load(os.path.join(landmarks_dir, "train_y.npy"))
|
||||||
|
vaX = np.load(os.path.join(landmarks_dir, "val_X.npy"))
|
||||||
|
vaY = np.load(os.path.join(landmarks_dir, "val_y.npy"))
|
||||||
|
with open(os.path.join(landmarks_dir, "class_names.json")) as f:
|
||||||
|
classes = json.load(f)
|
||||||
|
|
||||||
|
print(f"Letter: {letter}")
|
||||||
|
print(f"Loaded: train {trX.shape} val {vaX.shape} classes={classes}")
|
||||||
|
|
||||||
|
# Standardize using train mean/std
|
||||||
|
X_mean_np = trX.mean(axis=0, keepdims=True).astype(np.float32)
|
||||||
|
X_std_np = (trX.std(axis=0, keepdims=True) + 1e-6).astype(np.float32)
|
||||||
|
trXn = (trX - X_mean_np) / X_std_np
|
||||||
|
vaXn = (vaX - X_mean_np) / X_std_np
|
||||||
|
|
||||||
|
# Torch datasets
|
||||||
|
tr_ds = TensorDataset(torch.from_numpy(trXn).float(), torch.from_numpy(trY).long())
|
||||||
|
va_ds = TensorDataset(torch.from_numpy(vaXn).float(), torch.from_numpy(vaY).long())
|
||||||
|
tr_dl = DataLoader(tr_ds, batch_size=args.batch, shuffle=True)
|
||||||
|
va_dl = DataLoader(va_ds, batch_size=args.batch, shuffle=False)
|
||||||
|
|
||||||
|
device = get_device()
|
||||||
|
model = MLP(in_dim=trX.shape[1], num_classes=len(classes)).to(device)
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
opt = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=1e-4)
|
||||||
|
sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=args.epochs)
|
||||||
|
|
||||||
|
best_acc, best_state = 0.0, None
|
||||||
|
|
||||||
|
for epoch in range(1, args.epochs + 1):
|
||||||
|
# Train
|
||||||
|
model.train()
|
||||||
|
tot, correct, loss_sum = 0, 0, 0.0
|
||||||
|
for xb, yb in tr_dl:
|
||||||
|
xb, yb = xb.to(device), yb.to(device)
|
||||||
|
opt.zero_grad(set_to_none=True)
|
||||||
|
logits = model(xb)
|
||||||
|
loss = criterion(logits, yb)
|
||||||
|
loss.backward()
|
||||||
|
opt.step()
|
||||||
|
loss_sum += loss.item() * yb.size(0)
|
||||||
|
correct += (logits.argmax(1) == yb).sum().item()
|
||||||
|
tot += yb.size(0)
|
||||||
|
tr_loss = loss_sum / max(1, tot)
|
||||||
|
tr_acc = correct / max(1, tot)
|
||||||
|
|
||||||
|
# Validate
|
||||||
|
model.eval()
|
||||||
|
vtot, vcorrect = 0, 0
|
||||||
|
with torch.no_grad():
|
||||||
|
for xb, yb in va_dl:
|
||||||
|
xb, yb = xb.to(device), yb.to(device)
|
||||||
|
logits = model(xb)
|
||||||
|
vcorrect += (logits.argmax(1) == yb).sum().item()
|
||||||
|
vtot += yb.size(0)
|
||||||
|
va_acc = vcorrect / max(1, vtot)
|
||||||
|
sched.step()
|
||||||
|
|
||||||
|
print(f"Epoch {epoch:02d}: train_loss={tr_loss:.4f} train_acc={tr_acc:.3f} val_acc={va_acc:.3f}")
|
||||||
|
|
||||||
|
if va_acc > best_acc:
|
||||||
|
best_acc = va_acc
|
||||||
|
# Save stats as **tensors** (future-proof for torch.load safety)
|
||||||
|
best_state = {
|
||||||
|
"model": model.state_dict(),
|
||||||
|
"classes": classes,
|
||||||
|
"X_mean": torch.from_numpy(X_mean_np), # tensor
|
||||||
|
"X_std": torch.from_numpy(X_std_np), # tensor
|
||||||
|
}
|
||||||
|
torch.save(best_state, out_file)
|
||||||
|
print(f" ✅ Saved best → {out_file} (val_acc={best_acc:.3f})")
|
||||||
|
|
||||||
|
print("Done. Best val_acc:", best_acc)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
145
webcam_capture.py
Executable file
145
webcam_capture.py
Executable file
@@ -0,0 +1,145 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
capture_webcam.py
|
||||||
|
Show webcam preview and, given --letter L, count down 5s, then capture frames
|
||||||
|
every --interval seconds until --count images are saved.
|
||||||
|
Saves PNGs to ./captures as L001.PNG, L002.PNG, ...
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python capture_webcam.py --letter A
|
||||||
|
python capture_webcam.py --letter B --camera 1
|
||||||
|
python capture_webcam.py --letter C --count 10 --interval 1
|
||||||
|
|
||||||
|
# Default: 5 captures at 2s spacing, 640x480
|
||||||
|
python capture_webcam.py --letter A
|
||||||
|
|
||||||
|
# Ten captures, 1s apart
|
||||||
|
python capture_webcam.py --letter B --count 10 --interval 1
|
||||||
|
|
||||||
|
# USB camera index 1, HD override
|
||||||
|
python capture_webcam.py --letter C --camera 1 --width 1280 --height 720 --count 8 --interval 1.5
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
COUNTDOWN_SECONDS = 5
|
||||||
|
|
||||||
|
def next_sequence_number(captures_dir: Path, letter: str) -> int:
|
||||||
|
"""Return next available sequence number for files like 'A001.PNG'."""
|
||||||
|
pattern = re.compile(rf"^{re.escape(letter)}(\d{{3}})\.PNG$", re.IGNORECASE)
|
||||||
|
max_idx = 0
|
||||||
|
if captures_dir.exists():
|
||||||
|
for name in os.listdir(captures_dir):
|
||||||
|
m = pattern.match(name)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
idx = int(m.group(1))
|
||||||
|
if idx > max_idx:
|
||||||
|
max_idx = idx
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return max_idx + 1
|
||||||
|
|
||||||
|
def draw_text(img, text, org, scale=1.4, color=(0, 255, 0), thickness=2):
|
||||||
|
cv2.putText(img, text, org, cv2.FONT_HERSHEY_SIMPLEX, scale, color, thickness, cv2.LINE_AA)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--letter", required=True, help="Target letter A–Z. Output files like A001.PNG")
|
||||||
|
ap.add_argument("--camera", type=int, default=0, help="OpenCV camera index (default: 0)")
|
||||||
|
ap.add_argument("--width", type=int, default=640, help="Requested capture width (default: 640)")
|
||||||
|
ap.add_argument("--height", type=int, default=480, help="Requested capture height (default: 480)")
|
||||||
|
ap.add_argument("--count", type=int, default=5, help="Number of captures to take (default: 5)")
|
||||||
|
ap.add_argument("--interval", type=float, default=2.0, help="Seconds between captures (default: 2.0)")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
letter = args.letter.upper().strip()
|
||||||
|
if not (len(letter) == 1 and "A" <= letter <= "Z"):
|
||||||
|
raise SystemExit("Please pass a single letter A–Z to --letter (e.g., --letter A)")
|
||||||
|
if args.count <= 0:
|
||||||
|
raise SystemExit("--count must be >= 1")
|
||||||
|
if args.interval <= 0:
|
||||||
|
raise SystemExit("--interval must be > 0")
|
||||||
|
|
||||||
|
captures_dir = Path("./captures")
|
||||||
|
captures_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
start_idx = next_sequence_number(captures_dir, letter)
|
||||||
|
|
||||||
|
cap = cv2.VideoCapture(args.camera)
|
||||||
|
if not cap.isOpened():
|
||||||
|
raise SystemExit(f"❌ Could not open camera index {args.camera}")
|
||||||
|
|
||||||
|
# Try to set resolution (best-effort)
|
||||||
|
cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
|
||||||
|
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
|
||||||
|
|
||||||
|
window_title = f"Capture {letter} (press 'q' to quit)"
|
||||||
|
print(f"Showing webcam. Countdown {COUNTDOWN_SECONDS}s, then capturing {args.count} frame(s) every {args.interval}s...")
|
||||||
|
print(f"Saving to: {captures_dir.resolve()} as {letter}NNN.PNG starting at index {start_idx:03d}")
|
||||||
|
|
||||||
|
countdown_done_at = time.time() + COUNTDOWN_SECONDS
|
||||||
|
# Absolute times when we want to capture (after countdown)
|
||||||
|
capture_times = [countdown_done_at + i * args.interval for i in range(args.count)]
|
||||||
|
capture_taken = [False] * args.count
|
||||||
|
captures_made = 0
|
||||||
|
idx = start_idx
|
||||||
|
|
||||||
|
while True:
|
||||||
|
ok, frame = cap.read()
|
||||||
|
if not ok:
|
||||||
|
print("⚠️ Frame grab failed; ending.")
|
||||||
|
break
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
# Countdown overlay
|
||||||
|
if now < countdown_done_at:
|
||||||
|
remaining = int(round(countdown_done_at - now))
|
||||||
|
overlay = frame.copy()
|
||||||
|
draw_text(overlay, f"Starting in: {remaining}s", (30, 60), scale=2.0, color=(0, 255, 255), thickness=3)
|
||||||
|
draw_text(overlay, f"Letter: {letter}", (30, 120), scale=1.2, color=(0, 255, 0), thickness=2)
|
||||||
|
cv2.imshow(window_title, overlay)
|
||||||
|
else:
|
||||||
|
# Check if it's time for any pending captures
|
||||||
|
for i, tcap in enumerate(capture_times):
|
||||||
|
if (not capture_taken[i]) and now >= tcap:
|
||||||
|
filename = f"{letter}{idx:03d}.PNG"
|
||||||
|
out_path = captures_dir / filename
|
||||||
|
cv2.imwrite(str(out_path), frame)
|
||||||
|
capture_taken[i] = True
|
||||||
|
captures_made += 1
|
||||||
|
idx += 1
|
||||||
|
print(f"📸 Saved {out_path.name}")
|
||||||
|
|
||||||
|
# Overlay progress
|
||||||
|
elapsed_after = now - countdown_done_at
|
||||||
|
total_duration = args.interval * (args.count - 1) if args.count > 1 else 0
|
||||||
|
remaining_after = max(0.0, total_duration - elapsed_after)
|
||||||
|
overlay = frame.copy()
|
||||||
|
draw_text(overlay, f"Capturing {letter}… {captures_made}/{args.count}", (30, 60),
|
||||||
|
scale=1.5, color=(0, 255, 0), thickness=3)
|
||||||
|
draw_text(overlay, f"Time left: {int(round(remaining_after))}s", (30, 110),
|
||||||
|
scale=1.2, color=(0, 255, 255), thickness=2)
|
||||||
|
cv2.imshow(window_title, overlay)
|
||||||
|
|
||||||
|
# If finished all captures, keep preview up until user quits
|
||||||
|
if captures_made >= args.count:
|
||||||
|
draw_text(overlay, "Done! Press 'q' to close.", (30, 160),
|
||||||
|
scale=1.2, color=(0, 200, 255), thickness=2)
|
||||||
|
cv2.imshow(window_title, overlay)
|
||||||
|
|
||||||
|
# Quit on 'q'
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user