Initial commit: ASL handshape recognition project
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
137
prep_landmarks_binary.py
Executable file
137
prep_landmarks_binary.py
Executable file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Prepare landmarks for a single binary task (Letter vs Not_Letter).
|
||||
|
||||
Data layout (per letter):
|
||||
data/asl/
|
||||
train/
|
||||
A/
|
||||
Not_A/
|
||||
val/
|
||||
A/
|
||||
Not_A/
|
||||
|
||||
Usage (no outdir needed):
|
||||
python prep_landmarks_binary.py --letter A
|
||||
# -> saves into landmarks_A/
|
||||
|
||||
Optional:
|
||||
python prep_landmarks_binary.py --letter B --data /path/to/dataset
|
||||
"""
|
||||
import os, argparse, json, math
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
|
||||
# ---------- geometry helpers ----------
|
||||
def _angle(v): return math.atan2(v[1], v[0])
|
||||
|
||||
def _rot2d(t):
|
||||
c, s = math.cos(t), math.sin(t)
|
||||
return np.array([[c, -s], [s, c]], dtype=np.float32)
|
||||
|
||||
def normalize_landmarks(pts, handed=None):
|
||||
"""
|
||||
pts: (21,3) in MediaPipe normalized image coords.
|
||||
Steps:
|
||||
1) translate wrist to origin
|
||||
2) mirror left->right (canonicalize)
|
||||
3) rotate wrist->middle_mcp to +Y
|
||||
4) scale by max pairwise XY distance
|
||||
returns: (63,) float32
|
||||
"""
|
||||
pts = pts.astype(np.float32).copy()
|
||||
# 1) translate
|
||||
pts[:, :2] -= pts[0, :2]
|
||||
# 2) canonicalize left/right
|
||||
if handed and handed.lower().startswith("left"):
|
||||
pts[:, 0] *= -1.0
|
||||
# 3) rotate
|
||||
v = pts[9, :2] # middle MCP
|
||||
R = _rot2d(math.pi/2 - _angle(v)) # align to +Y
|
||||
pts[:, :2] = pts[:, :2] @ R.T
|
||||
# 4) scale
|
||||
xy = pts[:, :2]
|
||||
d = np.linalg.norm(xy[None,:,:] - xy[:,None,:], axis=-1).max()
|
||||
d = 1.0 if d < 1e-6 else float(d)
|
||||
pts[:, :2] /= d; pts[:, 2] /= d
|
||||
return pts.reshape(-1)
|
||||
|
||||
# ---------- extraction ----------
|
||||
def collect(split_dir: Path, pos_name: str, neg_name: str, min_det_conf: float):
|
||||
X, y, paths = [], [], []
|
||||
total, used = 0, 0
|
||||
|
||||
hands = mp.solutions.hands.Hands(
|
||||
static_image_mode=True,
|
||||
max_num_hands=1,
|
||||
min_detection_confidence=min_det_conf
|
||||
)
|
||||
|
||||
for label, cls in [(1, pos_name), (0, neg_name)]:
|
||||
cls_dir = split_dir / cls
|
||||
if not cls_dir.exists():
|
||||
continue
|
||||
for p in cls_dir.rglob("*"):
|
||||
if not p.is_file() or p.suffix.lower() not in {".jpg",".jpeg",".png",".bmp",".webp"}:
|
||||
continue
|
||||
total += 1
|
||||
bgr = cv2.imread(str(p))
|
||||
if bgr is None:
|
||||
continue
|
||||
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
|
||||
res = hands.process(rgb)
|
||||
if not res.multi_hand_landmarks:
|
||||
continue
|
||||
|
||||
ih = res.multi_hand_landmarks[0]
|
||||
handed = None
|
||||
if res.multi_handedness:
|
||||
handed = res.multi_handedness[0].classification[0].label # "Left"/"Right"
|
||||
|
||||
pts = np.array([[lm.x, lm.y, lm.z] for lm in ih.landmark], dtype=np.float32)
|
||||
feat = normalize_landmarks(pts, handed)
|
||||
X.append(feat); y.append(label); paths.append(str(p)); used += 1
|
||||
|
||||
X = np.stack(X) if X else np.zeros((0,63), np.float32)
|
||||
y = np.array(y, dtype=np.int64)
|
||||
print(f"Split '{split_dir.name}': found {total}, used {used} (hands detected).")
|
||||
return X, y, paths
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--letter", required=True, help="Target letter (A–Z)")
|
||||
ap.add_argument("--data", default="data/asl", help="Root with train/ and val/ (default: data/asl)")
|
||||
ap.add_argument("--outdir", default=None, help="Output dir (default: landmarks_<LETTER>)")
|
||||
ap.add_argument("--min_det_conf", type=float, default=0.5, help="MediaPipe min detection confidence")
|
||||
args = ap.parse_args()
|
||||
|
||||
L = args.letter.upper()
|
||||
pos_name = L
|
||||
neg_name = f"Not_{L}"
|
||||
outdir = args.outdir or f"landmarks_{L}"
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
|
||||
train_dir = Path(args.data) / "train"
|
||||
val_dir = Path(args.data) / "val"
|
||||
|
||||
Xtr, ytr, ptr = collect(train_dir, pos_name, neg_name, args.min_det_conf)
|
||||
Xva, yva, pva = collect(val_dir, pos_name, neg_name, args.min_det_conf)
|
||||
|
||||
# Save arrays + metadata
|
||||
np.save(f"{outdir}/train_X.npy", Xtr)
|
||||
np.save(f"{outdir}/train_y.npy", ytr)
|
||||
np.save(f"{outdir}/val_X.npy", Xva)
|
||||
np.save(f"{outdir}/val_y.npy", yva)
|
||||
|
||||
with open(f"{outdir}/class_names.json","w") as f:
|
||||
json.dump([neg_name, pos_name], f) # index 0: Not_L, index 1: L
|
||||
|
||||
open(f"{outdir}/train_paths.txt","w").write("\n".join(ptr))
|
||||
open(f"{outdir}/val_paths.txt","w").write("\n".join(pva))
|
||||
|
||||
print(f"✅ Saved {L}: train {Xtr.shape}, val {Xva.shape}, classes={[neg_name, pos_name]} → {outdir}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user