From 816e34cb171e8462e72cecc4a5b740a17719e4e9 Mon Sep 17 00:00:00 2001 From: jared Date: Mon, 19 Jan 2026 22:27:20 -0500 Subject: [PATCH] Initial commit: handshapes multiclass project Co-Authored-By: Claude Opus 4.5 --- .gitignore | 14 + README.md | 255 ++++++++++++++ capture_sequence.py | 259 ++++++++++++++ doc/capture_sequence.py | 323 ++++++++++++++++++ doc/eval_val.py | 70 ++++ doc/infer_seq_webcam.py | 249 ++++++++++++++ doc/prep_sequence_resampled.py | 93 +++++ doc/train_seq.py | 137 ++++++++ eval_val.py | 61 ++++ first_attempt_landmark_hands/README.md | 216 ++++++++++++ .../capture_sequence.py | 176 ++++++++++ first_attempt_landmark_hands/eval_val.py | 60 ++++ .../infer_seq_webcam.py | 198 +++++++++++ first_attempt_landmark_hands/make_seq_dirs.sh | 19 ++ .../prep_sequence_resampled.py | 71 ++++ first_attempt_landmark_hands/train_seq.py | 136 ++++++++ first_attempt_landmark_hands/what_to_do.txt | 24 ++ infer_seq_webcam.py | 227 ++++++++++++ make_seq_dirs.sh | 19 ++ prep_sequence_resampled.py | 77 +++++ train_seq.py | 120 +++++++ what_to_do.txt | 16 + 22 files changed, 2820 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 capture_sequence.py create mode 100644 doc/capture_sequence.py create mode 100644 doc/eval_val.py create mode 100644 doc/infer_seq_webcam.py create mode 100644 doc/prep_sequence_resampled.py create mode 100644 doc/train_seq.py create mode 100755 eval_val.py create mode 100644 first_attempt_landmark_hands/README.md create mode 100755 first_attempt_landmark_hands/capture_sequence.py create mode 100755 first_attempt_landmark_hands/eval_val.py create mode 100755 first_attempt_landmark_hands/infer_seq_webcam.py create mode 100755 first_attempt_landmark_hands/make_seq_dirs.sh create mode 100755 first_attempt_landmark_hands/prep_sequence_resampled.py create mode 100755 first_attempt_landmark_hands/train_seq.py create mode 100644 first_attempt_landmark_hands/what_to_do.txt create mode 100755 infer_seq_webcam.py create mode 100755 make_seq_dirs.sh create mode 100755 prep_sequence_resampled.py create mode 100755 train_seq.py create mode 100644 what_to_do.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2f3839e --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +# Ignore everything +* + +# But not directories (so git can traverse into them) +!*/ + +# Allow these file types +!*.py +!*.txt +!*.md +!*.sh + +# Don't ignore .gitignore itself +!.gitignore diff --git a/README.md b/README.md new file mode 100644 index 0000000..aaba275 --- /dev/null +++ b/README.md @@ -0,0 +1,255 @@ +# Handshapes Multiclass (Holistic) — README + +A small end-to-end pipeline that records MediaPipe **Holistic** landmarks, builds fixed-length sequences, trains a **bidirectional GRU** classifier, evaluates it, and runs a **live webcam demo** that recognizes classes such as words (“Mother”, “Father”, “Go”) or letters. + +--- + +## Quick Start + +```bash +# 0) Create class folders +./make_seq_dirs.sh Mother Father Go + +# 1) Capture clips (per class; adjust counts as you like) +python capture_sequence.py --label Mother --split train --seconds 0.8 --count 100 +python capture_sequence.py --label Mother --split val --seconds 0.8 --count 20 +python capture_sequence.py --label Father --split train --seconds 0.8 --count 100 +python capture_sequence.py --label Father --split val --seconds 0.8 --count 20 +python capture_sequence.py --label Go --split train --seconds 0.8 --count 100 +python capture_sequence.py --label Go --split val --seconds 0.8 --count 20 + +# 2) Build fixed-length dataset (32 frames/clip) +python prep_sequence_resampled.py --in sequences --out landmarks_seq32 --frames 32 + +# 3) Train, evaluate, and run live inference +python train_seq.py --landmarks landmarks_seq32 --out asl_seq32_gru_mother_father_go.pt +python eval_val.py --landmarks landmarks_seq32 --model asl_seq32_gru_mother_father_go.pt +python infer_seq_webcam.py --model asl_seq32_gru_mother_father_go.pt --threshold 0.35 --smooth 0.1 +``` + +Folder layout after capture: + +``` +sequences/ + train/ + Mother/ clip_001.npz ... + Father/ clip_001.npz ... + Go/ clip_001.npz ... + val/ + Mother/ ... + Father/ ... + Go/ ... +``` + +--- + +## Feature Representation (per frame) + +From MediaPipe **Holistic**: + +* **Right hand** 21×(x,y,z) → 63 +* **Left hand** 21×(x,y,z) → 63 +* **Face** 468×(x,y,z) → 1,404 +* **Pose** 33×(x,y,z,visibility) → 132 +* **Face-relative hand extras**: wrist (x,y) + index tip (x,y) for each hand, expressed in the face-normalized frame → 8 + **Total** = **1,670 dims** per frame. + +### Normalization (high level) + +* Hands: translate to wrist, mirror left → right, rotate so middle-finger MCP points +Y, scale by max pairwise distance. +* Face: center at eye midpoint, scale by inter-ocular distance, rotate to align eyeline horizontally. +* Pose: center at shoulder midpoint, scale by shoulder width, rotate shoulders horizontal. +* Extras: per-hand wrist/tip projected into the face frame so the model retains *where* the hand is relative to the face (critical for signs like **Mother** vs **Father**). + +--- + +## How the Pipeline Works + +### 1) `make_seq_dirs.sh` + +Creates the directory scaffolding under `sequences/` for any labels you pass (letters or words). + +* **Usage:** `./make_seq_dirs.sh Mother Father Go` +* **Why:** Keeps data organized as `train/` and `val/` per class. + +--- + +### 2) `capture_sequence.py` + +Records short clips from your webcam and saves per-frame **feature vectors** into compressed `.npz` files. + +**Key behaviors** + +* Uses **MediaPipe Holistic** to extract right/left hands, full face mesh, and pose. +* Computes normalized features + face-relative extras. +* Writes each clip as `sequences//