#!/usr/bin/env python3 # prep_sequence_resampled.py # Build a fixed-length (N frames) multiclass dataset from sequences///clip_*.npz import argparse, os, glob, json from pathlib import Path import numpy as np def resample_sequence(X, N=32): # X: (T,63) -> (N,63) by linear interpolation along frame index T = len(X) if T == 0: return np.zeros((N, X.shape[1]), np.float32) if T == 1: return np.repeat(X, N, axis=0) src = np.linspace(0, T-1, num=T) dst = np.linspace(0, T-1, num=N) out = np.zeros((N, X.shape[1]), np.float32) for d in range(X.shape[1]): out[:, d] = np.interp(dst, src, X[:, d]) return out.astype(np.float32) def load_classes(seq_root: Path): # classes are subdirs in sequences/train/ classes = sorted([p.name for p in (seq_root/"train").iterdir() if p.is_dir()]) classes = [c for c in classes if len(c)==1 and "A"<=c<="Z"] if not classes: raise SystemExit("No letter classes found in sequences/train/") return classes def collect_split(seq_root: Path, split: str, classes, N): Xs, ys = [], [] for ci, cls in enumerate(classes): for f in sorted(glob.glob(str(seq_root/split/cls/"clip_*.npz"))): d = np.load(f) Xi = d["X"].astype(np.float32) # (T,63) XiN = resample_sequence(Xi, N) # (N,63) Xs.append(XiN); ys.append(ci) if Xs: X = np.stack(Xs, 0) y = np.array(ys, np.int64) else: X = np.zeros((0, N, 63), np.float32); y = np.zeros((0,), np.int64) return X, y def main(): ap = argparse.ArgumentParser() ap.add_argument("--in", dest="in_dir", default="sequences", help="Root sequences/ with train/ and val/") ap.add_argument("--out", default="landmarks_seq32", help="Output folder with npy files") ap.add_argument("--frames", type=int, default=32, help="Frames per clip after resampling (default: 32)") args = ap.parse_args() seq_root = Path(args.in_dir) outdir = Path(args.out) outdir.mkdir(parents=True, exist_ok=True) classes = load_classes(seq_root) trX, trY = collect_split(seq_root, "train", classes, args.frames) vaX, vaY = collect_split(seq_root, "val", classes, args.frames) np.save(outdir/"train_X.npy", trX) np.save(outdir/"train_y.npy", trY) np.save(outdir/"val_X.npy", vaX) np.save(outdir/"val_y.npy", vaY) json.dump(classes, open(outdir/"class_names.json", "w")) json.dump({"frames": args.frames, "input_dim": 63}, open(outdir/"meta.json","w")) print(f"Saved dataset → {outdir}") print(f" train {trX.shape}, val {vaX.shape}, classes={classes}") if __name__ == "__main__": main()