72 lines
2.6 KiB
Python
Executable File
72 lines
2.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# prep_sequence_resampled.py
|
|
# Build a fixed-length (N frames) multiclass dataset from sequences/<split>/<CLASS>/clip_*.npz
|
|
import argparse, os, glob, json
|
|
from pathlib import Path
|
|
import numpy as np
|
|
|
|
def resample_sequence(X, N=32):
|
|
# X: (T,63) -> (N,63) by linear interpolation along frame index
|
|
T = len(X)
|
|
if T == 0:
|
|
return np.zeros((N, X.shape[1]), np.float32)
|
|
if T == 1:
|
|
return np.repeat(X, N, axis=0)
|
|
src = np.linspace(0, T-1, num=T)
|
|
dst = np.linspace(0, T-1, num=N)
|
|
out = np.zeros((N, X.shape[1]), np.float32)
|
|
for d in range(X.shape[1]):
|
|
out[:, d] = np.interp(dst, src, X[:, d])
|
|
return out.astype(np.float32)
|
|
|
|
def load_classes(seq_root: Path):
|
|
# classes are subdirs in sequences/train/
|
|
classes = sorted([p.name for p in (seq_root/"train").iterdir() if p.is_dir()])
|
|
classes = [c for c in classes if len(c)==1 and "A"<=c<="Z"]
|
|
if not classes:
|
|
raise SystemExit("No letter classes found in sequences/train/")
|
|
return classes
|
|
|
|
def collect_split(seq_root: Path, split: str, classes, N):
|
|
Xs, ys = [], []
|
|
for ci, cls in enumerate(classes):
|
|
for f in sorted(glob.glob(str(seq_root/split/cls/"clip_*.npz"))):
|
|
d = np.load(f)
|
|
Xi = d["X"].astype(np.float32) # (T,63)
|
|
XiN = resample_sequence(Xi, N) # (N,63)
|
|
Xs.append(XiN); ys.append(ci)
|
|
if Xs:
|
|
X = np.stack(Xs, 0)
|
|
y = np.array(ys, np.int64)
|
|
else:
|
|
X = np.zeros((0, N, 63), np.float32); y = np.zeros((0,), np.int64)
|
|
return X, y
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--in", dest="in_dir", default="sequences", help="Root sequences/ with train/ and val/")
|
|
ap.add_argument("--out", default="landmarks_seq32", help="Output folder with npy files")
|
|
ap.add_argument("--frames", type=int, default=32, help="Frames per clip after resampling (default: 32)")
|
|
args = ap.parse_args()
|
|
|
|
seq_root = Path(args.in_dir)
|
|
outdir = Path(args.out)
|
|
outdir.mkdir(parents=True, exist_ok=True)
|
|
|
|
classes = load_classes(seq_root)
|
|
trX, trY = collect_split(seq_root, "train", classes, args.frames)
|
|
vaX, vaY = collect_split(seq_root, "val", classes, args.frames)
|
|
|
|
np.save(outdir/"train_X.npy", trX)
|
|
np.save(outdir/"train_y.npy", trY)
|
|
np.save(outdir/"val_X.npy", vaX)
|
|
np.save(outdir/"val_y.npy", vaY)
|
|
json.dump(classes, open(outdir/"class_names.json", "w"))
|
|
json.dump({"frames": args.frames, "input_dim": 63}, open(outdir/"meta.json","w"))
|
|
|
|
print(f"Saved dataset → {outdir}")
|
|
print(f" train {trX.shape}, val {vaX.shape}, classes={classes}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|