"""Video target preprocessing pipeline for roop-unleashed. Discovers video files in an input folder, runs scene-cut detection, samples frames within each scene, runs face detection + embedding via Windows DML worker, stitches per-frame detections into face tracks, applies quality gates, cuts approved segments out with ffmpeg stream-copy, and writes a report. Output clips have generic UUID names + a sidecar JSON with full provenance. Subcommands: scan list input videos, run ffprobe, write per-video index scenes PySceneDetect AdaptiveDetector per video; write scenes_.json stage write frame queue.json (sampled @ 2 fps within scenes) merge ingest worker results.json into per-video frame_results track IoU+embedding stitching of per-frame detections into tracks score track-level quality gating + segment plan cut ffmpeg -c copy each accepted segment to /.mp4 report HTML preview with thumbnails + identity tags """ from __future__ import annotations import argparse import json import math import re import shutil import subprocess import sys import time import uuid from collections import defaultdict from pathlib import Path import numpy as np DEFAULT_INPUT = Path("/mnt/x/src/vd") DEFAULT_OUTPUT = Path("/mnt/x/src/vd/ct") WORK_DIR = Path("/opt/face-sets/work/video_preprocess") # defaults — first set was strict-portrait; second set loosened for side-profile + segment merging SAMPLE_FPS = 2.0 QUALITY_YAW_MAX = 75.0 # was 25; allow full 3/4 + profile (face-sets handle it) QUALITY_PITCH_MAX = 45.0 # was 30 QUALITY_FACE_MIN = 80 # was 96 QUALITY_BLUR_MIN = 50.0 QUALITY_DET_MIN = 0.5 # was 0.6 TRACK_GATE_FRAC = 0.7 # >=70% of frames in track must pass per-frame gates SEGMENT_MIN_S = 1.0 SEGMENT_MAX_S = 30.0 # was 10 SEGMENT_BRIDGE_S = 3.0 # was 1.0 — within-track pose-failure bridging SEGMENT_MERGE_GAP_S = 2.0 # NEW — across-track merge if same scene + within this gap TRACK_IOU_MIN = 0.3 TRACK_EMB_MIN = 0.5 CACHES = [ Path("/opt/face-sets/work/cache/nl_full.npz"), Path("/opt/face-sets/work/cache/immich_peter.npz"), Path("/opt/face-sets/work/cache/immich_nic.npz"), ] FACESETS_ROOT = Path("/mnt/e/temp_things/fcswp/nl_sorted/facesets_swap_ready") IDENTITY_TAG_THRESHOLD = 0.6 # cosine sim to faceset centroid def wsl_to_win(p: str) -> str: s = str(p) if s.startswith("/mnt/"): return f"{s[5].upper()}:\\{s[7:].replace('/', chr(92))}" return s # ----------------------------- ffprobe / scan ----------------------------- def ffprobe(video: Path) -> dict: cmd = [ "ffprobe", "-v", "error", "-print_format", "json", "-show_format", "-show_streams", str(video), ] r = subprocess.run(cmd, capture_output=True, text=True, timeout=60) if r.returncode != 0: return {"error": r.stderr.strip()} return json.loads(r.stdout) def parse_video_meta(probe: dict) -> dict: if "error" in probe: return {"error": probe["error"]} fmt = probe.get("format", {}) duration = float(fmt.get("duration", 0)) vstream = next((s for s in probe.get("streams", []) if s.get("codec_type") == "video"), None) if vstream is None: return {"error": "no video stream"} fps_str = vstream.get("avg_frame_rate", "0/1") try: num, den = (int(x) for x in fps_str.split("/")) fps = num / den if den else 0.0 except Exception: fps = 0.0 nb_frames = int(vstream.get("nb_frames", 0)) or int(round(duration * fps)) return { "duration_s": duration, "fps": fps, "frames": nb_frames, "width": int(vstream.get("width", 0)), "height": int(vstream.get("height", 0)), "codec": vstream.get("codec_name"), } def cmd_scan(args): in_dir = Path(args.input) out = Path(args.out) out.parent.mkdir(parents=True, exist_ok=True) extensions = {".mp4", ".mov", ".mkv", ".m4v", ".avi", ".webm"} out_root = Path(args.output_dir).resolve() videos = [] for p in sorted(in_dir.iterdir() if not args.recursive else in_dir.rglob("*")): if not p.is_file(): continue if out_root in p.parents or p.resolve() == out_root: continue # never include the output dir if p.suffix.lower() not in extensions: continue videos.append(p) print(f"[scan] {len(videos)} candidate videos", file=sys.stderr) inventory = [] for p in videos: meta = parse_video_meta(ffprobe(p)) meta["path"] = str(p) meta["win_path"] = wsl_to_win(str(p)) meta["size"] = p.stat().st_size inventory.append(meta) if "error" not in meta: print(f" {p.name}: {meta['duration_s']:.1f}s @ {meta['fps']:.1f}fps " f"{meta['width']}x{meta['height']} {meta['codec']}", file=sys.stderr) else: print(f" {p.name}: ERROR {meta['error']}", file=sys.stderr) out.write_text(json.dumps({"input": str(in_dir), "videos": inventory}, indent=2)) print(f"[scan] inventory -> {out}", file=sys.stderr) # ----------------------------- scenes ----------------------------- def cmd_scenes(args): from scenedetect import open_video, SceneManager from scenedetect.detectors import AdaptiveDetector inv = json.loads(Path(args.inventory).read_text()) out_dir = Path(args.out_dir) out_dir.mkdir(parents=True, exist_ok=True) only = set(args.only.split(",")) if args.only else None for v in inv["videos"]: if "error" in v: continue path = Path(v["path"]) if only and path.name not in only: continue out_file = out_dir / (path.stem + ".scenes.json") if out_file.exists() and not args.force: continue print(f"[scenes] {path.name} ...", file=sys.stderr, flush=True) t0 = time.time() try: video = open_video(str(path)) sm = SceneManager() sm.add_detector(AdaptiveDetector(min_scene_len=int(round(v.get("fps", 30) or 30) * 0.5))) sm.detect_scenes(video, show_progress=False) scenes = sm.get_scene_list() entries = [] for s, e in scenes: entries.append({ "start_frame": s.frame_num, "end_frame": e.frame_num, "start_s": s.get_seconds(), "end_s": e.get_seconds(), "duration_s": e.get_seconds() - s.get_seconds(), }) # if no cuts found, treat the whole video as one scene if not entries: entries = [{ "start_frame": 0, "end_frame": v["frames"], "start_s": 0.0, "end_s": v["duration_s"], "duration_s": v["duration_s"], }] out_file.write_text(json.dumps({"video": str(path), "scenes": entries}, indent=2)) print(f" {len(entries)} scenes in {time.time()-t0:.1f}s -> {out_file.name}", file=sys.stderr) except Exception as e: print(f" ERROR: {e}", file=sys.stderr) # ----------------------------- stage ----------------------------- def cmd_stage(args): inv = json.loads(Path(args.inventory).read_text()) scenes_dir = Path(args.scenes_dir) queue = [] qid = 0 sample_every = 1.0 / args.sample_fps for v in inv["videos"]: if "error" in v: continue p = Path(v["path"]) sf = scenes_dir / (p.stem + ".scenes.json") if not sf.exists(): print(f"[warn] no scenes file for {p.name}; skipping", file=sys.stderr) continue scenes = json.loads(sf.read_text()).get("scenes", []) fps = v.get("fps", 30) or 30 for sc in scenes: t = sc["start_s"] while t < sc["end_s"] - 0.01: fidx = int(round(t * fps)) if fidx >= v["frames"]: break queue.append({ "queue_id": f"q{qid:08d}", "video_path": str(p), "win_video_path": v["win_path"], "frame_idx": fidx, "time_s": t, }) qid += 1 t += sample_every out = Path(args.out) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps(queue, indent=2)) print(f"[stage] {len(queue)} sampled frames @ {args.sample_fps} fps -> {out}", file=sys.stderr) print(f"[stage] win path for worker: {wsl_to_win(str(out))}", file=sys.stderr) # ----------------------------- merge + track ----------------------------- def cmd_merge(args): """Read worker output and group by video_path. Supports either JSONL (one record per line, the new format) or legacy JSON (results.json with `results` list).""" src_path = Path(args.results) records = [] # try JSONL first (sister .jsonl file or .results passed directly) jsonl_candidate = src_path.with_suffix(".jsonl") if jsonl_candidate.exists(): with open(jsonl_candidate) as f: for line in f: line = line.strip() if line: records.append(json.loads(line)) elif src_path.suffix == ".jsonl": with open(src_path) as f: for line in f: line = line.strip() if line: records.append(json.loads(line)) else: # legacy: monolithic JSON src = json.loads(src_path.read_text()) records = src.get("results", []) by_video: dict[str, list] = {} for r in records: by_video.setdefault(r["video_path"], []).append(r) for v in by_video: by_video[v].sort(key=lambda x: x["frame_idx"]) out = Path(args.out) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps({"by_video": by_video}, indent=2)) print(f"[merge] {sum(len(v) for v in by_video.values())} frames across {len(by_video)} videos " f"-> {out}", file=sys.stderr) def _iou(a, b): ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b ix1 = max(ax1, bx1); iy1 = max(ay1, by1) ix2 = min(ax2, bx2); iy2 = min(ay2, by2) iw = max(ix2 - ix1, 0); ih = max(iy2 - iy1, 0) inter = iw * ih ua = (ax2 - ax1) * (ay2 - ay1) + (bx2 - bx1) * (by2 - by1) - inter return inter / ua if ua > 0 else 0.0 def cmd_track(args): """Stitch per-frame face detections into tracks within each scene of each video. Track = list of (frame_idx, face_idx) where adjacent samples have IoU>=0.3 OR cosine(emb)>=0.5. New face → new track. No cross-scene merging.""" fr = json.loads(Path(args.frames).read_text()) scenes_dir = Path(args.scenes_dir) inv = json.loads(Path(args.inventory).read_text()) inv_by_path = {v["path"]: v for v in inv["videos"]} all_video_tracks: dict[str, list] = {} for video_path, frames in fr["by_video"].items(): v = inv_by_path.get(video_path, {}) sf = scenes_dir / (Path(video_path).stem + ".scenes.json") scenes = json.loads(sf.read_text()).get("scenes", []) if sf.exists() else [] # group frames by scene scene_for_frame = {} for si, sc in enumerate(scenes): for f in frames: if f["frame_idx"] >= sc["start_frame"] and f["frame_idx"] < sc["end_frame"]: scene_for_frame.setdefault(si, []).append(f) video_tracks = [] for si, scene_frames in scene_for_frame.items(): scene_frames.sort(key=lambda x: x["frame_idx"]) # tracks = list of dict{ "members": [(frame_idx, face_idx, face_dict)], "last_bbox", "last_emb" } tracks = [] for f in scene_frames: claimed = set() for face_idx, face in enumerate(f.get("faces", [])): bbox = face["bbox"] emb = np.array(face.get("embedding", []), dtype=np.float32) if face.get("embedding") else None best_track = None best_score = 0.0 for ti, tr in enumerate(tracks): if ti in claimed: continue # staleness in TIME (sample period independent of source fps) last_time = tr["members"][-1][3] if f["time_s"] - last_time > 1.5: # stale if >1.5s gap (3 sample periods @ 2fps) continue score = _iou(tr["last_bbox"], bbox) if emb is not None and tr.get("last_emb") is not None: score = max(score, float(np.dot(tr["last_emb"], emb))) if score > best_score: best_score = score best_track = ti if best_track is not None and best_score >= min(TRACK_IOU_MIN, TRACK_EMB_MIN): tr = tracks[best_track] tr["members"].append((f["frame_idx"], face_idx, face, f["time_s"])) tr["last_bbox"] = bbox if emb is not None: tr["last_emb"] = emb claimed.add(best_track) else: tracks.append({ "members": [(f["frame_idx"], face_idx, face, f["time_s"])], "last_bbox": bbox, "last_emb": emb, }) for tr in tracks: if len(tr["members"]) < 2: continue video_tracks.append({ "scene_idx": si, "members": [ {"frame_idx": m[0], "face_idx": m[1], "time_s": m[3], "face": m[2]} for m in tr["members"] ], }) all_video_tracks[video_path] = video_tracks print(f"[track] {Path(video_path).name}: {sum(len(s) for s in scene_for_frame.values())} frames " f"-> {len(video_tracks)} tracks across {len(scene_for_frame)} scenes", file=sys.stderr) out = Path(args.out) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps({"by_video": all_video_tracks}, indent=2)) print(f"[track] -> {out}", file=sys.stderr) # ----------------------------- score (quality gates) ----------------------------- def _track_passes(track, cfg): """Per-frame quality gating; return list of bool (does each member pass) + aggregate stats. cfg: dict with yaw_max, pitch_max, face_min, det_min.""" passes = [] yaws, pitches, sizes, dets = [], [], [], [] for m in track["members"]: f = m["face"] yaw = abs(f.get("pose", [0, 0, 0])[1]) if f.get("pose") else 0 pitch = abs(f.get("pose", [0, 0, 0])[0]) if f.get("pose") else 0 size = f.get("face_short", 0) det = f.get("det_score", 0) ok = (yaw <= cfg["yaw_max"] and pitch <= cfg["pitch_max"] and size >= cfg["face_min"] and det >= cfg["det_min"]) passes.append(ok) yaws.append(yaw); pitches.append(pitch); sizes.append(size); dets.append(det) return passes, { "n": len(passes), "n_pass": sum(passes), "frac_pass": sum(passes) / max(1, len(passes)), "yaw_med": float(np.median(yaws)) if yaws else None, "pitch_med": float(np.median(pitches)) if pitches else None, "size_med": float(np.median(sizes)) if sizes else None, "det_med": float(np.median(dets)) if dets else None, } def _build_segments(track, cfg): """Return list of (start_s, end_s) accepted sub-segments of this track: contiguous runs of passing frames meeting min/max duration. Pose-failure spans <= cfg['bridge_s'] long get bridged across (handles momentary head turns / detection misses).""" passes, stats = _track_passes(track, cfg) members = track["members"] if not members: return [], stats # bridge gaps of failing frames (any width) up to cfg["bridge_s"] seconds bridged = list(passes) n = len(bridged) i = 0 while i < n: if bridged[i]: i += 1 continue # find run of consecutive False starting at i j = i while j < n and not bridged[j]: j += 1 # bridge if surrounded by True on both sides AND time gap <= bridge_s if i > 0 and j < n and bridged[i - 1] and bridged[j]: t_left = members[i - 1]["time_s"] t_right = members[j]["time_s"] if t_right - t_left <= cfg["bridge_s"]: for k in range(i, j): bridged[k] = True i = j # find runs of True runs = [] i = 0 while i < n: if not bridged[i]: i += 1; continue j = i while j + 1 < n and bridged[j + 1]: j += 1 s = members[i]["time_s"] # end is the time of the last passing sample plus one sample-period e = members[j]["time_s"] + 1.0 / max(SAMPLE_FPS, 1e-3) runs.append((s, e)) i = j + 1 return runs, stats def _merge_close_segments(segs_with_meta, merge_gap_s: float): """Merge segments within the same scene that are within merge_gap_s of each other. segs_with_meta: list of dicts with start_s, end_s, scene_idx, track_idx, stats. Returns list of merged dicts (one per merged group). Identity-tag and stats aggregation happen later.""" by_scene: dict[int, list] = {} for s in segs_with_meta: by_scene.setdefault(s["scene_idx"], []).append(s) merged_all = [] for scene_idx, segs in by_scene.items(): segs.sort(key=lambda x: x["start_s"]) cur = None for s in segs: if cur is None: cur = {**s, "track_idxs": [s["track_idx"]], "member_count": s["stats"]["n"], "pass_count": s["stats"]["n_pass"]} continue gap = s["start_s"] - cur["end_s"] if gap <= merge_gap_s: # merge cur["end_s"] = max(cur["end_s"], s["end_s"]) cur["track_idxs"].append(s["track_idx"]) cur["member_count"] += s["stats"]["n"] cur["pass_count"] += s["stats"]["n_pass"] # take the better-quality stats for display if s["stats"]["n_pass"] > cur["stats"]["n_pass"]: cur["stats"] = s["stats"] else: merged_all.append(cur) cur = {**s, "track_idxs": [s["track_idx"]], "member_count": s["stats"]["n"], "pass_count": s["stats"]["n_pass"]} if cur is not None: merged_all.append(cur) return merged_all def _split_long_segments(segs_with_meta, min_s: float, max_s: float): """Apply min/max duration: drop too-short, split too-long evenly.""" out = [] for s in segs_with_meta: dur = s["end_s"] - s["start_s"] if dur < min_s: continue if dur <= max_s: out.append(s) continue n = int(math.ceil(dur / max_s)) chunk = dur / n base_start = s["start_s"] for k in range(n): piece = dict(s) piece["start_s"] = base_start + k * chunk piece["end_s"] = base_start + (k + 1) * chunk out.append(piece) return out # identity tagging via cached arcface centroids def load_caches_index(): rec_index = {} alias_map = {} for c in CACHES: if not c.exists(): continue d = np.load(c, allow_pickle=True) emb = d["embeddings"] meta = json.loads(str(d["meta"])) face_records = [m for m in meta if not m.get("noface")] if "path_aliases" in d.files: paliases = json.loads(str(d["path_aliases"])) for canon, alist in paliases.items(): alias_map.setdefault(canon, canon) for a in alist: alias_map[a] = canon for i, rec in enumerate(face_records): v = emb[i].astype(np.float32) n = float(np.linalg.norm(v)) if n > 0: v = v / n rec_index[(rec["path"], tuple(int(x) for x in rec["bbox"]))] = v alias_map.setdefault(rec["path"], rec["path"]) return rec_index, alias_map def load_faceset_centroids(): """Return dict faceset_name -> normalized centroid embedding.""" rec_index, alias_map = load_caches_index() centroids = {} for fs_dir in sorted(FACESETS_ROOT.iterdir()): if not fs_dir.is_dir() or fs_dir.name.startswith("_"): continue # exclude era splits to avoid double-tagging within a family if re.match(r"^faceset_\d+_(?:\d{4}-\d{2,4}|\d{4}|undated)", fs_dir.name): continue mp = fs_dir / "manifest.json" if not mp.exists(): continue m = json.loads(mp.read_text()) vecs = [] for f in m.get("faces", []): src = f.get("source"); bbox = f.get("bbox") if not src or not bbox: continue canon = alias_map.get(src, src) v = rec_index.get((canon, tuple(int(x) for x in bbox))) if v is None and canon != src: v = rec_index.get((src, tuple(int(x) for x in bbox))) if v is not None: vecs.append(v) if len(vecs) < 3: continue c = np.stack(vecs).mean(axis=0) n = float(np.linalg.norm(c)) if n > 0: c = c / n centroids[fs_dir.name] = c return centroids def _track_centroid(track): embs = [m["face"].get("embedding") for m in track["members"] if m["face"].get("embedding")] if not embs: return None arr = np.array(embs, dtype=np.float32) c = arr.mean(axis=0) n = float(np.linalg.norm(c)) return c / n if n > 0 else c def cmd_score(args): tr = json.loads(Path(args.tracks).read_text()) inv = json.loads(Path(args.inventory).read_text()) inv_by_path = {v["path"]: v for v in inv["videos"]} cfg = { "yaw_max": args.max_yaw, "pitch_max": args.max_pitch, "face_min": args.min_face, "det_min": args.min_det, "bridge_s": args.bridge_gap, } centroids = {} if not args.no_identity: print("[score] loading faceset centroids ...", file=sys.stderr) t0 = time.time() centroids = load_faceset_centroids() print(f"[score] {len(centroids)} active faceset centroids loaded in {time.time()-t0:.1f}s", file=sys.stderr) n_total_tracks = 0 n_accepted_tracks = 0 # collect per-track candidate segments first; merging happens per-video below per_video_candidates: dict[str, list] = {} track_centroids_by_video: dict[str, dict] = {} for video_path, tracks in tr["by_video"].items(): per_video_candidates.setdefault(video_path, []) track_centroids_by_video.setdefault(video_path, {}) for ti, track in enumerate(tracks): n_total_tracks += 1 runs, stats = _build_segments(track, cfg) if stats["frac_pass"] < args.track_gate_frac: continue if not runs: continue n_accepted_tracks += 1 track_centroids_by_video[video_path][ti] = _track_centroid(track) for (s, e) in runs: per_video_candidates[video_path].append({ "video_path": video_path, "track_idx": ti, "scene_idx": track["scene_idx"], "start_s": s, "end_s": e, "stats": stats, }) plan = [] for video_path, segs in per_video_candidates.items(): if not segs: continue # merge across tracks within the same scene if gap <= merge_gap_s merged = _merge_close_segments(segs, args.merge_gap) # apply min/max duration (split long, drop short) merged = _split_long_segments(merged, args.min_dur, args.max_dur) for s in merged: tag = None tag_sim = None # identity from union of contributing tracks' centroids if centroids: track_centroid_list = [ track_centroids_by_video[video_path].get(ti) for ti in s.get("track_idxs", [s.get("track_idx")]) ] track_centroid_list = [c for c in track_centroid_list if c is not None] if track_centroid_list: union = np.stack(track_centroid_list).mean(axis=0) nm = float(np.linalg.norm(union)) if nm > 0: union = union / nm sims = {name: float(np.dot(c, union)) for name, c in centroids.items()} best = max(sims, key=sims.get) if sims[best] >= IDENTITY_TAG_THRESHOLD: tag = best; tag_sim = round(sims[best], 4) plan.append({ "video_path": video_path, "track_idxs": s.get("track_idxs", [s.get("track_idx")]), "scene_idx": s["scene_idx"], "start_s": round(s["start_s"], 3), "end_s": round(s["end_s"], 3), "duration_s": round(s["end_s"] - s["start_s"], 3), "member_count": s.get("member_count", s["stats"]["n"]), "pass_count": s.get("pass_count", s["stats"]["n_pass"]), "stats": s["stats"], "identity_tag": tag, "identity_sim": tag_sim, "uuid": uuid.uuid4().hex[:12], }) plan.sort(key=lambda p: (p["video_path"], p["start_s"])) out = Path(args.out) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps({ "thresholds": { "yaw_max": args.max_yaw, "pitch_max": args.max_pitch, "face_min": args.min_face, "blur_min": QUALITY_BLUR_MIN, "det_min": args.min_det, "track_gate_frac": args.track_gate_frac, "bridge_s": args.bridge_gap, "merge_gap_s": args.merge_gap, "min_dur_s": args.min_dur, "max_dur_s": args.max_dur, "identity_tag_threshold": IDENTITY_TAG_THRESHOLD, }, "totals": { "tracks_total": n_total_tracks, "tracks_accepted": n_accepted_tracks, "segments": len(plan), }, "plan": plan, }, indent=2)) print(f"[score] {n_accepted_tracks}/{n_total_tracks} tracks accepted -> {len(plan)} segments " f"-> {out}", file=sys.stderr) # ----------------------------- cut ----------------------------- def cmd_cut(args): plan = json.loads(Path(args.plan).read_text()) out_dir = Path(args.output_dir) out_dir.mkdir(parents=True, exist_ok=True) if args.clean: # remove only existing UUID-named clips + sidecars (12-char hex), keeping any other files import re as _re uuid_pat = _re.compile(r"^[0-9a-f]{12}\.(mp4|json)$") n_removed = 0 for child in out_dir.iterdir(): if child.is_file() and uuid_pat.match(child.name): child.unlink() n_removed += 1 elif child.is_dir() and _re.match(r"^[A-Za-z0-9_.-]+$", child.name): # subfolder of prior runs — clear UUID files inside, then remove if empty for inner in child.iterdir(): if inner.is_file() and uuid_pat.match(inner.name): inner.unlink() n_removed += 1 try: child.rmdir() except OSError: pass if n_removed: print(f"[clean] removed {n_removed} prior UUID clips/sidecars", file=sys.stderr) n_done = 0 n_err = 0 sidecars = [] for seg in plan["plan"]: sub = Path(seg["video_path"]).stem seg_dir = out_dir / sub seg_dir.mkdir(parents=True, exist_ok=True) out_video = seg_dir / f"{seg['uuid']}.mp4" if out_video.exists() and not args.force: continue s = seg["start_s"]; d = seg["duration_s"] cmd = [ "ffmpeg", "-y", "-loglevel", "error", "-ss", f"{s}", "-i", seg["video_path"], "-t", f"{d}", "-c", "copy", "-avoid_negative_ts", "make_zero", str(out_video), ] r = subprocess.run(cmd, capture_output=True, text=True, timeout=120) if r.returncode != 0 or not out_video.exists() or out_video.stat().st_size < 1024: print(f"[cut-err] {seg['uuid']} {seg['video_path']}@{s}+{d}: {r.stderr.strip()[:200]}", file=sys.stderr) n_err += 1 if out_video.exists() and out_video.stat().st_size < 1024: out_video.unlink() continue # sidecar (alongside the clip in the source-named subfolder) sidecar = seg_dir / f"{seg['uuid']}.json" sidecar.write_text(json.dumps({ "uuid": seg["uuid"], "source_video": seg["video_path"], "source_basename": Path(seg["video_path"]).name, "start_s": s, "end_s": seg["end_s"], "duration_s": d, "scene_idx": seg["scene_idx"], "track_idxs": seg.get("track_idxs", [seg.get("track_idx")]), "member_count": seg.get("member_count"), "pass_count": seg.get("pass_count"), "stats": seg["stats"], "identity_tag": seg["identity_tag"], "identity_sim": seg["identity_sim"], "thresholds": plan["thresholds"], }, indent=2)) sidecars.append(sidecar) n_done += 1 print(f"[cut] {n_done} clips written, {n_err} errors -> {out_dir}", file=sys.stderr) # ----------------------------- report ----------------------------- def cmd_report(args): plan = json.loads(Path(args.plan).read_text()) out_dir = Path(args.out) out_dir.mkdir(parents=True, exist_ok=True) thumbs_dir = out_dir / "thumbs" thumbs_dir.mkdir(exist_ok=True) output_dir = Path(args.output_dir) # group by video by_video: dict[str, list] = {} for seg in plan["plan"]: by_video.setdefault(seg["video_path"], []).append(seg) # generate thumbs from each segment's first frame via ffmpeg print(f"[report] generating thumbs for {len(plan['plan'])} segments", file=sys.stderr) for seg in plan["plan"]: thumb = thumbs_dir / f"{seg['uuid']}.jpg" if thumb.exists(): continue s = seg["start_s"] + 0.1 cmd = [ "ffmpeg", "-y", "-loglevel", "error", "-ss", f"{s}", "-i", seg["video_path"], "-frames:v", "1", "-vf", "scale=240:-1", str(thumb), ] subprocess.run(cmd, capture_output=True, timeout=30) # render rows = [] rows.append("

Video target preprocessing — review

") t = plan["totals"] th = plan["thresholds"] rows.append(f"

Tracks accepted: {t['tracks_accepted']}/{t['tracks_total']}; " f"segments emitted: {t['segments']}.
" f"Thresholds: pose ≤{th['yaw_max']}°yaw / {th['pitch_max']}°pitch, " f"face_short ≥{th['face_min']}px, det ≥{th['det_min']}, " f"track-gate ≥{int(100*th['track_gate_frac'])}%, " f"duration {th['min_dur_s']}–{th['max_dur_s']}s. " f"Output dir: {output_dir}

") nav = " · ".join(f"{Path(v).name}" for i, v in enumerate(by_video.keys())) rows.append(f"

{nav}

") for vi, (video_path, segs) in enumerate(by_video.items()): rows.append(f"

") rows.append(f"

{Path(video_path).name} ({len(segs)} segments)

") rows.append("

") for seg in sorted(segs, key=lambda x: x["start_s"]): stats = seg["stats"] tag = seg["identity_tag"] or "" tag_sim = seg["identity_sim"] tag_html = (f"{tag} ({tag_sim:.2f})" if tag else "untagged") sub_name = Path(seg['video_path']).stem rows.append( f"

" f"

" f"{sub_name}/{seg['uuid']}.mp4
" f"{seg['start_s']:.1f}s → {seg['end_s']:.1f}s ({seg['duration_s']:.1f}s)
" f"yaw={stats['yaw_med']:.0f}° size={stats['size_med']:.0f}px det={stats['det_med']:.2f}
" f"pass {stats['n_pass']}/{stats['n']}
" f"{tag_html}" f"

" ) rows.append("

") html = f""" Video targets review {''.join(rows)} """ out_html = out_dir / "index.html" out_html.write_text(html) print(f"[report] -> {out_html}", file=sys.stderr) # ----------------------------- main ----------------------------- def main(): ap = argparse.ArgumentParser() sub = ap.add_subparsers(dest="cmd", required=True) s = sub.add_parser("scan") s.add_argument("--input", default=str(DEFAULT_INPUT)) s.add_argument("--output-dir", default=str(DEFAULT_OUTPUT)) s.add_argument("--recursive", action="store_true") s.add_argument("--out", required=True) s.set_defaults(func=cmd_scan) sc = sub.add_parser("scenes") sc.add_argument("--inventory", required=True) sc.add_argument("--out-dir", required=True) sc.add_argument("--only", default=None, help="comma-separated basenames to limit run") sc.add_argument("--force", action="store_true") sc.set_defaults(func=cmd_scenes) st = sub.add_parser("stage") st.add_argument("--inventory", required=True) st.add_argument("--scenes-dir", required=True) st.add_argument("--sample-fps", type=float, default=SAMPLE_FPS) st.add_argument("--out", required=True) st.set_defaults(func=cmd_stage) m = sub.add_parser("merge") m.add_argument("--results", required=True) m.add_argument("--out", required=True) m.set_defaults(func=cmd_merge) tr = sub.add_parser("track") tr.add_argument("--frames", required=True) tr.add_argument("--scenes-dir", required=True) tr.add_argument("--inventory", required=True) tr.add_argument("--sample-fps", type=float, default=SAMPLE_FPS) tr.add_argument("--out", required=True) tr.set_defaults(func=cmd_track) sc2 = sub.add_parser("score") sc2.add_argument("--tracks", required=True) sc2.add_argument("--inventory", required=True) sc2.add_argument("--out", required=True) sc2.add_argument("--no-identity", action="store_true") sc2.add_argument("--max-yaw", type=float, default=QUALITY_YAW_MAX) sc2.add_argument("--max-pitch", type=float, default=QUALITY_PITCH_MAX) sc2.add_argument("--min-face", type=int, default=QUALITY_FACE_MIN) sc2.add_argument("--min-det", type=float, default=QUALITY_DET_MIN) sc2.add_argument("--track-gate-frac", type=float, default=TRACK_GATE_FRAC) sc2.add_argument("--bridge-gap", type=float, default=SEGMENT_BRIDGE_S, help="bridge within-track failure gaps up to this many seconds") sc2.add_argument("--merge-gap", type=float, default=SEGMENT_MERGE_GAP_S, help="merge across-track segments in same scene if within this gap") sc2.add_argument("--min-dur", type=float, default=SEGMENT_MIN_S) sc2.add_argument("--max-dur", type=float, default=SEGMENT_MAX_S) sc2.set_defaults(func=cmd_score) cu = sub.add_parser("cut") cu.add_argument("--plan", required=True) cu.add_argument("--output-dir", default=str(DEFAULT_OUTPUT)) cu.add_argument("--force", action="store_true") cu.add_argument("--clean", action="store_true", help="remove prior UUID-named clips before cutting (preserves non-UUID files)") cu.set_defaults(func=cmd_cut) rp = sub.add_parser("report") rp.add_argument("--plan", required=True) rp.add_argument("--output-dir", default=str(DEFAULT_OUTPUT)) rp.add_argument("--out", required=True) rp.set_defaults(func=cmd_report) args = ap.parse_args() args.func(args) if __name__ == "__main__": main()