Make per-clip sidecar JSONs opt-in (default off)

Previously every video_target_pipeline cut wrote a <uuid>.json provenance
sidecar alongside each <uuid>.mp4. The same provenance is already in the
per-batch plan.json, so the per-clip sidecars are redundant unless a
downstream tool wants each clip self-describing in isolation.

- video_target_pipeline.py cut: new --write-sidecar flag, default off.
- run_video_pipeline.sh: new SIDECAR env var (default "no"), passes
  --write-sidecar when SIDECAR=yes.
- README + docs/analysis/video-target-preprocessing.md updated.

The 1,984 already-emitted sidecars in /mnt/x/src/vd/ct/ct_src_*/ have
been deleted (1.5 MB).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-28 12:44:27 +02:00
parent 998fa79f81
commit 7960dec350
4 changed files with 29 additions and 22 deletions

View File

@@ -722,23 +722,23 @@ def cmd_cut(args):
if out_video.exists() and out_video.stat().st_size < 1024:
out_video.unlink()
continue
# sidecar (alongside the clip in the source-named subfolder)
sidecar = seg_dir / f"{seg['uuid']}.json"
sidecar.write_text(json.dumps({
"uuid": seg["uuid"],
"source_video": seg["video_path"],
"source_basename": Path(seg["video_path"]).name,
"start_s": s, "end_s": seg["end_s"], "duration_s": d,
"scene_idx": seg["scene_idx"],
"track_idxs": seg.get("track_idxs", [seg.get("track_idx")]),
"member_count": seg.get("member_count"),
"pass_count": seg.get("pass_count"),
"stats": seg["stats"],
"identity_tag": seg["identity_tag"],
"identity_sim": seg["identity_sim"],
"thresholds": plan["thresholds"],
}, indent=2))
sidecars.append(sidecar)
if args.write_sidecar:
sidecar = seg_dir / f"{seg['uuid']}.json"
sidecar.write_text(json.dumps({
"uuid": seg["uuid"],
"source_video": seg["video_path"],
"source_basename": Path(seg["video_path"]).name,
"start_s": s, "end_s": seg["end_s"], "duration_s": d,
"scene_idx": seg["scene_idx"],
"track_idxs": seg.get("track_idxs", [seg.get("track_idx")]),
"member_count": seg.get("member_count"),
"pass_count": seg.get("pass_count"),
"stats": seg["stats"],
"identity_tag": seg["identity_tag"],
"identity_sim": seg["identity_sim"],
"thresholds": plan["thresholds"],
}, indent=2))
sidecars.append(sidecar)
n_done += 1
print(f"[cut] {n_done} clips written, {n_err} errors -> {out_dir}", file=sys.stderr)
@@ -901,6 +901,8 @@ def main():
cu.add_argument("--force", action="store_true")
cu.add_argument("--clean", action="store_true",
help="remove prior UUID-named clips before cutting (preserves non-UUID files)")
cu.add_argument("--write-sidecar", action="store_true",
help="emit <uuid>.json provenance sidecar alongside each clip (default off)")
cu.set_defaults(func=cmd_cut)
rp = sub.add_parser("report")