Make per-clip sidecar JSONs opt-in (default off)
Previously every video_target_pipeline cut wrote a <uuid>.json provenance sidecar alongside each <uuid>.mp4. The same provenance is already in the per-batch plan.json, so the per-clip sidecars are redundant unless a downstream tool wants each clip self-describing in isolation. - video_target_pipeline.py cut: new --write-sidecar flag, default off. - run_video_pipeline.sh: new SIDECAR env var (default "no"), passes --write-sidecar when SIDECAR=yes. - README + docs/analysis/video-target-preprocessing.md updated. The 1,984 already-emitted sidecars in /mnt/x/src/vd/ct/ct_src_*/ have been deleted (1.5 MB). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
# SKIP_PATTERN regex of basenames to exclude (Python `re` syntax). Applied AFTER FILTER_FROM.
|
||||
# MAX_DUR score --max-dur (default 120)
|
||||
# IDENTITY "yes" to enable identity tagging; default "no"
|
||||
# SIDECAR "yes" to emit <uuid>.json provenance sidecars; default "no"
|
||||
|
||||
set -e
|
||||
|
||||
@@ -23,6 +24,7 @@ set -e
|
||||
: ${OUTPUT_DIR:=/mnt/x/src/vd/ct}
|
||||
: ${MAX_DUR:=120}
|
||||
: ${IDENTITY:=no}
|
||||
: ${SIDECAR:=no}
|
||||
|
||||
mkdir -p "$WORK" "$WORK/scenes"
|
||||
|
||||
@@ -37,7 +39,7 @@ log() { echo "[$(ts)] [$PHASE] $*"; }
|
||||
|
||||
PHASE="setup"
|
||||
log "STARTED — host=$(hostname) pid=$$ work=$WORK"
|
||||
log "config: input=$INPUT_DIR output=$OUTPUT_DIR filter_from=${FILTER_FROM:-<none>} skip_pattern=${SKIP_PATTERN:-<none>} max_dur=$MAX_DUR identity=$IDENTITY"
|
||||
log "config: input=$INPUT_DIR output=$OUTPUT_DIR filter_from=${FILTER_FROM:-<none>} skip_pattern=${SKIP_PATTERN:-<none>} max_dur=$MAX_DUR identity=$IDENTITY sidecar=$SIDECAR"
|
||||
|
||||
PHASE="inventory"
|
||||
log "building subset inventory"
|
||||
@@ -110,7 +112,9 @@ log "done in $(($(date +%s)-T0))s"
|
||||
PHASE="cut"
|
||||
log "ffmpeg stream-copy into per-source subfolders (no --clean)"
|
||||
T0=$(date +%s)
|
||||
$PY_WSL $PIPELINE cut --plan "$WORK/plan.json" --output-dir "$OUTPUT_DIR"
|
||||
SIDECAR_FLAG=""
|
||||
if [ "$SIDECAR" = "yes" ]; then SIDECAR_FLAG="--write-sidecar"; fi
|
||||
$PY_WSL $PIPELINE cut --plan "$WORK/plan.json" --output-dir "$OUTPUT_DIR" $SIDECAR_FLAG
|
||||
log "done in $(($(date +%s)-T0))s"
|
||||
|
||||
PHASE="report"
|
||||
|
||||
@@ -722,23 +722,23 @@ def cmd_cut(args):
|
||||
if out_video.exists() and out_video.stat().st_size < 1024:
|
||||
out_video.unlink()
|
||||
continue
|
||||
# sidecar (alongside the clip in the source-named subfolder)
|
||||
sidecar = seg_dir / f"{seg['uuid']}.json"
|
||||
sidecar.write_text(json.dumps({
|
||||
"uuid": seg["uuid"],
|
||||
"source_video": seg["video_path"],
|
||||
"source_basename": Path(seg["video_path"]).name,
|
||||
"start_s": s, "end_s": seg["end_s"], "duration_s": d,
|
||||
"scene_idx": seg["scene_idx"],
|
||||
"track_idxs": seg.get("track_idxs", [seg.get("track_idx")]),
|
||||
"member_count": seg.get("member_count"),
|
||||
"pass_count": seg.get("pass_count"),
|
||||
"stats": seg["stats"],
|
||||
"identity_tag": seg["identity_tag"],
|
||||
"identity_sim": seg["identity_sim"],
|
||||
"thresholds": plan["thresholds"],
|
||||
}, indent=2))
|
||||
sidecars.append(sidecar)
|
||||
if args.write_sidecar:
|
||||
sidecar = seg_dir / f"{seg['uuid']}.json"
|
||||
sidecar.write_text(json.dumps({
|
||||
"uuid": seg["uuid"],
|
||||
"source_video": seg["video_path"],
|
||||
"source_basename": Path(seg["video_path"]).name,
|
||||
"start_s": s, "end_s": seg["end_s"], "duration_s": d,
|
||||
"scene_idx": seg["scene_idx"],
|
||||
"track_idxs": seg.get("track_idxs", [seg.get("track_idx")]),
|
||||
"member_count": seg.get("member_count"),
|
||||
"pass_count": seg.get("pass_count"),
|
||||
"stats": seg["stats"],
|
||||
"identity_tag": seg["identity_tag"],
|
||||
"identity_sim": seg["identity_sim"],
|
||||
"thresholds": plan["thresholds"],
|
||||
}, indent=2))
|
||||
sidecars.append(sidecar)
|
||||
n_done += 1
|
||||
print(f"[cut] {n_done} clips written, {n_err} errors -> {out_dir}", file=sys.stderr)
|
||||
|
||||
@@ -901,6 +901,8 @@ def main():
|
||||
cu.add_argument("--force", action="store_true")
|
||||
cu.add_argument("--clean", action="store_true",
|
||||
help="remove prior UUID-named clips before cutting (preserves non-UUID files)")
|
||||
cu.add_argument("--write-sidecar", action="store_true",
|
||||
help="emit <uuid>.json provenance sidecar alongside each clip (default off)")
|
||||
cu.set_defaults(func=cmd_cut)
|
||||
|
||||
rp = sub.add_parser("report")
|
||||
|
||||
Reference in New Issue
Block a user