Add post-export corpus maintenance pipeline
Adds four new orchestration scripts that operate on an already-built facesets_swap_ready/ to clean it up over time: - filter_occlusions.py + clip_worker.py: CLIP zero-shot mask + sunglasses filter (open_clip ViT-L-14/dfn2b_s39b). WSL stages, Windows DML scores via new C:\clip_dml_venv. Image-level threshold 0.7; faceset-level quarantine at 40% domain dominance. - consolidate_facesets.py: duplicate-identity merger using complete-linkage centroid clustering on cached arcface embeddings. Single-linkage chains catastrophically (60-faceset clusters with min sim < 0); complete-linkage guarantees within-group sim >= edge. - age_extend_001.py: slots newly-added PNGs into existing era buckets of faceset_001 using the same anchor-fragment rule as age_split_001.py (dist <= 0.40 AND |year_delta| <= 5). Anchors not re-centered. - dedup_optimize.py + multiface_worker.py: corpus-wide cleanup with three passes — cross-family SHA256 byte-dedup (preserves intra-family era duplication), within-faceset near-dup at sim >= 0.95, and a multi-face audit (the load-bearing roop invariant). Multi-face worker hits ~19 img/s on AMD Vega — ~7x embed_worker because input is 512x512 crops. Same-day corpus evolution: 311 active / 0 masked / 68 thin / 0 merged → 181 / 51 / 71 / 74; 6,440 → 3,849 active PNGs. All quarantines and prunes preserved on disk (faces/_dropped/, _masked/, _merged/, _thin/) for full reversibility. Master manifest gains masked[], merged[], plus per-run provenance blocks. Three new docs/analysis/ writeups cover model choice, threshold rationale, and per-pass run results. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
221
work/clip_worker.py
Normal file
221
work/clip_worker.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""Windows / DirectML CLIP worker for occlusion scoring.
|
||||
|
||||
Reads a queue.json staged by /opt/face-sets/work/filter_occlusions.py (WSL side),
|
||||
runs open_clip ViT-L-14 (dfn2b_s39b) on each PNG via torch-directml on the AMD
|
||||
Vega, and writes a scores.json with mask + sunglasses softmax probabilities.
|
||||
|
||||
CLI:
|
||||
py -3.12 clip_worker.py <queue.json> <out_scores.json> [--limit N] [--batch 8]
|
||||
|
||||
queue.json shape: list of objects
|
||||
{"wsl_path": "...", "win_path": "E:\\...\\faceset_NNN\\faces\\NNNN.png",
|
||||
"faceset": "faceset_NNN", "file": "NNNN.png"}
|
||||
|
||||
scores.json shape:
|
||||
{"model": "ViT-L-14/dfn2b_s39b",
|
||||
"logit_scale": 100.0,
|
||||
"prompts": {...},
|
||||
"results": [{"wsl_path": "...", "faceset": "...", "file": "...",
|
||||
"mask": float, "sunglasses": float}],
|
||||
"processed": [wsl_path, ...]}
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
# DML emits a verbose UserWarning per attention call -- silence at import time
|
||||
warnings.filterwarnings("ignore", category=UserWarning)
|
||||
|
||||
import torch
|
||||
import torch_directml
|
||||
import open_clip
|
||||
from PIL import Image
|
||||
|
||||
MODEL_NAME = "ViT-L-14"
|
||||
PRETRAINED = "dfn2b_s39b"
|
||||
|
||||
# kept in sync with /opt/face-sets/work/filter_occlusions.py PROMPTS
|
||||
PROMPTS = {
|
||||
"mask": {
|
||||
"pos": [
|
||||
"a photo of a person wearing a surgical face mask",
|
||||
"a photo of a person wearing an FFP2 respirator covering mouth and nose",
|
||||
"a photo of a person wearing a cloth face mask",
|
||||
"a face partially covered by a medical mask",
|
||||
"a person whose mouth and nose are hidden by a face mask",
|
||||
],
|
||||
"neg": [
|
||||
"a photo of a person's face with mouth and nose clearly visible",
|
||||
"a clear, unobstructed photo of a face",
|
||||
"a photo of a face without any mask or covering",
|
||||
"a portrait of a person showing their full face",
|
||||
"a photo of a person with a beard and visible mouth",
|
||||
],
|
||||
},
|
||||
"sunglasses": {
|
||||
"pos": [
|
||||
"a face with dark sunglasses covering the eyes",
|
||||
"a portrait with the eyes hidden behind opaque sunglasses",
|
||||
"a person wearing dark sunglasses over their eyes, eyes not visible",
|
||||
"a face where the eyes are completely concealed by tinted lenses",
|
||||
"a close-up portrait wearing aviator sunglasses on the eyes",
|
||||
],
|
||||
"neg": [
|
||||
"a portrait with both eyes clearly visible and uncovered",
|
||||
"a face with sunglasses pushed up on the forehead, eyes visible below",
|
||||
"a face with sunglasses resting on top of the head, eyes visible",
|
||||
"a person with sunglasses hanging from their shirt, eyes visible",
|
||||
"a face wearing clear prescription eyeglasses with visible eyes",
|
||||
"a portrait with no eyewear and visible eyes",
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
FLUSH_EVERY = 100
|
||||
|
||||
|
||||
def load_existing(out_path: Path):
|
||||
if not out_path.exists():
|
||||
return None, set()
|
||||
try:
|
||||
d = json.loads(out_path.read_text())
|
||||
processed = set(d.get("processed", []))
|
||||
return d, processed
|
||||
except Exception as e:
|
||||
print(f"[warn] could not parse existing {out_path}: {e}; starting fresh", file=sys.stderr)
|
||||
return None, set()
|
||||
|
||||
|
||||
def save_atomic(out_path: Path, data: dict):
|
||||
tmp = out_path.with_suffix(".tmp.json")
|
||||
tmp.write_text(json.dumps(data, indent=2))
|
||||
os.replace(tmp, out_path)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def build_text_features(model, tokenizer, device):
|
||||
out = {}
|
||||
for attr, sides in PROMPTS.items():
|
||||
feats = {}
|
||||
for side in ("pos", "neg"):
|
||||
tokens = tokenizer(sides[side]).to(device)
|
||||
f = model.encode_text(tokens)
|
||||
f = f / f.norm(dim=-1, keepdim=True)
|
||||
mean = f.mean(dim=0)
|
||||
feats[side] = mean / mean.norm()
|
||||
out[attr] = (feats["pos"], feats["neg"])
|
||||
return out
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("queue", type=Path)
|
||||
ap.add_argument("out", type=Path)
|
||||
ap.add_argument("--limit", type=int, default=None)
|
||||
ap.add_argument("--batch", type=int, default=8)
|
||||
args = ap.parse_args()
|
||||
|
||||
queue = json.loads(args.queue.read_text())
|
||||
print(f"[queue] {len(queue)} entries from {args.queue}")
|
||||
|
||||
args.out.parent.mkdir(parents=True, exist_ok=True)
|
||||
existing, processed = load_existing(args.out)
|
||||
if existing:
|
||||
print(f"[resume] {len(processed)} entries already scored")
|
||||
results = existing.get("results", [])
|
||||
else:
|
||||
results = []
|
||||
|
||||
pending = [e for e in queue if e["wsl_path"] not in processed]
|
||||
if args.limit is not None:
|
||||
pending = pending[: args.limit]
|
||||
print(f"[pending] {len(pending)} entries to score")
|
||||
|
||||
if not pending:
|
||||
print("[done] nothing to do")
|
||||
return
|
||||
|
||||
device = torch_directml.device()
|
||||
print(f"[load] {MODEL_NAME}/{PRETRAINED} on {torch_directml.device_name(0)}")
|
||||
t0 = time.time()
|
||||
model, _, preprocess = open_clip.create_model_and_transforms(MODEL_NAME, pretrained=PRETRAINED)
|
||||
tokenizer = open_clip.get_tokenizer(MODEL_NAME)
|
||||
model = model.to(device).eval()
|
||||
logit_scale = float(model.logit_scale.exp().detach().cpu())
|
||||
print(f"[load] ready in {time.time()-t0:.1f}s logit_scale={logit_scale:.2f}")
|
||||
text_feats = build_text_features(model, tokenizer, device)
|
||||
|
||||
def flush():
|
||||
save_atomic(args.out, {
|
||||
"model": f"{MODEL_NAME}/{PRETRAINED}",
|
||||
"logit_scale": logit_scale,
|
||||
"prompts": PROMPTS,
|
||||
"results": results,
|
||||
"processed": sorted(processed),
|
||||
})
|
||||
|
||||
n_done_this_run = 0
|
||||
n_load_err = 0
|
||||
last_flush = time.time()
|
||||
t_start = time.time()
|
||||
|
||||
for i in range(0, len(pending), args.batch):
|
||||
chunk = pending[i:i + args.batch]
|
||||
imgs = []
|
||||
keep = []
|
||||
for entry in chunk:
|
||||
try:
|
||||
img = Image.open(entry["win_path"]).convert("RGB")
|
||||
imgs.append(preprocess(img))
|
||||
keep.append(entry)
|
||||
except Exception as e:
|
||||
print(f"[skip] {entry['win_path']}: {e}", file=sys.stderr)
|
||||
n_load_err += 1
|
||||
processed.add(entry["wsl_path"])
|
||||
if not imgs:
|
||||
continue
|
||||
x = torch.stack(imgs).to(device)
|
||||
with torch.no_grad():
|
||||
feats = model.encode_image(x)
|
||||
feats = feats / feats.norm(dim=-1, keepdim=True)
|
||||
scores_per_attr = {}
|
||||
for attr, (pos, neg) in text_feats.items():
|
||||
sims = torch.stack([feats @ pos, feats @ neg], dim=1) * logit_scale
|
||||
probs = sims.softmax(dim=1)[:, 0].detach().cpu().tolist()
|
||||
scores_per_attr[attr] = probs
|
||||
for j, entry in enumerate(keep):
|
||||
results.append({
|
||||
"wsl_path": entry["wsl_path"],
|
||||
"faceset": entry["faceset"],
|
||||
"file": entry["file"],
|
||||
"mask": round(scores_per_attr["mask"][j], 4),
|
||||
"sunglasses": round(scores_per_attr["sunglasses"][j], 4),
|
||||
})
|
||||
processed.add(entry["wsl_path"])
|
||||
n_done_this_run += 1
|
||||
|
||||
if (n_done_this_run % FLUSH_EVERY < args.batch) or (time.time() - last_flush) > 30.0:
|
||||
flush()
|
||||
last_flush = time.time()
|
||||
elapsed = time.time() - t_start
|
||||
rate = n_done_this_run / max(0.1, elapsed)
|
||||
eta_min = (len(pending) - n_done_this_run) / max(0.1, rate) / 60.0
|
||||
print(f"[score] {n_done_this_run}/{len(pending)} "
|
||||
f"rate={rate:.2f} img/s eta={eta_min:.1f}min "
|
||||
f"load_err={n_load_err}", flush=True)
|
||||
|
||||
flush()
|
||||
elapsed = time.time() - t_start
|
||||
print(f"[done] {n_done_this_run} scored, {n_load_err} load errors, "
|
||||
f"{elapsed:.1f}s ({n_done_this_run/max(0.1,elapsed):.2f} img/s) -> {args.out}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user