Three-piece workflow that imports a self-hosted Immich library and emits new facesets without disturbing existing identity numbering: - work/immich_stage.py (WSL): pages /search/metadata, parallel-fetches /faces?id= per asset, prefilters by face_short>=90 against bbox scaled to original-image coords, downloads originals, sha256-dedups against nl_full.npz and same-run staged files. 8-worker ThreadPoolExecutor doing the full /faces->filter->/original chain per asset; resumable via state.json. API URL + key come from IMMICH_URL / IMMICH_API_KEY env vars, label->UUID map from work/immich/users.json (gitignored). - work/embed_worker.py (Windows venv at C:\face_embed_venv): runs insightface.FaceAnalysis(buffalo_l) with the DmlExecutionProvider on AMD Radeon Vega via onnxruntime-directml. Produces a cache file in the same .npz schema as sort_faces.cmd_embed (loadable via load_cache). ~7.5x speedup over CPU end-to-end; embeddings bit- identical to CPU (cosine similarity 1.0000 across 8 sample faces). - work/cluster_immich.py (WSL): mirrors cluster_osrc.py against an immich_<user>.npz. Builds existing identity centroids from canonical faceset_NNN/ in facesets_swap_ready/, drops matches at <=0.45, clusters the rest at 0.55, applies refine gates, hands off to cmd_export_swap. Numbers new facesets past the existing maximum. - work/finalize_immich.sh: chains queue->Windows embed->cache copy-> cluster_immich, with logging. The 2026-04-26 run on https://fotos.computerliebe.org (Immich v2.7.2) processed 53,842 admin-accessible assets, staged 10,261, embedded 19,462 face records on Vega DML in 64.6 min, matched 8,103 (42%) to existing identities, and emitted 185 new facesets (faceset_026..264 with gaps). facesets_swap_ready/ went from 31 to 216 substantive facesets. Important caveat surfaced: /search/metadata's userIds filter is silently ignored when the API key is bound to a different user, so this run can't enumerate other users' libraries from the admin key. A per-user API key would be required for nic. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
245 lines
8.1 KiB
Python
Executable File
245 lines
8.1 KiB
Python
Executable File
"""Windows / DirectML embed worker.
|
|
|
|
Reads a queue.json staged by /opt/face-sets/work/immich_stage.py (WSL side),
|
|
runs InsightFace's FaceAnalysis on each image with the DmlExecutionProvider
|
|
backed by the AMD Vega, and writes a cache file in the schema produced by
|
|
sort_faces.py:cmd_embed (so it can be merged into nl_full.npz).
|
|
|
|
CLI:
|
|
py -3.12 embed_worker.py <queue.json> <out_cache.npz> [--limit N]
|
|
|
|
The queue.json entry shape (each item) is:
|
|
{
|
|
"asset_id": "...",
|
|
"sha256": "...",
|
|
"wsl_path": "/mnt/x/src/immich/<user>/<rel>", # canonical path stored
|
|
"win_path": "X:\\src\\immich\\<user>\\<rel>", # what we read from
|
|
"size_bytes": int,
|
|
"width": int, "height": int,
|
|
...
|
|
}
|
|
|
|
Per face record matches cmd_embed's schema:
|
|
path, face_idx, det_score, bbox, face_short, face_area, blur, noface=False, hash
|
|
plus landmark_2d_106, landmark_3d_68, pose (FaceAnalysis returns these for
|
|
free and the existing cache already carries them after `enrich`).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
from PIL import Image, ImageOps
|
|
from insightface.app import FaceAnalysis
|
|
|
|
MODEL_ROOT = r"C:\face_embed_venv\models"
|
|
MIN_DET_SCORE = 0.5
|
|
MIN_FACE_PIX = 40
|
|
FLUSH_EVERY = 50
|
|
|
|
|
|
def load_rgb_bgr(path: Path):
|
|
try:
|
|
with Image.open(path) as im:
|
|
im = ImageOps.exif_transpose(im)
|
|
im = im.convert("RGB")
|
|
rgb = np.array(im)
|
|
bgr = rgb[:, :, ::-1].copy()
|
|
return rgb, bgr
|
|
except Exception as e:
|
|
print(f"[warn] failed to load {path}: {e}", file=sys.stderr)
|
|
return None, None
|
|
|
|
|
|
def laplacian_variance(gray: np.ndarray) -> float:
|
|
g = gray.astype(np.float32)
|
|
lap = (
|
|
-4.0 * g[1:-1, 1:-1]
|
|
+ g[:-2, 1:-1] + g[2:, 1:-1]
|
|
+ g[1:-1, :-2] + g[1:-1, 2:]
|
|
)
|
|
return float(lap.var())
|
|
|
|
|
|
def save_cache(out_path: Path, emb_chunks: list, meta: list, processed: set, src_root: str):
|
|
emb = np.concatenate(emb_chunks) if emb_chunks else np.zeros((0, 512), dtype=np.float32)
|
|
tmp = out_path.with_suffix(".tmp.npz")
|
|
np.savez(
|
|
str(tmp),
|
|
embeddings=emb,
|
|
meta=json.dumps(meta),
|
|
src_root=str(src_root),
|
|
processed_paths=json.dumps(sorted(processed)),
|
|
path_aliases=json.dumps({}),
|
|
schema="v2",
|
|
)
|
|
os.replace(tmp, out_path)
|
|
|
|
|
|
def load_cache_if_exists(out_path: Path):
|
|
"""Resume helper. Returns (emb_chunks, meta, processed_set)."""
|
|
if not out_path.exists():
|
|
return [], [], set()
|
|
data = np.load(out_path, allow_pickle=True)
|
|
emb = data["embeddings"]
|
|
meta = json.loads(str(data["meta"]))
|
|
processed = set(json.loads(str(data["processed_paths"])))
|
|
return [emb] if len(emb) else [], list(meta), processed
|
|
|
|
|
|
def main():
|
|
p = argparse.ArgumentParser()
|
|
p.add_argument("queue", type=Path)
|
|
p.add_argument("out", type=Path)
|
|
p.add_argument("--limit", type=int, default=None)
|
|
args = p.parse_args()
|
|
|
|
queue = json.loads(args.queue.read_text())
|
|
print(f"queue: {len(queue)} entries from {args.queue}")
|
|
|
|
args.out.parent.mkdir(parents=True, exist_ok=True)
|
|
emb_chunks, meta, processed = load_cache_if_exists(args.out)
|
|
n_existing_records = len(meta)
|
|
n_existing_emb = sum(e.shape[0] for e in emb_chunks)
|
|
if n_existing_records:
|
|
print(f"resume: {n_existing_records} existing meta records "
|
|
f"({n_existing_emb} embeddings, {len(processed)} processed paths)")
|
|
|
|
print("initializing FaceAnalysis with DmlExecutionProvider")
|
|
app = FaceAnalysis(
|
|
name="buffalo_l",
|
|
root=MODEL_ROOT,
|
|
providers=["DmlExecutionProvider", "CPUExecutionProvider"],
|
|
)
|
|
app.prepare(ctx_id=0, det_size=(640, 640))
|
|
|
|
src_root = "/mnt/x/src/immich"
|
|
|
|
n_done = 0
|
|
n_face_records_added = 0
|
|
n_noface_added = 0
|
|
n_skipped = 0
|
|
n_load_err = 0
|
|
t0 = time.perf_counter()
|
|
last_flush = time.perf_counter()
|
|
new_emb_chunks: list[np.ndarray] = []
|
|
new_meta: list[dict] = []
|
|
|
|
def flush():
|
|
nonlocal new_emb_chunks, new_meta, last_flush
|
|
if not new_emb_chunks and not new_meta:
|
|
return
|
|
if new_emb_chunks:
|
|
emb_chunks.append(np.concatenate(new_emb_chunks))
|
|
new_emb_chunks = []
|
|
for r in new_meta:
|
|
meta.append(r)
|
|
new_meta = []
|
|
save_cache(args.out, emb_chunks, meta, processed, src_root)
|
|
last_flush = time.perf_counter()
|
|
|
|
for i, entry in enumerate(queue):
|
|
if args.limit is not None and n_done >= args.limit:
|
|
break
|
|
wsl_path = entry["wsl_path"]
|
|
win_path = entry["win_path"]
|
|
sha = entry["sha256"]
|
|
|
|
if wsl_path in processed:
|
|
n_skipped += 1
|
|
continue
|
|
|
|
rgb, bgr = load_rgb_bgr(Path(win_path))
|
|
if bgr is None:
|
|
new_meta.append({
|
|
"path": wsl_path, "face_idx": -1, "noface": True,
|
|
"hash": sha, "error": "load",
|
|
})
|
|
processed.add(wsl_path)
|
|
n_load_err += 1
|
|
n_done += 1
|
|
continue
|
|
|
|
faces = app.get(bgr)
|
|
kept_any = False
|
|
for j, f in enumerate(faces):
|
|
if float(f.det_score) < MIN_DET_SCORE:
|
|
continue
|
|
x1, y1, x2, y2 = [int(round(v)) for v in f.bbox]
|
|
x1 = max(x1, 0); y1 = max(y1, 0)
|
|
x2 = min(x2, rgb.shape[1]); y2 = min(y2, rgb.shape[0])
|
|
w, h = x2 - x1, y2 - y1
|
|
short = min(w, h)
|
|
if short < MIN_FACE_PIX:
|
|
continue
|
|
crop = rgb[y1:y2, x1:x2]
|
|
if crop.size == 0:
|
|
continue
|
|
gray = crop.mean(axis=2)
|
|
blur = laplacian_variance(gray) if min(gray.shape) > 3 else 0.0
|
|
|
|
emb = f.normed_embedding.astype(np.float32)
|
|
new_emb_chunks.append(emb[None, :])
|
|
rec = {
|
|
"path": wsl_path,
|
|
"face_idx": j,
|
|
"det_score": float(f.det_score),
|
|
"bbox": [x1, y1, x2, y2],
|
|
"face_short": int(short),
|
|
"face_area": int(w * h),
|
|
"blur": blur,
|
|
"noface": False,
|
|
"hash": sha,
|
|
}
|
|
# Enrichment-equivalent fields (FaceAnalysis returns these for free)
|
|
if hasattr(f, "landmark_2d_106") and f.landmark_2d_106 is not None:
|
|
rec["landmark_2d_106"] = f.landmark_2d_106.astype(np.float32).tolist()
|
|
if hasattr(f, "landmark_3d_68") and f.landmark_3d_68 is not None:
|
|
rec["landmark_3d_68"] = f.landmark_3d_68.astype(np.float32).tolist()
|
|
if hasattr(f, "pose") and f.pose is not None:
|
|
rec["pose"] = [float(x) for x in f.pose]
|
|
new_meta.append(rec)
|
|
kept_any = True
|
|
n_face_records_added += 1
|
|
if not kept_any:
|
|
new_meta.append({
|
|
"path": wsl_path, "face_idx": -1, "noface": True, "hash": sha,
|
|
})
|
|
n_noface_added += 1
|
|
|
|
processed.add(wsl_path)
|
|
n_done += 1
|
|
|
|
if (n_done % FLUSH_EVERY == 0) or (time.perf_counter() - last_flush) > 30.0:
|
|
flush()
|
|
elapsed = time.perf_counter() - t0
|
|
rate = n_done / max(0.1, elapsed)
|
|
print(
|
|
f"[embed] done={n_done:5d}/{len(queue)} faces+={n_face_records_added:5d} "
|
|
f"noface+={n_noface_added:4d} skipped={n_skipped:4d} "
|
|
f"load_err={n_load_err:3d} rate={rate:.1f} img/s "
|
|
f"({elapsed:.1f}s elapsed)"
|
|
)
|
|
|
|
flush()
|
|
elapsed = time.perf_counter() - t0
|
|
print()
|
|
print("=== embed done ===")
|
|
print(f" done: {n_done}")
|
|
print(f" new face records: {n_face_records_added}")
|
|
print(f" new noface records: {n_noface_added}")
|
|
print(f" skipped (already done): {n_skipped}")
|
|
print(f" load errors: {n_load_err}")
|
|
print(f" elapsed: {elapsed:.1f}s ({n_done/max(0.1,elapsed):.1f} img/s)")
|
|
print(f" cache: {args.out}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|