release: v0.6.7 — diacritics, apostrophe, and AKA normalization fixes
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
// ==UserScript==
|
||||
// @name UNIT3D Mod Queue Helper — DarkPeers
|
||||
// @namespace https://gitea.computerliebe.org/Procuria/dp-modq-helper
|
||||
// @version 0.6.6
|
||||
// @version 0.6.7
|
||||
// @description Quality-gate checks for DarkPeers — extended moderation rules, title validation, SRRDB & Prowlarr integrations
|
||||
// @author TQG Contributors
|
||||
// @updateURL https://gitea.computerliebe.org/Procuria/dp-modq-helper/raw/branch/main/modq-helper-darkpeers.user.js
|
||||
@@ -3854,6 +3854,16 @@ const RenameDetector = {
|
||||
return name.replace(/\s+AKA\s+.*/i, "").trim();
|
||||
},
|
||||
|
||||
/**
|
||||
* _stripDiacritics — Normalize accented characters to ASCII equivalents.
|
||||
* É→E, â→a, û→u, ñ→n, ö→o, etc. Uses Unicode NFD decomposition
|
||||
* followed by combining-mark removal.
|
||||
*/
|
||||
_stripDiacritics(str) {
|
||||
if (!str) return str;
|
||||
return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
|
||||
},
|
||||
|
||||
/** Codec aliases for fuzzy matching */
|
||||
_codecAliases: {
|
||||
"x264": "h.264", "h.264": "h.264", "avc": "h.264",
|
||||
@@ -3908,9 +3918,17 @@ const RenameDetector = {
|
||||
}
|
||||
const titleName = raw.slice(0, titleEnd).replace(/[.\-_]/g, " ").replace(/\s+/g, " ").trim();
|
||||
|
||||
// Extract AKA title from the structural portion (after first token, e.g. after year).
|
||||
// Only matches AKA that appears AFTER titleEnd — AKA in the title portion is
|
||||
// handled by _stripAka on titleName instead.
|
||||
const postTitle = normalized.slice(titleEnd);
|
||||
const akaMatch = postTitle.match(/\bAKA\s+(.+?)(?=[\s.]+\d{3,4}p\b|[\s.]+(?:Blu-?ray|BluRay|WEB[-.]?DL|WEB[-.]?Rip|HDTV|Remux|REMUX|BDRip|BRRip|DVDRip)\b)/i);
|
||||
const akaTitle = akaMatch ? akaMatch[1].replace(/[.\-_]/g, " ").replace(/\s+/g, " ").trim() : null;
|
||||
|
||||
return {
|
||||
raw: name,
|
||||
titleName,
|
||||
akaTitle,
|
||||
elements,
|
||||
positions,
|
||||
group: fieldOf("group"),
|
||||
@@ -3929,8 +3947,9 @@ const RenameDetector = {
|
||||
*/
|
||||
_jaccardWords(a, b) {
|
||||
if (!a || !b) return 0;
|
||||
const wordsA = new Set(a.toLowerCase().split(/\s+/).filter(Boolean));
|
||||
const wordsB = new Set(b.toLowerCase().split(/\s+/).filter(Boolean));
|
||||
const norm = s => this._stripDiacritics(s).toLowerCase().replace(/['\u2019]/g, "");
|
||||
const wordsA = new Set(norm(a).split(/\s+/).filter(Boolean));
|
||||
const wordsB = new Set(norm(b).split(/\s+/).filter(Boolean));
|
||||
if (wordsA.size === 0 && wordsB.size === 0) return 1;
|
||||
if (wordsA.size === 0 || wordsB.size === 0) return 0;
|
||||
let intersection = 0;
|
||||
@@ -3963,7 +3982,15 @@ const RenameDetector = {
|
||||
const fieldScores = {};
|
||||
|
||||
// titleName — Jaccard similarity (strip AKA for fair comparison)
|
||||
const titleScore = this._jaccardWords(this._stripAka(uploadTokens.titleName), this._stripAka(resultTokens.titleName));
|
||||
let titleScore = this._jaccardWords(this._stripAka(uploadTokens.titleName), this._stripAka(resultTokens.titleName));
|
||||
// AKA fallback: try mid-name AKA titles if primary comparison is weak
|
||||
if (titleScore < 0.5) {
|
||||
const akaTitleScores = [
|
||||
resultTokens.akaTitle ? this._jaccardWords(this._stripAka(uploadTokens.titleName), resultTokens.akaTitle) : 0,
|
||||
uploadTokens.akaTitle ? this._jaccardWords(uploadTokens.akaTitle, this._stripAka(resultTokens.titleName)) : 0,
|
||||
];
|
||||
titleScore = Math.max(titleScore, ...akaTitleScores);
|
||||
}
|
||||
fieldScores.titleName = titleScore;
|
||||
weightedSum += titleScore * weights.titleName;
|
||||
totalWeight += weights.titleName;
|
||||
@@ -4107,7 +4134,16 @@ const RenameDetector = {
|
||||
// Compare title name words — strip AKA portion (folders never include it)
|
||||
const titleA = this._stripAka(tokensA.titleName);
|
||||
const titleB = this._stripAka(tokensB.titleName);
|
||||
const titleSim = this._jaccardWords(titleA, titleB);
|
||||
let titleSim = this._jaccardWords(titleA, titleB);
|
||||
// AKA fallback: one side may use the foreign title while the other has the
|
||||
// English title in a mid-name AKA portion (e.g. "La.voie.du.serpent.2024.AKA.Serpents.Path...")
|
||||
if (titleSim < 0.5) {
|
||||
const akaSims = [
|
||||
tokensB.akaTitle ? this._jaccardWords(titleA, tokensB.akaTitle) : 0,
|
||||
tokensA.akaTitle ? this._jaccardWords(tokensA.akaTitle, titleB) : 0,
|
||||
];
|
||||
titleSim = Math.max(titleSim, ...akaSims);
|
||||
}
|
||||
if (titleSim < 0.5) return false;
|
||||
|
||||
// Compare structural fields — must match if both present
|
||||
|
||||
Reference in New Issue
Block a user