User:Surjection/catfix-regrouper.js
Jump to navigation
Jump to search
Note: You may have to bypass your browser’s cache to see the changes. In addition, after saving a sitewide CSS file such as MediaWiki:Common.css, it will take 5-10 minutes before the changes take effect, even if you clear your cache.
- Mozilla / Firefox / Safari: hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Command-R on a Macintosh);
- Konqueror and Chrome: click Reload or press F5;
- Opera: clear the cache in Tools → Preferences;
- Internet Explorer: hold Ctrl while clicking Refresh, or press Ctrl-F5.
- This script lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • redirects • your own
/*
* dependencies: mediawiki.Title
*/
/** TODO: move this data somewhere else */
/** REGROUPER_DATA_LANG Data format:
* IMPORTANT! The regrouper already assumes that the language sort keys
* are set up correctly. It does not protect against the same group
* heading being repeated *AT ALL*. Do not enable the regrouper for
* a language until sort keys are set up for it first (so that
* each would-be group is contiguous).
* The data is organized by language code.
* If the code is missing or the value is undefined, the
* regrouper is disabled. This is the default.
* If it is ``true``, default values are used.
* Otherwise it is an object, with the following fields:
* group
* A function which is given the following parameters,
* in this order:
* - page title (minus namespace and language prefixes),
* - language code,
* - script code,
* - namespace number.
* ``this`` will be the language data object (so that you
* can add your own variables, functions, etc.; but if you
* do so, please prefix their name with x_).
* It should return the group, i.e. the heading that the
* page should be categorized under. If the value returned
* is undefined, null or an empty string, its existing group
* will be kept.
* The default is the default grouping function, either of the
* script, or of ``defaultGroup``;
* see below for a description thereof.
* detectScript
* A function which is given the following parameters,
* in this order:
* - page title (minus namespace and language prefixes),
* - language code,
* - (default) script code,
* - namespace number.
* ``this`` will be the language data object (so that you
* can add your own variables, functions, etc.; but if you
* do so, please prefix their name with x_).
* It should return a script code, or undefined to use
* the default script.
* initials
* If the default grouping function is used, this can be a RegEx
* of initial letters/digraphs/etc. which are automatically mapped
* to that respective group. All variants of that same letter
* should be included (e.g. both uppercase and lowercase).
* initialFallback
* Only applies for the default grouping function and if initials
* is defined. If ``true``, the default grouping function falls
* back to the default logic when the initial doesn't match any
* specified in initials; if ``false`` (default), it just returns
* ``undefined`` (i.e. keeps the existing group).
* ignore
* Preceding characters to ignore instead of the default ones.
* This is in RegExp character class syntax.
* ignoreAdd
* Preceding characters to ignore in addition to the default ones.
* This is in RegExp character class syntax.
* unsupported
* If ``true``, unsupported titles are passed directly to
* ``group``. If ``false`` (default), they are ignored, and their
* existing groups are kept.
* dottedDotlessI
* Used in case conversion; ``true`` means the language has both
* dotted and dotless I as separate letters (like in Turkish),
* and ``false`` (default) means it doesn't.
*
* REGROUPER_DATA_SC data format:
* This data is organized by script. Note that these configurations are
* still only considered for languages that have regrouping enabled.
*
* If missing, defaults are used. Else, an object may override:
* group
* A function that works just like group in REGROUPER_DATA_LANG.
* If missing, the default grouping function is used.
* The priority of grouping functions is:
* - group in language data,
* - group in script data,
* - default grouping.
*
* The default grouping function:
* Checks initials and initialFallback.
* If there are initials, it matches them first.
* Initial matching ignores certain preceding characters,
* e.g. hyphens.
* If an initial is found, the matching portion is converted
* to title case with ``titleCase`` and returned.
* Otherwise, we fall back to 'fallback' logic only if
* `initialFallback` is `true`, and else return `undefined`
* to keep the existing group.
* We may fall into the fallback logic:
* remove preceding characters (e.g. hyphens),
* take the first remaining Unicode character
* (or the first character in general if none would remain
* from the previous step),
* convert it to title case with ``titleCase`` and return it.
*/
var REGROUPER_DATA_LANG = {
"et": {
initials: /[ŠšZzŽžÕõÄäÖöÜü]/,
},
"fi": {
initials: /[Åå]/,
},
"hu": {
initials: /(?:[ÁáÉéÍíÓóÖöŐőÚúÜüŰű]|[CcZz][Ss]|[Dd][Zz][Ss]?|[GgLlNnTt][Yy]|[Ss][Zz])/,
},
};
var REGROUPER_DATA_SC = {
};
/* end of regrouper data */
function getRegrouperLanguageData(lang) {
return REGROUPER_DATA_LANG[lang];
}
function getRegrouperScriptData(sc) {
return REGROUPER_DATA_SC[sc];
}
function safeUpperCase(text, dottedDotlessI) {
if (dottedDotlessI)
return text.replace(/i/g, "İ").toUpperCase();
else
return text.toUpperCase();
}
function safeLowerCase(text, dottedDotlessI) {
if (dottedDotlessI)
return text.replace(/I/g, "ı").toLowerCase();
else
return text.toLowerCase();
}
function titleCase(text, lang, sc) {
return safeUpperCase(text.charAt(0), this.dottedDotlessI)
+ safeLowerCase(text.substring(1), this.dottedDotlessI);
}
var REGROUPER_INITIALS = "-";
function defaultGroup(title, lang, sc) {
if (title.length < 1) return undefined;
var cleaned = title.replace(this._clean_regex, "");
if (this.initials) {
var initialMatch = cleaned.match(this._initials_regex);
if (initialMatch) {
return titleCase(initialMatch[0], lang, sc);
}
if (!this.initialFallback) return undefined;
}
title = cleaned || title;
return titleCase(title.charAt(0), lang, sc);
}
function makeGroup(groupText) {
var groupDiv = document.createElement("div");
groupDiv.className = "mw-category-group";
var groupH3 = document.createElement("h3");
groupH3.textContent = groupText;
groupDiv.append(groupH3);
var groupUl = document.createElement("ul");
groupDiv.append(groupUl);
return [groupDiv, groupUl];
}
function getLiText(el) {
var child = $(el).find("a, span").first();
var rawText = el.textContent || el.innerText;
return (child.length > 0 && child.text()) || rawText;
}
jQuery(function () {
'use strict';
var catfix;
// Apply only to pages in the Category namespace
// containing an element with the id "catfix".
// Set window.disableCatfixRegrouper to true to prevent this script from running.
if (!(!window.disableCatfixRegrouper
&& mw.config.get('wgNamespaceNumber') == 14
&& (catfix = document.getElementById("catfix"))))
return;
// Get the language name and script catfix.
var langName = catfix.className.split("CATFIX-")[1];
catfix = catfix.getElementsByTagName("*")[0] || document.createElement("span");
var lang = catfix.getAttribute("lang");
var defaultSc = catfix.classList[0] || "None";
var cachedScriptData = {};
if (!lang)
return;
var UNPREFIXED_NAMESPACES = ["", "Talk", "Citations"];
var PREFIXED_NAMESPACES = ["Appendix", "Appendix talk", "Reconstruction", "Reconstruction talk"];
function isEntry(namespaceName, pageName) {
// main, Talk, Citations,
// Reconstruction/Appendix (Talk) if it starts with language name and "/"
return UNPREFIXED_NAMESPACES.indexOf(namespaceName) != -1
|| (PREFIXED_NAMESPACES.indexOf(namespaceName) != -1
&& pageName.slice(0, langName.length + 1) == langName + "/");
}
var formattedNamespaces = mw.config.get("wgFormattedNamespaces");
var regrouperData = getRegrouperLanguageData(lang);
if (!regrouperData) return;
// set up stuff for the default regrouper
regrouperData._clean_regex = new RegExp("^[" + ((regrouperData.ignoreAdd || "") + (regrouperData.ignore || REGROUPER_INITIALS)) + "]+");
if (regrouperData.initials)
regrouperData._initials_regex = new RegExp("^" + regrouperData.initials.source);
var groupFunction = regrouperData.group;
var detectScriptFunction = regrouperData.detectScript;
function getGroup(pageTitle, oldGroup) {
var titleobj = new mw.Title(pageTitle);
var namespaceId = titleobj.getNamespaceId();
var namespaceName = formattedNamespaces[namespaceId];
var pageName = titleobj.getMainText();
var formattedTitle = pageName;
var sc = defaultSc;
if (!isEntry(namespaceName, pageName))
return oldGroup;
// verify language prefix if the namespace should have one
var langPrefix = langName + "/";
if (PREFIXED_NAMESPACES.indexOf(namespaceName) != -1) {
if (formattedTitle.startsWith(langPrefix)) {
formattedTitle = formattedTitle.substring(langPrefix.length);
} else {
return oldGroup;
}
}
// ignore unsupported titles unless the language data requests otherwise
if (formattedTitle.startsWith("Unsupported titles/") && !regrouperData.unsupported)
return oldGroup;
// script detection
if (detectScriptFunction)
sc = detectScriptFunction.call(regrouperData, formattedTitle, lang, sc, namespaceId) || sc;
var scData = cachedScriptData[sc];
if (!scData)
scData = cachedScriptData[sc] = getRegrouperScriptData(sc) || {};
var newGroup;
if (groupFunction)
newGroup = groupFunction.call(regrouperData, formattedTitle, lang, sc, namespaceId);
else if (scData.group)
newGroup = scData.group.call(regrouperData, formattedTitle, lang, sc, namespaceId);
else
newGroup = defaultGroup.call(regrouperData, formattedTitle, lang, sc, namespaceId);
return newGroup || oldGroup;
}
var GROUP_QUERY = "#mw-pages > .mw-content-ltr .mw-category-group";
var regroupOk = true;
var regroupData = new Map();
// Process each group in the category listing.
jQuery(GROUP_QUERY)
.each(function () {
// Get the existing group.
var group = $(this).find("h3").first().text();
if (!group) {
// Failed to get group -- something has gone wrong.
regroupOk = false;
return;
}
$(this).find("li")
.each(function () {
try {
var liText = getLiText(this);
var newGroup = getGroup(liText, group);
regroupData.set(liText, newGroup);
} catch (e) {
console.error(e);
regroupOk = false;
}
});
});
// Find the existing groups, which we will delete.
var groups = jQuery(GROUP_QUERY);
// Cannot regroup if there are no groups.
if (!groups.length) return;
var parent = groups.first().parent()[0];
if (!parent) return;
var fragment = document.createDocumentFragment();
if (regroupOk) {
var lastGroup, groupUl;
jQuery(GROUP_QUERY + " li")
.each(function () {
var liText = getLiText(this);
var newGroup = regroupData.get(liText) || "";
if (lastGroup != newGroup) {
var elements = makeGroup(newGroup);
var groupDiv = elements[0];
fragment.appendChild(groupDiv);
groupUl = elements[1];
lastGroup = newGroup;
}
groupUl.appendChild(this);
});
groups.remove();
parent.appendChild(fragment);
}
});