MediaWiki:Gadget-OrangeLinks.js: difference between revisions

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Content deleted Content added
debugLogger object, split parts of processLink into other functions, clearer variable names
regex patterns for headers to ignore need spaces rather than underscores because link fragment in mw.Title object has spaces
Line 50: Line 50:
// occurs at the beginning of a language name (refer to
// occurs at the beginning of a language name (refer to
// [[Special:PermaLink/51354043]]) and is not a recognized non-language header
// [[Special:PermaLink/51354043]]) and is not a recognized non-language header
var POSHeader = /^(Noun|Proper_noun|Verb|Adjective|Adverb|Participle|Interjection|Particle|Suffix|Prefix|Symbol|Phrase|Prepositional_phrase|Han_[ct]|Hanja|Hanzi|Kanji|Romanization|Gismu|Rafsi|Brivla|Idiom|Acronym|Initialism)/;
var POSHeader = /^(Noun|Proper noun|Verb|Adjective|Adverb|Participle|Interjection|Particle|Suffix|Prefix|Symbol|Phrase|Prepositional phrase|Han [ct]|Hanja|Hanzi|Kanji|Romanization|Gismu|Rafsi|Brivla|Idiom|Acronym|Initialism)/;
var otherHeader = /^(Etymology|Pronunciation|Alternative_forms|See_also|External_links|Derived_terms|Related_terms|Coordinate_terms|Descendants|Inflection|Declension|Conjugation|Usage_notes|Translations|References|Anagrams|Synonyms|Antonyms|Meronyms|Holonyms|Hyponyms|Hypernyms|Quotations|Statistics)/i;
var otherHeader = /^(Etymology|Pronunciation|Alternative forms|See also|External links|Derived terms|Related terms|Coordinate terms|Descendants|Inflection|Declension|Conjugation|Usage notes|Translations|References|Anagrams|Synonyms|Antonyms|Meronyms|Holonyms|Hyponyms|Hypernyms|Quotations|Statistics|Compounds)/i;
function maybeLanguageHeader(header) {
function maybeLanguageHeader(header) {
return header && /^[A-Z'ÀÁÖǀǁǂǃ]/.test(header)
return header && /^[A-Z'ÀÁÖǀǁǂǃ]/.test(header)

Revision as of 22:46, 4 February 2019

/**
 * General idea: for each bluelink with an anchor, the script fetches the categories for its target page
 * and checks whether it contains a part-of-speech category. If a suitable category is found, the script
 * assumes the anchor is valid. If not, the link is coloured orange.
 * 
 * Previous version by [[User:Yair rand]], based in turn on an idea by [[User:Hippietrail]].
 * This script is a complete rewrite.
 */

// <nowiki>
/* jshint esversion: 5, loopfunc: true, latedef: true, scripturl: true, undef: true, unused: true */
/* globals mw, jQuery */

(function () {
'use strict';

var api = new mw.Api();
var fresh = [], // list of titles to get categories of
	queue = {}, // map from title to list of links with that title
	catcache = {}; // map from page name to list of categories

var articlePathRx = new RegExp('^' + mw.RegExp.escape(mw.config.get('wgArticlePath')).replace('\\$1', '(.+)') + '$'); // hax

var isDebug = mw.util.getParamValue('debug') && window.console;
var debugLogger = {};
// add methods that call console method if "isDebug" is defined
[ "error", "info" ].forEach(function (key) {
	debugLogger[key] = function () {
		if (isDebug)
			window.console[key].apply(console, arguments);
	};
});

// see [[mw:Manual:index.php]]
// alternative: /^\/(?:w(?:\/(?:index\.php)?)?)?$/
var indexPhp = [ '/', '/w', '/w/', '/w/index.php' ];

function getPageName(url) {
	if (!(url instanceof mw.Uri))
		throw new TypeError("Expected mw.Uri");
	
	var match = articlePathRx.exec(url.path);
	return match ? match[1]
		// if path is index.php, page name is value of "title" parameter
		: indexPhp.indexOf(url.path) !== -1 ? url.query.title
		: undefined;
}

// header is not language header if it does not begin with a character that
// occurs at the beginning of a language name (refer to
// [[Special:PermaLink/51354043]]) and is not a recognized non-language header
var POSHeader = /^(Noun|Proper noun|Verb|Adjective|Adverb|Participle|Interjection|Particle|Suffix|Prefix|Symbol|Phrase|Prepositional phrase|Han [ct]|Hanja|Hanzi|Kanji|Romanization|Gismu|Rafsi|Brivla|Idiom|Acronym|Initialism)/;
var otherHeader = /^(Etymology|Pronunciation|Alternative forms|See also|External links|Derived terms|Related terms|Coordinate terms|Descendants|Inflection|Declension|Conjugation|Usage notes|Translations|References|Anagrams|Synonyms|Antonyms|Meronyms|Holonyms|Hyponyms|Hypernyms|Quotations|Statistics|Compounds)/i;
function maybeLanguageHeader(header) {
	return header && /^[A-Z'ÀÁÖǀǁǂǃ]/.test(header)
	&& !(POSHeader.test(header) || otherHeader.test(header));
}

// get page title if link points to another page on this wiki
function getTitle(link) {
	if (!(link instanceof HTMLAnchorElement))
		throw new TypeError("Expected HTMLAnchorElement");
	
	// XXX: is there no other way to skip TabbedLanguages tabs?
	var parentClasses = link.parentNode.classList;
	if (parentClasses.contains("unselectedTab")
	|| parentClasses.contains("selectedTab"))
		return;
	
	// skip empty hrefs, script URLs, local links
	var rawHref = link.getAttribute('href');
	if (!(rawHref && rawHref.indexOf("javascript:") !== 0 && rawHref[0] !== "#"))
		return;
	
	var url = new mw.Uri(link.href);
	// check that the domain is correct
	if (url.getAuthority() !== location.hostname)
		return;
	
	var pageName = getPageName(url);
	
	return pageName
		? new mw.Title(decodeURIComponent(pageName
			+ '#' + url.fragment.replace(/\.([0-9A-Fa-f][0-9A-Fa-f])/g, '%$1')))
		: undefined;
}

// XXX: save resources by caching something or other between calls to
// processLink on same link?
function processLink(link) {
	try {
		if (link._ORANGED === link.href) // XXX: eliminate this hack
			return;

		var title = getTitle(link);
		
		// check that title is in main namespace and that the fragment could be
		// a language name
		if (!(title && title.getNamespaceId() === 0 && maybeLanguageHeader(title.fragment)))
			return;
		
		var prefixedText = title.getPrefixedText();
		var categories = catcache[prefixedText];
		if (categories) {
			link._ORANGED = link.href;
			
			// XXX: {{senseid}} hack
			// assumes sense ids start with Basic Latin lowercase letter and
			// that language names do not contain hyphen followed by Basic Latin
			// lowercase letter, which is not always true:
			// search for ': hastemplate:"senseid" insource:/senseid\|[^\|]+\|[^a-z]/'
			// with single quotes removed and see [[Special:PermaLink/51356029]]
			var langName = title.fragment.replace(/-[a-z].*$/, '');
			
			// XXX: discount "German Low German", etc. but allow
			// "Vietnamese Han tu" and "Vietnamese Nom" to count as an
			// existing vi entry
			// XXX: use /^ (lemmas|non-lemma forms)$/ instead?
			if (!categories.some(function (category) {
				return category.indexOf(langName) === 0
				&& /^ ([a-z]|Han tu|Nom)/.test(category.substring(langName.length));
			})) {
				link.classList.add('partlynew');
				debugLogger.info('partlynew ', link, '; cats=', categories,
					' frag=', title.fragment, ' langName=', langName);
			}
		} else {
			var entry = queue[prefixedText];
			if (!entry) {
				entry = queue[prefixedText] = [];
				fresh.push(prefixedText);
			}
			entry.push(link);
		}
	} catch (e) {
		debugLogger.error(e, 'while processing', link, ' href=', link.href);
	}
}

function processQueue(queue) {
	function collect(titles) {
		return jQuery.Deferred(function (d) {
			var query = {
				'action': 'query',
				'titles': titles.join('|'),
				'redirects': 1,
				'prop': 'categories',
				'cllimit': 100,
			};
			
			function fail(code, details, xhr) {
				d.reject(code, details, xhr);
			}
			
			function pluckResults(result) {
				debugLogger.info('result', result);
				var pages = result.query.pages, redirects = result.query.redirects;
				for (var pageid in pages) {
					var title = new mw.Title(pages[pageid].title).getPrefixedText();
					var categories = pages[pageid].categories || [];
					var cache = catcache[title];
					if (!cache)
						cache = catcache[title] = [];
					
					// 'Category:'.length === 9
					for (var i = 0; i < categories.length; ++i)
						cache.push(categories[i].title.substring(9));

					debugLogger.info('cache for', title, 'is', cache);
				}
				
				if (redirects) {
					redirects.forEach(function (redirect) {
						var origin = new mw.Title(redirect.from).getPrefixedText();
						var target = new mw.Title(redirect.to).getPrefixedText();
						catcache[origin] = catcache[target];
					});
				}
				
				if (result['continue']) {
					api.get(jQuery.extend(query, result['continue'])).then(pluckResults, fail);
				} else
					d.resolve(titles);
			}

			api.get(query).then(pluckResults, fail);
		});
	}
	
	while (fresh.length > 0) {
		collect(fresh.splice(0, 25)).then(function (titles) {
			titles.forEach(function (title) {
				queue[title].forEach(processLink);
			});
		}, function (code, details, xhr) {
			console.error(code, details, xhr);
		});
	}
	fresh = [];
}

Array.prototype.forEach.call(document.getElementsByTagName('a'), processLink);
processQueue(queue);

if (mw.util.getParamValue('@orangelinks.no_live')
|| typeof window.MutationObserver !== "function")
	return;

var mo = new MutationObserver(function (events) {
	for (var i = 0; i < events.length; ++i) {
		var event = events[i];
		if (event.type === 'childList') {
			event.addedNodes.forEach(function (addedNode) {
				if (addedNode instanceof Element) {
					if (addedNode.tagName === 'A')
						processLink(addedNode);
					else
						Array.prototype.forEach.call(
							addedNode.getElementsByTagName('a'), processLink);
				}
			});
		} else if (event.type === 'attributes' && event.target.tagName === 'A'
		&& (event.attributeName === 'href' || event.attributeName === 'class'))
			processLink(event.target);
	}
	processQueue(queue);
});

mo.observe(document, {
	attributes: true,
	childList: true,
	subtree: true
});

})();

// </nowiki>