User:Ioaxxere/auto-glossary.js

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Note: You may have to bypass your browser’s cache to see the changes. In addition, after saving a sitewide CSS file such as MediaWiki:Common.css, it will take 5-10 minutes before the changes take effect, even if you clear your cache.

  • Mozilla / Firefox / Safari: hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Command-R on a Macintosh);
  • Konqueror and Chrome: click Reload or press F5;
  • Opera: clear the cache in Tools → Preferences;
  • Internet Explorer: hold Ctrl while clicking Refresh, or press Ctrl-F5.

// The script works with the [[Template:auto-glossary]] template to automatically create a glossary for a particular category via scraping.

glossaryStyles = document.createElement("style");
glossaryStyles.textContent = `
	.mw-parser-output .auto-glossary a[rel~="mw:ExtLink"]:empty::after {
		content: '[' counter(mw-numbered-ext-link) ']';
		counter-increment: mw-numbered-ext-link;
	}
	.mw-parser-output .auto-glossary td:first-child {
		counter-reset: mw-numbered-ext-link; 
		text-align: center;
	}
	.mw-parser-output .auto-glossary ol {
		margin: 0 0 0 2em;
		padding: 0;
	}`;
document.head.appendChild(glossaryStyles);

(async () => {
	for (let autoGlossary of document.querySelectorAll(".auto-glossary")) {
		let category = autoGlossary.getAttribute("data-category");
		let label = autoGlossary.getAttribute("data-label"); // The label parameter is currently unused.
		let lang = autoGlossary.getAttribute("data-language");
		let inlineOnly = autoGlossary.getAttribute("data-inline-only");

		// Get a list of words in the given category.
		let actionAPI = new mw.Api({ ajax: { headers: { "Api-User-Agent": "Userscript developed by [[User:Ioaxxere]]" } } });
		let restAPI = new mw.Rest({ ajax: { headers: { "Api-User-Agent": "Userscript developed by [[User:Ioaxxere]]" } } });
		let wordlist = [];
		let continueParam = "";
		while (true) {
			let response = await actionAPI.get({
				action: "query",
				list: "categorymembers",
				cmprop: "title",
				cmlimit: "max",
				cmtitle: "Category:" + category,
				cmcontinue: continueParam
			});

			response.query.categorymembers.forEach(entry => {
				if (entry.ns === 0 || entry.ns === 100 || entry.ns === 118) {
					wordlist.push(entry.title);
				}
			});

			continueParam = response.continue?.cmcontinue;
			if (!continueParam) break;
		}
		wordlist.sort((a, b) => (/^[A-Za-z]/.test(a) ? /^[A-Za-z]/.test(b) ? a.localeCompare(b, undefined, { sensitivity: "base" }) : -1 : /^[A-Za-z]/.test(b) ? 1 : 0)); // ChatGPTsort

		let loadingIndicator = document.createElement("div");
		let entriesLoaded = 0;
		autoGlossary.appendChild(loadingIndicator);

		let ongoingRequests = 0;
		let glossaryQueries = await Promise.all(wordlist.map(async (word) => {
			// Limit concurrent requests, otherwise the browser might run of out memory.
			while (ongoingRequests >= 200) await new Promise(resolve => setTimeout(resolve, 10));
			ongoingRequests++;
			let response = await restAPI.get("/v1/page/" + encodeURIComponent(word) + "/html");
			ongoingRequests--;

			let responseDocument = new DOMParser().parseFromString(response, "text/html");
			let row = null;

			try {
				let langID = lang.replaceAll(" ", "_");
				let L2_section = responseDocument.querySelector("#" + langID).parentNode;
				glossaryDefs = Array.from(L2_section.querySelectorAll("li")).filter(li => {
					// Check if the li element contains an inline category.
					if (Array.from(li.querySelectorAll(":scope > link[rel=\"mw:PageProp/Category\"], :scope > span > span > link[rel=\"mw:PageProp/Category\"]"))
						.some(linkElement => linkElement.getAttribute("href").startsWith("./Category:" + category.replaceAll(" ", "_")))) {
						return true;
					}
					// Check if the headword element contains the category.
					let headwordLine = li.parentElement.previousElementSibling;
					if (!inlineOnly && headwordLine && headwordLine.matches("p") &&
						Array.from(headwordLine.querySelectorAll("link[rel=\"mw:PageProp/Category\"]"))
							.some(linkElement => linkElement.getAttribute("href").startsWith("./Category:" + category.replaceAll(" ", "_")))) {
						return true;
					}
				});
				// If no definitions found: grab all top-level definitions.
				if (!inlineOnly && !glossaryDefs.length) {
					glossaryDefs = L2_section.querySelectorAll(".mw-parser-output section > ol > li");
				}
				// Insert a row if definitions were found.
				if (glossaryDefs.length) {
					row = document.createElement("tr");
					let entryCell = row.insertCell(0);
					let entryLink = document.createElement("a");
					entryLink.href = "https://en.wiktionary.org/wiki/" + encodeURIComponent(word) + "#" + langID;
					entryLink.textContent = word;
					entryCell.appendChild(entryLink);

					let defCell = row.insertCell(1).appendChild(document.createElement("ol"));
					glossaryDefs.forEach(def => defCell.appendChild(def));

					// Convert URLs into absolute URLs.
					defCell.querySelectorAll("a[href]").forEach(link => {
						link.href = new URL(link.getAttribute("href"), "https://en.wiktionary.org/wiki/" + encodeURIComponent(word)).href;
					});

					// Clean HTML.
					defCell.querySelectorAll("link, .previewonly, .maintenance-line").forEach(elem => { elem.remove(); });
					defCell.querySelectorAll("*").forEach(elem => {
						Array.from(elem.attributes).forEach(attr => {
							if (attr.name != "class" && attr.name != "href" && attr.name != "style" && attr.name != "title" && attr.name != "rel") {
								elem.removeAttribute(attr.name);
							}
						});
					});
				}
			} catch {
				console.log("auto-glossary.js could not parse: " + word);
			}

			entriesLoaded++;
			loadingIndicator.textContent = `Loaded entry: ${entriesLoaded}/${wordlist.length}`;
			return row;
		}));

		let glossaryTable = Object.assign(document.createElement("table"), {
			className: "wikitable",
			style: "width: 100%; table-layout: fixed; display: table;",
			innerHTML: "<thead><tr><th style='width:8em'>Terms</th><th>Definitions</th></tr></thead><tbody></tbody>"
		});

		glossaryQueries.forEach(row => {
			if (row) {
				glossaryTable.lastChild.appendChild(row); // Append each row into <tbody>.
			}
		});

		autoGlossary.innerHTML = ""; // Clear everything.
		autoGlossary.appendChild(glossaryTable);
	}
	// Make sure this block executes after the for loop has completed.
	// Collapse quotes and nyms. TODO: Optimize this so the defaultVisibilityToggles gadget doesn't have to run twice.
	if (document.querySelector(".auto-glossary")) {
		mw.config.set({"wgNamespaceNumber": 0}); // To trick the gadget into thinking we're in mainspace.
		mw.loader.load("/w/index.php?title=MediaWiki:Gadget-defaultVisibilityToggles.js&action=raw&ctype=text/javascript");
	}
})();