Module:category tree/poscatboiler/data/affixes and compounds

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This data submodule defines part of Wiktionary's category structure.

For an introduction to the poscatboiler system and a description of how to add or modify categories, see Module:category tree/poscatboiler/data/documentation.


local labels = {}
local raw_categories = {}
local handlers = {}



-----------------------------------------------------------------------------
--                                                                         --
--                                  LABELS                                 --
--                                                                         --
-----------------------------------------------------------------------------


labels["alliterative compounds"] = {
	description = "{{{langname}}} noun phrases composed of two or more stems that alliterate.",
	parents = {"compound terms", "alliterative phrases"},
}

labels["antonymous compounds"] = {
	description = "{{{langname}}} compounds in which one part is an antonym of the other.",
	parents = {"dvandva compounds", sort = "antonym"},
}

labels["bahuvrihi compounds"] = {
	description = "{{{langname}}} compounds in which the first part (A) modifies the second (B), and whose meaning follows a [[metonymic]] pattern: “<person> having a B that is A.”",
	parents = {"compound terms", "exocentric compounds"},
}

-- Add "compound POS" categories for various parts of speech.

local compound_poses = {
	"adjectives",
	"adverbs",
	"conjunctions",
	"determiners",
	"interjections",
	"nouns",
	"numerals",
	"particles",
	"postpositions",
	"prefixes",
	"prepositions",
	"pronouns",
	"proper nouns",
	"suffixes",
	"verbs",
}

for _, pos in ipairs(compound_poses) do
	labels["compound " .. pos] = {
		description = "{{{langname}}} " .. pos .. " composed of two or more stems.",
		parents = {{name = "compound terms", sort = " "}, pos},
	}
end

labels["compound determinatives"] = {
	description = "{{{langname}}} determinatives composed of two or more stems.",
	parents = {"compound terms", "determiners"},
}

labels["compound terms"] = {
	description = "{{{langname}}} terms composed of two or more stems.",
	umbrella_parents = "Terms by etymology subcategories by language",
	parents = {"terms by etymology"},
}

labels["dvandva compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
	parents = {"compound terms"},
}

labels["endocentric compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems, one of which is the [[w:head (linguistics)|head]] of that compound.",
	parents = {"compound terms"},
}

labels["endocentric noun-noun compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems, one of which is the [[w:head (linguistics)|head]] of that compound.",
	breadcrumb = "noun-noun",
	parents = {"endocentric compounds", "compound terms"},
}

labels["endocentric verb-noun compounds"] = {
	description = "{{{langname}}} compounds in which the first element is a verbal stem, the second a nominal stem and the head of the compound.",
	breadcrumb = "verb-noun",
	parents = {"endocentric compounds", "verb-noun compounds"},
}

labels["exocentric compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems, none of which is the [[w:head (linguistics)|head]] of that compound.",
	parents = {"compound terms"},
}

labels["exocentric verb-noun compounds"] = {
	description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a noun functioning as its direct object, and whose referent is the person or thing doing the action.",
	breadcrumb = "verb-noun",
	parents = {"exocentric compounds", "verb-noun compounds"},
}

labels["karmadharaya compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems in which the main stem determines the case endings.",
	parents = {"tatpurusa compounds"},
}

labels["itaretara dvandva compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
	breadcrumb = "itaretara",
	parents = {"dvandva compounds"},
}

labels["rhyming compounds"] = {
	description = "{{{langname}}} noun phrases composed of two or more stems that rhyme.",
	parents = {"compound terms", "rhyming phrases"},
}

labels["samahara dvandva compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
	breadcrumb = "samahara",
	parents = {"dvandva compounds"},
}

labels["shitgibbons"] = {
	description = "{{{langname}}} terms that consist of a single-syllable [[expletive]] followed by a two-syllable [[trochee]] that serves as a [[nominalizer]] or [[intensifier]].",
	parents = {"endocentric compounds"},
}

labels["synonymous compounds"] = {
	description = "{{{langname}}} compounds in which one part is a synonym of the other.",
	parents = {"dvandva compounds", sort = "synonym"},
}

labels["tatpurusa compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems",
	parents = {"compound terms"},
}

labels["verb-noun compounds"] = {
	description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a noun functioning as its direct object, and whose referent is the person or thing doing the action, or an adjective describing such a person or thing.",
	parents = {"verb-object compounds"},
}

labels["verb-object compounds"] = {
	description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a term (usually but not always a noun) functioning as its (normally direct) object, and whose referent is the person or thing doing the action, or an adjective describing such a person or thing.",
	additional = "Examples in English are {{m|en|pickpocket|lit=someone who picks pockets}} and {{m|en|catch-all|lit=something that catches everything}}.",
	parents = {"compound terms"},
}

labels["verb-verb compounds"] = {
	description = "{{{langname}}} compounds composed of two or more verbs in apposition, often either synonyms or antonyms, and whose referent refers to the result of performing those actions.",
	parents = {"compound terms"},
}

labels["vrddhi derivatives"] = {
	description = "{{{langname}}} terms derived from a Proto-Indo-European root by the process of [[w:vṛddhi|vṛddhi]] derivation.",
	parents = {"terms by etymology"},
}

labels["vrddhi gerundives"] = {
	description = "{{{langname}}} [[gerundive]]s derived from a Proto-Indo-European root by the process of [[w:vṛddhi|vṛddhi]] derivation.",
	parents = {"vrddhi derivatives"},
}

labels["vyadhikarana compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems in which the non-main stem determines the case endings.",
	parents = {"tatpurusa compounds"},
}

for _, fixtype in ipairs({"circumfix", "infix", "interfix", "prefix", "suffix",}) do
	labels["terms by " .. fixtype] = {
		description = "{{{langname}}} terms categorized by their " .. fixtype .. "es.",
		umbrella_parents = "Terms by etymology subcategories by language",
		parents = {{name = "terms by etymology", sort = fixtype}, fixtype .. "es"},
	}
end


-- Add 'umbrella_parents' key if not already present.
for key, data in pairs(labels) do
	-- NOTE: umbrella.parents overrides umbrella_parents if both are given.
	if not data.umbrella_parents then
		data.umbrella_parents = "Types of compound terms by language"
	end
end



-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories["Types of compound terms by language"] = {
	description = "Umbrella categories covering topics related to types of compound terms.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"Umbrella metacategories",
		{name = "compound terms", is_label = true, sort = " "},
		{name = "Terms by etymology subcategories by language", sort = " "},
	},
}


-----------------------------------------------------------------------------
--                                                                         --
--                                 HANDLERS                                --
--                                                                         --
-----------------------------------------------------------------------------

-----------------------------------------------------------------------------
------------------------------ Affix handlers -------------------------------
-----------------------------------------------------------------------------

table.insert(handlers, function(data)
	local labelpref, pos, affixtype, term_and_id = data.label:match("^(([a-z -]+) ([a-z]+fix)ed with )(.+)$")
	if affixtype then
		local term, id = term_and_id:match("^(.+) %(([^()]+)%)$")
		term = term or term_and_id

		-- Convert term/alt into affixes if needed
		local desc = {
			["prefix"]		= "beginning with the prefix",
			["suffix"]		= "ending with the suffix",
			["circumfix"]	= "bookended with the circumfix",
			["infix"]		= "spliced with the infix",
			["interfix"]	= "joined with the interfix",
			-- Transfixes not supported currently.
			-- ["transfix"]	= "patterned with the transfix",
		}
		if not desc[affixtype] then
			return nil
		end

		-- Here, {LANG} is replaced with the actual language, {TERM_AND_ID} with the actual term (or with 'TERM<id:ID>'
		-- if there is an ID), {BASE} with '<var>base</var>', {BASE2} with '<var>base2</var>', {BASE_EXPL} with an
		-- explanation of what "base" means, {BASE_BASE2_EXPL} with an explanation of what "base" and "base2" mean, and
		-- {POS} with '|pos=POS' if there is a `pos` other than "terms", otherwise a blank string.
		local what_categorizes = {
			["prefix"] = "{{tl|af|{LANG}|{TERM_AND_ID}|{BASE}{POS}}} or {{tl|affix|{LANG}|{TERM_AND_ID}|{BASE}{POS}}} (or the more specific and less-preferred equivalents {{tl|pre}} or {{tl|prefix}}), where {BASE_EXPL}",
			["suffix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} (or the more specific and less-preferred equivalents {{tl|suf}} or {{tl|suffix}}), where {BASE_EXPL}",
			["circumfix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}}, where {BASE_EXPL}",
			["infix"] = "{{tl|infix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}}, where {BASE_EXPL}",
			["interfix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}|{BASE2}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}|{BASE2}{POS}}}, where {BASE_BASE2_EXPL}",
		}

		local params = {
			["alt"] = {},
			["sc"] = {},
			["sort"] = {},
			["tr"] = {},
			["ts"] = {},
		}
		local args = require("Module:parameters").process(data.args, params, nil, "category tree/poscatboiler/data/terms by etymology")
		local sc = data.sc or args.sc and require("Module:scripts").getByCode(args.sc, "sc") or nil
		local m_affix = require("Module:affix")
		-- Call make_affix to add display hyphens if they're not already present.
		local _, display_term, lookup_term = m_affix.make_affix(term, data.lang, sc, affixtype, nil, true)
		local _, display_alt = m_affix.make_affix(args.alt, data.lang, sc, affixtype)
		local _, display_tr = m_affix.make_affix(args.tr, data.lang, require("Module:scripts").getByCode("Latn"), affixtype)
		local _, display_ts = m_affix.make_affix(args.ts, data.lang, require("Module:scripts").getByCode("Latn"), affixtype)
		local m_script_utilities = require("Module:script utilities")
		local id_text = id and " (" .. id .. ")" or ""

		-- Compute parents.
		local parents = {}
		if id then
			if pos == "words" then
				-- don't allow formerly-named categories with "words"
				return nil
			end
			if pos == "terms" then
				table.insert(parents, {name = labelpref .. term, sort = id, args = args})
			else
				table.insert(parents, {name = "terms " .. affixtype .. "ed with " .. term_and_id, sort = id .. ", " .. pos, args = args})
				table.insert(parents, {name = labelpref .. term, sort = id, args = args})
			end
		elseif pos == "words" then
			-- don't allow formerly-named categories with "words"
			return nil
		elseif pos ~= "terms" then
			table.insert(parents, {name = "terms " .. affixtype .. "ed with " .. term, sort = pos, args = args})
		end
		table.insert(parents, {name = "terms by " .. affixtype, sort = (data.lang:makeSortKey((data.lang:makeEntryName(args.sort or term))))})

		-- If other affixes are mapped to this one, show them.
		local additional

		if data.lang then
			local langcode = data.lang:getCode()
			if m_affix.langs_with_lang_specific_data[langcode] then
				local langdata = mw.loadData(m_affix.affix_lang_data_module_prefix .. langcode)
				local variants = {}
				if langdata.affix_mappings then
					for variant, canonical in pairs(langdata.affix_mappings) do
						-- Above, we converted the stripped link term as we received it to the lookup form, so we
						-- can look up the variants that are mapped to this term. Once we find them, map them to
						-- display form.
						local is_variant = false
						if type(canonical) == "table" then
							for _, canonical_v in pairs(canonical) do
								if canonical_v == lookup_term then
									is_variant = true
									break
								end
							end
						else
							is_variant = canonical == lookup_term
						end
						if is_variant then
							local _, display_variant = m_affix.make_affix(variant, data.lang, sc, affixtype)
							table.insert(variants, "{{m|" .. langcode .. "|" .. display_variant .. "}}")
						end
					end
					if #variants > 0 then
						table.sort(variants)
						additional = ("This category also includes terms %sed with %s."):format(affixtype,
							require("Module:table").serialCommaJoin(variants))
					end
				end
			end
		end

		if data.lang then
			local what_categorizes_msg = what_categorizes[affixtype]
			if not what_categorizes_msg then
				error(("Internal error: No what_categorizes value for affixtype '%s' for label '%s', lang '%s'"):
					format(affixtype, data.label, data.lang:getCode()))
			end
			what_categorizes_msg = "Terms are placed in this category using " .. (what_categorizes_msg
				:gsub("{LANG}", data.lang:getCode())
				:gsub("{TERM_AND_ID}", require("Module:string utilities").replacement_escape(
					id and ("%s<id:%s>"):format(term, id) or term))
				:gsub("{POS}", require("Module:string utilities").replacement_escape(
					pos == "terms" and "" or ("|pos=%s"):format(pos)))
				:gsub("{BASE}", "<var>base</var>")
				:gsub("{BASE2}", "<var>base2</var>")
				:gsub("{BASE_EXPL}", "<code><var>base</var></code> is the base lemma from which this term is derived")
				:gsub("{BASE_BASE2_EXPL}", "<code><var>base</var></code> and <code><var>base2</var></code> are the " ..
					"base lemmas from which this term is derived")
			) .. "."
			if additional then
				additional = additional .. "\n\n" .. what_categorizes_msg
			else
				additional = what_categorizes_msg
			end
		end

		return {
			description = "{{{langname}}} " .. pos .. " " .. desc[affixtype] .. " " .. require("Module:links").full_link({
				lang = data.lang, term = display_term, alt = display_alt, sc = sc, id = id, tr = display_tr, ts = display_ts}, "term") .. ".",
			additional = additional,
			breadcrumb = pos == "terms" and m_script_utilities.tag_text(display_alt or display_term, data.lang, sc, "term") .. id_text or pos,
			displaytitle = "{{{langname}}} " .. labelpref .. m_script_utilities.tag_text(term, data.lang, sc, "term") .. id_text,
			parents = parents,
			umbrella = false,
		}, true -- true = args handled
	end
end)


return {LABELS = labels, RAW_CATEGORIES = raw_categories, HANDLERS = handlers}