Module:category tree/poscatboiler/data/terms by grammatical category

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This data submodule defines part of Wiktionary's category structure.

For an introduction to the poscatboiler system and a description of how to add or modify categories, see Module:category tree/poscatboiler/data/documentation.


local labels = {}
local raw_categories = {}
local handlers = {}



-----------------------------------------------------------------------------
--                                                                         --
--                                  LABELS                                 --
--                                                                         --
-----------------------------------------------------------------------------


labels["terms by grammatical category"] = {
	description = "{{{langname}}} terms categorized by their grammatical category.",
	umbrella_parents = "Fundamental",
	parents = {{name = "{{{langcat}}}", raw = true}},
}

------- GENDER -------

for _, pos in ipairs { "nouns", "pronouns", "proper nouns", "suffixes" } do
	labels[pos .. " by gender"] = {
		description = "{{{langname}}} " .. pos .. " organized by the gender they belong to.",
		breadcrumb = "by gender",
		parents = {{name = pos, sort = "gender"}},
	}

	labels[pos .. " with irregular gender"] = {
		description = "{{{langname}}} " .. pos .. " whose ending is not typical for " .. pos .. " of their gender.",
		breadcrumb = "with irregular gender",
		parents = {{name = "irregular " .. pos, sort = "irregular gender"}},
	}
	
	labels[pos .. " with multiple genders"] = {
		description = "{{{langname}}} " .. pos .. " that belong to more than one gender.",
		breadcrumb = "with multiple genders",
		parents = {{name = pos .. " by gender", sort = "multiple genders"}},
	}

	labels["common-gender " .. pos] = {
		description = "{{{langname}}} " .. pos .. " of {{glossary|common gender}}, i.e. belonging to a gender category that combines the function of {{glossary|masculine}} and {{glossary|feminine}} and is opposed to the {{glossary|neuter}} gender.",
		breadcrumb = "common-gender",
		parents = {pos .. " by gender"},
	}

	labels["feminine " .. pos] = {
		description = "{{{langname}}} " .. pos .. " of {{glossary|feminine}} gender, i.e. belonging to a gender category that contains (among other things) female beings.",
		breadcrumb = "feminine",
		parents = {pos .. " by gender"},
	}

	labels["masculine " .. pos] = {
		description = "{{{langname}}} " .. pos .. " of {{glossary|masculine}} gender, i.e. belonging to a gender category that contains (among other things) male beings.",
		breadcrumb = "masculine",
		parents = {pos .. " by gender"},
	}

	labels["masculine and feminine " .. pos .. " by sense"] = {
		description = "{{{langname}}} " .. pos .. " that may be either {{glossary|masculine}} or {{glossary|feminine}} depending on whether they refer to male or female beings.",
		breadcrumb = "masculine and feminine by sense",
		parents = {pos .. " by gender"},
	}

	labels["neuter " .. pos] = {
		description = "{{{langname}}} " .. pos .. " of {{glossary|neuter}} gender, i.e. belonging to a gender category that does not usually contain male or female beings.",
		breadcrumb = "neuter",
		parents = {pos .. " by gender"},
	}

	labels["gender-neutral " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that are applicable to all people, independent of gender.",
		breadcrumb = "gender-neutral",
		parents = {pos .. " by gender", "gender-neutral terms"},
	}
end

for _, pos in ipairs({"adjectives", "suffixes"}) do
	labels["epicene " .. pos] = {
		description = "{{{langname}}} " .. pos .. " whose form is the same for both {{glossary|masculine}} and {{glossary|feminine}}, in languages whose " .. pos .. " normally distinguish gender.",
		breadcrumb = "epicene",
		parents = {pos .. " by inflection type"},
	}
end

------- NOUN CLASSES -------

labels["nouns by class"] = {
	description = "{{{langname}}} nouns organized by the class they belong to.",
	breadcrumb = "by class",
	parents = {{name = "nouns", sort = "class"}},
}


------- ANIMACY -------

for _, pos in ipairs({"nouns", "suffixes", "verbs"}) do
	labels["animate " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that refer to humans or animals.",
		breadcrumb = "animate",
		parents = {pos},
	}
	
	labels["inanimate " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that refer to inanimate objects (not humans or animals).",
		breadcrumb = "inanimate",
		parents = {pos},
	}
	
	labels[pos .. " with multiple animacies"] = {
		description = "{{{langname}}} " .. pos .. " that belong to more than one animacy.",
		breadcrumb = "with multiple animacies",
		parents = {{name = pos, sort = "multiple animacies"}},
	}
end	

for _, pos in ipairs({"nouns", "suffixes"}) do
	-- This category should be used particularly in languages that have
	-- grammatical distinctions related to animals, such as Ukrainian.
	labels["animal " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that refer to animals.",
		breadcrumb = "animal",
		parents = {"animate " .. pos},
	}
	
	-- This category should be used particularly in languages that have
	-- grammatical distinctions related to men, such as Polish.
	labels["nonvirile " .. pos] = {
		description = "{{{langname}}} plural " .. pos .. " that refer to a group without male humans.",
		breadcrumb = "nonvirile",
		parents = {pos, "pluralia tantum"},
	}
	
	labels["personal " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that refer to humans.",
		breadcrumb = "personal",
		parents = {"animate " .. pos},
	}
	
	-- This category should be used particularly in languages that have
	-- grammatical distinctions related to men, such as Polish.
	labels["virile " .. pos] = {
		description = "{{{langname}}} plural " .. pos .. " that refer to a group with at least one male human.",
		breadcrumb = "virile",
		parents = {pos, "pluralia tantum"},
	}
end

------- INFLECTED PARTS OF SPEECH -------

-- Add "POS by inflection type", "irregular POS" and "POS by tone"
-- categories for (potentially) inflected parts of speech.

local inflected_poses = {
	"adjectives",
	"adverbs",
	"determiners",
	"nouns",
	"numerals",
	"participles",
	"pronouns",
	"proper nouns",
	"suffixes",
	"verbs",
}

for _, pos in ipairs(inflected_poses) do
	labels[pos .. " by inflection type"] = {
		description = "{{{langname}}} " .. pos .. " organized by the type of inflection they follow.",
		breadcrumb = "by inflection type",
		parents = {{name = pos, sort = "inflection"}},
	}

	labels["irregular " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that follow non-standard patterns of inflection.",
		breadcrumb = "irregular",
		parents = {pos .. " by inflection type"},
	}

	labels["defective " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that lack one or more forms in their inflections.",
		breadcrumb = "defective",
		parents = {pos, "irregular " .. pos},
	}

	labels["suppletive " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that have inflected forms from different roots.",
		breadcrumb = "suppletive",
		umbrella_parents = "Suppletion subcategories by language",
		parents = {"irregular " .. pos},
	}
	
	if pos ~= "verbs" and pos ~= "adverbs" then
		labels["indeclinable " .. pos] = {
			description = "{{{langname}}} " .. pos .. " that do not display additional grammatical relations by means of declension.",
			breadcrumb = "indeclinable",
			parents = {pos .. " by inflection type"},
		}

		labels[pos .. " with multiple declensions"] = {
			description = "{{{langname}}} " .. pos .. " that follow more than one type of inflection.",
			breadcrumb = "with multiple declensions",
			parents = {{name = pos .. " by inflection type", sort = "multiple declensions"}},
		}
		
		labels[pos .. " with multiple plurals"] = {
			description = "{{{langname}}} " .. pos .. " that have more than one possible plural (sometimes with distinct meanings).",
			breadcrumb = "with multiple plurals",
			parents = {{name = pos .. " by inflection type", sort = "multiple plurals"}},
		}
	end

	labels[pos .. " by tone"] = {
		description = "{{{langname}}} " .. pos .. " organized by the tone they follow.",
		breadcrumb = "by tone",
		parents = {{name = pos .. " by inflection type", sort = "tone"}},
	}
	
	labels[pos .. " by vowel harmony"] = {
		description = "{{{langname}}} " .. pos .. " organized by the vowel harmony they follow.",
		breadcrumb = "by vowel harmony",
		parents = {{name = pos .. " by inflection type", sort = "vowel harmony"}},
	}
end


-- FIXME: Only used currently for Arabic; probably should be removed as a general category.
labels["irregular elative adjectives"] = {
	description = "{{{langname}}} elative adjectives that follow non-standard patterns of inflection.",
	parents = {"adjectives by inflection type"},
}

-- FIXME: Only used currently for Arabic; probably should be removed as a general category.
labels["irregular elative adjectives"] = {
	description = "{{{langname}}} elative adjectives that follow non-standard patterns of inflection.",
	parents = {"adjectives by inflection type"},
}

for _, pos in ipairs { "nouns", "proper nouns", "pronouns" } do
	labels[pos .. " with unattested plurals"] = {
		description = "{{{langname}}} " .. pos .. " with unattested plurals.",
		breadcrumb = "with unattested plurals",
		parents = {{name = pos, sort = "unattested plurals"}},
	}
	labels["definite " .. pos] = {
		description = "{{{langname}}} " .. pos .. " that are inherently definite and have definite concord.",
		breadcrumb = "definite",
		parents = {pos .. " by inflection type"},
	}
end


------- GERMANIC VERB CLASSES -------

-- FIXME: Not clear this belongs among the general categories.

labels["strong verbs"] = {
	description = "{{{langname}}} verbs that present different stem vowels in their typically regular conjugated forms.",
	breadcrumb = "strong",
	parents = {"verbs by inflection type"},
}

labels["weak verbs"] = {
	description = "{{{langname}}} verbs that display dental suffixes in their past tense conjugated forms.",
	breadcrumb = "weak",
	parents = {"verbs by inflection type"},
}

labels["preterite-present verbs"] = {
	description = "{{{langname}}} verbs that inflect in the present tense like the past tense of strong verbs.",
	breadcrumb = "preterite-present",
	parents = {"verbs by inflection type"},
}

labels["class 1 strong verbs"] = {
	description = "Verbs where the [[ablaut]] vowel was followed by ''-y-'' in Proto-Indo-European.",
	breadcrumb = "class 1",
	parents = {{name = "strong verbs", sort = "1"}},
}

labels["class 1 weak verbs"] = {
	description = "Weak verbs of the first class.",
	breadcrumb = "class 1",
	parents = {{name = "weak verbs", sort = "1"}},
}

labels["class 2 strong verbs"] = {
	description = "Verbs where the [[ablaut]] vowel was followed by ''-w-'' in Proto-Indo-European.",
	breadcrumb = "class 2",
	parents = {{name = "strong verbs", sort = "2"}},
}

labels["class 2a strong verbs"] = {
	description = "Verbs where the [[ablaut]] vowel was *eu in Proto-Germanic.",
	breadcrumb = "class 2a",
	parents = {{name = "class 2 strong verbs", sort = "1"}},
}

labels["class 2b strong verbs"] = {
	description = "Verbs where the [[ablaut]] vowel was *ū in Proto-Germanic.",
	breadcrumb = "class 2b",
	parents = {{name = "class 2 strong verbs", sort = "2"}},
}

labels["class 2 weak verbs"] = {
	description = "Weak verbs of the second class.",
	breadcrumb = "class 2",
	parents = {{name = "weak verbs", sort = "2"}},
}

labels["class 3 weak verbs"] = {
	description = "Weak verbs of the third class.",
	breadcrumb = "class 3",
	parents = {{name = "weak verbs", sort = "3"}},
}

labels["class 3 strong verbs"] = {
	description = "Verbs where the [[ablaut]] vowel was followed by a [[consonant cluster]] in Proto-Indo-European.",
	breadcrumb = "class 3",
	parents = {{name = "strong verbs", sort = "3"}},
}

labels["class 3a strong verbs"] = {
	description = "Verbs where the [[consonant cluster]] begins with a nasal consonant.",
	breadcrumb = "class 3a",
	parents = {{name = "class 3 strong verbs", sort = "1"}},
}

labels["class 3b strong verbs"] = {
	description = "Verbs where the [[consonant cluster]] begins with a lateral consonant or velar fricative.",
	breadcrumb = "class 3b",
	parents = {{name = "class 3 strong verbs", sort = "2"}},
}

labels["class 3c strong verbs"] = {
	description = "Verbs where the [[consonant cluster]] begins with a rhotic consonant.",
	breadcrumb = "class 3c",
	parents = {{name = "class 3 strong verbs", sort = "3"}},
}

labels["class 4 strong verbs"] = {
	description = "Verbs where the [[ablaut]] vowel was followed by a [[sonorant]] (''m'', ''n'', ''l'', ''r'') but no other consonant in Proto-Indo-European.",
	breadcrumb = "class 4",
	parents = {{name = "strong verbs", sort = "4"}},
}

labels["class 4 weak verbs"] = {
	description = "Weak verbs of the fourth class.",
	breadcrumb = "class 4",
	parents = {{name = "weak verbs", sort = "4"}},
}

labels["class 5 strong verbs"] = {
	description = "Verbs where the [[ablaut]] vowel was followed by [[consonant]] other than a [[sonorant]] in Proto-Indo-European.",
	breadcrumb = "class 5",
	parents = {{name = "strong verbs", sort = "5"}},
}

labels["class 6 strong verbs"] = {
	description = "The Proto-Indo-European origin of this class is not securely known. It contains verbs with the stem vowel ''-a-'', except those where it is followed by a sonorant and another consonant (this combination was considered a diphthong in PIE and therefore belonged to class 7).",
	breadcrumb = "class 6",
	parents = {{name = "strong verbs", sort = "6"}},
}

labels["class 7 strong verbs"] = {
	description = "Verbs that retained their reduplication in the past tense in Proto-Germanic.",
	breadcrumb = "class 7",
	parents = {{name = "strong verbs", sort = "7"}},
}

labels["class 7a strong verbs"] = {
	description = "Class 7 strong verbs where the root vowel was ''*ai'' in Proto-Germanic, analogous to class 1.",
	breadcrumb = "class 7a",
	parents = {{name = "class 7 strong verbs", sort = "a"}},
}

labels["class 7b strong verbs"] = {
	description = "Class 7 strong verbs where the root vowel was ''*au'' in Proto-Germanic, analogous to class 2.",
	breadcrumb = "class 7b",
	parents = {{name = "class 7 strong verbs", sort = "b"}},
}

labels["class 7c strong verbs"] = {
	description = "Class 7 strong verbs where the root vowel was ''*a'' followed by a [[consonant cluster]] in Proto-Germanic, analogous to class 3.",
	breadcrumb = "class 7c",
	parents = {{name = "class 7 strong verbs", sort = "c"}},
}

labels["class 7d strong verbs"] = {
	description = "Class 7 strong verbs where the root vowel was ''*ē'' in Proto-Germanic.",
	breadcrumb = "class 7d",
	parents = {{name = "class 7 strong verbs", sort = "d"}},
}

labels["class 7e strong verbs"] = {
	description = "Class 7 strong verbs where the root vowel was ''*ō'' in Proto-Germanic.",
	breadcrumb = "class 7e",
	parents = {{name = "class 7 strong verbs", sort = "e"}},
}

------- TUPIAN LEMMA CLASSES -------

-- FIXME: Present in Old Tupi, Nheengatu, Guaraní and some other Tupian languages; not clear if this belongs among the general categories.

labels["pluriform adjectives"] = {
	description = "{{{langname}}} adjectives that have a relational prefix added to their stem.",
	breadcrumb = "pluriform",
	parents = {"adjectives by inflection type"},
}

labels["pluriform nouns"] = {
	description = "{{{langname}}} nouns that have a relational prefix added to their stem.",
	breadcrumb = "pluriform",
	parents = {"nouns by inflection type"},
}

labels["pluriform postpositions"] = {
	description = "{{{langname}}} postpositions that have a relational prefix added to their stem.",
	breadcrumb = "pluriform",
	parents = {"postpositions by inflection type"},
}

labels["pluriform verbs"] = {
	description = "{{{langname}}} verbs that have a relational prefix added to their stem.",
	breadcrumb = "pluriform",
	parents = {"verbs by inflection type"},
}

local labels2 = {}

-- Add 'umbrella_parents' key if not already present.
for key, data in pairs(labels) do
	labels2[key] = data
	if not data.umbrella_parents then
		data.umbrella_parents = "Terms by grammatical category subcategories by language"
	end
end



-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories["Terms by grammatical category subcategories by language"] = {
	description = "Umbrella categories covering topics related to grammatical categories, such as gender, animacy and noun and verb classes.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"Umbrella metacategories",
		{name = "terms by grammatical category", is_label = true, sort = " "},
	},
}

raw_categories["Suppletion subcategories by language"] = {
	description = "Umbrella categories covering suppletive terms in specific part-of-speech categories.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"Umbrella metacategories",
		"Terms by grammatical category subcategories by language",
	},
}


-----------------------------------------------------------------------------
--                                                                         --
--                                 HANDLERS                                --
--                                                                         --
-----------------------------------------------------------------------------


table.insert(handlers, function(data)
	local class = data.label:match("^class ([0-9a-z]+) nouns$")
	if class then
		local classnum, suffix = class:match("^([0-9]+)([a-z]*)$")
		return {
			description =
				"{{{langname}}} nouns that belong to class " .. class .. ".",
			breadcrumb = class,
			umbrella = false,
			parents = {{
				name = "nouns by class",
				sort = classnum and ("#%02d"):format(classnum) .. suffix or class,
			}},
		}
	end
end)

table.insert(handlers, function(data)
	local pos, tone = data.label:match("^(.+) with tone ([^ ]+)$")
	if pos then
		return {
			description = "{{{langname}}} " .. pos .. " with tone " .. tone .. ".",
			breadcrumb = tone,
			-- FIXME, should there be an umbrella category e.g. 'Adjectives with tone H by language'?
			umbrella = false,
			parents = {{
				name = pos .. " by tone",
				sort = "" .. tone:len() .. tone,
			}},
		}
	end
end)

table.insert(handlers, function(data)
	local vh, pos = data.label:match("^(.+)-harmonic ([^ ]+)$")
	if pos then
		return {
			description = "{{{langname}}} " .. pos .. " with vowel harmony in " .. vh .. ".",
			breadcrumb = vh,
			umbrella = false,
			parents = {{
				name = pos .. " by vowel harmony",
				sort = "" .. vh:len() .. vh,
			}},
		}
	end
end)

table.insert(handlers, function(data)
	local pos, classifier = data.label:match("^(nouns) classified by (.+)$")
	if pos then
		local linktext
		if data.lang then
			-- Chinese classifiers may take the form TRAD/SIMP. This will cause problems if passed directly to [[Module:links]],
			-- but the module can accept links of the form TRAD//SIMP and display them correctly.
			if data.lang:getCode() == "zh" then
				classifier = classifier:gsub("/", "//")
			end
			linktext = require("Module:links").full_link({ term = classifier, lang = data.lang }, "term")
		else
			linktext = classifier
		end
		return {
			description = "{{{langname}}} " .. pos .. " using " .. linktext .. " as their classifier.",
			breadcrumb = classifier,
			umbrella = false,
			parents = {{
				name = pos .. " by classifier",
				sort = (data.lang:makeSortKey(classifier)),
			}},
		}
	end
end)


return {LABELS = labels2, RAW_CATEGORIES = raw_categories, HANDLERS = handlers}