Module:grc-headword

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module generates the content of many Ancient Greek headword-line templates: {{grc-verb}}, {{grc-verb form}}, {{grc-noun}}, {{grc-noun form}}, {{grc-proper noun}}, {{grc-adj-1&2}}, {{grc-adj-2nd}}, {{grc-adj-1&3}}, {{grc-adj-3rd}}, {{grc-part-1&2}}, {{grc-part-1&3}}, {{grc-adverb}}, {{grc-num}}, {{grc-preposition}}, {{grc-particle}}.

This module tracks the monophthongs α, ι, υ (a, i, u) without macrons, breves, circumflexes, or iota subscripts (◌̄, ◌̆, ◌͂, ◌ͅ), so that length can be appropriately marked, and it categorizes all Ancient Greek words into categories for accent type, such as Ancient Greek oxytone terms.

Experimentation on new features is done in Module:grc-headword/sandbox.


local export = {}

local m_grc_utils = require("Module:grc-utilities")
local tokenize = m_grc_utils.tokenize
local find_ambig = m_grc_utils.findAmbig

local full_headword = require("Module:headword").full_headword
local get_accent_term = require("Module:grc-accent").get_accent_term
local serial_comma_join = require("Module:table").serialCommaJoin

local lang = require("Module:languages").getByCode("grc")
local canonical_name = lang:getCanonicalName()
local sc = require("Module:scripts").getByCode("polytonic")

local NAMESPACE = mw.title.getCurrentTitle().nsText
local PAGENAME = mw.title.getCurrentTitle().text
local MAINSPACE = NAMESPACE == ""

local ufind = mw.ustring.find
local umatch = mw.ustring.match

local pos_functions = {}

local legal_declension = {
	["first"] = true,
	["second"] = true,
	["Attic"] = true,
	["third"] = true,
	["irregular"] = true,
}

-- Also used to validate genders.
local gender_names = {
	["m"] = "masculine",
	["m-s"] = "masculine",
	["m-d"] = "masculine",
	["m-p"] = "masculine",
	["f"] = "feminine",
	["f-s"] = "feminine",
	["f-d"] = "feminine",
	["f-p"] = "feminine",
	["n"] = "neuter",
	["n-s"] = "neuter",
	["n-d"] = "neuter",
	["n-p"] = "neuter",
	["c"] = "common",
	["c-s"] = "common",
	["c-d"] = "common",
	["c-p"] = "common",
	["?"] = "unknown gender",
	["?-s"] = "unknown gender",
	["?-d"] = "unknown gender",
	["?-p"] = "unknown gender",
}

local function quote(text)
	return "“" .. text .. "”"
end

local function format(array, concatenater)
	if not array[1] then
		return ""
	else
		return "; ''" .. table.concat(array, concatenater) .. "''"
	end
end
	
-- Process arg the way [[Module:parameters]] would.
local function process_arg(val)
	if val then
		val = mw.text.trim(val)
	end
	if val == "" then
		val = nil
	end
	return val
end

-- Returns true if text contains one character from the Greek and Coptic or
-- Greek Extended blocks.
local function contains_Greek(text)
	-- Matches any character in Greek and Coptic block except the first line:
	-- ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ
	local basic_Greek = "[\206-\207][\128-\191]"
	-- Exactly matches entire Greek Extended block.
	local Greek_extended = "\225[\188-\191][\128-\191]"
	return (string.find(text, basic_Greek) or string.find(text, Greek_extended)) and true or false
end

local U = mw.ustring.char
local macron = U(0x304)
local breve = U(0x306)
local rough = U(0x314)
local smooth = U(0x313)
local diaeresis = U(0x308)
local acute = U(0x301)
local grave = U(0x300)
local circumflex = U(0x342)
local subscript = U(0x345)
local diacritic_patt = table.concat{
	"[",
	macron, breve,
	rough, smooth, diaeresis,
	acute, grave, circumflex,
	subscript,
	"]"
}

local function has_ambiguous_vowel(text)
	local lengthDiacritic = "[" .. macron .. breve .. circumflex .. subscript .. "]"
	local aiu_diacritic = "^([" .. "αιυ" .. "])(" .. diacritic_patt .. "*)$"
	
	-- breaks the word into units
	for _, token in ipairs(tokenize(text)) do
		local vowel, diacritics = umatch(token, aiu_diacritic)
			
		if vowel and (diacritics == "" or
				not ufind(diacritics, lengthDiacritic)) then
			return true
		end
	end
		
	return false
end

-- Process numbered parameters before using [[Module:parameters]], as
-- [[Module:parameters]] converts several named parameters into arrays, which
-- makes them more difficult to manipulate.

local function process_numbered_params(args, Greek_params, nonGreek_params)
	if not nonGreek_params then
		nonGreek_params = { false }
	end
	
	local max_Greek_param_index = #Greek_params
	
	-- Clone args table so that its values can be modified.
	args = require("Module:table").shallowClone(args)
	
	if args.head then
		-- [[Special:WhatLinksHere/Template:tracking/grc-headword/head param]]
		require("Module:debug").track("grc-headword/head param")
	end
	
	local last_Greek_param_index = 0
	for i, arg in ipairs(args) do
		if arg == "-" or contains_Greek(arg) then
			last_Greek_param_index = i
		else
			break
		end
	end
	
	local head_in_arg1 = false
	
	if last_Greek_param_index == max_Greek_param_index then
		if not MAINSPACE or has_ambiguous_vowel(PAGENAME) then
			head_in_arg1 = true
		else
			error("The pagename does not have ambiguous vowels, so there cannot be "
					.. max_Greek_param_index
					.. " numbered parameters. See template documentation for more details.")
		end
	
	elseif last_Greek_param_index > max_Greek_param_index then
		error("Too many numbered parameters containing Greek text or hyphens. There can be at most "
				.. max_Greek_param_index .. ".")
	
	-- For indeclinable nouns: {{grc-noun|Ἰσρᾱήλ|m}}
	-- First parameter is headword if equal to pagename when macrons and breves are removed.
	elseif args[1] and lang:makeEntryName(args[1]) == PAGENAME then
		if args.head then
			error("Parameter 1 appears to be the headword, so the head parameter " .. quote(args.head) .. " is not needed.")
		end
		args.head, args[1] = args[1], nil
	
	else
		table.remove(Greek_params, 1) -- Remove "head" parameter.
	end
	
	local function process_params(start_i, end_i, param_names)
		local i = 1 -- Index in the table of parameter names.
		for numbered = start_i, end_i do
			local named = param_names[i]
			i = i + 1
			
			if named then
				-- Process parameters, as they have not been processed by [[Module:parameters]].
				args[numbered], args[named] =
					process_arg(args[numbered]), process_arg(args[named])
			
			-- This should not happen, because the number of Greek parameters
			-- has already been checked.
			elseif args[numbered] then
				error("No purpose for parameter " .. numbered .. ".")
			end
				
			if args[numbered] then
				if named then
					if args[named] then
						error("Parameter " .. numbered .. " is not needed when parameter " .. named .. " is present.")
					end
					
					args[named], args[numbered] = args[numbered], nil
				else
					error("Parameter " .. numbered .. ", " .. args[numbered] .. ", has no purpose.")
				end
			end
		end
	end
	
	process_params(1, last_Greek_param_index, Greek_params)
	process_params(last_Greek_param_index + 1, #Greek_params + #nonGreek_params, nonGreek_params)
	
	if args.head == "-" then
		error("The headword cannot be absent.")
	end
	
	return args
end

local function process_heads(data, poscat)
	local suffix = data.heads[1]:find("^%*?%-") and true or false
	for _, head in ipairs(data.heads) do
		if suffix and head:sub(1, 1) ~= "-" then
			error("The first headword has a hyphen, so headword #" .. i ..
					", " .. quote(head) .. ", should as well.")
		end
		local accent = get_accent_term(head)
		if accent ~= nil then
			table.insert(data.categories, canonical_name .. ' ' .. accent .. ' terms')
		end
	end
	
	if suffix then
		data.pos_category = "suffixes"
		table.insert(data.categories, canonical_name .. " " .. poscat .. "-forming suffixes")
	end
end

local function unlinked_form(label)
	return { label = label, { nolink = true, term = "—" } }
end

local function adj_and_part_forms(total_forms, args, inflections, allow_blank_forms)
	if total_forms == 2 then
		if args.f[1] then
			if allow_blank_forms and not args.f[2] and args.f[1] == "-" then
				table.insert(inflections, unlinked_form("feminine"))
			else
				args.f.label = 'feminine'
				table.insert(inflections, args.f)
			end
		end
	end
	
	if args.n[1] then
		if allow_blank_forms and not args.n[2] and args.n[1] == "-" then
			table.insert(inflections, unlinked_form("neuter"))
		else
			args.n.label = 'neuter'
			table.insert(inflections, args.n)
		end
	end
end

local function get_reconstructed_prefix()
	return NAMESPACE == "Reconstruction" and "reconstructed " or ""
end

function export.show(frame)
	local args = frame:getParent().args
	
	local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
	local sub_class = frame.args[2]
	
	local data = {
		lang = lang, sc = sc,
		pos_category = get_reconstructed_prefix() .. poscat,
		categories = {}, heads = {}, genders = {}, inflections = {}
	}
	local appendix = {}
	
	if pos_functions[poscat] then
		pos_functions[poscat](args, data, appendix, poscat, sub_class)
	end
	
	for i, term in ipairs(data.heads) do
		local _, vowels_set = find_ambig(term, false)
		for vowel in pairs(vowels_set) do
			require("Module:debug").track {
				"grc-headword/ambig",
				"grc-headword/ambig/" .. vowel
			}
		end
		if not term:find(" ") and mw.ustring.toNFD(term):find(grave) then
			error("Head #" .. i .. ", " .. quote(term) ..
				", contained a grave accent, but no space. Grave accent can only be used in multi-word terms.")
		end
	end
	
	return full_headword(data) .. format(appendix, ", ")
end

function export.test(frame_args, parent_args, pagename)
	PAGENAME = pagename
	local poscat = frame_args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
	local sub_class = frame_args[2]
	
	local data = {
		pos_category = get_reconstructed_prefix() .. poscat,
		categories = {}, heads = {}, genders = {}, inflections = {}
	}
	local appendix = {}
	
	if pos_functions[poscat] then
		pos_functions[poscat](parent_args, data, appendix, poscat, sub_class)
	end
	
	for _, term in pairs(data.heads) do
		local _, vowels_set = find_ambig(term, false)
		for vowel in pairs(vowels_set) do
			require("Module:debug").track {
				"grc-headword/ambig",
				"grc-headword/ambig/" .. vowel
			}
		end
	end
	
	return data
end

pos_functions["nouns"] = function(args, data, appendix, poscat)
	args = process_numbered_params(args, { "head", "gen" }, { "g", "decl" })
	
	local params = {
		-- Numbered parameters 1, 2, 3, 4 handled above.
		head = { list = true, default = PAGENAME },
		gen = { list = true },
		g = { list = true, default = '?' },
		dim = { list = true },
		decl = { list = true },
	}
	args = require("Module:parameters").process(args, params)
	
	data.heads = args.head
	
	process_heads(data, "noun")
	
	for _, g in ipairs(args.g) do
		local gender_name = gender_names[g]
		if gender_name then
			table.insert(data.genders, g)
			table.insert(data.categories, canonical_name .. " " .. gender_name .. " " .. poscat)
		else
			error("Gender " .. quote(g) .. " is not an valid " .. canonical_name .. " gender.")
		end
	end
	
	if not args.gen[1] then
		table.insert(data.inflections, { label = "[[Appendix:Glossary#indeclinable|indeclinable]]" })
		table.insert(data.categories, canonical_name .. " indeclinable " .. poscat)
		for _, g in ipairs(args.g) do
			table.insert(data.categories, canonical_name .. " " .. gender_names[g] .. " indeclinable " .. poscat)
		end
		if args.decl[1] then
			error("Declension class " .. quote(args.decl[1])
					.. " has been given, but no genitive form has been given, so the word cannot belong to a declension class.")
		end
	else
		if not args.gen[2] and args.gen[1] == "-" then
			table.insert(data.inflections, unlinked_form("genitive"))
		else
			args.gen.label = "genitive"
			table.insert(data.inflections, args.gen)
		end
		
		if args.decl[2] then
			table.insert(data.inflections, { label = 'variously declined' })
			table.insert(data.categories, canonical_name .. " " .. poscat .. " with multiple declensions")
		elseif not args.decl[1] then
			table.insert(appendix, "? declension")
		end
		
		for _, decl_class in ipairs(args.decl) do
			if legal_declension[decl_class] then
				table.insert(appendix, "[[Appendix:" .. canonical_name .. " " .. decl_class .. " declension|" .. decl_class .. " declension]]")
				if decl_class ~= "irregular" then
					table.insert(data.categories, canonical_name .. " " .. decl_class .. "-declension " .. poscat)
				end
				
				for _, g in ipairs(args.g) do
					table.insert(data.categories,
						canonical_name .. " " .. gender_names[g] .. " " .. poscat .. " in the " .. decl_class .. " declension")
				end
			else
				error("Declension " .. quote(decl_class) .. " is not an legal " ..
					canonical_name .. " declension. Choose “first”, “second”, “third”, or “irregular”.")
			end
		end
	end
	
	if args.dim[1] then
		args.dim.label = "diminutive"
		table.insert(data.inflections, args.dim)
	end
end

pos_functions["proper nouns"] = pos_functions["nouns"]

pos_functions["verbs"] = function(args, data)
	args = process_numbered_params(args, { "head" })

	local params = {
		head = { list = true, default = PAGENAME }
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "verb")
end

pos_functions["adverbs"] = function(args, data)
	local params = {
		[1] = { alias_of = 'comp' },
		[2] = { alias_of = 'super' },
		head = { list = true, default = PAGENAME },
		comp = { list = true },
		super = { list = true },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "adverb")
	
	if args.comp[1] then
		args.comp.label = 'comparative'
		table.insert(data.inflections, args.comp)
		if args.super[1] then
			args.super.label = 'superlative'
			table.insert(data.inflections, args.super)
		else
			table.insert(data.inflections, { label = 'no superlative' })
		end
	elseif args.super[1] then
		table.insert(data.inflections, { label = 'no comparative' })
		args.super.label = 'superlative'
		table.insert(data.inflections, args.super)
	end
end

pos_functions["numerals"] = function(args, data)
	args = process_numbered_params(args, { "head", "f", "n" })
	
	local params = {
		head = { list = true, default = PAGENAME },
		f = { list = true },
		n = { list = true },
		car = { list = true },
		ord = { list = true },
		adv = { list = true },
		coll = { list = true },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "numeral")
	
	adj_and_part_forms(2, args, data.inflections, false)
	
	if args.car[1] then
		args.car.label = 'cardinal'
		table.insert(data.inflections, args.car)
	end
	
	if args.ord[1] then
		args.ord.label = 'ordinal'
		table.insert(data.inflections, args.ord)
	end
	
	if args.adv[1] then
		args.adv.label = 'adverbial'
		table.insert(data.inflections, args.adv)
	end
	
	if args.coll[1] then
		args.coll.label = 'collective'
		table.insert(data.inflections, args.coll)
	end
end



pos_functions["participles"] = function(args, data, appendix, _, sub_class)
	if sub_class == "1&2" then
		pos_functions["part-1&2"](args, data, appendix)
	elseif sub_class == "1&3" then
		pos_functions["part-1&3"](args, data, appendix)
	else
		error('Participle subclass ' .. quote(sub_class) .. ' not recognized.')
	end
end

pos_functions["part-1&2"] = function(args, data, appendix)
	args = process_numbered_params(args, { "head", "f", "n" })
	
	local params = {
		-- Parameters 1, 2, and 3 handled above.
		head = { list = true, default = PAGENAME },
		f = { list = true, required = true },
		n = { list = true, required = true },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "participle")
	
	table.insert(data.genders, "m")
	
	table.insert(appendix, "[[Appendix:" .. canonical_name ..
			" first declension|first]]/[[Appendix:" .. canonical_name ..
			" second declension|second declension]]")
	
	adj_and_part_forms(2, args, data.inflections, false)
end

pos_functions["part-1&3"] = function(args, data, appendix)
	args = process_numbered_params(args, { "head", "f", "n" })
	
	local params = {
		-- Parameters 1, 2, and 3 handled above.
		head = { list = true, default = PAGENAME },
		f = { list = true, required = true },
		n = { list = true, required = true },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "participle")
	
	table.insert(data.genders, "m")
	
	table.insert(appendix, "[[Appendix:" .. canonical_name ..
			" first declension|first]]/[[Appendix:" .. canonical_name ..
			" third declension|third declension]]")
	
	adj_and_part_forms(2, args, data.inflections, false)
end

pos_functions["adjectives"] = function(args, data, appendix, _, sub_class)
	if sub_class == "1&2" then
		pos_functions["adj-1&2"](args, data, appendix)
	elseif sub_class == "1&3" then
		pos_functions["adj-1&3"](args, data, appendix)
	elseif sub_class == "2nd" then
		pos_functions["adj-2nd"](args, data, appendix)
	elseif sub_class == "3rd" then
		pos_functions["adj-3rd"](args, data, appendix)
	else
		error('Adjective subclass ' .. quote(sub_class) .. ' not recognized.')
	end
end

pos_functions["adj-1&2"] = function(args, data, appendix)
	args = process_numbered_params(args, { "head", "f", "n" })
	
	local params = {
		-- Parameters 1, 2, and 3 handled above.
		head = { list = true, default = PAGENAME },
		f = { list = true, required = true },
		n = { list = true, required = true },
		deg = {},
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "adjective")
	
	table.insert(data.genders, "m")
	
	table.insert(appendix, "[[Appendix:" .. canonical_name ..
			" first declension|first]]/[[Appendix:" .. canonical_name ..
			" second declension|second declension]]")
	
	if args.deg ~= nil then
		if args.deg == 'comp' then
			data.pos_category = get_reconstructed_prefix() .. "comparative adjectives"
		elseif args.deg == 'super' then
			data.pos_category = get_reconstructed_prefix() .. "superlative adjectives"
		else
			error('Adjective degree ' .. quote(args.deg) .. ' not recognized.')
		end
	end
	
	adj_and_part_forms(2, args, data.inflections, true)
end

pos_functions["adj-1&3"] = function(args, data, appendix)
	args = process_numbered_params(args, { "head", "f", "n" })
	
	local params = {
		-- Parameters 1, 2, and 3 handled above.
		head = { list = true, default = PAGENAME },
		f = { list = true, required = true },
		n = { list = true, required = true },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "adjective")
	
	table.insert(data.genders, "m")
	
	table.insert(appendix, "[[Appendix:" .. canonical_name ..
		" first declension|first]]/[[Appendix:" .. canonical_name ..
		" third declension|third declension]]")
	
	adj_and_part_forms(2, args, data.inflections, true)
end

pos_functions["adj-2nd"] = function(args, data, appendix)
	args = process_numbered_params(args, { "head", "n" })
	
	local params = {
		-- Parameters 1 and 2 handled above.
		head = { list = true, default = PAGENAME },
		n = { list = true, required = true },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "adjective")
	
	table.insert(data.genders, "m")
	table.insert(data.genders, "f")
	
	table.insert(appendix, "[[Appendix:" .. canonical_name .. " second declension|second declension]]")
	
	adj_and_part_forms(1, args, data.inflections, true)
end

pos_functions["adj-3rd"] = function(args, data, appendix)
	args = process_numbered_params(args, { "head", "n" })
	
	local params = {
		-- Parameters 1 and 2 handled above.
		head = { list = true, default = PAGENAME },
		n = { list = true, required = true },
		deg = {},
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "adjective")
	
	table.insert(data.genders, "m")
	table.insert(data.genders, "f")
	
	table.insert(appendix, "[[Appendix:" .. canonical_name .. " third declension|third declension]]")
	
	if args.deg ~= nil then
		if args.deg == 'comp' then
			data.pos_category = get_reconstructed_prefix() .. "comparative adjectives"
		elseif args.deg == 'super' then
			data.pos_category = get_reconstructed_prefix() .. "superlative adjectives"
		else
			error('Adjective degree ' .. quote(args.deg) .. ' not recognized.')
		end
	end
	
	adj_and_part_forms(1, args, data.inflections, true)
end

local case_abbreviations = {
	nom = 'nominative',
	gen = 'genitive',
	dat = 'dative',
	acc = 'accusative',
	voc = 'vocative',
}

pos_functions["prepositions"] = function(args, data, appendix)
	local params = {
		[1] = { list = true },
		head = { list = true, default = PAGENAME },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "preposition")
	
	if args[1][1] then
		local cases = {}
		for _, case in ipairs(args[1]) do
			if case_abbreviations[case] then
				table.insert(data.categories, canonical_name .. " " .. case_abbreviations[case] .. " prepositions")
				table.insert(cases, "[[Appendix:Glossary#" .. case_abbreviations[case] .. "|" .. case_abbreviations[case] .. "]]")
			else
				error('Case abbreviation ' .. quote(case) ..
						' not recognized. Please choose from ' ..
						require("Module:fun").map(quote, { "gen", "dat", "acc" }) .. '.')
			end
		end
		table.insert(data.inflections, { label = 'governs the ' .. serial_comma_join(cases) })
	end
end

pos_functions["particles"] = function(args, data)
	local params = {
		head = { list = true, default = PAGENAME },
		disc = { type = 'boolean' },
		mod = { type = 'boolean' },
		inter = { type = 'boolean' },
		neg = { type = 'boolean' },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "particles")
	
	for _, item in ipairs{ { "disc", "discourse" }, { "mod", "modal" }, { "inter", "interrogative" }, { "neg", "negative" } } do
		if args[item[1]] then
			local descriptor = item[2]
			table.insert(data.categories, canonical_name .. " " .. descriptor .. " particles")
			table.insert(data.inflections, { label = descriptor .. ' particle' })
		end
	end
end

pos_functions["noun forms"] = function(args, data)
	args = process_numbered_params(args, { "head" })
	
	local params = {
		head = { list = true, default = PAGENAME },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "noun forms")
end

pos_functions["verb forms"] = function(args, data)
	args = process_numbered_params(args, { "head" })
	
	local params = {
		head = { list = true, default = PAGENAME },
	}
	local args = require("Module:parameters").process(args, params)
	data.heads = args.head
	
	process_heads(data, "verb forms")
end

return export