Module:User:Erutuon/Module:grc-conj

From Wiktionary, the free dictionary
Jump to navigation Jump to search


Conjugation types
in {{grc-conj}}
in 2019-10-20 dump
conjugation count
aor-1 879
fut 735
imperf 673
perf 669
pres 628
plup 342
aor-2 254
pres-con-e 136
fut-ln 126
imperf-con-e 118
pres-con-a 72
imperf-con-a 63
pres-con-o 34
imperf-con-o 27
futp 27
imperf-ami 25
pres-numi 24
imperf-numi 22
aor-ami 21
pres-ami 20
pres-emi 18
pres-con-e-mono 17
imperf-emi 17
imperf-irreg 17
pres-irreg 16
imperf-con-e-mono 13
aor-emi 9
aor-omi 7
plup-ami 7
perf-ami 7
aor-numi 7
aor-wmi 5
pres-omi 4
pres-con-eta 4
imperf-con-eta 4
imperf-omi 3
perf-irreg 3
aor-hiemi-comp 3
aor-irreg 2
plup-irreg 2
fut-con-e 2
aor-hmi 2

local export = {}

local m_accent = require("Module:grc-accent")
local Array = require "Module:array"

local function trim_args(args)
	local new_args = {}
	for k, v in pairs(args) do
		v = mw.text.trim(v)
		if v == "" then
			v = nil
		end
		new_args[k] = v
	end
	return new_args
end

local tense_names = {
	aor = "aorist", imperf = "imperfect", pres = "present", fut = "future",
	perf = "perfect", plup = "pluperfect", futp = "future perfect",
}

-- These lists were retrieved from template instances in the dump,
-- but they should probably be dynamically generated from the names
-- of the actual conjugation functions.

-- current list from "conjugations" in [[Module:grc-conj]]:
--[[
aor: "1", "2", "ami", "amiw", "emi", "hiemi-comp", "hmi", "irreg", "numi", "omi", "wmi"
fut: "", "con-a", "con-e"
futp: ""
imperf: "", "ami", "con-a", "con-e", "con-e-mono", "con-eta", "con-o", "con-omega", "emi", "irreg", "numi", "omi"
perf: "", "ami", "irreg"
plup: "", "ami", "irreg"
pres: "", "ami", "con-a", "con-e", "con-e-mono", "con-eta", "con-o", "con-omega", "emi", "irreg", "numi", "omi"
]]
-- code:
--[[
local suffixes = {}
local Array = require "Module:array"
for name in pairs(conjugations) do
	local tense, suffix = name:match("^(%a+)%-?(.*)")
	suffixes[tense] = suffixes[tense] or Array()
	suffixes[tense]:insert(suffix)
end

for tense, suffixes in require "Module:table".sortedPairs(suffixes) do
	mw.log(tense .. ": " .. suffixes:sort():map(function (suffix) return '"' .. suffix .. '"' end):concat ", ")
end
--]]

local present_and_imperfect_suffixes = Array(
	"", "ami", "con-a", "con-e", "con-e-mono", "con-eta", "con-o", "emi",
	"irreg", "numi", "omi"
)

local perfect_and_pluperfect_suffixes = Array(
	"", "ami", "irreg"
)

local suffixes = {
	present = present_and_imperfect_suffixes,
	imperfect = present_and_imperfect_suffixes,
	future = Array("", "con-e", "ln"),
	aorist = Array(
		"1", "2", "ami", "emi", "hiemi-comp", "hmi", "irreg", "numi", "omi", "wmi"
	),
	perfect = perfect_and_pluperfect_suffixes,
	pluperfect = perfect_and_pluperfect_suffixes,
	future_perfect = Array(""),
}

local function parse_conjugation(conjugation)
	local tense, suffix, tense_name, contraction_type
	if conjugation ~= "irreg" then
		tense, suffix = conjugation:match("^(%a+)%-?(.*)")
		if tense then
			if not tense_names[tense] then
				error("The tense " .. tense .. " is not recognized; choose from"
					.. Array.keys(tense_names):concat ", "
					.. " (with hyphen and suffix added if necessary).")
			end
		else
			error("No tense provided.")
		end
		
		tense_name = tense_names[tense]
		local tense_suffixes = suffixes[tense_name:gsub(" ", "_")]
		if tense_suffixes and not tense_suffixes:contains(suffix) then
			if suffix == "" then
				error("Add a hyphen and a suffix for the type of " .. tense_name
					.. ": one of " .. tense_suffixes:concat ", " .. ".")
			else
				error("Invalid type of " .. tense_name .. ": " .. suffix
					.. "; choose from " .. tense_suffixes:concat ", " .. ".")
			end
		end
		
		contraction_type = suffix:match "^con%-(.+)$"
		if contraction_type then
			
		end
	end
	
	return {
		tense = tense_name,
		suffix = suffix,
		contraction = contraction_type,
		is_irregular = conjugation == "irreg",
		conjugation = conjugation,
	}
end

local function check_stems(conjugation, stems)
	if conjugation.is_irregular then
		for _, stem in pairs(stems) do
			local malformed = {}
			for i = 1, 4 do
				if stems[i] and stems[i] ~= m_accent.strip_tone(stems[i]) then
					table.insert(malformed, i)
				end
			end
			if #malformed > 0 then
				local plural = #malformed > 1
				error("Malformed input in parameter" .. (plural and "s " or " ") 
					.. require("Module:table").serialCommaJoin(malformed)
					.. " (contains extra accents)")
			end
		end
	end
	
	local named_stems = {}
	local stem_count
	if conjugation.tense == "present" or conjugation.tense == "imperfect" then
		named_stems.unaugmented, named_stems.augmented = stems[1]
		stem_count = 1
	elseif conjugation.tense == "future" then
		named_stems.active_or_middle, named_stems.passive = unpack(stems, 1, 2)
		stem_count = 2
	elseif conjugation.tense == "aorist" then
		named_stems.unaugmented_active_or_middle,
		named_stems.augmented_active_or_middle,
		named_stems.unaugmented_passive,
		named_stems.augmented_passive = unpack(stems, 1, 4)
	elseif conjugation.tense == "perfect" then
		named_stems.active, named_stems.mediopassive = unpack(stems, 1, 2)
		stem_count = 2
	elseif conjugation.tense == "pluperfect" then
		named_stems.active, named_stems.mediopassive, named_stems.unaugmented_mediopassive =
			unpack(stems, 1, 3)
		stem_count = 3
	elseif conjugation.tense == "future perfect" then
		named_stems.active_or_middle = stems[1]
		stem_count = 1
	end
	
	if stem_count then
		for k in pairs(stems) do
			if k > stem_count then
				error("Too many stems; expected at most " .. stem_count .. ".")
			end
		end
	end
	
	return named_stems
end

local function check_voice(voice, ...)
	if voice then
		local expected_voices = Array {...}
		if not expected_voices:contains(voice) then
			error("Invalid voice; expected one of "
				.. expected_voices:concat ", " .. ".")
		end
	end
end

-- It's not very convenient that the meaning of several of these codes depends
-- on the tense.
local function interpret_voice_code(voice_code, tense)
	if voice_code == "act" then
		return { "active" }
	-- FIXME: Does this ever mean "mediopassive"?
	elseif voice_code == "mid" then
		return { "middle" }
	elseif voice_code == "pass" then
		return { "passive" }
	-- FIXME: Does this ever mean "active, mediopassive"?
	elseif voice_code == "am" then
		return { "active", "middle" }
	-- FIXME: Does this mean "middle, passive" or ever "mediopassive"?
	elseif voice_code == "mp" then
		if tense == "aorist" or tense == "future" then
			return { "middle", "passive" }
		else -- present, imperfect, perfect, pluperfect
			return { "mediopassive" }
		end
	elseif voice_code == "full" then
		return { "active", "middle", "mediopassive", "passive" }
	end
end

-- Very convoluted.
-- This would probably throw an error on existing uses of {{grc-conj}},
-- because the current module does not check if the voice code is appropriate
-- for the tense.
local function get_voices(voice, tense, stems)
	mw.logObject({ voice, tense, stems })
	
	if tense == "present" or tense == "imperfect"
	or tense == "perfect" or tense == "pluperfect" then
		check_voice(voice, "act", "mp", "full")
		if not voice then
			if not stems.mediopassive then
				voice = "act"
			elseif not stems.active then
				voice = "mp"
			else
				voice = "full"
			end
		end
	elseif tense == "aorist" or tense == "future" then
		check_voice(voice, "act", "mid", "pass", "am", "mp", "full")
		if not voice then
			if tense == "future" then
				if not stems.passive then
					voice = "am"
				elseif not stems.active then
					voice = "pass"
				else
					voice = "full"
				end
			else
				if not stems.augmented_passive then
					voice = "am"
				elseif not stems.augmented_active_or_middle then
					voice = "pass"
				else
					voice = "full"
				end
			end
		end
	elseif tense == "future perfect" then
		if not voice then
			voice = "mid"
		elseif voice ~= "mid" then
			error("Future perfect only has middle forms.")
		end
	end
	
	if not voice then
		mw.log("Default voices")
		return { "active", "middle", "mediopassive", "passive" }
	end
	
	return interpret_voice_code(voice, tense)
end

-- This would throw an error on many existing uses of {{grc-conj}},
-- because the current module does not validate the |form= parameter.
local function parse_form(form)
	-- Could restrict separators to hyphens rather than accepting all
	-- non-letter characters.
	local words = Array(mw.text.split(form, "%A+"))
	local contracted = words:contains "con"
	local valid = Array("con", "act", "mid", "pass", "am", "mp", "full")
	local invalid = words:filter(function(word) return not valid:contains(word) end)
	if #invalid > 0 then
		error("The following word" .. (#invalid == 1 and "" or "s")
			.. " in the form parameter were not recognized: "
			.. invalid:concat ", " .. "; choose from "
			.. valid:concat ", " .. ".")
	end
	local voices = words:filter(function (word) return word ~= "con" end)
	if #voices > 1 then
		error("Too many voices specified.")
	end
	return { contracted = contracted, voice = voices[1] }
end

function export.make_table_from_args(args)
	mw.logObject(args)
	local ok, conjugation = pcall(parse_conjugation, args[1])
	if not ok then
		error("Parameter 1 is incorrect: " .. conjugation)
	end
	local stems = check_stems(conjugation, { unpack(args, 2, 5) })
	if args.prefix then
		args.prefix = mw.ustring.toNFD(args.prefix)
	end
	local form = parse_form(args.form)
	mw.logObject(conjugation, "conjugation")
	local voices = get_voices(form.voice, conjugation.tense, stems)
	
	for _, stem in pairs(stems) do
		if mw.ustring.find(stem, '˘') then
			-- Throw error because no current instances have spacing breve?
			error("Manual breve")
			-- require('Module:debug').track('grc-conj/manual-breve')
		end
	end
	for _, k in ipairs { "dial1", "titleapp", "prefix" } do
		local arg = args[k]
		if arg then
			require('Module:debug').track('grc-conj/' .. k)
			if k == "prefix" then
				args[k] = mw.ustring.toNFD(arg)
			end
		end
	end
end

function export.make_table(frame)
	local args = frame.args.child and frame.args or frame:getParent().args
	return export.make_table_from_args(trim_args(args))
end

return export