Module:he-headword/sandbox

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local m_headword = require("Module:headword")
local com = require("Module:he-common")

local export = {}
local pos_functions = {}

local lang = require("Module:languages").getByCode("he")

local function get_params_list(args, argprefix)
	local export = {}
	i = 1
	term = argprefix

	while args[term] ~= nil and args[term] ~= "" do
		table.insert(export, term)

		i = i + 1
		term = argprefix .. i
	end
	return export
end

local function make_params_array(args, params)
	local export = {}
	for _, param in ipairs(params) do
		table.insert(export, args[param])
	end
	return export
end

local function maybe_get_inflection(args, name, params, list, accel, is_construct)
	if list then
		params = get_params_list(args, params)
	end
	
	local tbl = {}
	if type(params) ~= "table" then
		params = {params}
	end
	for _, param in ipairs(params) do
		local paramwv = param .. "wv"
		local paramdwv = param .. "dwv"
		if args[param] or args[paramwv] or args[paramdwv] then
			tbl.label = name
			tbl.accel = accel
			if is_construct == true then
				table.insert(tbl, com.gen_link_ending_with_makaf(args[param], args[paramwv], args[paramdwv]))
			else
				table.insert(tbl, com.gen_link(args[param], args[paramwv], args[paramdwv]))
			end
		end
		if args[param] == "" or args[paramwv] == "" or args[paramdwv] == "" then
			require('Module:debug').track("he-noun/empty-form-param")
		elseif args[param] ~= nil and args[paramwv] ~= nil then
			require('Module:debug').track("he-noun/param-and-paramwv")
		end
	end
	return next(tbl) and tbl
end

-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
	local args = {}
	for k, v in pairs(frame:getParent().args) do
		if v ~= "" then
			args[k] = v
		end
	end
	local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
	
	if frame.args["auto"] and (args["auto"] == "" or not args["auto"]) then args["auto"] = frame.args["auto"] end
	
	local data = {lang = lang, pos_category = poscat, categories = {}, heads = make_params_array(args, get_params_list(args, "head")), translits = make_params_array(args, get_params_list(args, "tr")), genders = {}, inflections = {}}
	local head = args["head"]; if head == "" then head = nil end; data.heads[1] = head
	
	data.sort_key = args["sort"]; if data.sort_key == "" then data.sort_key = nil end
	
	if data.sort_key then
		data.sort_key = data.sort_key .. "a" .. mw.title.getCurrentTitle().subpageText
	end
	
	local wv = args["wv"] or mw.title.getCurrentTitle().subpageText
	local dwv = args["dwv"]
	
	if not data.heads[1] then
		data.no_redundant_head_cat = true
		if dwv then
			data.heads[1] = wv .. " / " .. '<span class="inflection-table">' .. "[[" .. dwv .. "]]" .. "</span>" -- not really an inflection table but for the black links
		else
			data.heads[1] = wv
		end
	end
	
	if dwv and mw.ustring.match(dwv, "%[%[") then
		require('Module:debug').track("he-noun/link-in-dwv")
	end
	
	if not (args["wv"] or args["dwv"] or args["head"]) then
		require('Module:debug').track("he-noun/no-nikud")
	end
	
	if mw.ustring.match(wv, "׳") then
		table.insert(data.categories, "Hebrew terms spelled with ׳")
	end
	
	if mw.ustring.match(wv, "״") then
		table.insert(data.categories, "Hebrew terms spelled with ״")
	end

	local begedkefet = "בגדכפת"
	local vowels = "ְֱֲֳִֵֶַָֹֻ"
	local begedkefet_regex = "[" .. begedkefet .. "]"
	local vowels_with_mater_lectionis_regex = "[" .. vowels .. "וא" .. "]"

	local fixed_nikud = com.fix_nikud(args["dwv"] or args["wv"] or "")
	if args["unchangeablebegedkefet"] == "1" 
		or mw.ustring.match(fixed_nikud, "^" .. begedkefet_regex .. vowels_with_mater_lectionis_regex) then
		table.insert(data.categories, "Hebrew terms beginning with an unchangeable begedkefet letter")
	end

	local pattern_link = ""
	if args["pat"] and args["pat"] ~= "" and poscat ~= "verbs" then
		table.insert(data.categories, "Hebrew terms in the pattern " .. args["pat"])
		local patfmt = require("Module:links").full_link({lang = lang, alt = args["pat"], tr = "-"})
		pattern_link = " <small>[[Appendix:Hebrew patterns/" .. args["pat"] .. "|[pattern: " .. patfmt .. "] ]]</small>"
	end

	if pos_functions[poscat] then
		pos_functions[poscat](args, data, wv)
	end

	table.insert(data.inflections, maybe_get_inflection(args, "Biblical Hebrew [[w:Pausa|pausal form]]", "pausal", false))
	
	return (m_headword.full_headword(data)) .. pattern_link
end

local final_to_nonfinal = {
	["ך"]="כ",
	["ם"]="מ",
	["ן"]="נ",
	["ף"]="פ",
	["ץ"]="צ",
}

local function stemify_helper(letter, rest)
	return (final_to_nonfinal[letter] or letter) .. rest
end

local function stemify(x)
	if x then
		x = mw.ustring.gsub(x, "ְ([^א-תﭏ]*)$", "%1")
		x = mw.ustring.gsub(x, "([ךםןףץ])([^א-תﭏ]*)$", stemify_helper)
	end
	return x
end

local function gen_fem_t(stemwv)
	local stub, vowel, final, dagesh = mw.ustring.match(com.fix_nikud(stemwv), "^(.*)([ְֲִָ])([א-ת][ׁׂ]?)(ּ?)$")
	if not stub then
		error("Error attaching feminine ת suffix, please check vocalization.")
	end
	if final == "י" then
		if vowel ~= "ִ" or dagesh ~= "ּ" then
			error("Error attaching feminine ת suffix, please check vocalization.")
		end
		return stub .. vowel .. final .. "ת"
	end
	if vowel == "ִ" or dagesh ~= "" then
		error("Error attaching feminine ת suffix, please check vocalization.")
	end
	if final == "א" then
		return stub .. "ֵ" .. final .. "ת"
	end
	if final == "ה" or final == "ח" or final == "ע" then
		return stub .. "ַ" .. final .. "ַ" .. "ת"
	end
	return stub .. "ֶ" .. final .. "ֶ" .. "ת"
end

pos_functions["adjectives"] = function(args, data)
	-- table.insert(genders, "m")
	if args["auto"] == nil or args["auto"] == "" then
		-- Use only provided inflections
		table.insert(data.inflections, maybe_get_inflection(args, "feminine", {"f", "f2"}, false))
		table.insert(data.inflections, maybe_get_inflection(args, "masculine plural", {"mp", "mp2"}, false))
		table.insert(data.inflections, maybe_get_inflection(args, "feminine plural", "fp", true))
	else
		require("Module:debug").track("he-adj/auto")
		-- Automatically generate inflections
		local stem = args["stem"]
		local stemwv = args["stemwv"]
		local stemdwv = args["stemdwv"]
		local e = false
		if not stem then
			if stemwv then
				stem = com.remove_nikud(stemwv)
			else
				stem = stemify(mw.title.getCurrentTitle().subpageText)
				if not stemdwv then
					stemwv = stemify(args["wv"])
					stemdwv = stemify(args["dwv"])
				end
				local fstemwv = stemwv and com.fix_nikud(stemwv)
				local fstemdwv = stemdwv and com.fix_nikud(stemdwv)
				if (fstemwv or fstemdwv) and (stem ~= nil or stem:match("ה$")) and (fstemwv == nil or fstemwv:match("ֶה$")) and (fstemdwv == nil or fstemdwv:match("ֶה$")) then
					e = true
					stem = stem and stem:gsub("ה$", "")
					stemwv = fstemwv and fstemwv:gsub("ֶה$", "")
					stemdwv = fstemdwv and fstemdwv:gsub("ֶה$", "")
				end
			end
		end
		stem, stemwv, stemdwv = com.process_wv_triad(stem, stemwv, stemdwv)
		local f, fwv, fdwv = com.process_wv_triad(args["f"], args["fwv"], args["fdwv"])
		local f2, f2wv, f2dwv = com.process_wv_triad(args["f2"], args["f2wv"], args["f2dwv"])
		local mp, mpwv, mpdwv = com.process_wv_triad(args["mp"], args["mpwv"], args["mpdwv"])
		local mp2, mp2wv, mp2dwv = com.process_wv_triad(args["mp2"], args["mp2wv"], args["mp2dwv"])
		local fp, fpwv, fpdwv = com.process_wv_triad(args["fp"], args["fpwv"], args["fpdwv"])
		if f ~= "-" then
			if not (f or fwv or fdwv) then
				f = stem and (stem .. "ה")
				fwv = stemwv and (stemwv .. "ָה")
				fdwv = stemdwv and (stemdwv .. "ָה")
			elseif f == "ת" and not (fwv or fdwv) then
				f = stem and (stem .. (e and "ית" or "ת"))
				fwv = stemwv and (e and (stemwv .. "ֵית") or gen_fem_t(stemwv))
				fdwv = stemdwv and (e and (stemdwv .. "ֵית") or gen_fem_t(stemdwv))
			end
			if f2 == "ת" and not (f2wv or f2dwv) then
				f2 = stem and (stem .. (e and "ית" or "ת"))
				f2wv = stemwv and (e and (stemwv .. "ֵית") or gen_fem_t(stemwv))
				f2dwv = stemdwv and (e and (stemdwv .. "ֵית") or gen_fem_t(stemdwv))
			end
			tbl = {label = "feminine", com.gen_link(f, fwv, fdwv)}
			if f2 or f2wv or f2dwv then
				table.insert(tbl, com.gen_link(f2, f2wv, f2dwv))
			end
			table.insert(data.inflections, tbl)
		end
		if mp ~= "-" then
			if not (mp or mpwv or mpdwv) then
				mp = stem and (stem .. "ים")
				mpwv = stemwv and (stemwv .. "ִים")
				mpdwv = stemdwv and (stemdwv .. "ִים")
			end
			tbl = {label = "masculine plural", com.gen_link(mp, mpwv, mpdwv)}
			if mp2 or mp2wv or mp2dwv then
				table.insert(tbl, com.gen_link(mp2, mp2wv, mp2dwv))
			end
			table.insert(data.inflections, tbl)
		end
		if fp ~= "-" then
			if not (fp or fpwv or fpdwv) then
				fp = stem and (stem .. "ות")
				fpwv = stemwv and (stemwv .. "וֹת")
				fpdwv = stemdwv and (stemdwv .. "וֹת")
			end
			tbl = {label = "feminine plural", com.gen_link(fp, fpwv, fpdwv)}
			table.insert(data.inflections, tbl)
		end
	end
end

pos_functions["determiners"] = function(args, data, wv)
	if args["g"] == "m" or args["g"] == "m-p" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
		table.insert(data.inflections, maybe_get_inflection(args, "feminine counterpart", "f", true))
	elseif args["g"] == "f" or args["g"] == "f-p" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
		table.insert(data.inflections, maybe_get_inflection(args, "masculine counterpart", "m", false))
	end
end

local function validate_gender(gender, param)
	if not (
		gender == "" or gender == nil
		or gender == "m" or gender == "f"
		or gender == "m-d" or gender == "f-d"
		or gender == "m-p" or gender == "f-p"
		or gender == "-" -- for אַל "naught"
	) then
		if gender:find "m" and gender:find "f" then
			error("To indicate two genders, use |g2=; for instance, for masculine or feminine, |g=m|g2=f")
		else
			error("Gender in |" .. param .. "=" .. gender .. " is invalid; choose from m, f, m-d, f-d, m-p, f-p")
		end
	end
end

pos_functions["nouns"] = function(args, data, wv)
	validate_gender(args["g"], "g")
	validate_gender(args["g2"], "g2")
	if args["g2"] ~= nil and args["g2"] ~= "" and not (args["g"] ~= nil and args["g"] ~= "") then
		error("|g= parameter required if |g2= is present")
	end
	
	if args["g"] == "m" or args["g"] == "f" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
		if args["sg"] ~= nil then
			require('Module:debug').track("he-noun/sg-for-head")
		end
	elseif args["g"] == "m-d" or args["g"] == "f-d" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
		table.insert(data.categories, "Hebrew dualia tantum")
		table.insert(data.inflections, maybe_get_inflection(args, "singular indefinite", "sg", false))
	elseif args["g"] == "m-p" or args["g"] == "f-p" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
		table.insert(data.categories, "Hebrew pluralia tantum")
		table.insert(data.inflections, maybe_get_inflection(args, "singular indefinite", "sg", false))
	elseif args["g"] == "" or args["g"] == nil then
		table.insert(data.genders, "?")
		table.insert(data.categories, "Requests for gender in Hebrew entries")
	end
	
	-- Dual forms rare enough that there's no need for without/missing categoristion
	table.insert(data.inflections, maybe_get_inflection(args, "dual indefinite", "dual", true))

	if args["pl"] == "-" then
		table.insert(data.inflections, {label = "no plural forms"})
		table.insert(data.categories, "Hebrew nouns without plural forms")
	elseif args["pl"] == "" or args["pl"] == nil then
		table.insert(data.categories, "Hebrew noun entries missing plural forms")
	else
		table.insert(data.inflections, maybe_get_inflection(args, "plural indefinite", "pl", true, {form = "p", lemma = wv}))
		if args["g"] == "m" and mw.ustring.sub(com.remove_nikud(args["pl"]), -2, -1) == "ות" then
			table.insert(data.categories, "Hebrew masculine nouns with plurals ending in ־ות")
		elseif args["g"] == "f" and mw.ustring.sub(com.remove_nikud(args["pl"]), -2, -1) == "ים" then
			table.insert(data.categories, "Hebrew feminine nouns with plurals ending in ־ים")
		end
		if mw.ustring.sub(com.remove_nikud(args["pl"]), -3, -1) == "יים"
		and mw.ustring.sub(mw.ustring.gsub(com.remove_nikud(wv), "[%[%]]", ""), -1, -1) ~= "י" then
			table.insert(data.categories, "Hebrew nouns with plurals ending in ־יים")
		end
	end

	if args["cons"] == "-" then
		table.insert(data.inflections, {label = "no construct forms"})
		table.insert(data.categories, "Hebrew nouns without construct forms")
	else
		if args["cons"] == "" or args["cons"] == nil then
			table.insert(data.categories, "Hebrew noun entries missing singular construct forms")
		else
			table.insert(data.inflections, maybe_get_inflection(args, "singular construct", "cons", true, {form = "s|cons", lemma = wv}, true))
		end
		table.insert(data.inflections, maybe_get_inflection(args, "dual construct", "dualcons", true , nil, true))
		if (args["plcons"] == "" or args["plcons"] == nil) and args["pl"] ~= "-" then
			table.insert(data.categories, "Hebrew noun entries missing plural construct forms")
		else
			table.insert(data.inflections, maybe_get_inflection(args, "plural construct", "plcons", true, {form = "p|cons", lemma = wv}, true))
		end
	end

	table.insert(data.inflections, maybe_get_inflection(args, "masculine counterpart", "m", true))
	table.insert(data.inflections, maybe_get_inflection(args, "feminine counterpart", "f", true))

	if args["g"] == "m" and mw.ustring.sub((com.remove_nikud(args["wv"] or args["dwv"] or mw.title.getCurrentTitle().subpageText)), -1) == "ת" then
		table.insert(data.categories, "Hebrew masculine nouns ending in ־ת")
	elseif args["g"] == "m" and mw.ustring.sub((com.remove_nikud(args["wv"] or args["dwv"] or mw.title.getCurrentTitle().subpageText)), -1) == "ה" then
		table.insert(data.categories, "Hebrew masculine nouns ending in ־ה")
	elseif args["g"] == "m" then
		table.insert(data.categories, "Hebrew masculine nouns")
	elseif args["g"] == "f" then
		table.insert(data.categories, "Hebrew feminine nouns")
	end
end

pos_functions["numerals"] = function(args, data)
	table.insert(data.inflections, maybe_get_inflection(args, "construct", "cons", true, nil, true))
	
	table.insert(data.inflections, maybe_get_inflection(args, "dual", "dual", true))
	table.insert(data.inflections, maybe_get_inflection(args, "plural", "pl", true))
	local f = maybe_get_inflection(args, "feminine", "f", true)
	table.insert(data.inflections, f)
	table.insert(data.inflections, maybe_get_inflection(args, "feminine construct", "fcons", true, nil, true))

	table.insert(data.genders, "m")
	if not f then
		table.insert(data.genders, "f")
	end
end

pos_functions["pronouns"] = function(args, data)
	if args["g"] == "m" or args["g"] == "f" or args["g"] == "m-p" or args["g"] == "f-p" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
	elseif args["n"] then
		require('Module:debug').track("he-pron/param-n")
	end
end

pos_functions["proper nouns"] = function(args, data, wv)
	if args["pl"] and args["pl"] ~= "-" and args["pl"] ~= "" then
		table.insert(data.inflections, maybe_get_inflection(args, "plural indefinite", "pl", true, {form = "p", lemma = wv}))
		if args["g"] == "m" and mw.ustring.sub(com.remove_nikud(args["pl"]), -2, -1) == "ות" then
			table.insert(data.categories, "Hebrew masculine nouns with plurals ending in ־ות")
		elseif args["g"] == "f" and mw.ustring.sub(com.remove_nikud(args["pl"]), -2, -1) == "ים" then
			table.insert(data.categories, "Hebrew feminine nouns with plurals ending in ־ים")
		end
		if mw.ustring.sub(com.remove_nikud(args["pl"]), -3, -1) == "יים"
		and mw.ustring.sub(mw.ustring.gsub(com.remove_nikud(wv), "[%[%]]", ""), -1, -1) ~= "י" then
			table.insert(data.categories, "Hebrew nouns with plurals ending in ־יים")
		end
	end

	if args["g"] == "m" or args["g"] == "f" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
	elseif args["g"] == "m-d" or args["g"] == "f-d" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
		table.insert(data.categories, "Hebrew dualia tantum")
	elseif args["g"] == "m-p" or args["g"] == "f-p" then
		table.insert(data.genders, args["g"])
		table.insert(data.genders, args["g2"])
		table.insert(data.categories, "Hebrew pluralia tantum")
	elseif args["g"] == "" or args["g"] == nil then
		table.insert(data.categories, "Requests for gender in Hebrew entries")
	elseif args["g"] == "-" then --surnames don't really have gender
		table.insert(data.categories, "Hebrew proper nouns without gender")
	else
		table.insert(data.genders, "?")
	end
end

local binyanim = {
	["pa"] = "pa'al",
	["pi"] = "pi'el",
	["hif"] = "hif'il",
	["hit"] = "hitpa'el",
	["huf"] = "huf'al",
	["pu"] = "pu'al",
	["nif"] = "nif'al",
	["hitpu'al"] = "hitpu'al", -- should this be included?
}

local weakroots = {
	["פ"] = {
		["א"] = true,
		["ה"] = true,
		["ח"] = true,
		["י"] = true,
		["ל"] = true,
		["נ"] = true,
		["ע"] = true,
	},
	["ע"] = {
		["א"] = true,
		["ה"] = true,
		["ו"] = true,
		["ח"] = true,
		["י"] = true,
		["ע"] = true,
		["ר"] = true,
	},
	["ל"] = {
		["א"] = true,
		["ה"] = true,
		["ח"] = true,
		["נ"] = true,
		["ע"] = true,
	},
}

local function maybe_get_weakcat(args, pos, binyan)
	if weakroots[pos][args[pos]] then
		if binyan then
			return "Hebrew " .. pos .. "״" .. args[pos] .. " " .. binyan .. " verbs"
		else
			return "Hebrew " .. pos .. "״" .. args[pos] .. " verbs"
		end
	else
		return nil -- perhaps track error if args[pos] ~= ""
	end
end

pos_functions["verbs"] = function(args, data)
	local binyan = binyanim[args[1]]
	if binyan then
		table.insert(data.inflections, { label="[[Appendix:Hebrew verbs|" .. binyan .. " construction]]" })
		table.insert(data.categories, "Hebrew " .. binyanim[args[1]] .. " verbs")
	elseif args[1] ~= "-" then
		table.insert(data.categories, "Hebrew verbs lacking binyan")
	end
	table.insert(data.categories, maybe_get_weakcat(args, "פ", binyan))
	table.insert(data.categories, maybe_get_weakcat(args, "ע", binyan))
	table.insert(data.categories, maybe_get_weakcat(args, "ל", binyan))
	table.insert(data.inflections, maybe_get_inflection(args, "infinitive", "inf", true))
	table.insert(data.inflections, maybe_get_inflection(args, "present", "pres", true))
	table.insert(data.inflections, maybe_get_inflection(args, "future", "fut", true))
	table.insert(data.inflections, maybe_get_inflection(args, "imperative", "imp", true))
	table.insert(data.inflections, maybe_get_inflection(args, "passive participle", "passp", true))
	table.insert(data.inflections, maybe_get_inflection(args, "passive counterpart", "pass", true))
	table.insert(data.inflections, maybe_get_inflection(args, "active counterpart", "act", true))
	data.gloss = "third-singular masculine past"
end

return export