Module:User:Benwing2/en-headword/angle-bracket

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Benwing2, for their own experimentation. Items in this module may be added and removed at Benwing2's discretion; do not rely on this module's stability.


local export = {}

function export.conjugate_angle_bracket_spec(data, par1, pagename, base_default_verb_forms,
	compute_plusplus_s_form, compute_double_last_cons_stem)
	-- In the angle bracket format, we always copy the full past tense specs to the past participle
	-- specs if none of the latter are given, so act as if the past participle is always given.
	-- There is a separate check to see if the past tense and past participle are identical, in any case.
	local past_ptcs_given = true
	local iut = require("Module:inflection utilities")

	-- (1) Parse the indicator specs inside of angle brackets.

	local function parse_indicator_spec(angle_bracket_spec)
		local inside = angle_bracket_spec:match("^<(.*)>$")
		assert(inside)
		local segments = iut.parse_balanced_segment_run(inside, "[", "]")
		local comma_separated_groups = iut.split_alternating_runs(segments, ",")
		if #comma_separated_groups > 4 then
			error("Too many comma-separated parts in indicator spec: " .. angle_bracket_spec)
		end

		local function fetch_qualifiers(separated_group)
			local qualifiers
			for j = 2, #separated_group - 1, 2 do
				if separated_group[j + 1] ~= "" then
					error("Extraneous text after bracketed qualifiers: '" .. table.concat(separated_group) .. "'")
				end
				if not qualifiers then
					qualifiers = {}
				end
				table.insert(qualifiers, separated_group[j])
			end
			return qualifiers
		end

		local function fetch_specs(comma_separated_group)
			if not comma_separated_group then
				return {{}}
			end
			local specs = {}
			
			local colon_separated_groups = iut.split_alternating_runs(comma_separated_group, ":")
			for _, colon_separated_group in ipairs(colon_separated_groups) do
				local form = colon_separated_group[1]
				if form == "*" or form == "++*" then
					error("* and ++* not allowed inside of indicator specs: " .. angle_bracket_spec)
				end
				if form == "" then
					form = nil
				end
				table.insert(specs, {form = form, qualifiers = fetch_qualifiers(colon_separated_group)})
			end
			return specs
		end

		local s_specs = fetch_specs(comma_separated_groups[1])
		local ing_specs = fetch_specs(comma_separated_groups[2])
		local ed_specs = fetch_specs(comma_separated_groups[3])
		local en_specs = fetch_specs(comma_separated_groups[4])
		for _, spec in ipairs(s_specs) do
			if spec.form == "++" and #ing_specs == 1 and not ing_specs[1].form and not ing_specs[1].qualifiers
				and #ed_specs == 1 and ed_specs[1].form and not ed_specs[1].qualifiers then
				ing_specs[1].form = "++"
				ed_specs[1].form = "++"
				break
			end
		end

		return {
			forms = {},
			s_specs = s_specs,
			ing_specs = ing_specs,
			ed_specs = ed_specs,
			en_specs = en_specs,
		}
	end

	local parse_props = {
		parse_indicator_spec = parse_indicator_spec,
	}
	local alternant_multiword_spec = iut.parse_inflected_text(par1, parse_props)

	-- (2) Remove any links from the lemma, but remember the original form
	--     so we can use it below in the 'lemma_linked' form.

	iut.map_word_specs(alternant_multiword_spec, function(base)
		if base.lemma == "" then
			base.lemma = pagename
		end
		base.orig_lemma = base.lemma
		base.lemma = require("Module:links").remove_links(base.lemma)
	end)

	-- (3) Conjugate the verbs according to the indicator specs parsed above.

	local all_verb_slots = {
		lemma = "infinitive",
		lemma_linked = "infinitive",
		s_form = "3|s|pres",
		ing_form = "pres|ptcp",
		ed_form = "past",
		en_form = "past|ptcp",
	}
	local function conjugate_verb(base)
		local def_s_form, def_ing_form, def_ed_form = base_default_verb_forms(base.lemma)

		local function process_specs(slot, specs, default_form, canonicalize_plusplus)
			for _, spec in ipairs(specs) do
				local form = spec.form
				if not form or form == "+" then
					form = default_form
				elseif form == "++" then
					form = canonicalize_plusplus()
				end
				-- If there's a ~ in the form, substitute it with the lemma,
				-- but make sure to first replace % in the lemma with %% so that
				-- it doesn't get interpreted as a capture replace expression.
				if form:find("~") then
					-- Assign to a var because gsub returns multiple values.
					local subbed_lemma = base.lemma:gsub("%%", "%%%%")
					form = form:gsub("~", subbed_lemma)
				end
				-- If the form is -, don't insert any forms, which will result
				-- in there being no overall forms (in fact it will be nil).
				-- We check for that down below and substitute a single "-" as
				-- the form, which in turn gets turned into special labels like
				-- "no present participle".
				if form ~= "-" then
					iut.insert_form(base.forms, slot, {form = form, footnotes = spec.qualifiers})
				end
			end
		end

		process_specs("s_form", base.s_specs, def_s_form,
			function() return compute_plusplus_s_form(base.lemma, def_s_form) end)
		process_specs("ing_form", base.ing_specs, def_ing_form,
			function() return compute_double_last_cons_stem(base.lemma) .. "ing" end)
		process_specs("ed_form", base.ed_specs, def_ed_form,
			function() return compute_double_last_cons_stem(base.lemma) .. "ed" end)

		-- If the -en spec is completely missing, substitute the -ed spec in its entirely.
		-- Otherwise, if individual -en forms are missing or use +, we will substitute the
		-- default -ed form, as with the -ed spec.
		local en_specs = base.en_specs
		if #en_specs == 1 and not en_specs[1].form and not en_specs[1].qualifiers then
			en_specs = base.ed_specs
		end

		process_specs("en_form", en_specs, def_ed_form,
			function() return compute_double_last_cons_stem(base.lemma) .. "ed" end)

		iut.insert_form(base.forms, "lemma", {form = base.lemma})
		-- Add linked version of lemma for use in head=. We write this in a general fashion in case
		-- there are multiple lemma forms (which isn't possible currently at this level, although it's
		-- possible overall using the ((...,...)) notation).
		iut.insert_forms(base.forms, "lemma_linked", iut.map_forms(base.forms.lemma, function(form)
			if form == base.lemma and base.orig_lemma:find("%[%[") then
				return base.orig_lemma
			else
				return form
			end
		end))
	end

	local inflect_props = {
		slot_table = all_verb_slots,
		inflect_word_spec = conjugate_verb,
	}
	iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)

	-- (4) Fetch the forms and put the conjugated lemmas in data.heads if not explicitly given.

	local function fetch_forms(slot)
		local forms = alternant_multiword_spec.forms[slot]
		-- See above. This should only occur if the user explicitly used -
		-- for a spec.
		if not forms or #forms == 0 then
			forms = {{form = "-"}}
		end
		return forms
	end

	local pres_3sgs = fetch_forms("s_form")
	local pres_ptcs = fetch_forms("ing_form")
	local pasts = fetch_forms("ed_form")
	local past_ptcs = fetch_forms("en_form")
	-- Use the "linked" form of the lemma as the head if no head= explicitly given.
	-- If no links in this form and it has multiple words, autolink the individual words.
	-- The user can override this using head=.
	if #data.heads == 0 then
		for _, lemma_obj in ipairs(alternant_multiword_spec.forms.lemma_linked) do
			local lemma = lemma_obj.form
			if not lemma:find("%[%[") then
				local m_headword = require("Module:headword")
				if m_headword.head_is_multiword(lemma) then
					lemma = m_headword.add_multiword_links(lemma)
				end
			end
			table.insert(data.heads, lemma)
		end
	end

	return pres_3sgs, pres_ptcs, pasts, past_ptcs, past_ptcs_given
end

return export