Module:frp-IPA

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Implements {{frp-IPA}}. Locality data in Module:frp-IPA/data.


local m_link = require("Module:links")
local frp = require("Module:languages").getByCode("frp")
local m_IPA = require("Module:IPA")
local format_accent = require("Module:accent qualifier").format_qualifiers
local locs_data = mw.loadData("Module:frp-IPA/data")
local get_section = require("Module:utilities").get_section
local m_temp = require("Module:template parser")
local m_form_of = require("Module:form of")

local export = {}

local accents = mw.loadData("Module:labels/data/lang/frp")

local codes = { "nch", "onch", "vds", "ovds", "frb", "ofrb", "vls", "ovls", "vld",
	"pms", "apl", "svy", "osvy", "gnv", "ognv", "cmt", "bgs", "dph", "odph",
	"dmb", "brs", "obrs", "mcn", "bjl", "objl",
	"lyn", "olyn", "frz", "ofrz" }

local allowed_codes = require("Module:table/listToSet")(codes)
allowed_codes.sources = true
allowed_codes.cf = true
allowed_codes.note = true
export.codes = codes

local references = { "AIS", "ALF", "ALLy", "ALP", "ALV", "APV", "DFF", "DGL", "DLG", 
	"DPB", "DSV", "LPT", "MHN", "PhL", "PVA", "SSV", "TPh", "VFC", "VIV" }
export.references = references
local allowed_refs = require("Module:table/listToSet")(references)

local function record(data, field)
	return function(value)
		if value ~= "" then
			data[field] = value
		end
		return ""
	end
end

-- format: "[pron]<ortho>(loc1,loc2,...)ref"
local function parse(text, loc, require_pron)
	local data = {}
	text = text:gsub("[/%[][^/%[%]]+[/%]]", record(data, "pron"))
	if require_pron and not data.pron then error("Missing pronunciation in " .. loc) end
	text = text:gsub("^<([^<>]+)>", record(data, "ortho"))
	text = text:gsub("^%(([^%(%)]+)%)", record(data, "locs"))
	if text:match("^[A-Z]") then -- reference must start with capital letter
		if not allowed_refs[text] then
			error("Invalid reference: " .. text)
		end
		data.ref = text
	elseif text ~= "" then
		error("Invalid input (" .. loc .. "): " .. text)
	end
	return data
end

local function link(text)
	return m_link.full_link({ lang = frp, term = text })
end

local function format_ortho(text)
	if text then
		return " ⟨" .. m_link.full_link({ lang = frp, term = text }) .. "⟩"
	end
	return ""
end

-- Obtains data (i.e. location name, Wikipedia link, sortkey value) from one or more location
-- codes (separated by comma).
local function parse_locs(group, text)
	local locs = mw.text.split(text, ",", true)
	for i, code in ipairs(locs) do
		local data = locs_data[group][code]
		if not data then
			error("Invalid code " .. code .. " for " .. accents[group])
		end
		if type(data) == "string" then -- allow redirect once
			code = data
			data = locs_data[group][data]
		end
		local wiki = (type(data) == "table") and data.wiki or code
		-- The parens around <frp:makeSortKey> avoid the return table to be filled with
		-- an unwanted empty table.
		-- TODO: Perhaps, in the sortkey, articles should be ignored, with an expression
		-- such as "^L[ea]?s?[ ']".
		locs[i] = {
			wiki = wiki,
			text = code,
			sortkey = (frp:makeSortKey(code))
		}
	end
	table.sort(locs, function(a, b) return a.sortkey < b.sortkey end)
	return locs
end

-- Format locations into links to Wikipedia.
local function format_locs(locs, no_paren)
	if not locs then
		return ""
	end
	for i, loc in ipairs(locs) do
		locs[i] = "[[w:" .. loc.wiki .. "|" .. loc.text .. "]]"
	end
	return (no_paren and "%s" or " (%s)"):format(table.concat(locs, ", "))
end

-- check if each parameter name is valid
local function check_groups(args)
	for a, _ in pairs(args) do
		if type(a) == "number" then
			error("This template does not accept numbered arguments.")
		end
		if not allowed_codes[a] and not locs_data[a] then
			error("Invalid accent code: " .. a)
		end
	end
end

local function gather_data(data, args, require_pron)
	for _, a in ipairs(codes) do
		if args[a] and args[a] ~= "" then
			data[a] = {}
			for term in mw.text.gsplit(args[a], ";", true) do
				local loc_data = parse(term, a, require_pron)
				table.insert(data[a], loc_data)
				if loc_data.pron then
					data[a].has_pron = true
				end
				if loc_data.ortho then
					data[a].has_ortho = true
				end
			end
		end
	end
end

-- `next` doesn't seem to work
local function is_empty(args)
	for _, _ in pairs(args) do
		return false
	end
	return true
end

local function scrape(pagename)
	local content = mw.title.new(pagename):getContent()
	content = get_section(content, { "Franco-Provençal", "Alternative forms" }, 3)
	for t_name, t_args, t_src, t_dx in m_temp.findTemplates(content) do
		if t_name == "frp-alt" then
			return t_args
		end
	end
	error("Scraping failed.")
end

-- explode the locs and group by loc
local function record_locs(target, term, region_name)
	local function add_one_loc(loc)
		local key = loc and loc.text or ""
		if not target[key] then
			if loc then
				table.insert(target, loc)
			end
			target[key] = {}
		end
		table.insert(target[key], term)
	end
	if not term.locs then
		add_one_loc()
		return
	end
	for _, loc in ipairs(parse_locs(region_name, term.locs)) do
		add_one_loc(loc)
	end
end

-- Orders the many transcription of a single wide region and minimises the lines needed
-- whenever possible.
-- The data in the input is the list of locations and the terms for each location.
local function order_data(data)
	if #data == 0 then
		return {""}
	end
	-- STEP 1: Order alphabetically by location name.
	table.sort(data, function(a, b) return a.sortkey < b.sortkey end)
	-- STEP 2: Merge adjacent locations with the same content.
	-- Only one form is allowed: a location with multiple forms is not grouped.
	function shares_content(loc1, loc2)
		return #data[loc1.text] == 1 and #data[loc2.text] == 1
			and data[loc1.text][1].display == data[loc2.text][1].display
	end
	local groups = {}
	local current_group = {data[1]}
	for i=2, #data do
		if shares_content(current_group[1], data[i]) then
			table.insert(current_group, data[i])
		else
			table.insert(groups, current_group)
			current_group = {data[i]}
		end
	end
	table.insert(groups, current_group)
	if data[""] then
		table.insert(groups, "")
	end
	return groups
end

-- gather the unique <field>s (e.g. pronunciations) for the collapsed view
local function uniques(groups, data, field)
	local dedup = {}
	function add_if_absent(val)
		if not dedup[val] then
			table.insert(dedup,
				field == "pron" and { pron = val } or val)
			dedup[val] = 1
		end
	end
	for _, locs in ipairs(groups) do
		local key = locs ~= "" and locs[1].text or ""
		for _, term in ipairs(data[key]) do
			add_if_absent(term[field])
		end
	end
	return dedup
end

-- Handles {{frp-IPA}}, the ==Pronunciation== section.
function export.show(frame)
	local args = frame:getParent().args
	local require_pron = true
	if is_empty(args) then
		args = scrape(mw.loadData("Module:headword/data").page.full_raw_pagename)
		require_pron = false
	end
	check_groups(args)
	local data = {}
	gather_data(data, args, require_pron)
	-- go through each lect in order
	local show = {}
	local hide = {}
	local ref_used = {}
	for _, a in ipairs(codes) do
		if data[a] and data[a].has_pron then
			-- generate the display form and explode the locations
			-- also, group by location
			local processed = {}
			for _, term in ipairs(data[a]) do
				if term.pron then
					term.display = m_IPA.format_IPA(frp, term.pron)
						.. format_ortho(term.ortho)
						.. (term.ref and "<sup>" .. term.ref .. "</sup>" or "")
					record_locs(processed, term, a)
					if term.ref then
						ref_used[term.ref] = true
					end
				end
			end
			-- group adjacent locs with the same form
			-- however, if they have two forms, then they are not grouped
			local groups = order_data(processed)
			-- at this stage, "processed" should contain a list of the location groups
			-- and also the terms of the locations
			-- e.g. { [1]={Loc A}, [2]={Loc B, Loc C}, [3]={Loc D},
			--		[Loc A]=form 3, [Loc B]=form 1, [Loc C]=form 1, [Loc D]={form 4, form 2}}
			local accent = format_accent(frp, {a})
			table.insert(show, '* ' .. accent .. ': '
				.. m_IPA.format_IPA_multiple(frp, uniques(groups, processed, "pron"), nil, 1))
			table.insert(hide, '* ' .. accent)
			for _, locs in ipairs(groups) do
				local hide_text = "** "
				local key = locs ~= "" and locs[1].text or ""
				local terms = processed[key]
				for i, term in ipairs(terms) do
					hide_text = hide_text .. term.display
					-- Trails a comma if a term follows.
					if i ~= #terms then hide_text = hide_text .. ", " end
				end
				hide_text = hide_text .. (locs ~= "" and format_locs(locs) or "")
				table.insert(hide, hide_text)
			end
		end
	end
	-- reference
	if args.sources or args.cf or args.note then
		data.ref = require("Module:frp-IPA/ref").make_ref(args.sources, args.cf, args.note, ref_used)
	else
		data.ref = ""
	end
	return '<div class="vsSwitcher" data-toggle-category="pronunciations">'
		.. '<span class="vsToggleElement"></span>'
		.. '<div class="vsShow">\n'
		.. table.concat(show, '\n')
		.. '\n</div><div class="vsHide">\n'
		.. table.concat(hide, '\n')
		.. data.ref
		.. '</div></div>[[Category:Franco-Provençal terms with IPA pronunciation|'
			.. (frp:makeSortKey(mw.loadData("Module:headword/data").pagename)) .. ']]'
end

-- Handles {{frp-alt}}, the ==Alternative forms== section.
-- TODO: A good portion of this function is repeated from <export.show>, perhaps it can be avoided.
function export.show_alt(frame)
	local args = frame:getParent().args
	check_groups(args)
	local data = {}
	gather_data(data, args)
	-- go through each lect in order
	local show = {}
	local hide = {}
	local ref_used = {}
	for _, a in ipairs(codes) do
		if data[a] and data[a].has_ortho then
			local processed = {}
			for _, term in ipairs(data[a]) do
				if term.ortho then
					term.link = link(term.ortho)
					term.display = term.link
						.. (term.ref and "<sup>" .. term.ref .. "</sup>" or "")
					record_locs(processed, term, a)
					if term.ref then
						ref_used[term.ref] = true
					end
				end
			end
			local groups = order_data(processed)
			local accent = format_accent(frp, {a})
			table.insert(show, accent .. ' '
				.. table.concat(uniques(groups, processed, "link"), ", "))
			table.insert(hide, '* ' .. accent)
			for _, locs in ipairs(groups) do
				local hide_text = "** "
				local key = locs ~= "" and locs[1].text or ""
				local terms = processed[key]
				for i, term in ipairs(terms) do
					hide_text = hide_text .. term.display
					-- Trails a comma if a term follows.
					if i ~= #terms then hide_text = hide_text .. ", " end
				end
				hide_text = hide_text .. (locs ~= "" and format_locs(locs) or "")
				table.insert(hide, hide_text)
			end
		end
	end
	-- reference
	if args.sources or args.cf or args.note then
		data.ref = require("Module:frp-IPA/ref").make_ref(args.sources, nil, nil, ref_used)
	else
		data.ref = ""
	end
	return '<div class="vsSwitcher" data-toggle-category="alternative forms">'
		.. '<span class="vsToggleElement"></span>'
		.. '<div class="vsShow">\n'
		.. '* ' .. table.concat(show, '; ')
		.. '\n</div><div class="vsHide">\n'
		.. table.concat(hide, '\n')
		.. data.ref
		.. '</div></div>'
end

-- Handles {{frp-altform}}, the alternative form's entry definition line.
function export.show_altform(frame)
	local args = frame:getParent().args
	local allowed_para = { [1] = true, [2] = true }
	local ns = mw.loadData("Module:headword/data").page.namespace
	if ns == "Template" then
		allowed_para.pagename = true
	end
	for key, _ in pairs(args) do
		if not allowed_para[key] then
			error("Parameter " .. key .. " not used by this template.")
		end
	end
	local pagename = args.pagename or mw.loadData("Module:headword/data").pagename
	local lemma_args = scrape(args[1])
	local data = {}
	gather_data(data, lemma_args)
	local locs = {}
	local loc_text
	local seen
	for _, a in ipairs(codes) do
		if data[a] then
			local sublocs = {}
			for _, term in ipairs(data[a]) do
				if term.ortho == pagename then
					seen = true
					if term.locs then
						for _, subloc in ipairs(parse_locs(a, term.locs)) do
							table.insert(sublocs, subloc)
						end
					end
				end
			end
			if #sublocs > 0 then
				table.sort(sublocs, function(a, b) return a.sortkey < b.sortkey end)
				table.insert(locs, format_locs(sublocs, true))
			end
		end
	end
	if not seen then
		error("Gathering localities failed. Please specify manually.")
	end
	if #locs > 0 then
		loc_text = " documented in the following location(s): " .. table.concat(locs, "; ")
	end
	return m_form_of.format_form_of {
		lemma_face = "term",
		lemmas = { { lang = frp, term = args[1], gloss = args[2] } },
		text = "Alternative form of",
		posttext = loc_text
	}
end

return export