Module:doublet table

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Used to generate tables in lists of doublets, such as Appendix:English doublets. On pages with a single language, much faster than a bunch of {{l}} templates; on Appendix:Romance doublets, just a little faster, because of the number of languages in each table.


local export = {}

local fun = require "Module:fun"
local getByCode = require "Module:languages".getByCode
local get_by_name = require "Module:languages".getByCanonicalName

local auto_subtable = require "Module:auto-subtable"

-- U+0304 COMBINING MACRON, U+0306 COMBINING BREVE
local function remove_macron_breve(text)
	return mw.ustring.toNFD(text):gsub("\204[\132\134]", "")
end

-- U+0304 COMBINING MACRON, U+0306 COMBINING BREVE, U+0308 COMBINING DIAERESIS
local function remove_macron_breve_diaeresis(text)
	return mw.ustring.toNFD(text):gsub("\204[\132\134\136]", "")
end

-- U+0304 COMBINING MACRON, U+0307 COMBINING DOT ABOVE, U+0323 COMBINING DOT BELOW
local function remove_macron_acute_dot(text)
	return mw.ustring.toNFD(text):gsub("\204[\132\135\163]", "")
end

local make_entry_name = {
	la = remove_macron_breve_diaeresis,
	grc = remove_macron_breve,
	ang = remove_macron_acute_dot,
}

local function empty_method(self, ...)
	return ...
end

local langs = require "Module:languages/cache"

local function quote(word)
	return "“" .. word .. "”"
end

local function trim(word)
	return string.match(word, "%s*(.-)%s*$")
end

local strip_marker = "\127'.-'\127"
local function remove_strip_markers(text)
	return string.gsub(text, strip_marker, "")
end

-- Keep in sync with tag_text function in [[Module:script utilities]].
local function tag(text, lang_code, sc_code)
	return '<span class="' .. sc_code .. '" lang="' .. lang_code
			.. '">' .. text .. "</span>"
end

-- Keep in sync with anchor function in [[Module:senseid]]. Probably the sense ID format won't
-- ever have to change again though.
local function make_anchor(lang, sense_id)
	return sense_id and ": " .. mw.uri.encode(sense_id, "WIKI") or ""
end

local function make_reconstructed_link(word, link_text, lang, sc_code, sense_id)
	return tag(
		'[[Reconstruction:' .. lang:getCanonicalName() .. "/"
			.. (lang:makeEntryName(word)) .. "#" .. lang:getCanonicalName()
			.. make_anchor(lang, sense_id)
			.. "|" .. link_text .. "]]",
		lang:getCode(),
		sc_code)
end

local function make_mainspace_link(word, link_text, lang, sc_code, sense_id)
	return tag(
		'[[' .. (lang:makeEntryName(word)) .. "#" .. lang:getCanonicalName()
			.. make_anchor(lang, sense_id)
			.. "|" .. link_text .. "]]",
		lang:getCode(),
		sc_code)
end

-- defined below
local format_qualifier

local function fast_link(word, lang, has_qualifier)
	if word == "" then
		return "&mdash;"
	end
	
	if type(lang) ~= "table" then
		has_qualifier = lang
		lang = word
		return function(word)
			return fast_link(word, lang, has_qualifier)
		end
	end
	
	if word:find("\127") then
		return word:gsub(
				"^(.-)( ?" .. strip_marker .. ")",
				function (text, space_and_strip_marker)
					return fast_link(text, lang, has_qualifier) .. space_and_strip_marker
				end)
	end
		
	
	if word:find(" and ") then
		return word:gsub(
			"(.+) and (.+)",
			function (first, second)
				return fast_link(first, lang, has_qualifier) .. " and " .. fast_link(second, lang, has_qualifier)
			end)
	end
	
	if word:find("[[", 1, true) then
		return word:gsub("%[%[([^%]]+)%]%]", fast_link(lang, has_qualifier))
	end
	
	local script = lang:findBestScript(word):getCode()
	
	local link_func = make_mainspace_link
	local entry, link_text, sense_id
	if word:find("|") then
		entry, link_text = word:match("^([^|]+)|(.+)$")
		
		if not entry then
			error("Malformed piped link: " .. word)
		end
		
		if link_text:find("^%*") then
			link_func = make_reconstructed_link
		end
	else
		entry = word
		
		if entry:find("^%*") then
			link_text = entry	
			link_func = make_reconstructed_link
		end
	end
	
	entry = entry:gsub("^%*", "")
	
	-- moule$mussel -> moule#French-mussel (assuming lang is French)
	if entry:find("%$") then
		entry, sense_id = entry:match("([^$]+)$(.+)$")
		
		if not entry then
			error("Malformed sense id: " .. entry)
		end
		
		link_text = entry
	end
	
	local link_text = link_text or entry or word
	
	return link_func(remove_strip_markers(entry), link_text, lang, script, sense_id)
		.. (not has_qualifier and format_qualifier("", link_text, lang) or "")
end

local function gsub_or_nil(str, pattern, repl)
	local result, count = string.gsub(str, pattern, repl)
	if count == 0 then
		return nil
	end
	return result
end

local langs_by_name = {}
setmetatable(langs_by_name, {
	-- Auto-create language objects: langs.English -> language object for English.
	__index = function(self, key)
		local lang = get_by_name(remove_strip_markers(key)) or error("No language with name " .. tostring(key) .. ".")
		if make_entry_name[key] then
			lang.makeEntryName = function(self, text)
				return make_entry_name[key](text)
			end
		elseif lang._rawData.entry_name == nil then
			lang.makeEntryName = empty_method
		end
		self[key] = lang
		return lang
	end
})

local function link_language_names(text)
	return text:gsub(
		"%[%[([^%]]+)%]%]",
		function (name)
			return langs_by_name[name]:makeWikipediaLink()
		end)
end

local comma_placeholder = "\1"
local semicolon_placeholder = "\2"
local placeholder_convert = {
	[comma_placeholder] = ",", [semicolon_placeholder] = ";",
	[","] = comma_placeholder, [";"] = semicolon_placeholder,
}

local function remove_strip_markers(text)
	return (text:gsub("\127(.-)\127", ""))
end

-- Keep in sync with tag_translit function in [[Module:script utilities]].
local function format_tr(text, lang)
	local tr = (lang:transliterate(remove_strip_markers(text)))
	if tr then
		return '<span class="tr Latn" lang="' .. lang:getCode() .. '-Latn">' .. tr .. '</span>'
	end
end

-- declared as local above
function format_qualifier(qualifier_content, link_text, lang)
	if qualifier_content:find("\127") then
		return qualifier_content:gsub("[^\127]+ ?", format_qualifier)
	end
	
	local tr = format_tr(link_text, lang)
	if qualifier_content == "" then
		return tr and ' (' .. tr .. ")" or ""
	elseif qualifier_content:find('"') then
		return "("
			.. (tr and tr .. (qualifier_content ~= "" and ", " or "") or "")
			.. qualifier_content
				:gsub(comma_placeholder, placeholder_convert)
				:gsub(
					'"([^"]+)"',
					function (gloss)
						return quote(gloss:gsub("[,;]", placeholder_convert))
					end)
				:gsub(
					"[^,;]+",
					function (item)
						if item:find("“") then
							return item
						else
							return "''" .. item .. "''"
						end
					end)
				:gsub("[" .. comma_placeholder .. semicolon_placeholder .. "]", placeholder_convert)
			.. ")"
	else
		return "("
			.. (tr and tr .. (qualifier_content ~= "" and ", " or "") or "")
			.. qualifier_content
				:gsub(comma_placeholder, placeholder_convert)
				:gsub("[^,;]+", "''%1''")
			.. ")"
	end
end

local function link_and_make_qualifier(cell, lang)
	if not cell then
		return ""
	end
	
	if cell:find(",") then
		return cell
			-- Replace commas in qualifiers with semicolons, so that the function
			-- doesn't confuse commas in qualifiers and commas that separate words.
			:gsub(
				"%([^%)]+%)",
				function (qualifier)
					return qualifier:gsub(",", placeholder_convert)
				end)
			:gsub(
				"([^,]+)(,? ?)",
				function(text, comma)
					return link_and_make_qualifier(text, lang) .. comma
				end)
	
	elseif cell:find("/") then
		return cell
			:gsub(
				"([^/]+)( ?/? ?)",
				function(text, slash)
					return link_and_make_qualifier(text, lang) .. slash
				end)
	
	elseif cell:find("%(") then
		return gsub_or_nil(
			cell,
			"(.-) %(([^%)]+)%)",
			function (link_text, qualifier_content)
				return fast_link(link_text, lang, true) .. " "
					.. link_language_names(format_qualifier(qualifier_content, link_text, lang))
			end)
			or error("Ill-formed qualifier in " .. quote(cell) .. " for " .. lang:getCanonicalName() .. ".")
	end
	
	return fast_link(cell, lang, false)
end

local function link_term_list(text, lang)
	if text:find("[[", 1, true) then
		return text:gsub("%[%[([^%]]+)%]%]", fast_link(lang))
	else
		return text:gsub("([^,]+)", fast_link(lang))
	end
end

-- Remove piped links
local function remove_links(text)
	if text:find("[[", 1, true) then
		return text:gsub("%[%[[^|%]]+|([^%]]+)%]%]", "")
			:gsub("%[%[([^%]]+)%]%]", "")
	end
	return text
end

local function make_table(rows, column_number_to_lang, arg_count)
	local output = {}
	for i, header_cell in ipairs(rows[1]) do
		output[i] = ("! %s"):format(header_cell)
	end
	
	local row_count_for_headers_at_bottom = 10
	local headers_at_bottom = #rows > row_count_for_headers_at_bottom
	
	local headers
	if headers_at_bottom then
		headers = "|-\n" .. table.concat(output, "\n")
	end
	
	table.insert(output, 1, '{| class="wikitable sortable"')
	
	local column_count = #column_number_to_lang
	column_number = column_count
	row_number = 1 -- Header is row 1.
	table.insert(output, "|-")
	for i = column_count + 1, arg_count do
		if column_number == column_count then
			column_number = 1
			row_number = row_number + 1
			table.insert(output, "|-")
		else
			column_number = column_number + 1
		end
		
		local lang = langs[column_number_to_lang[column_number]]
		local content = rows[row_number][column_number]
		table.insert(output, ('| data-sort-value="%s" | %s'):format(
			remove_links((lang:makeEntryName(content:match("[^,(]+") or content))),
			link_and_make_qualifier(content, lang)))
	end
	
	if headers_at_bottom then
		table.insert(output, headers)
	end
	
	table.insert(output, "|}")
	
	return table.concat(output, "\n")
end

function export.doublet_table(frame)
	local args = frame:getParent().args
	
	if not args.langs then
		return
	end
	
	local column_number_to_lang = {}
	local column_count = 0
	for lang in args.langs:gmatch("[^, ]+") do
		column_count = column_count + 1
		column_number_to_lang[column_count] = lang
	end
	
	local rows = auto_subtable()
	
	local column_number = 0
	local row_number = 1
	local arg_count
	for i, arg in ipairs(args) do
		arg_count = i
		
		if column_number == column_count then
			column_number = 1
			row_number = row_number + 1
		else
			column_number = column_number + 1
		end
		
		rows[row_number][column_number] = trim(arg)
	end
	
	return make_table(rows, column_number_to_lang, arg_count)
end

local function make_family_doublet_table(rows, column_count)
	local Array = require "Module:array"
	local output = Array()
	for i, header_cell in ipairs(rows[1]) do
		if i == 1 then
			-- Assumes the language name is a single capitalized word.
			-- Works in [[Appendix:Romance doublets]].
			header_cell = header_cell:gsub(
				"^(%u%l+) (.+)$",
				function (language_name, terms)
					return language_name .. " "
						.. link_term_list(terms, langs_by_name[language_name])
				end)
			output:insert(("|+ %s"):format(header_cell))
			output:insert("!")
		else
			output:insert(("! %s"):format(header_cell))
		end
	end
	
	local row_count_for_headers_at_bottom = 10
	local headers_at_bottom = #rows > row_count_for_headers_at_bottom
	
	local headers
	if headers_at_bottom then
		headers = "|-\n" .. output:concat("\n")
	end
	
	output:insert(1, '{| class="wikitable"')
	
	for i = 2, #rows do
		if rows[i][1] == "See also" then
			output:insert(('|-\n| colspan="%d" style="text-align: center; font-weight: bold;" | See also')
				:format(column_count))
		else
			local lang = langs_by_name[rows[i][1]]
		
			output:insert("|-\n! " .. rows[i][1]) -- link language name?
			
			for j = 2, column_count do
				output:insert("| " .. link_and_make_qualifier(rows[i][j], lang))
			end
		end
	end
	
	if headers_at_bottom then
		output:insert(headers)
	end
	
	output:insert("|}")
	
	return output:concat("\n")
end

-- Copies sequential numbered arguments and counts them (while ignoring "See also").
local function process_args(args)
	local count = 0
	local new_args = {}
	for i, v in ipairs(args) do
		v = trim(v)
		if v ~= "See also" then
			count = count + 1
		end
		new_args[i] = v
	end
	return new_args, count
end

function export.family_doublets(frame)
	local args = frame:getParent().args
	local column_count = tonumber(args.cols) or error("Provide the number of columns in the |cols= parameter.")
	local arg_count
	args, arg_count = process_args(args) -- Warning! Removes named parameters!
	if arg_count % column_count ~= 0 then
		error(
			string.format(
				"There are %d cell parameters but %d columns. The number of cells should be a multiple of the number of columns.",
				arg_count, column_count))
	end
	
	local rows = auto_subtable()
	local column_number = 0
	local row_number = 1
	for i, arg in ipairs(args) do
		if column_number == column_count then
			column_number = 1
			row_number = row_number + 1
		else
			column_number = column_number + 1
		end
		
		rows[row_number][column_number] = arg
		
		if arg == "See also" then
			column_number = 0
			row_number = row_number + 1
		end
	end
	
	rows:un_auto_subtable() -- to avoid problems with below function
	
	return make_family_doublet_table(rows, column_count)
end

return export