Module:doublet table: difference between revisions

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Content deleted Content added
mNo edit summary
check that the right number of cells has been input
Line 418: Line 418:
return table.concat(output, "\n")
return table.concat(output, "\n")
end

-- Copies sequential numbered arguments and counts them (while ignoring "See also").
local function process_args(args)
local count = 0
local new_args = {}
for i, v in ipairs(args) do
v = trim(v)
if v ~= "See also" then
count = count + 1
end
new_args[i] = v
end
return new_args, count
end
end


Line 423: Line 437:
local args = frame:getParent().args
local args = frame:getParent().args
local column_count = tonumber(args.cols) or error("Provide the number of columns in the |cols= parameter.")
local column_count = tonumber(args.cols) or error("Provide the number of columns in the |cols= parameter.")
local arg_count
args, arg_count = process_args(args) -- Warning! Removes named parameters!
if arg_count % column_count ~= 0 then
error(
string.format(
"There are %d cell parameters but %d columns. The number of cells should be a multiple of the number of columns.",
arg_count, column_count))
end
local rows = auto_subtable()
local rows = auto_subtable()
Line 435: Line 457:
end
end
rows[row_number][column_number] = arg
if arg then
arg = trim(arg)
if arg == "See also" then
rows[row_number][column_number] = arg
column_number = 0
row_number = row_number + 1
if arg == "See also" then
column_number = 0
row_number = row_number + 1
end
else
break
end
end
end
end

Revision as of 20:10, 9 March 2018

Used to generate tables in lists of doublets, such as Appendix:English doublets. On pages with a single language, much faster than a bunch of {{l}} templates; on Appendix:Romance doublets, just a little faster, because of the number of languages in each table.


local export = {}

local fun = require "Module:fun"
local getByCode = require "Module:languages".getByCode
local get_by_name = require "Module:languages".getByCanonicalName

local auto_subtable_mt = {
	__index = function(self, key)
		local val = {}
		self[key] = val
		return val
	end,
}

local function auto_subtable()
	return setmetatable(
		{},
		{
			__index = function(self, key)
				local val = {}
				self[key] = val
				return val
			end,
		})
end

local function remove_macron_breve(text)
	return mw.ustring.toNFD(text):gsub("\204[\132\134]", "")
end

local function remove_macron_breve_diaeresis(text)
	return mw.ustring.toNFD(text):gsub("\204[\132\134\136]", "")
end

local make_entry_name = {
	la = remove_macron_breve_diaeresis,
	grc = remove_macron_breve,
}

local function empty_method(self, ...)
	return ...
end

local langs = {}
setmetatable(langs, { -- Auto-create language objects: langs.en -> language object for English.
	__index = function(self, key)
		local lang = getByCode(key) or error("No language with code " .. key .. ".")
		if make_entry_name[key] then
			lang.makeEntryName = function(self, text)
				return make_entry_name[key](text)
			end
		elseif lang._rawData.entry_name == nil then
			lang.makeEntryName = empty_method
		end
		self[key] = lang
		return lang
	end
})

local function quote(word)
	return "“" .. word .. "”"
end

local function trim(word)
	return string.match(word, "%s*(.-)%s*$")
end

local strip_marker = "\127'.-'\127"
local function remove_strip_markers(text)
	return string.gsub(text, strip_marker, "")
end

local function tag(text, lang_code, sc_code)
	return '<span class="' .. sc_code .. '" lang="' .. lang_code
			.. '">' .. text .. "</span>"
end

local function make_anchor(lang, sense_id)
	return sense_id and "-" .. mw.uri.encode(sense_id, "WIKI") or ""
end

local function make_reconstructed_link(word, link_text, lang, sc_code, sense_id)
	return tag(
		'[[Reconstruction:' .. lang:getCanonicalName() .. "/"
			.. lang:makeEntryName(word) .. "#" .. lang:getCanonicalName()
			.. make_anchor(lang, sense_id)
			.. "|" .. link_text .. "]]",
		lang:getCode(),
		sc_code)
end

local function make_mainspace_link(word, link_text, lang, sc_code, sense_id)
	return tag(
		'[[' .. lang:makeEntryName(word) .. "#" .. lang:getCanonicalName()
			.. make_anchor(lang, sense_id)
			.. "|" .. link_text .. "]]",
		lang:getCode(),
		sc_code)
end

local find_best_script = require("Module:scripts").findBestScript
local function fast_link(word, lang)
	if word == "" then
		return "&mdash;"
	end
	
	if lang == nil then
		lang = word
		return function(word)
			return fast_link(word, lang)
		end
	end
	
	if word:find("\127") then
		return word:gsub(
				"^(.-)( ?" .. strip_marker .. ")",
				function (text, strip_marker)
					return fast_link(text, lang) .. strip_marker
				end)
	end
		
	
	if word:find(" and ") then
		return word:gsub(
			"(.+) and (.+)",
			function (first, second)
				return fast_link(first, lang) .. " and " .. fast_link(second, lang)
			end)
	end
	
	if word:find("[[", 1, true) then
		return word:gsub("%[%[([^%]]+)%]%]", fast_link(lang))
	end
	
	local script = find_best_script(word, lang):getCode()
	
	local link_func = make_mainspace_link
	local entry, link_text, sense_id
	if word:find("|") then
		entry, link_text = word:match("^([^|]+)|(.+)$")
		
		if not entry then
			error("Malformed piped link: " .. word)
		end
		
		if link_text:find("^%*") then
			link_func = make_reconstructed_link
		end
	else
		entry = word
		
		if entry:find("^%*") then
			link_text = entry	
			link_func = make_reconstructed_link
		end
	end
	
	entry = entry:gsub("^%*", "")
	
	-- moule$mussel -> moule#French-mussel (assuming lang is French)
	if entry:find("%$") then
		entry, sense_id = entry:match("([^$]+)$(.+)$")
		
		if not entry then
			error("Malformed sense id: " .. entry)
		end
		
		link_text = entry
	end
	
	return link_func(remove_strip_markers(entry), link_text or entry or word, lang, script, sense_id)
end

local function gsub_or_nil(str, pattern, repl)
	local result, count = string.gsub(str, pattern, repl)
	if count == 0 then
		return nil
	end
	return result
end

local langs_by_name = {}
setmetatable(langs_by_name, {
	-- Auto-create language objects: langs.English -> language object for English.
	__index = function(self, key)
		local lang = get_by_name(remove_strip_markers(key)) or error("No language with name " .. tostring(key) .. ".")
		if make_entry_name[key] then
			lang.makeEntryName = function(self, text)
				return make_entry_name[key](text)
			end
		elseif lang._rawData.entry_name == nil then
			lang.makeEntryName = empty_method
		end
		self[key] = lang
		return lang
	end
})

local function link_language_names(text)
	return text:gsub(
		"%[%[([^%]]+)%]%]",
		function (name)
			return langs_by_name[name]:makeWikipediaLink()
		end)
end

local random_character = "\1"
local random_character2 = "\2"
local function format_qualifier(qualifier_content)
	if qualifier_content:find("\127") then
		return qualifier_content:gsub("[^\127]+ ?", format_qualifier)
	elseif qualifier_content:find('"') then
		return "("
			.. qualifier_content
				:gsub(random_character, ",")
				:gsub(
					'"([^"]+)"',
					function (gloss)
						return quote(gloss:gsub(",", random_character):gsub(";", random_character2))
					end)
				:gsub(
					"[^,;]+",
					function (item)
						if item:find("“") then
							return item
						else
							return "''" .. item .. "''"
						end
					end)
				:gsub(random_character, ",")
				:gsub(random_character2, ";")
			.. ")"
	else
		return "("
			.. qualifier_content
				:gsub(random_character, ",")
				:gsub("[^,;]+", "''%1''")
			.. ")"
	end
end

local function link_and_make_qualifier(cell, lang)
	if not cell then
		return ""
	end
	
	if cell:find(",") then
		return cell
			-- Replace commas in qualifiers with semicolons, so that the function
			-- doesn't get confused between commas in qualifiers and commas that
			-- separate words.
			:gsub(
				"%([^%)]+%)",
				function (qualifier)
					return qualifier:gsub(",", random_character)
				end)
			:gsub(
				"([^,]+)(,? ?)",
				function(text, comma)
					return link_and_make_qualifier(text, lang) .. comma
				end)
	
	elseif cell:find("/") then
		return cell
			:gsub(
				"([^/]+)( ?/? ?)",
				function(text, slash)
					return link_and_make_qualifier(text, lang) .. slash
				end)
	
	elseif cell:find("%(") then
		return gsub_or_nil(
			cell,
			"(.-) %(([^%)]+)%)",
			function (link_text, qualifier_content)
				return fast_link(link_text, lang) .. " "
					.. link_language_names(format_qualifier(qualifier_content))
			end)
			or error("Ill-formed qualifier in " .. quote(cell) .. " for " .. lang:getCanonicalName() .. ".")
	end
	
	return fast_link(cell, lang)
end

local function link_term_list(text, lang)
	if text:find("[[", 1, true) then
		return text:gsub("%[%[([^%]]+)%]%]", fast_link(lang))
	else
		return text:gsub("([^,]+)", fast_link(lang))
	end
end

local function make_table(rows, column_number_to_lang, arg_count)
	local output = {}
	for i, header_cell in ipairs(rows[1]) do
		output[i] = ("! %s"):format(header_cell)
	end
	
	local row_count_for_headers_at_bottom = 10
	local headers_at_bottom = #rows > row_count_for_headers_at_bottom
	
	local headers
	if headers_at_bottom then
		headers = "|-\n" .. table.concat(output, "\n")
	end
	
	table.insert(output, 1, '{| class="wikitable"')
	
	local column_count = #column_number_to_lang
	column_number = 0
	row_number = 2 -- Header is row 1.
	table.insert(output, "|-")
	for i = column_count + 1, arg_count do
		if column_number == column_count then
			column_number = 1
			row_number = row_number + 1
			table.insert(output, "|-")
		else
			column_number = column_number + 1
		end
		
		table.insert(output, ("| %s"):format(
			link_and_make_qualifier(rows[row_number][column_number],
				langs[column_number_to_lang[column_number]])))
	end
	
	if headers_at_bottom then
		table.insert(output, headers)
	end
	
	table.insert(output, "|}")
	
	return table.concat(output, "\n")
end

function export.doublet_table(frame)
	local args = frame:getParent().args
	
	if not args.langs then
		return
	end
	
	local column_number_to_lang = {}
	local column_count = 0
	for lang in args.langs:gmatch("[^, ]+") do
		column_count = column_count + 1
		column_number_to_lang[column_count] = lang
	end
	
	local rows = auto_subtable()
	
	local column_number = 0
	local row_number = 1
	local arg_count
	for i, arg in ipairs(args) do
		arg_count = i
		
		if column_number == column_count then
			column_number = 1
			row_number = row_number + 1
		else
			column_number = column_number + 1
		end
		
		rows[row_number][column_number] = trim(arg)
	end
	
	return make_table(rows, column_number_to_lang, arg_count)
end

local function make_family_doublet_table(rows, column_count)
	local output = {}
	for i, header_cell in ipairs(rows[1]) do
		if i == 1 then
			-- Assumes the language name is a single capitalized word.
			-- Works in [[Appendix:Romance doublets]].
			header_cell = header_cell:gsub(
				"^(%u%l+) (.+)$",
				function (language_name, terms)
					return language_name .. " "
						.. link_term_list(terms, langs_by_name[language_name])
				end)
		end
		
		output[i] = ("! %s"):format(header_cell)
	end
	
	local row_count_for_headers_at_bottom = 10
	local headers_at_bottom = #rows > row_count_for_headers_at_bottom
	
	local headers
	if headers_at_bottom then
		headers = "|-\n" .. table.concat(output, "\n")
	end
	
	table.insert(output, 1, '{| class="wikitable"')
	
	for i = 2, #rows do
		if rows[i][1] == "See also" then
			table.insert(output, ('|-\n| colspan="%d" style="text-align: center; font-weight: bold;" | See also')
				:format(column_count))
		else
			local lang = langs_by_name[rows[i][1]]
		
			table.insert(output, "|-\n| " .. rows[i][1]) -- link language name?
			
			for j = 2, column_count do
				table.insert(output, "| " .. link_and_make_qualifier(rows[i][j], lang))
			end
		end
	end
	
	if headers_at_bottom then
		table.insert(output, headers)
	end
	
	table.insert(output, "|}")
	
	return table.concat(output, "\n")
end

-- Copies sequential numbered arguments and counts them (while ignoring "See also").
local function process_args(args)
	local count = 0
	local new_args = {}
	for i, v in ipairs(args) do
		v = trim(v)
		if v ~= "See also" then
			count = count + 1
		end
		new_args[i] = v
	end
	return new_args, count
end

function export.family_doublets(frame)
	local args = frame:getParent().args
	local column_count = tonumber(args.cols) or error("Provide the number of columns in the |cols= parameter.")
	local arg_count
	args, arg_count = process_args(args) -- Warning! Removes named parameters!
	if arg_count % column_count ~= 0 then
		error(
			string.format(
				"There are %d cell parameters but %d columns. The number of cells should be a multiple of the number of columns.",
				arg_count, column_count))
	end
	
	local rows = auto_subtable()
	local column_number = 0
	local row_number = 1
	for i, arg in ipairs(args) do
		if column_number == column_count then
			column_number = 1
			row_number = row_number + 1
		else
			column_number = column_number + 1
		end
		
		rows[row_number][column_number] = arg
		
		if arg == "See also" then
			column_number = 0
			row_number = row_number + 1
		end
	end
	
	auto_subtable_mt.__index = nil -- to avoid problems with below function
	
	return make_family_doublet_table(rows, column_count)
end

return export