Module:User:Erutuon/lang stuff

Definition from Wiktionary, the free dictionary
Jump to navigation Jump to search

Generates the tables of information about Wiktionary languages in User:Erutuon/language stuff and User:Erutuon/languages with no scripts.


local export = {}

local script_data

local function _link_script (script_code)
	script_data = script_data or mw.loadData "Module:scripts/data"
	local name = script_data[script_code].canonicalName
	if name:find("[Ss]script$") or name:find("[Cc]ode$") or name:find("[Ss]emaphore$") then
		return "[[:Category:" .. name .. "|" .. script_code .. "]]"
	else
		return "[[:Category:" .. name .. " script|" .. script_code .. "]]"
	end
end

local cache = {}
local function link_script (script_code)
	local link = cache[script_code]
	if not link then
		link = _link_script(script_code)
		cache[script_code] = link
	end
	return link
end

local function ToC_item(title)
	return '<div style="overflow: hidden; height: 0; margin: 0; padding: 0;">\n=='
		.. title .. '==\n</div>'
end

function export.lang_and_fam_name(frame)
	local conflicts = {}
	
	local language_names = mw.loadData "Module:languages/canonical names"
	local family_data	= mw.loadData "Module:families/data"
	local sortedPairs	= require "Module:table".sortedPairs
	local is_combining	= require "Module:Unicode data".is_combining
	local fun			= require "Module:fun"

	local function link_name(name, family)
		if family then
			local catname = name:find "[Ll]anguages$" and name or name .. " languages"
			return "[[:Category:" .. catname .. "|" .. catname .. "]]"
		else
			return "[[:Category:" .. (name:find "[Ll]anguage$" and name or name .. " language") .. "|" .. name .. "]]"
		end
	end
	
	local count = 0
	for code, data in sortedPairs(family_data) do
		if language_names[data.canonicalName] then
			count = count + 1
			local canonicalName = data.canonicalName
			conflicts[canonicalName] = { link_name(canonicalName), language_names[canonicalName], link_name(canonicalName, true), code }
		end
	end
	
	local get_sort_value = fun.memoize(function (canonical_name)
		return mw.ustring.toNFD(canonical_name):gsub(
			"[\194-\244][\128-\191]+",
			function (nonASCII_char)
				if is_combining(mw.ustring.codepoint(nonASCII_char)) then
					return ""
				end
			end)
	end)
	
	local function compare(canonical_name1, canonical_name2)
		return get_sort_value(canonical_name1) < get_sort_value(canonical_name2)
	end
	
	return ToC_item("Languages and language families with the same name")
		.. '\n{| class="wikitable sortable"\n|+ ' .. count
		.. ' pairs of languages and language families have the same canonical name\n! language !! code !! family !! code\n'
		.. table.concat(
			fun.mapIter(
				function(subarray)
					return ("|-\n| %s || <code>%s</code> || %s || <code>%s</code>\n"):format(unpack(subarray))
				end,
				sortedPairs(conflicts, compare)))
		.. '|}'
end

function export.number_of_scripts(frame)
	local fun = require "Module:fun"
	local m_table = require "Module:table"
	local number_of_scripts = tonumber(frame.args[1]) or error("Supply a number in parameter 1.")
	local get_length = fun.memoize(m_table.length)
	
	local languages = fun.filter(
		function (data)
			return data.scripts and get_length(data.scripts) >= number_of_scripts
		end,
		mw.loadData "Module:languages/alldata")
	
	local count = m_table.size(languages)

	return ToC_item("Number of scripts")
		.. '\n{| class="wikitable sortable"\n|+ ' .. count .. ' languages use ' .. number_of_scripts
		.. ' or more scripts\n! canonical name !! code !! script<br>count !! style="width: 8em;" | scripts\n'
		.. table.concat(
			fun.mapIter(
				function (data, code)
					return ('|-\n| [[:Category:%s|%s]] || <code style="white-space: nowrap;">%s</code> || %d || %s\n')
						:format(
							data[1] .. (data[1]:find("language") and "" or " language"),
							data[1],
							code,
							get_length(data.scripts),
							table.concat(
								fun.map(
									link_script,
									data.scripts),
								", "))
				end,
				m_table.sortedPairs(languages)))
		.. "|}"
end

function export.census(frame)
	local alldata = require "Module:languages/alldata"
	local fun = require "Module:fun"
	local m_table = require "Module:table"
	
	local count = {}
	local count_metatable = {
		__index = function(self, key)
			self[key] = 0
			return 0
		end
	}
	
	setmetatable(count, count_metatable)
	
	for code, data in pairs(alldata) do
		if #code == 2 then
			count[2] = count[2] + 1
		elseif #code == 3 then
			local key = 3 .. code:sub(1, 1)
			count[key] = count[key] + 1
			count[3] = count[3] + 1
		else
			count.exceptional = count.exceptional + 1
		end
	end
	
	count_metatable.__index = nil
	
	sortkey = 0
	
	return ToC_item('Languages in each module')
		.. '\n{| class="wikitable sortable"\n|+ '
		.. 'Total number of codes in each language data module\n! module !! count\n'
		.. table.concat(
			fun.mapIter(
				function(count, module)
					sortkey = sortkey + 1
					
					if module == 2 then
						module = "data2"
					elseif module == 3 then
						return ('|-\n| data-sort-value="%02d" | three-letter codes || %d\n'):format(sortkey, count)
					elseif module:sub(1, 1) == "3" then
						module = "data3/" .. module:sub(2, 2)
					else
						module = "datax"
					end
					
					return ('|-\n| data-sort-value="%02d" | [[Module:languages/%s]] || %d\n'):format(sortkey, module, count)
				end,
				m_table.sortedPairs(count)))
		.. '|}'
end

function export.exceptional_code_formats(frame)
	local language_data = mw.loadData "Module:languages/datax"
	local sortedPairs	= require "Module:table".sortedPairs
	local fun			= require "Module:fun"
	
	local code_counts = {}
	setmetatable(
		code_counts,
		{
			__index = function(self, key)
				self[key] = 0
				return 0
			end
		})
	
	for code in sortedPairs(language_data) do
		local code_repr = code:gsub("[^-]", "a")
		code_counts[code_repr] = code_counts[code_repr] + 1
	end
	
	setmetatable(code_counts, nil)
	
	local function get_sort_value(code_repr)
		return code_repr:gsub(
			"[^-]+",
			function (sequence)
				return #sequence
			end)
	end
	
	local function compare(code_repr1, code_repr2)
		return get_sort_value(code_repr1) < get_sort_value(code_repr2)
	end
	
	return ToC_item('Exceptional code formats')
		.. '\n{| class="wikitable sortable"\n|+ '
		.. 'Code formats in [[Module:languages/datax]]\n! format !! count\n'
		.. table.concat(
			fun.mapIter(
				function(count, code_repr)
					return ("|-\n| <code>%s</code> || %d\n"):format(code_repr, count)
				end,
				sortedPairs(code_counts, compare)))
		.. '|}'
end

function export.script_combinations(frame)
	local fun = require "Module:fun"
	local m_table = require "Module:table"
	local shallow_copy = m_table.shallowcopy
	local language_data = mw.loadData "Module:languages/alldata"
	
	local function add(t, k, lang_code)
		local subtable = t[k]
		if not subtable then
			subtable = {}
			t[k] = subtable
		end
		table.insert(subtable, lang_code)
	end
	
	local script_combinations = {}
	local script_list
	
	for code, data in pairs(language_data) do
		script_list = data.scripts
		if script_list == nil then
			add(script_combinations, "None", code)
		elseif type(script_list) == "table" then
			script_list = shallow_copy(script_list)
			table.sort(script_list)
			add(script_combinations, table.concat(script_list, ", "), code)
		end
	end
	
	local count = m_table.size(script_combinations)
	
	local function sorted_ipairs(t)
		table.sort(t)
		return ipairs(t)
	end
	
	-- Actually, take 100, return false, then return nil.
	local function take(n, f, s, i)
		local count = 0
		return function ()
			count = count + 1
			if count == n then
				return false
			elseif count > n then
				return nil
			end
			local v
			i, v = f(s, i)
			if i ~= nil then
				return i, v
			end
		end
	end
	
	return ToC_item('Script combinations')
.. [[

{| class="wikitable sortable"
|+ ]] .. count .. [[ script combinations (sorted alphabetically) and the number of languages that use them
! style="width: 8em;" | script list !! script<br>count !! languages
]]
		.. table.concat(
			fun.mapIter(
				function (languages, script_list)
					local script_count
					script_list, script_count = script_list:gsub("[^, ]+", link_script)
					local language_count = #languages
					return ('|-\n| %s || %d || title="%s" | %d\n')
						:format(
							script_list,
							script_count,
							table.concat(
								fun.mapIter(
									function(code)
										if not code then return "..." end
										return language_data[code][1] .. " (" .. code .. ")"
									end,
									take(80, sorted_ipairs(languages))), -- Sort list of languages by language code.
								", "),
							language_count)
				end,
				m_table.sortedPairs(script_combinations)))
		.. '|}'
end

function export.count_data_items(frame)
	local counts = setmetatable(
		{},
		{
			__index = function (self, k)
				return 0
			end,
		})
	
	for _, data in pairs(mw.loadData "Module:languages/alldata") do
		for k, v in pairs(data) do
			counts[k] = counts[k] + 1
		end
		counts.total = counts.total + 1
	end
	
	getmetatable(counts).__index = nil
	
	local info = { "canonical name", "Wikidata item", "family" }
	
	return ToC_item('Data item census')
.. [[

{| class="wikitable sortable"
|+ Number of languages with each data item in their table
! data item !! count
|-
]]
		.. table.concat(
		require "Module:fun".mapIter(
			function(count, data_key)
				if data_key == "total" then
					return ("| total<br>languages || %d"):format(count)
				elseif info[data_key] then
					return ("| <code>%s</code> (%s) || %d"):format(data_key, info[data_key], count)
				else
					return ("| <code>%s</code> || %d"):format(data_key, count)
				end
			end,
			require "Module:table".sortedPairs(
				counts,
				function (a, b)
					if a == "total" then -- Ensure "total languages" shows at the top.
						return true
					else
						return counts[a] > counts[b]
					end
				end)),
		"\n|-\n")
	.. "\n|}"
end

function export.no_scripts(frame)
	local fun = require "Module:fun"
	
	local langs = fun.filter(
		function (data)
			return not data.scripts
		end,
		mw.loadData "Module:languages/alldata")
	
	local get_module = fun.memoize(function (code)
		if #code == 3 then
			return "data3/" .. code:sub(1, 1)
		elseif #code == 2 then
			return "data2"
		else
			return "datax"
		end
	end)
	
	local function get_module_sortkey(code)
		if #code == 3 then
			return 3 .. code:sub(1, 1)
		elseif #code == 2 then
			return 2
		else
			return "x"
		end
	end
	
	function link_language_name(data)
		local name = data[1]
		local article = data.wikipedia_article
			or data.wikidata_item and mw.wikibase.sitelink(data.wikidata_item, 'enwiki')
			or name:find("[Ll]anguage") and name
			or name .. " language"
		return "[[w:" .. article .. "|" .. name .. "]]"
	end
	
	return ToC_item('Languages with no scripts') .. 
[[

{| class="wikitable sortable"
|+ Languages with no scripts
! code !! name !! module
|-
]] .. table.concat(
		fun.mapIter(
			function(data, code)
				return ('| %s || %s || data-sort-value="%s" | [[Module:languages/%s|%s]]')
					:format(code, link_language_name(data), get_module_sortkey(code),
						get_module(code), get_module(code))
			end,
			require "Module:table".sortedPairs(langs)),
		"\n|-\n")
	.. "\n|}"
end

function export.entry_name_replacements(frame)
	local fun = require "Module:fun"
	local m_table = require "Module:table"
	local m_Unicode_data = require "Module:Unicode data"
	
	local function show_from_or_to(from_or_to)
		return m_Unicode_data.add_dotted_circle(table.concat(
			m_table.shallowcopy(from_or_to), ", "))
	end
	
	local header = 'Languages with entry name replacements'
	
	return ToC_item(header) .. ([[

{| class="wikitable sortable"
|+ header
! language !! replacements
|-
]]):gsub('header', header) ..
		table.concat(
			fun.mapIter(
				function (data, code)
					local script = require "Module:scripts".findBestScript(
						table.concat(m_table.shallowcopy(data.entry_name.from)),
						require "Module:languages".getByCode(code))
					return ('|-\n| %s (<code>%s</code>) || <span class="%s">%s<br>&darr;<br>%s</span>')
						:format(data[1], code, script:getCode(),
							show_from_or_to(data.entry_name.from),
							show_from_or_to(data.entry_name.to))
				end,
				m_table.sortedPairs(
					fun.filter(
						function (data)
							return data.entry_name ~= nil
						end,
						mw.loadData "Module:languages/alldata"))),
			"\n|-\n")
		.. "\n|}"
end

function export.wikimedia_languages(frame)
	local fun = require "Module:fun"
	
	local languages_with_Wikimedia_code = setmetatable({},
		{
			__index = function (self, key)
				local val = {}
				self[key] = val
				return val
			end,
		})
	
	local language_data = require "Module:languages/alldata"
	
	for code, data in pairs(language_data) do
		if data.wikimedia_codes then
			for _, wikimedia_code in pairs(data.wikimedia_codes) do
				table.insert(languages_with_Wikimedia_code[wikimedia_code],
					code)
			end
		end
	end
	
	for _, codes in pairs(languages_with_Wikimedia_code) do
		if codes[2] then
			table.sort(codes)
		end
	end
	
	return ToC_item("Wiktionary languages by Wikimedia language")
		.. [[

{| class="wikitable sortable"
|+ Languages by their Wikimedia language
! Wikimedia language !! Wiktionary language
]] .. table.concat(
		fun.mapIter(
			function (Wiktionary_codes, Wikimedia_code)
				return ("|-\n| [https://%s.wiktionary.org %s] || %s"):format(
					Wikimedia_code,
					Wikimedia_code,
					table.concat(
						fun.map(
							function (code)
								return ("%s ([[:Category:%s language|%s]])"):format(
									code,
									language_data[code][1], -- canonical name
									language_data[code][1])
							end,
							Wiktionary_codes),
						", "))
			end,
			require "Module:table".sortedPairs(languages_with_Wikimedia_code)),
		"\n")
		.. "\n|}"
end

return export