Module:Mymr-sortkey

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is UCA-like sortkey module using PUA to rearrange. It focuses on Myanmar (Burmese) script that is disorderedly used by many languages.


local export = {}
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local find = mw.ustring.find
local sub = mw.ustring.sub

local glyphTable = {

	["ံ"] = u(0xE000), ["း"] = u(0xE001), ["့"] = u(0xE002),

	["၊"] = u(0xE010), ["။"] = u(0xE011), ["၌"] = u(0xE012), ["၍"] = u(0xE013),
	["၎"] = u(0xE014), ["၏"] = u(0xE015), ["႞"] = u(0xE016), ["႟"] = u(0xE017),
	["꩷"] = u(0xE018), ["꩸"] = u(0xE019), ["꩹"] = u(0xE01A), ["ꧦ"] = u(0xE01B), ["ꩰ"] = u(0xE01C),

	["၀"] = u(0xE020), ["႐"] = u(0xE021), ["꧰"] = u(0xE022), ["၁"] = u(0xE023), ["႑"] = u(0xE024), ["꧱"] = u(0xE025),
	["၂"] = u(0xE026), ["႒"] = u(0xE027), ["꧲"] = u(0xE028), ["၃"] = u(0xE029), ["႓"] = u(0xE02A), ["꧳"] = u(0xE02B),
	["၄"] = u(0xE02C), ["႔"] = u(0xE02D), ["꧴"] = u(0xE02E), ["၅"] = u(0xE02F), ["႕"] = u(0xE030), ["꧵"] = u(0xE031),
	["၆"] = u(0xE032), ["႖"] = u(0xE033), ["꧶"] = u(0xE034), ["၇"] = u(0xE035), ["႗"] = u(0xE036), ["꧷"] = u(0xE037),
	["၈"] = u(0xE038), ["႘"] = u(0xE039), ["꧸"] = u(0xE03A), ["၉"] = u(0xE03B), ["႙"] = u(0xE03C), ["꧹"] = u(0xE03D),

	["က"] = u(0xE040), ["ၵ"] = u(0xE041), ["ခ"] = u(0xE042), ["ၶ"] = u(0xE043),
	["ဂ"] = u(0xE044), ["ၷ"] = u(0xE045), ["ꩠ"] = u(0xE046), ["ꧩ"] = u(0xE047),
	["ဃ"] = u(0xE048), ["ꧠ"] = u(0xE049), ["ꧪ"] = u(0xE04A), ["င"] = u(0xE04B), ["ၚ"] = u(0xE04C),

	["စ"] = u(0xE050), ["ၸ"] = u(0xE051), ["ꩡ"] = u(0xE052),
	["ဆ"] = u(0xE053), ["ꧡ"] = u(0xE054), ["ꩢ"] = u(0xE055), ["ꩾ"] = u(0xE056),
	["ဇ"] = u(0xE057), ["ꩣ"] = u(0xE058), ["ꧫ"] = u(0xE059), ["ၹ"] = u(0xE05A), ["ꩲ"] = u(0xE05B),
	["ဈ"] = u(0xE05C), ["ၛ"] = u(0xE05D), ["ꧢ"] = u(0xE05E), ["ꩤ"] = u(0xE05F), ["ꧬ"] = u(0xE060),
	["ၡ"] = u(0xE061), ["ꩿ"] = u(0xE062),
	["ဉ"] = u(0xE063), ["ၺ"] = u(0xE064), ["ꩥ"] = u(0xE065), ["ꧧ"] = u(0xE066), ["ည"] = u(0xE067),

	["ဋ"] = u(0xE070), ["ꩦ"] = u(0xE071), ["ဌ"] = u(0xE072), ["ꩧ"] = u(0xE073),
	["ဍ"] = u(0xE074), ["ꩨ"] = u(0xE075), ["ꧭ"] = u(0xE076), ["ဎ"] = u(0xE077), ["ꩩ"] = u(0xE078), ["ꧮ"] = u(0xE079),
	["ဏ"] = u(0xE07A), ["ၮ"] = u(0xE07B), ["ꧣ"] = u(0xE07C), ["ꧯ"] = u(0xE07D),

	["တ"] = u(0xE080), ["ထ"] = u(0xE081), ["ဒ"] = u(0xE082), ["ၻ"] = u(0xE083), ["ꧻ"] = u(0xE084),
	["ဓ"] = u(0xE085), ["ꩪ"] = u(0xE086), ["ꧼ"] = u(0xE087),
	["န"] = u(0xE088), ["ၼ"] = u(0xE089), ["ꩫ"] = u(0xE08A), ["ၞ"] = u(0xE08B),

	["ပ"] = u(0xE090), ["ဖ"] = u(0xE091), ["ၽ"] = u(0xE092),
	["ၾ"] = u(0xE093), ["ꩯ"] = u(0xE094), ["ႎ"] = u(0xE095), ["ꧨ"] = u(0xE096),
	["ဗ"] = u(0xE097), ["ၿ"] = u(0xE098), ["ꧽ"] = u(0xE099),
	["ဘ"] = u(0xE09A), ["ꧤ"] = u(0xE09B), ["ꧾ"] = u(0xE09C), ["မ"] = u(0xE09D), ["ၟ"] = u(0xE09E),

	["ယ"] = u(0xE0A0), ["ျ"] = u(0xE0A1), ["ရ"] = u(0xE0A2), ["ꩳ"] = u(0xE0A3), ["ꩺ"] = u(0xE0A4), ["ြ"] = u(0xE0A5),
	["လ"] = u(0xE0A6), ["ၠ"] = u(0xE0A7), ["ဝ"] = u(0xE0A8), ["ွ"] = u(0xE0A9), ["ႂ"] = u(0xE0AA),

	["ႀ"] = u(0xE0B0), ["ၐ"] = u(0xE0B1), ["ၑ"] = u(0xE0B2), ["ၥ"] = u(0xE0B3), ["သ"] = u(0xE0B4), ["ꩬ"] = u(0xE0B5),
	["ဟ"] = u(0xE0B6), ["ႁ"] = u(0xE0B7), ["ꩭ"] = u(0xE0B8), ["ှ"] = u(0xE0B9), ["ꩮ"] = u(0xE0BA), ["ꩱ"] = u(0xE0BB),
	["ဠ"] = u(0xE0BC), ["ꧺ"] = u(0xE0BD), ["ၜ"] = u(0xE0BE), ["ၝ"] = u(0xE0BF),
	["ၯ"] = u(0xE0C0), ["ၰ"] = u(0xE0C1), ["ၦ"] = u(0xE0C2),

	["အ"] = u(0xE0D0), ["ဢ"] = u(0xE0D1), ["ဣ"] = u(0xE0D2), ["ဤ"] = u(0xE0D3), ["ဥ"] = u(0xE0D4), ["ဦ"] = u(0xE0D5),
	["ၒ"] = u(0xE0D6), ["ၓ"] = u(0xE0D7), ["ၔ"] = u(0xE0D8), ["ၕ"] = u(0xE0D9),
	["ဧ"] = u(0xE0DA), ["ဨ"] = u(0xE0DB), ["ဩ"] = u(0xE0DC), ["ဪ"] = u(0xE0DD),

	["ာ"] = u(0xE0E0), ["ါ"] = u(0xE0E1), ["ႃ"] = u(0xE0E2), ["ၲ"] = u(0xE0E3), ["ႜ"] = u(0xE0E4),
	["ိ"] = u(0xE0E5), ["ၱ"] = u(0xE0E6), ["ီ"] = u(0xE0E7), ["ဳ"] = u(0xE0E8),
	["ု"] = u(0xE0E9), ["ၳ"] = u(0xE0EA), ["ၴ"] = u(0xE0EB), ["ူ"] = u(0xE0EC),

	["ၖ"] = u(0xE0F0), ["ၗ"] = u(0xE0F1), ["ၘ"] = u(0xE0F2), ["ၙ"] = u(0xE0F3),
	["ေ"] = u(0xE0F4), ["ႄ"] = u(0xE0F5), ["ဵ"] = u(0xE0F6), ["ႅ"] = u(0xE0F7),
	["ဲ"] = u(0xE0F8), ["ႝ"] = u(0xE0F9), ["ႆ"] = u(0xE0FA),
	["ဴ"] = u(0xE0FB), ["ၢ"] = u(0xE0FC), ["ၧ"] = u(0xE0FD), ["ၨ"] = u(0xE0FE),

	["ꧥ"] = u(0xE100), ["္"] = u(0xE101), ["်"] = u(0xE102), ["ၣ"] = u(0xE103), ["ၤ"] = u(0xE104),
	["ၩ"] = u(0xE105), ["ၪ"] = u(0xE106), ["ၫ"] = u(0xE107), ["ၬ"] = u(0xE108), ["ၭ"] = u(0xE109),
	["ႇ"] = u(0xE10A), ["ႋ"] = u(0xE10B), ["ႈ"] = u(0xE10C), ["ႌ"] = u(0xE10D), ["ႍ"] = u(0xE10E),
	["ႉ"] = u(0xE10F), ["ႊ"] = u(0xE110), ["ႏ"] = u(0xE111), ["ႚ"] = u(0xE112), ["ႛ"] = u(0xE113),
	["ꩻ"] = u(0xE114), ["ꩼ"] = u(0xE115), ["ꩽ"] = u(0xE116), ["ꩴ"] = u(0xE117), ["ꩵ"] = u(0xE118), ["ꩶ"] = u(0xE119),

}

function export.makeSortKey(text, lang, sc)

	if sc and sc ~= "Mymr" then
		return mw.ustring.upper(text)
	end
	
	if not text then
		return nil
	end

	text = gsub(text, "ဥ".."ီ", "ဦ")
	text = gsub(text, "ဿ", "သ္သ")

	if lang == "shn" or lang == "tjl" or lang == "kht" then
		text = gsub(text, "ၢ", u(0xE0E3)) -- vowel aa with final
	end

	text = gsub(text, ".", glyphTable)

	return text

end

function export.showSorting(frame)

	local terms = {}
	
	for _, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local makeSortKey = require("Module:fun").memoize(export.makeSortKey)
	local function comp(term1, term2)
		return makeSortKey(term1) < makeSortKey(term2)
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		terms[i] = "\n* " .. term .. " (<code>" .. makeSortKey(term) .. "</code>)"
	end
	
	return table.concat(terms)

end

return export