Module:vi-sortkey

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module takes Vietnamese words and generates sortkeys.

Alphabetic order: a ă â b c d đ e ê g h i k l m n o ô ơ p q r s t u ư v x y.

Order of tonal diacritics: currently a, à, ả, ã, á, ạ; alternative, a, á, à, ả, ã, ạ.

See the discussion in the Beer Parlour.

Examples[edit]

  • 八04口05囗08
共和國
  • tuye₂n ngo₂n toan1 the₂4 gio₃i4 ve₂1 nha₂n quye₂n1 cua2 lie₂n ho₃p5 quo₂c4
Tuyên ngôn toàn thế giới về nhân quyền của Liên Hợp Quốc
  • co₂ng ba₁ng1
công bằng
  • d₁ai5
Đại
  • ac4 si met4
Ác-si-mét
  • ta₂m ly4
tâm lý


  • nghề (nghe₂1)
  • nghệ (nghe₂5)
  • ngon (ngon)
  • ngón (ngon4)
  • ngọn (ngon5)
  • ngoy (ngoy)
  • ngôi (ngo₂i)
  • ngôn (ngo₂n)
  • ngôn (ngo₂n)
  • a (a)
  • à (a1)
  • (a2)
  • ã (a3)
  • á (a4)
  • (a5)
  • ac (ac)
  • àc (ac1)
  • ảc (ac2)
  • ãc (ac3)
  • ác (ac4)
  • ạc (ac5)
  • an (an)
  • àn (an1)
  • ản (an2)
  • ãn (an3)
  • án (an4)
  • ạn (an5)
  • ă (a₁)
  • (a₁1)
  • (a₁2)
  • (a₁3)
  • (a₁4)
  • (a₁5)
  • ăc (a₁c)
  • ằc (a₁c1)
  • ẳc (a₁c2)
  • ẵc (a₁c3)
  • ắc (a₁c4)
  • ặc (a₁c5)
  • ăn (a₁n)
  • ằn (a₁n1)
  • ẳn (a₁n2)
  • ẵn (a₁n3)
  • ắn (a₁n4)
  • ặn (a₁n5)
  • â (a₂)
  • (a₂1)
  • (a₂2)
  • (a₂3)
  • (a₂4)
  • (a₂5)
  • âc (a₂c)
  • ầc (a₂c1)
  • ẩc (a₂c2)
  • ẫc (a₂c3)
  • ấc (a₂c4)
  • ậc (a₂c5)
  • ân (a₂n)
  • ần (a₂n1)
  • ẩn (a₂n2)
  • ẫn (a₂n3)
  • ấn (a₂n4)
  • ận (a₂n5)
  • b (b)
  • bc (bc)
  • bn (bn)
  • c (c)
  • cc (cc)
  • cn (cn)
  • d (d)
  • dc (dc)
  • dn (dn)
  • đ (d₁)
  • đc (d₁c)
  • đn (d₁n)
  • e (e)
  • è (e1)
  • (e2)
  • (e3)
  • é (e4)
  • (e5)
  • ec (ec)
  • èc (ec1)
  • ẻc (ec2)
  • ẽc (ec3)
  • éc (ec4)
  • ẹc (ec5)
  • en (en)
  • èn (en1)
  • ẻn (en2)
  • ẽn (en3)
  • én (en4)
  • ẹn (en5)
  • ê (e₂)
  • (e₂1)
  • (e₂2)
  • (e₂3)
  • ế (e₂4)
  • (e₂5)
  • êc (e₂c)
  • ềc (e₂c1)
  • ểc (e₂c2)
  • ễc (e₂c3)
  • ếc (e₂c4)
  • ệc (e₂c5)
  • ên (e₂n)
  • ền (e₂n1)
  • ển (e₂n2)
  • ễn (e₂n3)
  • ến (e₂n4)
  • ện (e₂n5)
  • g (g)
  • gc (gc)
  • gn (gn)
  • h (h)
  • hc (hc)
  • hn (hn)
  • i (i)
  • ì (i1)
  • (i2)
  • ĩ (i3)
  • í (i4)
  • (i5)
  • ic (ic)
  • ìc (ic1)
  • ỉc (ic2)
  • ĩc (ic3)
  • íc (ic4)
  • ịc (ic5)
  • in (in)
  • ìn (in1)
  • ỉn (in2)
  • ĩn (in3)
  • ín (in4)
  • ịn (in5)
  • k (k)
  • kc (kc)
  • kn (kn)
  • l (l)
  • lc (lc)
  • ln (ln)
  • m (m)
  • mc (mc)
  • mn (mn)
  • n (n)
  • nc (nc)
  • nn (nn)
  • o (o)
  • ò (o1)
  • (o2)
  • õ (o3)
  • ó (o4)
  • (o5)
  • oc (oc)
  • òc (oc1)
  • ỏc (oc2)
  • õc (oc3)
  • óc (oc4)
  • ọc (oc5)
  • on (on)
  • òn (on1)
  • ỏn (on2)
  • õn (on3)
  • ón (on4)
  • ọn (on5)
  • ô (o₂)
  • (o₂1)
  • (o₂2)
  • (o₂3)
  • (o₂4)
  • (o₂5)
  • ôc (o₂c)
  • ồc (o₂c1)
  • ổc (o₂c2)
  • ỗc (o₂c3)
  • ốc (o₂c4)
  • ộc (o₂c5)
  • ôn (o₂n)
  • ồn (o₂n1)
  • ổn (o₂n2)
  • ỗn (o₂n3)
  • ốn (o₂n4)
  • ộn (o₂n5)
  • ơ (o₃)
  • (o₃1)
  • (o₃2)
  • (o₃3)
  • (o₃4)
  • (o₃5)
  • ơc (o₃c)
  • ờc (o₃c1)
  • ởc (o₃c2)
  • ỡc (o₃c3)
  • ớc (o₃c4)
  • ợc (o₃c5)
  • ơn (o₃n)
  • ờn (o₃n1)
  • ởn (o₃n2)
  • ỡn (o₃n3)
  • ớn (o₃n4)
  • ợn (o₃n5)
  • p (p)
  • pc (pc)
  • pn (pn)
  • q (q)
  • qc (qc)
  • qn (qn)
  • r (r)
  • rc (rc)
  • rn (rn)
  • s (s)
  • sc (sc)
  • sn (sn)
  • t (t)
  • tc (tc)
  • tn (tn)
  • u (u)
  • ù (u1)
  • (u2)
  • ũ (u3)
  • ú (u4)
  • (u5)
  • uc (uc)
  • ùc (uc1)
  • ủc (uc2)
  • ũc (uc3)
  • úc (uc4)
  • ục (uc5)
  • un (un)
  • ùn (un1)
  • ủn (un2)
  • ũn (un3)
  • ún (un4)
  • ụn (un5)
  • ư (u₃)
  • (u₃1)
  • (u₃2)
  • (u₃3)
  • (u₃4)
  • (u₃5)
  • ưc (u₃c)
  • ừc (u₃c1)
  • ửc (u₃c2)
  • ữc (u₃c3)
  • ức (u₃c4)
  • ực (u₃c5)
  • ưn (u₃n)
  • ừn (u₃n1)
  • ửn (u₃n2)
  • ữn (u₃n3)
  • ứn (u₃n4)
  • ựn (u₃n5)
  • v (v)
  • vc (vc)
  • vn (vn)
  • x (x)
  • xc (xc)
  • xn (xn)
  • y (y)
  • (y1)
  • (y2)
  • (y3)
  • ý (y4)
  • (y5)
  • yc (yc)
  • ỳc (yc1)
  • ỷc (yc2)
  • ỹc (yc3)
  • ýc (yc4)
  • ỵc (yc5)
  • yn (yn)
  • ỳn (yn1)
  • ỷn (yn2)
  • ỹn (yn3)
  • ýn (yn4)
  • ỵn (yn5)

local export = {}

local U = mw.ustring.char

local breve = U(0x306)
local circum = U(0x302)
local horn = U(0x31B)

local grave = U(0x300)
local hook = U(0x309)
local tilde = U(0x303)
local acute = U(0x301)
local dot = U(0x323)

--[[
local diacritics = breve .. circum .. horn .. grave .. hook .. tilde .. acute .. dot
local diacritic = "[" .. diacritics .. "]"
--]]

-- Determines sort order of diacritics. The full-size numbers are placed at the
-- end of the syllable, the subscript numbers immediately after the vowel.
local replacements = {
	--[[	Diacritics that form separate vowel letters. ]]
	[breve] = "₁",
	[circum] = "₂",
	[horn] = "₃",
	
	--[[	Tonal diacritics ]]
--[[ Order given by Stephen G. Brown
	[acute] = 1,
	[grave] = 2,
	[hook] = 3,
	[tilde] = 4,
	[dot] = 5,
]]
-- Order given by Fumiko Take
	[grave] = 1,
	[hook] = 2,
	[tilde] = 3,
	[acute] = 4,
	[dot] = 5,
	
	["đ"] = "d₁",
	["-"] = " ",
}
	
local vi = require("Module:languages").getByCode("vi")
local function tag(text)
	return require("Module:script utilities").tag_text(text, vi)
end

function export.makeSortKey(text, lang, sc)
	if lang and lang ~= "vi" then
		return text
	end
	
	if not sc then
		sc = text and require("Module:scripts").findBestScript(text, vi):getCode()
	end
	
	if sc then
		if sc == "Hani" then
			return require("Module:zh-sortkey").makeSortKey(text, lang, sc)
		elseif sc ~= "Latn" then
			return text
		end
	end
	
	if not text then
		return nil
	end
	
	local sortkey = text
	
	--[=[
		[[Module:languages]] currently converts text to lowercase
		before applying changes, then to uppercase before outputting
		the result.
	]=]
	if mw.title.getCurrentTitle().nsText == "Module" then
		sortkey = mw.ustring.lower(text)
	end
	
	sortkey = mw.ustring.toNFD(sortkey)
	
	sortkey = sortkey:gsub("[%z\1-\127\194-\244][\128-\191]*", replacements) -- pattern for UTF-8 character
	
	sortkey = mw.ustring.gsub(sortkey, "([0-4])([₀-₃])", "%2%1")
	
	-- move tone number to end of syllable
	sortkey = mw.ustring.gsub(sortkey, '([1-5])([^%s]+)', '%2%1')
	
	return sortkey
end

function export.showSortkey(frame)
	local output = {}
	
	for _, word in ipairs(frame.args) do
		local example = "\n* <code>" .. export.makeSortKey(word) .. "</code>\n: " .. tag(word)
		table.insert(output, example)
	end
	
	return table.concat(output)
end

function export.showSorting(frame)
	local terms = {}
	
	for _, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local makeSortKey = require("Module:fun").memoize(export.makeSortKey)
	local function comp(term1, term2)
		return makeSortKey(term1) < makeSortKey(term2)
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		terms[i] = "\n* " .. tag(term) .. " (<code>" .. makeSortKey(term) .. "</code>)"
	end
	
	return table.concat(terms)
end

return export