Module:vi-sortkey
- The following documentation is located at Module:vi-sortkey/documentation. [edit]
- Useful links: subpage list • transclusions • testcases
This module takes Vietnamese words and generates sortkeys.
Alphabetic order: a ă â b c d đ e ê g h i k l m n o ô ơ p q r s t u ư v x y.
Order of tonal diacritics: currently a, à, ả, ã, á, ạ; alternative, a, á, à, ả, ã, ạ.
See the discussion in the Beer Parlour.
Examples[edit]
八04口05囗08
- 共和國
tuye₂n ngo₂n toan1 the₂4 gio₃i4 ve₂1 nha₂n quye₂n1 cua2 lie₂n ho₃p5 quo₂c4
- Tuyên ngôn toàn thế giới về nhân quyền của Liên Hợp Quốc
co₂ng ba₁ng1
- công bằng
d₁ai5
- Đại
ac4 si met4
- Ác-si-mét
ta₂m ly4
- tâm lý
- nghề (
nghe₂1
) - nghệ (
nghe₂5
) - ngon (
ngon
) - ngón (
ngon4
) - ngọn (
ngon5
) - ngoy (
ngoy
) - ngôi (
ngo₂i
) - ngôn (
ngo₂n
) - ngôn (
ngo₂n
)
- a (
a
) - à (
a1
) - ả (
a2
) - ã (
a3
) - á (
a4
) - ạ (
a5
) - ac (
ac
) - àc (
ac1
) - ảc (
ac2
) - ãc (
ac3
) - ác (
ac4
) - ạc (
ac5
) - an (
an
) - àn (
an1
) - ản (
an2
) - ãn (
an3
) - án (
an4
) - ạn (
an5
) - ă (
a₁
) - ằ (
a₁1
) - ẳ (
a₁2
) - ẵ (
a₁3
) - ắ (
a₁4
) - ặ (
a₁5
) - ăc (
a₁c
) - ằc (
a₁c1
) - ẳc (
a₁c2
) - ẵc (
a₁c3
) - ắc (
a₁c4
) - ặc (
a₁c5
) - ăn (
a₁n
) - ằn (
a₁n1
) - ẳn (
a₁n2
) - ẵn (
a₁n3
) - ắn (
a₁n4
) - ặn (
a₁n5
) - â (
a₂
) - ầ (
a₂1
) - ẩ (
a₂2
) - ẫ (
a₂3
) - ấ (
a₂4
) - ậ (
a₂5
) - âc (
a₂c
) - ầc (
a₂c1
) - ẩc (
a₂c2
) - ẫc (
a₂c3
) - ấc (
a₂c4
) - ậc (
a₂c5
) - ân (
a₂n
) - ần (
a₂n1
) - ẩn (
a₂n2
) - ẫn (
a₂n3
) - ấn (
a₂n4
) - ận (
a₂n5
) - b (
b
) - bc (
bc
) - bn (
bn
) - c (
c
) - cc (
cc
) - cn (
cn
) - d (
d
) - dc (
dc
) - dn (
dn
) - đ (
d₁
) - đc (
d₁c
) - đn (
d₁n
) - e (
e
) - è (
e1
) - ẻ (
e2
) - ẽ (
e3
) - é (
e4
) - ẹ (
e5
) - ec (
ec
) - èc (
ec1
) - ẻc (
ec2
) - ẽc (
ec3
) - éc (
ec4
) - ẹc (
ec5
) - en (
en
) - èn (
en1
) - ẻn (
en2
) - ẽn (
en3
) - én (
en4
) - ẹn (
en5
) - ê (
e₂
) - ề (
e₂1
) - ể (
e₂2
) - ễ (
e₂3
) - ế (
e₂4
) - ệ (
e₂5
) - êc (
e₂c
) - ềc (
e₂c1
) - ểc (
e₂c2
) - ễc (
e₂c3
) - ếc (
e₂c4
) - ệc (
e₂c5
) - ên (
e₂n
) - ền (
e₂n1
) - ển (
e₂n2
) - ễn (
e₂n3
) - ến (
e₂n4
) - ện (
e₂n5
) - g (
g
) - gc (
gc
) - gn (
gn
) - h (
h
) - hc (
hc
) - hn (
hn
) - i (
i
) - ì (
i1
) - ỉ (
i2
) - ĩ (
i3
) - í (
i4
) - ị (
i5
) - ic (
ic
) - ìc (
ic1
) - ỉc (
ic2
) - ĩc (
ic3
) - íc (
ic4
) - ịc (
ic5
) - in (
in
) - ìn (
in1
) - ỉn (
in2
) - ĩn (
in3
) - ín (
in4
) - ịn (
in5
) - k (
k
) - kc (
kc
) - kn (
kn
) - l (
l
) - lc (
lc
) - ln (
ln
) - m (
m
) - mc (
mc
) - mn (
mn
) - n (
n
) - nc (
nc
) - nn (
nn
) - o (
o
) - ò (
o1
) - ỏ (
o2
) - õ (
o3
) - ó (
o4
) - ọ (
o5
) - oc (
oc
) - òc (
oc1
) - ỏc (
oc2
) - õc (
oc3
) - óc (
oc4
) - ọc (
oc5
) - on (
on
) - òn (
on1
) - ỏn (
on2
) - õn (
on3
) - ón (
on4
) - ọn (
on5
) - ô (
o₂
) - ồ (
o₂1
) - ổ (
o₂2
) - ỗ (
o₂3
) - ố (
o₂4
) - ộ (
o₂5
) - ôc (
o₂c
) - ồc (
o₂c1
) - ổc (
o₂c2
) - ỗc (
o₂c3
) - ốc (
o₂c4
) - ộc (
o₂c5
) - ôn (
o₂n
) - ồn (
o₂n1
) - ổn (
o₂n2
) - ỗn (
o₂n3
) - ốn (
o₂n4
) - ộn (
o₂n5
) - ơ (
o₃
) - ờ (
o₃1
) - ở (
o₃2
) - ỡ (
o₃3
) - ớ (
o₃4
) - ợ (
o₃5
) - ơc (
o₃c
) - ờc (
o₃c1
) - ởc (
o₃c2
) - ỡc (
o₃c3
) - ớc (
o₃c4
) - ợc (
o₃c5
) - ơn (
o₃n
) - ờn (
o₃n1
) - ởn (
o₃n2
) - ỡn (
o₃n3
) - ớn (
o₃n4
) - ợn (
o₃n5
) - p (
p
) - pc (
pc
) - pn (
pn
) - q (
q
) - qc (
qc
) - qn (
qn
) - r (
r
) - rc (
rc
) - rn (
rn
) - s (
s
) - sc (
sc
) - sn (
sn
) - t (
t
) - tc (
tc
) - tn (
tn
) - u (
u
) - ù (
u1
) - ủ (
u2
) - ũ (
u3
) - ú (
u4
) - ụ (
u5
) - uc (
uc
) - ùc (
uc1
) - ủc (
uc2
) - ũc (
uc3
) - úc (
uc4
) - ục (
uc5
) - un (
un
) - ùn (
un1
) - ủn (
un2
) - ũn (
un3
) - ún (
un4
) - ụn (
un5
) - ư (
u₃
) - ừ (
u₃1
) - ử (
u₃2
) - ữ (
u₃3
) - ứ (
u₃4
) - ự (
u₃5
) - ưc (
u₃c
) - ừc (
u₃c1
) - ửc (
u₃c2
) - ữc (
u₃c3
) - ức (
u₃c4
) - ực (
u₃c5
) - ưn (
u₃n
) - ừn (
u₃n1
) - ửn (
u₃n2
) - ữn (
u₃n3
) - ứn (
u₃n4
) - ựn (
u₃n5
) - v (
v
) - vc (
vc
) - vn (
vn
) - x (
x
) - xc (
xc
) - xn (
xn
) - y (
y
) - ỳ (
y1
) - ỷ (
y2
) - ỹ (
y3
) - ý (
y4
) - ỵ (
y5
) - yc (
yc
) - ỳc (
yc1
) - ỷc (
yc2
) - ỹc (
yc3
) - ýc (
yc4
) - ỵc (
yc5
) - yn (
yn
) - ỳn (
yn1
) - ỷn (
yn2
) - ỹn (
yn3
) - ýn (
yn4
) - ỵn (
yn5
)
local export = {}
local U = mw.ustring.char
local breve = U(0x306)
local circum = U(0x302)
local horn = U(0x31B)
local grave = U(0x300)
local hook = U(0x309)
local tilde = U(0x303)
local acute = U(0x301)
local dot = U(0x323)
--[[
local diacritics = breve .. circum .. horn .. grave .. hook .. tilde .. acute .. dot
local diacritic = "[" .. diacritics .. "]"
--]]
-- Determines sort order of diacritics. The full-size numbers are placed at the
-- end of the syllable, the subscript numbers immediately after the vowel.
local replacements = {
--[[ Diacritics that form separate vowel letters. ]]
[breve] = "₁",
[circum] = "₂",
[horn] = "₃",
--[[ Tonal diacritics ]]
--[[ Order given by Stephen G. Brown
[acute] = 1,
[grave] = 2,
[hook] = 3,
[tilde] = 4,
[dot] = 5,
]]
-- Order given by Fumiko Take
[grave] = 1,
[hook] = 2,
[tilde] = 3,
[acute] = 4,
[dot] = 5,
["đ"] = "d₁",
["-"] = " ",
}
local vi = require("Module:languages").getByCode("vi")
local function tag(text)
return require("Module:script utilities").tag_text(text, vi)
end
function export.makeSortKey(text, lang, sc)
if lang and lang ~= "vi" then
return text
end
if not sc then
sc = text and require("Module:scripts").findBestScript(text, vi):getCode()
end
if sc then
if sc == "Hani" then
return require("Module:zh-sortkey").makeSortKey(text, lang, sc)
elseif sc ~= "Latn" then
return text
end
end
if not text then
return nil
end
local sortkey = text
--[=[
[[Module:languages]] currently converts text to lowercase
before applying changes, then to uppercase before outputting
the result.
]=]
if mw.title.getCurrentTitle().nsText == "Module" then
sortkey = mw.ustring.lower(text)
end
sortkey = mw.ustring.toNFD(sortkey)
sortkey = sortkey:gsub("[%z\1-\127\194-\244][\128-\191]*", replacements) -- pattern for UTF-8 character
sortkey = mw.ustring.gsub(sortkey, "([0-4])([₀-₃])", "%2%1")
-- move tone number to end of syllable
sortkey = mw.ustring.gsub(sortkey, '([1-5])([^%s]+)', '%2%1')
return sortkey
end
function export.showSortkey(frame)
local output = {}
for _, word in ipairs(frame.args) do
local example = "\n* <code>" .. export.makeSortKey(word) .. "</code>\n: " .. tag(word)
table.insert(output, example)
end
return table.concat(output)
end
function export.showSorting(frame)
local terms = {}
for _, term in ipairs(frame.args) do
table.insert(terms, term)
end
local makeSortKey = require("Module:fun").memoize(export.makeSortKey)
local function comp(term1, term2)
return makeSortKey(term1) < makeSortKey(term2)
end
table.sort(terms, comp)
for i, term in pairs(terms) do
terms[i] = "\n* " .. tag(term) .. " (<code>" .. makeSortKey(term) .. "</code>)"
end
return table.concat(terms)
end
return export