Module:vi

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
The following documentation is located at Module:vi/documentation. [edit]
Useful links: subpage listtransclusionstestcases

This module provides lexicographic tools for Vietnamese language text.

Usage[edit]

In a template:

  • {{#invoke:vi|toReformedTones|xóa}} → xoá
  • {{#invoke:vi|toTraditionalTones|xoá}} → xóa
  • {{#invoke:vi|removeDiacritics|thay đổi gần đây}} → thay doi gan day
  • {{#invoke:vi|removeDiacritics|thay đổi gần đây|tones=0}} → thay dỏi gàn day
  • {{#invoke:vi|removeDiacritics|thay đổi gần đây|accents=0}} → thay dôi gân dây
  • {{#invoke:vi|removeDiacritics|thay đổi gần đây|đ=0}} → thay đoi gan đay

In another module:

viet = require "Module:vi"
 
t = {"an ninh", "bóng rổ", "Ả Rập", "bóng đá", "ăn", "Á Châu"}
table.sort(t, viet.comp)

causes t to hold: (removed due to script error)

For best results, call _toTraditionalTones() or _toReformedTones() on each string before sorting them using comp().


---Lexicographic tools for Vietnamese language text.
local lang = require("Module:languages").getByCode("vi")
 
local p = {}
 
---Converts the given text to traditional tone marks.
function p.toTraditionalTones(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	return (mw.ustring.gsub(text, "%a+", function (word)
		if mw.ustring.match(word, "^qu[yýỳỷỹỵ]$") then return word end
		return (mw.ustring.gsub(word, "%a%a$", {
			["oá"] = "óa", ["oà"] = "òa", ["oả"] = "ỏa", ["oã"] = "õa", ["oạ"] = "ọa",
			["oé"] = "óe", ["oè"] = "òe", ["oẻ"] = "ỏe", ["oẽ"] = "õe", ["oẹ"] = "ọe",
			["uý"] = "úy", ["uỳ"] = "ùy", ["uỷ"] = "ủy", ["uỹ"] = "ũy", ["uỵ"] = "ụy"
		}))
	end))
end
 
---Converts the given text to reformed tone marks.
function p.toReformedTones(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	return (mw.ustring.gsub(text, "%a+", function (word)
		return (mw.ustring.gsub(word, "%a%a$", {
			["óa"] = "oá", ["òa"] = "oà", ["ỏa"] = "oả", ["õa"] = "oã", ["ọa"] = "oạ",
			["óe"] = "oé", ["òe"] = "oè", ["ỏe"] = "oẻ", ["õe"] = "oẽ", ["ọe"] = "oẹ",
			["úy"] = "uý", ["ùy"] = "uỳ", ["ủy"] = "uỷ", ["ũy"] = "uỹ", ["ụy"] = "uỵ"
		}))
	end))
end
 
function p.allSpellings(main_spelling, makeLinks)
	local frame = nil
	if type(main_spelling) == "table" then
		frame = main_spelling
		main_spelling, makeLinks = frame.args[1], frame.args.link
	end
 
	local xformers = {
		p.toTraditionalTones, p.toReformedTones,
	}
 
	local spellings = {}
	for i, xformer in ipairs(xformers) do
		local alt_spelling = xformer(main_spelling)
		if not spellings[alt_spelling] then
			table.insert(spellings, alt_spelling)
			spellings[alt_spelling] = true
		end
	end
 
	if makeLinks then
		local m_links = require("Module:links") -- [[Module:links]]
		for k, link in ipairs(spellings) do
			spellings[k] = m_links.full_link(link, nil, lang, nil, nil, nil, {}, false)
		end
	end
	return frame and table.concat(spellings, "/") or spellings
end
 
---Unicode codepoints for combining Vietnamese tone marks.
p.combiningToneMarks = mw.ustring.char(
	0x300,  -- à
	0x301,  -- á
	0x303,  -- ã
	0x309,  -- ả
	0x323   -- ạ
)
 
---Unicode codepoints for combining Vietnamese accent marks.
p.combiningAccentMarks = mw.ustring.char(
	0x302,  -- â
	0x306,  -- ă
	0x31b   -- ơ
)
 
---Strips Vietnamese diacritical marks from the given text.
-- @param tones     Set to “0” to leave tone marks intact.
-- @param accents   Set to “0” to leave accent marks intact.
-- @param đ         Set to “0” to leave “Đ” and “đ” intact.
function p.removeDiacritics(text, toneMarks, accentMarks, stroke)
	if type(text) == "table" then
		text, toneMarks, accentMarks, stroke = text.args[1],
			not text.args.tones or tonumber(text.args.tones) == 1,
			not text.args.accents or tonumber(text.args.accents) == 1,
			not text.args["đ"] or tonumber(text.args["đ"]) == 1
	end
	text = mw.ustring.toNFD(text)
	if toneMarks then
		text = mw.ustring.gsub(text, "[" .. p.combiningToneMarks .. "]", "")
	end
	if accentMarks then
		text = mw.ustring.gsub(text, "[" .. p.combiningAccentMarks .. "]", "")
	end
	if stroke then
		text = mw.ustring.gsub(text, "[Đđ]", {["Đ"] = "D", ["đ"] = "d"})
	end
	return mw.ustring.toNFC(text)
end
 
---Vietnamese letters for use in comp().
p.letters = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ"
 
---Compare two syllables according to Vietnamese dictionary sorting order.
function p.compWord(word1, word2)
	if mw.ustring.find(word1, word2, 1, true) == 0 then return false end
	if mw.ustring.find(word2, word1, 1, true) == 0 then return true end
 
	do
		local func1, static1, var1 = mw.ustring.gmatch(word1, "[" .. p.letters .. "]")
		local func2, static2, var2 = mw.ustring.gmatch(word2, "[" .. p.letters .. "]")
		while true do
			local c1 = func1(static1, var1)
			local c2 = func2(static2, var2)
			if c1 == nil or c2 == nil then break end
 
			local idx1 = mw.ustring.find(p.letters, c1, 1, true)
			local idx2 = mw.ustring.find(p.letters, c2, 1, true)
			if idx1 and idx2 then
				if idx1 < idx2 then return true end
				if idx1 > idx2 then return false end
			end
		end
	end
 
	return word1 < word2
end
 
---Compare two strings according to Vietnamese dictionary sorting order.
function p.comp(text1, text2)
	if text1 == text2 then return false end
 
	do
		local func1, static1, var1 = mw.ustring.gmatch(text1, "%a+")
		local func2, static2, var2 = mw.ustring.gmatch(text2, "%a+")
		while true do
			local word1 = func1(static1, var1)
			local word2 = func2(static2, var2)
			if word1 == nil then return true end
			if word2 == nil then return false end
 
			if word1 ~= word2 then
				local lower1 = mw.ustring.lower(word1)
				local lower2 = mw.ustring.lower(word2)
				local noTones1 = p.removeDiacritics(lower1, true, false, false)
				local noTones2 = p.removeDiacritics(lower2, true, false, false)
 
				-- Compare base letters.
				if noTones1 ~= noTones2 then
					return p.compWord(noTones1, noTones2)
				end
 
				-- Compare letters case-insensitively.
				if lower1 ~= lower2 then
					return p.compWord(lower1, lower2)
				end
 
				-- Compare letters including tones.
				assert(word1 ~= word2)
				return p.compWord(word1, word2)
			end
		end
	end
 
	return text1 < text2
end
 
---[[Template:vi-readings]]
function p.readings(hanviet, nom, rs, phienthiet)
	if type(hanviet) == "table" then
		local args = hanviet:getParent().args
		hanviet, nom, rs, phienthiet =
			args.hanviet or args.hv, args.nom or args.n, args.rs,
			args.phienthiet or args.phth or args.fanqie
	end
 
	local lines = {}
	local styles = {
		{
			link = "Hán Việt",
			cat = "Vietnamese Han tu",
			list = hanviet and mw.text.split(hanviet, "%s*,%s*"),
			phienthiet = phienthiet and mw.text.split(phienthiet, "%s*,%s*")
		},
		{
			link = "chữ Nôm|Nôm",
			cat = "Vietnamese Nom",
			list = nom and mw.text.split(nom, "%s*,%s*"),
		},
	}
	for i, style in ipairs(styles) do
		if style.list and #style.list > 0 and #style.list[1] > 0 then
			local readings = style.list
--			table.sort(readings, p.comp)
			for j, reading in ipairs(readings) do
				local spellings = p.allSpellings(reading, true)
				readings[j] = table.concat(spellings, "/")
 
				-- Fanqie
				if style.phienthiet and style.phienthiet[j] then
					local ruby = p.ruby(mw.ustring.match(mw.text.trim(style.phienthiet[j]),
						"(%a+) +(.+)"))
					if ruby then
						local suffix = p.ruby("切", "thiết")
						readings[j] = mw.ustring.format("%s (%s[[w:Fanqie|%s]])",
							readings[j], ruby, suffix)
					end
				end
			end
			if #readings > 0 then
				local sortkey = rs or mw.title.getCurrentTitle().text
				readings = table.concat(readings, ", ")
				table.insert(lines, mw.ustring.format("* '''[[%s]]''': %s[[Category:%s|%s]]",
					style.link, readings, style.cat, sortkey))
			end
		end
	end
 
	return table.concat(lines, "\n")
end
 
---[[Template:vi-ruby]]
function p.ruby(characters, readings, mark, alts)
	if type(characters) == "table" then
		local args = characters:getParent().args
		characters, readings, mark, alts =
			args[1] or "",
			args[2] or "",
			args.mark or mw.title.getCurrentTitle().text,
			((args.alts and mw.text.split(args.alts, "%s+")) or
				(args.ids and mw.text.split(args.ids, "%s+")) or {})
	end
 
	if not readings then
		return characters
	end
 
	readings = mw.text.split(readings, "[^" .. p.letters .. "]+")
 
	local result = {}
	local character_idx = 1
	local alt_idx = 1
	for character in mw.ustring.gmatch(characters, ".") do
		local is_alt = false
		if character == "*" and alts[alt_idx] then
			character = alts[alt_idx]
			is_alt = true
			alt_idx = alt_idx + 1
		end
		if is_alt or (mw.ustring.match(character, "^%a$") and not character:match("^%w$")) then
			local reading = readings[character_idx]
			if mark and character == mark then
				character = mw.ustring.format("<mark>%s</mark>", character)
				reading = mw.ustring.format("<mark>%s</mark>", reading)
			end
			character = mw.ustring.format(
				"<ruby><rb><span class='Hani'>%s</span></rb><rp>(</rp><rt><span style='padding: 0 0.25em;'>%s</span></rt><rp>)</rp></ruby>",
				character, reading)
			character_idx = character_idx + 1
		end
		table.insert(result, character)
	end
 
	return mw.ustring.format("<span lang='vi' style='font-size: 137%%;'>%s</span>", table.concat(result))
end
 
function p.hantutab()
	local hantu = mw.ustring.gsub(mw.title.getCurrentTitle().text, '[^一-鿌]', '')
	local table_head = '<table align="right" border="0" cellpadding="5" cellspacing="0" style="border:1px solid #aaaaaa; margin-left:5px; margin-right:10px; font-size:8pt; text-align:center; margin: 0 0 1em 1em;" rules="all" frame="box"><tr><td colspan="' .. 
		mw.ustring.len(hantu) .. 
		'" bgcolor="#f9f9f9">[[Hán tự]] in this word</td></tr><tr style="font-size: 2em">'
	return table_head .. 
		mw.ustring.gsub(hantu, '(.)', '<td><span lang="vi">[[%1#Vietnamese|%1]]</span></td>') .. 
		'</table>'
end
 
---Returns the categories indicated by the given wikitext.
function p.classifierCategories(frame)
	local src = frame.args[1]
	local classifiers = {}
	for classifier in mw.ustring.gmatch(mw.ustring.gsub(src, "<[^>]->", ""), "[" .. p.letters .. "]+") do
		if classifier ~= "l" and classifier ~= "vi" and classifier ~= "vi-l" and
				classifier ~= "Vietnamese" then
			local cat = mw.ustring.format("[[Category:Vietnamese %s class nouns]]",
				classifier)
			table.insert(classifiers, cat)
		end
	end
	return table.concat(classifiers)
end
 
return p