Module:zh

From Wiktionary, the free dictionary
Jump to navigation Jump to search

General-purpose utilities module for Chinese. This module uses Module:zh/data to store its data.

Functions

  1. ts
    {{#invoke:zh|ts|試試我吧!}} → 试试我吧!
  2. st
    {{#invoke:zh|st|试试我吧!}} → 試試我吧!
  3. ts_determ
    {{#invoke:zh|ts_determ|試試我}} → trad
  4. py
    {{#invoke:zh|py|試}} → shì
  5. check_pron
    {{#invoke:zh|check_pron|學|cmn}} → xué
  6. check_pron (for Module:zh-usex)
    {{#invoke:zh|check_pron|嘅|yue|1}} → ge3
  7. check_pron (for Module:zh-new)
    {{#invoke:zh|check_pron|三|yue}} → saam3,saam1
  8. check_pron
    {{#invoke:zh|check_pron|學生|yue}} → hok6 saang1
  9. check_pron
    {{#invoke:zh|check_pron|學生|hak}} → ho̍k-sâng
  10. check_pron
    {{#invoke:zh|check_pron|學生|nan-hbl}} → ha̍k-seng

Substitution-only functions have been moved to Module:zh-new.


local m_str_utils = require("Module:string utilities")
local m_ts = mw.loadData("Module:zh/data/ts")
local m_st = mw.loadData("Module:zh/data/st")
local lang = require("Module:languages").getByCode("zh")

local codepoint = m_str_utils.codepoint
local find = m_str_utils.find
local len = m_str_utils.len
local split = m_str_utils.split
local sub = m_str_utils.sub

local export = {}

function export.ts_determ(f)
	local text = type(f) == "table" and f.args[1] or f
	local sc = lang:findBestScript(text):getCode()
	return sc == "Hani" and "both" or sc == "Hant" and "trad" or "simp"
end

function export.ts(f)
	local text = type(f) == "table" and f.args[1] or f
	return (text:gsub("[\194-\244][\128-\191]*", m_ts))
end

function export.st(f)
	local text = type(f) == "table" and f.args[1] or f
	return (text:gsub("[\194-\244][\128-\191]*", m_st))
end

function export.py(text, comp, pos, p, is_erhua)
	require("Module:debug").track("zh/py")
	local m_cmn_pron = mw.loadData("Module:zh/data/cmn-pron")
	if not is_erhua then is_erhua = false end
	if type(text) == "table" then
		text, comp, pos, p, is_erhua = text.args[1], text.args[2], text.args[3], text.args[4], text.args[5]
	end
	comp = comp or ''
	local q = {}
	local sum = 0
	local length = len(text)
	if is_erhua then length = length - 1 end
	local textconv = text
	text = ''
	if comp ~= '' and comp ~= '12' and comp ~= '21' and not ((pos == 'cy' or pos == 'Idiom' or pos == 'idiom') and length == 4) and not is_erhua then
		for i = 1, len(comp) do
			sum = sum + tonumber(sub(comp,i,i))
			q[sum] = 'y'
		end
	end
	if not p then p={} end
	local initial = true
	for i = 1, length do
		if p[i] and p[i] ~= '' then --pronunciation supplied
			text = text .. p[i]
		else
			local char = sub(textconv,i,i)
			char = m_cmn_pron.py[char] or m_cmn_pron.py[export.ts(char)] or char
			if not is_erhua and not initial and find(char,'^[aoeāōēáóéǎǒěàòè]') then
				text = text .. "'"
			end
			text = text .. char
			
			initial = char == sub(textconv,i,i)
				and sub(textconv,i-3,i) ~= "</b>" --checks for closing bold tag
				and (i-2 == 1 or sub(textconv,i-2,i) ~= "<b>" or sub(textconv,i-3,i) == "^<b>") --checks for opening bold tag
				and (i-3 == 1 or sub(textconv,i-3,i) ~= "^<b>") --checks for opening bold tag with capitalization
		end
		if q[i] == 'y' and i ~= length and not is_erhua then text = text .. ' ' end
	end
	text = text:gsub("<b>&#39;", "&#39;<b>") --fix bolding of apostrophe
	
	if is_erhua then text = text .. 'r' end
	if pos == 'pn' or pos == 'propn' then
		local characters = split(text, ' ')
		for i=1,#characters do
			characters[i] = mw.language.getContentLanguage():ucfirst(characters[i])
		end
		text = table.concat(characters,' ')
	end
	return text
end

do
	local function get_pron(text, startpoint, address)
		local success, data = pcall(mw.loadData, ("Module:zh/data/" .. address)
			:format((codepoint(text) - startpoint) / 1000)
		)
		return success and data[text] or false
	end
	
	function export.check_pron(text, variety, length, usex)
		if type(text) == "table" then
			text, variety, usex = text.args[1], text.args[2], text.args[3]
			length = len(text)
			usex = usex and usex ~= ""
		end
		if not text then
			return
		elseif variety == "cmn" and length == 1 then
			local py = mw.loadData("Module:zh/data/cmn-pron").py
			return py[text] or py[m_ts[text]] or false
		elseif variety == "yue" then
			if length == 1 then
				return (usex and mw.loadData("Module:zh/data/yue-pron").jyutping or -- used for {{zh-x}}
					mw.loadData("Module:zh/data/Jyutping character"))[text] or -- used for {{subst:zh-new}}
					false
			end
			local result =  get_pron(text, 51, "yue-word/%03d")
			if result and not require("Module:yue-pron/check")(text, result) then
				require("Module:debug").track("zh/yue")
				require("Module:debug").track("zh/yue/"..text)
			end
			return result
		elseif variety == "hak" then
			return get_pron(text, 19968, "hak-pron/%02d")
		elseif variety == "nan-hbl" then
			return get_pron(text, 19968, "nan-pron/%03d")
		end
		return false
	end
end

return export