Module:zh-usex

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module does the work for {{zh-x}}, see there for more.

Data for this module is found in Module:zh-usex/data.


local m_zh = require("Module:zh")
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local PAGENAME = PAGENAME or mw.title.getCurrentTitle().text

local export = {}

local data = mw.loadData("Module:zh-usex/data")
local variety_list = data.variety_list
local punctuation = data.punctuation
local ref_list = data.ref_list
local pron_correction = data.pron_correction
local polysyllable_pron_correction = data.polysyllable_pron_correction

local zh_format_start = "<span lang=\"zh\" class=\"Hani\" style=\"font-size:110%;\">"
local zh_format_end = "</span>"
local bg_format_start = '<div style="background-color:#FCFEFB">'
local bg_format_end = '</div>'

local function make_link(word)
	if match(word, "<b>.+</b>") then
		word = mw.text.split(word, "-", true)
		for i,val in ipairs(word) do
			word[i] = "[[" .. gsub(val, "</?b>", "") .. "|" ..
						((match(val, "</b>") and not match(val, "<b>")) and "<b>" or "") .. val ..
						((match(val, "<b>") and not match(val, "</b>")) and "</b>" or "") .. "]]"
		end
		word = table.concat(word)
	else
		word = "[[" .. word .. "]]"
		if match(word, "([一-鿌㐀-䶵𠀀-𬺯])%-([一-鿌㐀-䶵𠀀-𬺯])") then
			word = gsub(word, "%-", "]][[")
		end
	end
	word = gsub(word, "(%[%[)(<br/?>)", "%2%1")
	return word
end

function export.show(frame)
	local params = {
		[1] = { required = true },	-- example
		[2] = {},					-- translation
		[3] = {},					-- variety
		lit = {},
		tr = {},
		
		ref = {}, r = { alias_of = "ref" },
		
		display_type = {}, type = { alias_of = "display_type" },
		
		inline = { type = "boolean" },
		
		audio = {}, a = { alias_of = "audio" },
		
		collapsed = { type = "boolean" },
		
		link = { type = "boolean", default = true }, l = { alias_of = "link" },
	}
	
	local args, unrecognized_args = require("Module:parameters").process(frame:getParent().args, params, true)
	
	local example = args[1] or error("Example unspecified.")
	local translation = args[2]
	local literal = args["lit"]
	local reference = args["ref"]
	local manual_tr = args["tr"]
	local display = args["display_type"]
	local inline = args["inline"]
	local audio_file = args["audio"]
	local collapsed = args["collapsed"]
	local phonetic = ""
	local original_length = mw.ustring.len(gsub(example, "[^一-龯㐀-䶵]", ""))
	local variety = args[3] or (ref_list[reference] and ref_list[reference][1] or false) or "MSC"
	local variety_data = variety_list[variety] or error("variety " .. variety .. " not recognized.")
	local variety_code
	if variety_data then
		variety_code = variety_data[2]
	end
	
	local link = args["link"]
	-- link = match(link, "n") == nil -- and not (not match(example, " ") and match(example, "[,。?!﹑]"))
	
	if next(unrecognized_args) then
		local unrecognized_list = require("Module:fun").mapIter(
			function(param_value, param_name)
				return "|" .. param_name .. "=" .. param_value
			end,
			require("Module:table").sortedPairs(unrecognized_args))
		
		mw.log("Unrecognized parameters in {{zh-x}}: " .. table.concat(unrecognized_list, ", "))
		
		-- [[Special:WhatLinksHere/Template:tracking/zh-usex/unrecognized parameters]]
		require("Module:debug").track("zh-usex/unrecognized parameters")
	end

	if not translation or translation == '' then -- per standard [[Module:usex]]
		translation = '<small>(please add an English translation of this example)</small> [[Category:Chinese usage examples with the translation missing]]'
	end
	if not match(example, "'''") then boldify = true end
	
	-- automatically boldify pagetitle if nothing is in bold
	if boldify and not punctuation[PAGENAME] then
		example = gsub(example, PAGENAME, "'''" .. PAGENAME .. "'''")
		example = gsub(example, "''''''", "")
	end
	
	-- tidying up the example, making it ready for transcription
	example = gsub(example, "([?!,。、“”…;:‘’|()「」『』—《》· .~])", " %1 ")
	example = gsub(example, " —  — ", " —— ") -- double em-dash (to be converted to single em-dash later)
	example = gsub(example, "^ *", "")
	example = gsub(example, " *$", "")
	example = gsub(example, " +", " ")
	example = gsub(example, "%'%'%'([^%']+)%'%'%'", "<b>%1</b>")
	example = gsub(example, "(.)</b>%[([^%[%]]+)%]", function(first, second)
		return "<b>"..first.."</b>" ~= second and first.."["..second.."]</b>" or first.."["..first.."]</b>" end)
	example = gsub(example, "</b>({[^{}]+})", "%1</b>")
	
	local ruby_start, ruby_mid, ruby_end = "<big><ruby><span class=\"Hani\">", "</span><rp>&nbsp;(</rp><rt><big>", "</big></rt><rp>)</rp></ruby></big>"
	local ruby_words = {}
	local trad_words, simp_words, tr_words = {}, {}, {}
	
	simp_exist = (m_zh.ts_determ(gsub(example, "(.)%[%1%]", "")) == "trad" or (match(example, "%[[^%[%]]+%]") and not match(example, "(.)%[%1%]"))) and variety_code ~= "vi"
	for word in mw.text.gsplit(example, " ", true) do
		if gsub(gsub(word, "%{[^%}]+%}", ""), "%.", "") == PAGENAME and boldify then
			word = "<b>" .. word .. "</b>"
		end
		local trad_word, simp_word, tr_word, ruby_word = word, false, false, ""
		
		-- various tricks for linking and display in trad. and simp.
		trad_word = gsub(trad_word, "(.)%[(.)%]", "%1")
		trad_word = gsub(trad_word, "{[^{}]*}", "")
		trad_word = gsub(trad_word, "[%^%.]", "")
		trad_word = gsub(trad_word, "\\", "|")
		trad_word = gsub(trad_word, ".", ".")
		
		if simp_exist then
			simp_word = match(word, "%[") and gsub(gsub(word .. "占[位]", "([^%[%]]*).%[(.)%]", function(a, b) return m_zh.ts(a) .. b end), "位$", "") or m_zh.ts(word)
			simp_word = gsub(simp_word, "{[^{}]*}", "")
			simp_word = gsub(simp_word, "[%^%.]", "")
			simp_word = gsub(simp_word, "\\", "|")
			simp_word = gsub(simp_word, ".", ".")
		end
		
		-- produce links
		local contain_pagename = (gsub(gsub(gsub(trad_word, "</?b>", ""), "%^", ""), "-", "") == PAGENAME) and not punctuation[PAGENAME]
		if match(trad_word, "|") or (link and not match(trad_word, "@") and not punctuation[word] and not contain_pagename) then
			trad_word = make_link(trad_word)
			if simp_exist then
				simp_word = make_link(simp_word)
			end
		end
		
		trad_word = gsub(trad_word, "@", "")
		simp_word = simp_exist and gsub(simp_word, "@", "")
		
		-- same tricks applied to transcription
		if not manual_tr and (variety_code == "cmn" or variety_code == "yue" or variety_code == "nan" or variety_code == "hak") then
			if punctuation[word] then
				tr_word = punctuation[word]
			else
				real_word = true
				local hyphen = variety_code == "nan" or variety_code == "hak"
				tr_word = gsub(word, "@", "")
				tr_word = gsub(tr_word, "%.", " ")
				tr_word = gsub(tr_word, ".+\\", "")
				tr_word = gsub(tr_word, "%[[^%[%]]+%]", "")
				tr_word = gsub(tr_word, ".</b>(%{[^%}]+%})", "%1</b>")
				tr_word = gsub(tr_word, "(.){([^{}]*)}",function(a, b)
						if hyphen and not mw.ustring.find(a, "[a-zA-Z]") then
							return "-" .. b .. "-"
						else
							return b
						end
					end)
				for key,val in pairs(polysyllable_pron_correction[variety_code]) do
					tr_word = gsub(tr_word, key, val)
				end
				tr_word = gsub(tr_word, ".", pron_correction[variety_code])
				if variety_code == "cmn" then
					tr_word = gsub(tr_word, "%-", "")
					tr_word = m_zh.py(tr_word)
				elseif variety_code == "yue" then
					local m_yue_pron = mw.loadData("Module:zh/data/yue-pron")
					tr_word = gsub(tr_word, ".", m_yue_pron.jyutping)
					tr_word = gsub(tr_word, "([a-z])([1-9])(-?)([1-9]?)", "%1%2%3%4 ")
				elseif hyphen then
					tr_word = gsub(tr_word, "[一-鿌㐀-䶵 -〿𠀀-𬺯]+", function(text) 
						if m_zh.check_pron(text, variety_code, 1) then
							return gsub(m_zh.check_pron(text, variety_code, 1), "/.+$", "")
						else
							text = gsub(text, ".", function(ch)
								if m_zh.check_pron(ch, variety_code, 1) then
									return gsub(m_zh.check_pron(ch, variety_code, 1), "/.+$", "") .. "-"
								else
									return ch
								end
							end)
							return gsub(text, "-$", "")
						end
					end)
					tr_word = gsub(tr_word, "%-+", "-")
					tr_word = gsub(tr_word, "%-([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōṳA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌṲ])", "%1")
					tr_word = gsub(tr_word, "([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōoóòôōṳA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌOÓÒÔŌṲ̄̀́̂̍͘])%-", "%1")
					tr_word = gsub(tr_word, "<b>", "-<b>")
					tr_word = gsub(tr_word, "</b>", "</b>-")
					tr_word = gsub(tr_word, "%^%-<b>", "<b>^")
					tr_word = gsub(tr_word, "^%-+", "")
					tr_word = gsub(tr_word, "%-+$", "")
					tr_word = gsub(tr_word, "%%%-?", "--")
				end
			end
		end
		
		if variety_code == "nan" then
			trad_word = gsub(trad_word, "%%", "")
			simp_word = simp_exist and gsub(simp_word, "%%", "")
		end
		
		if display == "ruby" then
			ruby_word = ruby_start .. trad_word .. (simp_exist and "<br>" .. simp_word or "") .. ruby_mid .. (real_word and tr_word or "") .. ruby_end
			table.insert(ruby_words, ruby_word)
		else
			table.insert(trad_words, trad_word)
			table.insert(simp_words, simp_word or nil)
			table.insert(tr_words, tr_word or nil)
		end
	end
	
	local tag_start = " <span style=\"color:darkgreen; font-size:x-small;\">&#91;" -- HTML entity since "[[[w:MSC|MSC]]" is interpreted poorly
	local tag_end = "&#93;</span>"
	
	if display == "ruby" then
		tag = " <ruby><rb><big>" ..
				tag_start .. variety_data[1] .. 
					(simp_exist
						and ", ''[[Traditional Chinese|trad.]]''↑ + ''[[Simplified Chinese|simp.]]''↓"
						or ", ''[[Traditional Chinese|trad.]]'' and ''[[Simplified Chinese|simp.]]''") .. tag_end .. 
				
				tag_start .. "''rom.'': " .. variety_data[3] .. tag_end ..
					"</rb></big></ruby>"
			
		return table.concat(ruby_words, "") .. tag .. "<dl><dd><i>" .. translation .. "</i></dd></dl>"
	else
		trad_text = gsub(table.concat(trad_words), "([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2")
		simp_text = simp_exist and gsub(table.concat(simp_words), "([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2") or false
		phonetic = manual_tr or (#tr_words > 0 and table.concat(tr_words, " ") or false)

		-- overall transcription formatting
		if phonetic then
			phonetic = gsub(phonetic, " </b>", "</b> ")
			phonetic = gsub(phonetic, "  ", " ")
			if variety_code == "yue" or variety_code == "zhx-tai" or variety_code == "zhx-teo" or variety_code == "cmn-sze" then
				phonetic = gsub(phonetic, "([a-zê]+)([1-9%-]+)", "%1<sup>%2</sup>") -- superscript tones
			end
			phonetic = gsub(phonetic, " ([,%.?!;:’”)])", "%1") -- remove excess spaces from punctiation
			phonetic = gsub(phonetic, "([‘“(]) ", "%1")
			if not manual_tr then
				phonetic = gsub(phonetic, "%'([^%'])", "%1") -- allow bolding for manual translit
				if variety_code == "nan" then
					phonetic = gsub(phonetic, " +%-%-", "--")
				end
			end
			
			-- capitalisation
			if match(example, "[。?!]") then
				phonetic = "^" .. gsub(phonetic, "([%.?!]) ", "%1 ^")
			end
			phonetic = gsub(phonetic, "([%.%?%!][”’]) (.)", "%1 ^%2")
			phonetic = gsub(phonetic, "<br>(.)", "<br>^%1")
			phonetic = gsub(phonetic, ": ([“‘])(.)", ": %1^%2")
			phonetic = gsub(phonetic, "%^<b>", "<b>^")
			phonetic = gsub(phonetic, "%^+.", mw.ustring.upper)
			phonetic = gsub(phonetic, "%^", "")
				
			if variety_code == "wuu" then
				local wuu_pron = require("Module:wuu-pron") 
				phonetic = "<span class=\"IPA\">[" .. wuu_pron.ipa_conv(phonetic) .. "]</span>"
			
			elseif variety_code == "cmn-wuh" then
				phonetic = "<span class=\"IPA\">[" .. phonetic .. "]</span>"
			
			elseif variety_code == "cdo" then
				local cdo_pron = require("Module:cdo-pron")
				phonetic = "<i>" .. phonetic .. "</i>" .. 
					(not match(phonetic, "-[^ ]+-[^ ]+-[^ ]+-")
						and " / <span class=\"IPA\"><small>[" .. cdo_pron.sentence(phonetic) .. "]</small></span>"
						or "")
				
			else
				phonetic = "<i>" .. phonetic .. "</i>"
			end
			phonetic = "<span style=\"color:#404D52\">" .. phonetic .. "</span>"
		end
	end
	
	local collapse_start, collapse_end, collapse_tag = '', '', ''
	if collapsed then
		collapse_start = '<span class="mw-collapsible mw-collapsed" id="mw-customcollapsible-zhexample">'
		collapse_end = '</span>'
		collapse_tag = '<span class="mw-customtoggle-zhexample" style="color:darkgreen; font-size:x-small;padding-left:10px">[▼ expand/hide]</span>'
	end
		
	-- indentation, font and identity tags
	if 
		((variety_code == "cmn" and original_length > 7) 
			or (variety_code ~= "cmn" and original_length > 5)
			or reference
			or (match(example, "[,。?!、:; ]") and variety_code == "wuu") 
			or (variety_code == "cdo" and original_length > 3)
			or (inline or "" ~= "")) then
			
		trad_text = "<dd>" .. zh_format_start .. trad_text .. zh_format_end
		
		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end
		
		if phonetic then
			phonetic = "<dl><dd>" .. collapse_start .. phonetic
			translation = "<dd>" .. translation .. "</dd></dl>"
			tr_tag = tag_start .. variety_data[3] .. tag_end .. collapse_end .. "</dd>"
		else
			translation = "<dl>" .. translation .. "</dl>"
		end
		
		if audio_file then
			audio = "<dd>[[File:" .. audio_file .. "]]</dd>"
		end
		
		trad_tag = collapse_start .. tag_start .. variety_data[1] .. ", <i>[[w:Traditional Chinese|trad.]]" .. 
			((simp_exist or variety_code == "vi") and "" or " and [[w:Simplified Chinese|simp.]]") .. "</i>" .. tag_end .. collapse_end .. collapse_tag .. "</dd>"
		
		if simp_exist then
			simp_text = "<dd>" .. collapse_start .. zh_format_start .. simp_text .. zh_format_end
			simp_tag = tag_start .. variety_data[1] .. ", <i>[[w:Simplified Chinese|simp.]]</i>" .. tag_end .. collapse_end .. "</dd>"
		end
		
		if reference then
			reference = "<dd>" .. collapse_start .. "<small><i>From:</i> " .. 
				(ref_list[reference] and ref_list[reference][2] or reference) .. "</small>" .. collapse_end .. "</dd>"
		end
		
		return bg_format_start ..trad_text .. trad_tag .. (simp_text or "") .. (simp_tag or "") .. (reference or "") .. 
			(phonetic and phonetic .. tr_tag or "") .. (audio or "") .. translation .. bg_format_end
		
	else
		trad_text = zh_format_start .. trad_text .. zh_format_end
		divider = "&nbsp; ―&nbsp; "
		
		if variety ~= "MSC" then
			ts_tag = tag_start .. variety_data[1] .. tag_end
			tr_tag = tag_start .. variety_data[3] .. tag_end
		end
		
		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end
		
		if simp_exist then
			simp_text = " / " .. zh_format_start .. simp_text .. zh_format_end
		end
		
		if audio_file then
			audio = " [[File:" .. audio_file .. "]]"
		end
		
		return bg_format_start ..
			trad_text .. (simp_text or "") .. (ts_tag or "") .. divider .. 
			(phonetic and phonetic .. (tr_tag or "") .. (audio or "") .. divider or "") .. translation .. (literal and " (literally, “" .. literal .. "”)" or "") ..
			bg_format_end
	end
end

return export