Module:zh-usex

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module does the work for {{zh-x}}, see there for more.


local m_zh = require("Module:zh")
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local PAGENAME = PAGENAME or mw.title.getCurrentTitle().text

local export = {}

local variety_list = {
	["MSC"] = { "[[w:Standard Chinese|MSC]]", "cmn", "Pinyin" },
		["M-BJ"] = { "[[w:Beijing dialect|Beijing Mandarin]]", "cmn", "Pinyin" },
		["M-TW"] = { "[[w:Taiwanese Mandarin|Taiwanese Mandarin]]", "cmn", "Pinyin" },
		["M-TJ"] = { "[[w:Tianjin dialect|Tianjin Mandarin]]", "cmn", "Pinyin" },
		["M-NE"] = { "[[w:Northeastern Mandarin|Northeastern Mandarin]]", "cmn", "Pinyin" },
		["M-S"]  = { "[[w:Sichuanese dialects|Sichuanese]]", "cmn-sze", "Sichuanese Pinyin" },
		["M-W"]  = { "[[w:Wuhan dialect|Wuhanese]]", "cmn-wuh", "IPA" },
	
	["CL"] = { "[[w:Classical Chinese|Classical Chinese]]", "cmn", "Pinyin" },
		["CL-VN"] = { "[[w:Classical Chinese|Classical Chinese]]", "vi", "[[w:Sino-Vietnamese vocabulary|Sino-Vietnamese]]" },
	
	["WVC"] = { "[[w:Written vernacular Chinese|Written Vernacular Chinese]]", "cmn", "Pinyin" },

	["C"] = { "[[w:Cantonese|Cantonese]]", "yue", "Jyutping" },
		["C-GZ"] = { "[[w:Cantonese|Guangzhou Cantonese]]", "yue", "Jyutping" },
		["C-LIT"] = { "[[w:Cantonese|Literary Cantonese]]", "yue", "Jyutping" },
		["C-T"] = { "[[w:Taishanese|Taishanese]]", "yue-tai", "Wiktionary" },
	
	["MD"] = { "[[w:Min Dong|Min Dong]]", "cdo", "[[w:Bàng-uâ-cê|Bàng-uâ-cê]] / IPA" },
	
	["MN"] = { "[[w:Min Nan|Min Nan]]", "nan", "[[w:Pe̍h-ōe-jī|Pe̍h-ōe-jī]]" },
		["TW"] = { "[[w:Taiwanese|Taiwanese]]", "nan", "[[w:Pe̍h-ōe-jī|Pe̍h-ōe-jī]]" },
		["MN-T"] = { "[[w:Teochew dialect|Teochew]]", "nan-teo", "[[w:Peng\'im|Peng\'im]]" },
		
	["W"] = { "[[w:Wu Chinese|Wu]]", "wuu", "IPA" },
		["SH"] = { "[[w:Shanghainese|Shanghainese]]", "wuu", "IPA" },
		
	["H"] = { "[[w:Hakka Chinese|Hakka]]", "hak", "[[w:Pha̍k-fa-sṳ|Pha̍k-fa-sṳ]]" },
}

local punctuation = {
	[","] = ",",   ["。"] = ".",   ["、"] = ",",
	["?"] = "?",   ["!"] = "!",
	
	["《"] = "“",   ["》"] = "”",
	["『"] = "‘",   ["』"] = "’",
	["「"] = "“",   ["」"] = "”",
	
	["("] = "(",   [")"] = ")",
	[";"] = ";",   [":"] = ":",
	["|"] = "|",    ["—"] = "-",	["~"] = "~",
	["·"] = " ",    ["…"] = "...",
	["."] = ".",

	[" "] = ";",
}

local ref_list = {
	['Analects']     =  { "CL",  "The ''[[w:Analects|Analects]] of Confucius'', circa 475 – 221 BCE, translated based on [[w:James Legge|James Legge]]'s translation" },
	['Bencao Gangmu']=  { "CL",  "The ''[[w:Compendium of Materia Medica|Compendium of Materia Medica]]'' [Bencao Gangmu], by [[w:Li Shizhen|Li Shizhen]], 1578 CE" },
	['Chuci']        =  { "CL",  "The ''[[w:Chu Ci|Verses of Chu]]'', 4th century BCE – 2nd century CE" },
	['Guanyinzi']       =  { "CL",  "''[[w:zh:關尹子|Guanyinzi]]'', time unknown" },
	['Guanzi']       =  { "CL",  "''[[w:Guanzi (text)|Guanzi]]'', 7th – 4th century BCE" },
	['Hanfeizi']     =  { "CL",  "''[[w:Han Feizi (book)|Han Feizi]]'', circa 2nd century BCE" },
	['Hanshu']       =  { "CL",  "The ''[[w:Book of Han|Book of Han]]'', circa 1st century CE" },
	['Houhanshu']    =  { "CL",  "The ''[[w:Book of the Later Han|Book of the Later Han]]'', circa 5th century CE" },
	['Hongloumeng']  =  { "WVC", "[[w:Cao Xueqin|Cao Xueqin]], ''[[w:Dream of the Red Chamber|Dream of the Red Chamber]]'', mid-18th century CE" },
	['Lantingjixu']  =  { "CL",  "'''353''' CE, [[w:Wang Xizhi|Wang Xizhi]], ''[[:s:Preface to the Poems Composed at the Orchid Pavilion|Preface to the Poems Composed at the Orchid Pavilion]]''" },
	['Liji']         =  { "CL",  "The ''[[w:Book of Rites|Book of Rites]]'', circa 4th – 2nd century BCE" },
	['Mengzi']       =  { "CL",  "''[[w:Mencius (book)|Mengzi]] (Mencius)'', circa 4th century BCE" },
	['Mozi']         =  { "CL",  "''[[w:Mozi|Mozi]] ([[w:zh:墨子 (书)|book]])'', circa 4th century BCE" },
	['Qimin Yaoshu'] =  { "CL",  "'''544''' CE, Jia Sixie, ''[[w:Qimin Yaoshu|Qimin Yaoshu]]''" },
	['Sanguo Yanyi'] =  { "CL",  "''[[w:Romance of the Three Kingdoms|Romance of the Three Kingdoms]]'', circa 14th century CE" },
	['Sanzijing']    =  { "CL",  "''[[w:Three Character Classic|Three Character Classic]]'', circa 13th century CE" },
	['Shangjunshu']     =  { "CL",  "The ''[[w:The Book of Lord Shang|Book of Lord Shang]]'', circa 3rd century BCE" },
	['Shangshu']     =  { "CL",  "The ''[[w:Book of Documents|Book of Documents]]'', circa 4th – 3rd century BCE" },
	['Shanhaijing']  =  { "CL",  "The ''[[w:Classic of Mountains and Seas|Classic of Mountains and Seas]]''" },
	['Shiji']        =  { "CL",  "The ''[[w:Records of the Grand Historian|Records of the Grand Historian]]'', by [[w:Sima Qian|Sima Qian]], circa 91 BCE" },
	['Shijing']      =  { "CL",  "The ''[[w:Classic of Poetry|Classic of Poetry]]'', circa 11th – 7th centuries BCE, translated based on [[w:James Legge|James Legge]]'s translation" },
	['Shijing-Xu']   =  { "CL",  "''Preface to Mao's Odes'' (Commentary on the ''[[w:Classic of Poetry|Classic of Poetry]]''), mid 2nd century BCE"  },
	['Shuihuzhuan']  =  { "WVC", "[[w:Shi Nai'an|Shi Nai'an]], ''[[w:Water Margin|Water Margin]]'', circa 14th century CE" },
	['Shujing']      =  { "CL",  "The ''[[w:Book of Documents|Book of Documents]]'', circa 7th – 4th centuries BCE" },
	['Shuowen']      =  { "CL",  "''[[w:Shuowen Jiezi|Shuowen Jiezi]]'', circa 2nd century CE" },
	['Sunzi']        =  { "CL",  "''[[w:The Art of War|The Art of War]]'', circa 5th century BCE" },
	['Taiping Yulan']=  { "CL",	 "''[[w:Taiping Yulan|Taiping Yulan]]'' (''Readings of the Taiping Era''), 977 – 983 CE" },
	['Tongdian']     =  { "CL",	 "[[w:Du You|Du You]], ''[[w:Tongdian|Tongdian]]'', 766 – 801 CE" },
	['UM']           =  { "MSC", "[http://nlp2ct.cis.umac.mo/um-corpus/ UM-Corpus: A Large English-Chinese Parallel Corpus] by NLP2CT" },
	['Xishuangji']   =  { "WVC", "[[w:Wang Shifu|Wang Shifu]], ''[[w:The Story of the Western Wing|The Story of the Western Wing]]'', 13th – 14th centuries CE" },
	['Xiyouji']      =  { "WVC", "[[w:Wu Cheng'en|Wu Cheng'en]], ''[[w:Journey to the West|Journey to the West]]'', 16th century CE" },
	['Xunzi']        =  { "CL",  "''[[w:Xunzi (book)|Xunzi]]'', circa 3rd century BCE" },
	['Yijing']       =  { "CL",  "''[[w:I Ching|I Ching]]'', 3rd – 2nd millennia BCE" },
	['Yizhoushu']    =  { "CL",  "''[[w:Yi Zhou Shu|Lost Book of Zhou]]'', circa 4th – 1st centuries BCE" },
	['Zhanguoce']    =  { "CL",  "''[[w:Zhan Guo Ce|Zhan Guo Ce]]'', circa 5th – 3rd centuries BCE" },
	['Zhouli']       =  { "CL",  "''[[w:Rites of Zhou|Rites of Zhou]]'', circa 3rd century BCE" },
	['Zhuangzi']     =  { "CL",  "''[[w:Zhuangzi (book)|Zhuangzi]]'', circa 3rd – 2nd centuries BCE" },
	['Zuozhuan']     =  { "CL",  "''[[w:Zuo zhuan|Commentary of Zuo]]'', circa 4th century BCE" },
}

local pron_correction = {
	["cmn"] = {
		["吧"] = "ba",
		["的"] = "de", ["都"] = "dōu", 
		["個"] = "ge", ["給"] = "gěi", ["更"] = "gèng",
		["還"] = "hái", 
		["幾"] = "jǐ", ["將"] = "jiāng",
		["了"] = "le", ["槤"] = "lián",
		["麼"] = "me", ["沒"] = "méi", 
		["妳"] = "nǐ",
		["漂"] = "piào", 
		["捨"] = "shě", ["什"] = "shén", 
		["為"] = "wèi",
		["要"] = "yào"
	},
	["yue"] = {
		["掂"] = "dim6",
		["若"] = "joek6",
		["令"] = "ling6", ["來"] = "loi4",
		["華"] = "waa4", ["玩"] = "waan2",
		["蛇"] = "se4",
		["𠻹"] = "tim1",
		["只"] = "zi2",
	},
	["hak"] = {
		["阿"] = "-â-",
		["竹"] = "-chuk-", ["獎"] = "-chióng-",
		["茶"] = "chhà", ["蚻"] = "-chha̍t-", ["曾"] = "-chhèn-", ["千"] = "-chhiên-", ["竄"] = "-chhon-", ["捽"] = "-chhu̍t-",
		["仔"] = "-é-", ["𫣆"] = "-ên-",
		["客"] = "-hak-",
		["𥘹"] = "-kì-", ["溝"] = "-kiêu-", ["稿"] = "-kó-",
		["罅"] = "-la-", ["壢"] = "-lak-", ["摎"] = "-lâu-", ["㧯"] = "-lâu-", ["恅"] = "-láu-", ["俚"] = "-lî-", ["擂"] = "-lùi-",
		["閩"] = "-mén-", ["忘"] = "-mong-", ["蚊"] = "-mûn-",
		["𠊎"] = "-ngài-", ["祢"] = "-ngì-",
		["孲"] = "-ò-", ["𡟓"] = "-ôi-",
		["輩"] = "-pi-", ["𡜵"] = "-pû-",
		["婆"] = "-phò-",
		["使"] = "-sṳ́-", ["史"] = "-sṳ́-", ["脣"] = "-sùn-",
		["點"] = "-tiám-",
		["𢯭"] = "-then-", ["唐"] = "-thòng-"
	},
	["nan"] = {
		["阿"] = "-a-", ["仔"] = "-á-", ["矣"] = "-ah-", ["啊"] = "-ah-",
		["䆀"] = "-bái-", ["袂"] = "-bē-", ["欲"] = "-beh-", ["覕"] = "-bih-", ["盟"] = "-bêng-", ["務"] = "-bū-",
		["欉"] = "-châng-", ["十"] = "-cha̍p-", ["誌"] = "-chì-", ["遮"] = "-chiah-", ["針"] = "-chiam-", ["窒"] = "-chit-", ["一"] = "-chi̍t-", ["睭"] = "-chiu-", ["𠞩"] = "-chûi-",
		["𨑨"] = "-chhit-", ["𤆬"] = "-chhōa-", ["攢"] = "-chhoân-",
		["的"] = "-ê-", ["憶"] = "-ek-",
		["𠢕"] = "-gâu-", ["偌"] = "-gōa-", ["囡"] = "-gín-",
		["耳"] = "-hīⁿ-", ["予"] = "-hō͘-",
		["也"] = "-iā-", ["𪜶"] = "-in-",
		["字"] = "-jī-", ["然"] = "-jiân-", ["日"] = "-ji̍t-",
		["共"] = "-kā-", ["佮"] = "-kah-", ["甲"] = "-kah-", ["矸"] = "-kan-", ["到"] = "-kàu-", ["竟"] = "kèng", ["行"] = "-kiâⁿ-", ["勼"] = "-kiu-", ["閣"] = "-koh-", ["擱"] = "-koh-", ["講"] = "-kóng-",
		["跤"] = "-kha-", ["較"] = "-khah-", ["徛"] = "-khiā-", ["課"] = "-khò",
		["人"] = "-lâng-", ["汝"] = "-lí-", ["旅"] = "-lí-", ["啉"] = "-lim-", ["戀"] = "-loân-", ["攏"] = "-lóng-",
		["毋"] = "-m̄-", ["嬤"] = "-má-",
		["喔"] = "-o͘h-",
		["爸"] = "-pē-",
		["麭"] = "-pháng-",
		["三"] = "-saⁿ-", ["捨"] = "-siá-", ["閃"] = "-siám-", ["雙"] = "-siang-", ["啥"] = "-siáⁿ-", ["俗"] = "-sio̍k-", ["傷"] = "-siong-", ["商"] = "-siong-", ["受"] = "-siū-", ["煞"] = "-soah-", ["士"] = "-sū-", ["遂"] = "-sūi-",
		["塊"] = "-tè-", ["咧"] = "-teh-", ["豬"] = "ti", ["戴"] = "-tì-", ["佇"] = "-tī-", ["躊"] = "-tiû-", ["斷"] = "-tn̄g-", ["多"] = "-to-", ["倒"] = "tó", ["拄"] = "-tú-", ["盹"] = "-tuh-", ["脣"] = "-tûn-",
		["太"] = "-thài-", ["刣"] = "-thâi-", ["讀"] = "-tha̍k-", ["窗"] = "-thang-", ["迌"] = "-thô-",
		["揻"] = "-ui-",
	},
	["wuu"] = {},
}

local polysyllable_pron_correction = {
	["cmn"] = {
		["覺得"] = "juéde"
	},
	["yue"] = {},
	["hak"] = {
		["老鼠"] = "-lo-chhú-",
		["敗勢"] = "-phài-se-",
		["癩⿸疒哥"] = "-thái-kô-",
		["台灣"] = "-Thòi-vàn-",
		["臺灣"] = "-Thòi-vàn-"
	},
	["nan"] = {
		["愛人"] = "-ài-jîn-",
		["饅頭"] = "-bán-thô-", ["門徒"] = "-bûn-tô͘-",
		["情批"] = "-chêng-phoe-", ["遮爾"] = "-chiah-nī-", ["這馬"] = "-chit-má-", ["作用"] = "-chok-iōng-",
		["請假"] = "-chhéng-ká-", ["親像"] = "-chhin-chhiūⁿ-",
		["偌爾"] = "-gōa-nī-",
		["一切"] = "-it-chhè-", ["一般"] = "-it-poaⁿ-", ["一直"] = "-it-ti̍t-",
		["人海"] = "-jîn-hái-", ["人生"] = "-jîn-seng-",
		["卡拉OK"] = "-kha-lá-ó͘-khe-",
		["旅行"] = "-lí-hêng-",
		["歐巴桑"] = "-o͘-bá-sáng-",
		["歹勢"] = "-pháiⁿ-sè-",
		["山珍海味"] = "-san-tin-hái-bī-", ["漩渦"] = "-soân-o-",
		["臺灣"] = "-Tâi-oân-", ["的確"] = "-tek-khak-",
		["癩⿸疒哥"] = "-thái-ko-",
	},
	["wuu"] = {},
}

local zh_format_start = "<span lang=\"zh\" class=\"Hani\" style=\"font-size:110%;\">"
local zh_format_end = "</span>"

local function make_link(word)
	if match(word, "<b>.+</b>") then
		word = mw.text.split(word, "-", true)
		for i,val in ipairs(word) do
			word[i] = "[[" .. gsub(val, "</?b>", "") .. "|" ..
						((match(val, "</b>") and not match(val, "<b>")) and "<b>" or "") .. val ..
						((match(val, "<b>") and not match(val, "</b>")) and "</b>" or "") .. "]]"
		end
		word = table.concat(word)
	else
		word = "[[" .. word .. "]]"
		if match(word, "([一-鿌㐀-䶵𠀀-𬺯])%-([一-鿌㐀-䶵𠀀-𬺯])") then
			word = gsub(word, "%-", "]][[")
		end
	end
	word = gsub(word, "(%[%[)(<br/?>)", "%2%1")
	return word
end

function export.show(frame)
	local args = frame:getParent().args
	local example = args[1] or error("Example unspecified.")
	local translation = args[2]
	local reference = args["ref"] or args["r"] or false
	local manual_tr = args["tr"] or false
	local display = args["type"] or args["display_type"] or "plain"
	local inline = args["inline"] or false
	local audio_file = args["a"] or args["audio"] or false
	local collapsed = args["collapsed"] or false
	local phonetic = ""
	local original_length = mw.ustring.len(gsub(example, "[^一-龯㐀-䶵]", ""))
	local variety = args[3] or (ref_list[reference] and ref_list[reference][1] or false) or "MSC"
	variety_code = variety_list[variety][2]
	
	local link = args["link"] or args["l"] or "yes"
	link = match(link, "n") == nil-- and not (not match(example, " ") and match(example, "[,。?!﹑]"))

	if not translation or translation == '' then translation = '<small>(please add an English translation of this example)</small> [[Category:Chinese usage examples with the translation missing]]' end -- per standard [[Module:usex]]
	
	-- automatically boldify pagetitle if nothing is in bold
	if not match(example, "'''") and not punctuation[PAGENAME] then
		example = gsub(example, PAGENAME, "'''" .. PAGENAME .. "'''")
		example = gsub(example, "''''''", "")
	end
	
	-- tidying up the example, making it ready for transcription
	example = gsub(example, "——", "&mdash;") -- convert a double em-dash, as in xiehouyu, into one em-dash, instead of into two hyphens
	example = gsub(example, "([?!,。、“”…;:‘’|()「」『』—《》· .~])", " %1 ")
	example = gsub(example, "^ *", "")
	example = gsub(example, " *$", "")
	example = gsub(example, " +", " ")
	example = gsub(example, "%'%'%'([^%']+)%'%'%'", "<b>%1</b>")
	example = gsub(example, "(.)</b>%[([^%[%]]+)%]", function(first, second)
		return "<b>"..first.."</b>" ~= second and first.."["..second.."]</b>" or first.."["..first.."]</b>" end)
	example = gsub(example, "</b>({[^{}]+})", "%1</b>")
	
	local ruby_start, ruby_mid, ruby_end = "<big><ruby><span class=\"Hani\">", "</span><rp>&nbsp;(</rp><rt><big>", "</big></rt><rp>)</rp></ruby></big>"
	local ruby_words = {}
	local trad_words, simp_words, tr_words = {}, {}, {}
	
	simp_exist = (m_zh.ts_determ(gsub(example, "(.)%[%1%]", "")) == "trad" or (match(example, "%[[^%[%]]+%]") and not match(example, "(.)%[%1%]"))) and variety_code ~= "vi"
	for word in mw.text.gsplit(example, " ", true) do
		if gsub(word, "%{[^%}]+%}", "") == PAGENAME then
			word = "<b>" .. word .. "</b>"
		end
		local trad_word, simp_word, tr_word, ruby_word = word, false, false, ""
		
		-- various tricks for linking and display in trad. and simp.
		trad_word = gsub(trad_word, "(.)%[(.)%]", "%1")
		trad_word = gsub(trad_word, "{[^{}]*}", "")
		trad_word = gsub(trad_word, "[%^%.]", "")
		trad_word = gsub(trad_word, "\\", "|")
		trad_word = gsub(trad_word, ".", ".")
		
		if simp_exist then
			simp_word = gsub(m_zh.ts(word), ".%[(.)%]", "%1")
			simp_word = gsub(simp_word, "{[^{}]*}", "")
			simp_word = gsub(simp_word, "[%^%.]", "")
			simp_word = gsub(simp_word, "\\", "|")
			simp_word = gsub(simp_word, ".", ".")
		end
		
		-- produce links
		local contain_pagename = (gsub(gsub(gsub(trad_word, "</?b>", ""), "%^", ""), "-", "") == PAGENAME) and not punctuation[PAGENAME]
		if match(trad_word, "|") or (link and not match(trad_word, "@") and not punctuation[word] and not contain_pagename) then
			trad_word = make_link(trad_word)
			if simp_exist then
				simp_word = make_link(simp_word)
			end
		end
		
		trad_word = gsub(trad_word, "@", "")
		simp_word = simp_exist and gsub(simp_word, "@", "")
		
		-- same tricks applied to transcription
		if not manual_tr and (variety_code == "cmn" or variety_code == "yue" or variety_code == "nan" or variety_code == "hak") then
			if punctuation[word] then
				tr_word = punctuation[word]
			else
				real_word = true
				local hyphen = variety_code == "nan" or variety_code == "hak"
				tr_word = gsub(word, "@", "")
				tr_word = gsub(tr_word, "%.", " ")
				tr_word = gsub(tr_word, ".+\\", "")
				tr_word = gsub(tr_word, "%[[^%[%]]+%]", "")
				tr_word = gsub(tr_word, ".</b>(%{[^%}]+%})", "%1</b>")
				tr_word = gsub(tr_word, "(.){([^{}]*)}",function(a, b)
						if hyphen and not mw.ustring.find(a, "[a-zA-Z]") then
							return "-" .. b .. "-"
						else
							return b
						end
					end)
				for key,val in pairs(polysyllable_pron_correction[variety_code]) do
					tr_word = gsub(tr_word, key, val)
				end
				tr_word = gsub(tr_word, ".", pron_correction[variety_code])
				if variety_code == "cmn" then
					tr_word = gsub(tr_word, "%-", "")
					tr_word = m_zh.py(tr_word)
				elseif variety_code == "yue" then
					local m_yue_pron = mw.loadData("Module:zh/data/yue-pron")
					tr_word = gsub(tr_word, ".", m_yue_pron.jyutping)
					tr_word = gsub(tr_word, "([a-z])([1-9])(-?)([1-9]?)", "%1%2%3%4 ")
				elseif hyphen then
					tr_word = gsub(tr_word, "[一-鿌㐀-䶵 -〿𠀀-𬺯]+", function(text) 
						if m_zh.check_pron(text, variety_code, 1) then
							return gsub(m_zh.check_pron(text, variety_code, 1), "/.+$", "")
						else
							text = gsub(text, ".", function(ch)
								if m_zh.check_pron(ch, variety_code, 1) then
									return gsub(m_zh.check_pron(ch, variety_code, 1), "/.+$", "") .. "-"
								else
									return ch
								end
							end)
							return gsub(text, "-$", "")
						end
					end)
					tr_word = gsub(tr_word, "%-+", "-")
					tr_word = gsub(tr_word, "%-([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōṳA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌṲ])", "%1")
					tr_word = gsub(tr_word, "([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōoóòôōṳA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌOÓÒÔŌṲ̄̀́̂̍͘])%-", "%1")
					tr_word = gsub(tr_word, "<b>", "-<b>")
					tr_word = gsub(tr_word, "</b>", "</b>-")
					tr_word = gsub(tr_word, "%^%-<b>", "<b>^")
					tr_word = gsub(tr_word, "^%-+", "")
					tr_word = gsub(tr_word, "%-+$", "")
					tr_word = gsub(tr_word, "%%%-?", "--")
				end
			end
		end
		
		if variety_code == "nan" then
			trad_word = gsub(trad_word, "%%", "")
			simp_word = simp_exist and gsub(simp_word, "%%", "")
		end
		
		if display == "ruby" then
			ruby_word = ruby_start .. trad_word .. (simp_exist and "<br>" .. simp_word or "") .. ruby_mid .. (real_word and tr_word or "") .. ruby_end
			table.insert(ruby_words, ruby_word)
		else
			table.insert(trad_words, trad_word)
			table.insert(simp_words, simp_word or nil)
			table.insert(tr_words, tr_word or nil)
		end
	end
	
	local tag_start = " <span style=\"color:darkgreen; font-size:x-small;\">&#91;" -- HTML entity since "[[[w:MSC|MSC]]" is interpreted poorly
	local tag_end = "&#93;</span>"
	
	if display == "ruby" then
		tag = " <ruby><rb><big>" ..
				tag_start .. variety_list[variety][1] .. 
					(simp_exist
						and ", ''[[Traditional Chinese|trad.]]''↑ + ''[[Simplified Chinese|simp.]]''↓"
						or ", ''[[Traditional Chinese|trad.]]'' and ''[[Simplified Chinese|simp.]]''") .. tag_end .. 
				
				tag_start .. "''rom.'': " .. variety_list[variety][3] .. tag_end ..
					"</rb></big></ruby>"
			
		return table.concat(ruby_words, "") .. tag .. "<dl><dd><i>" .. translation .. "</i></dd></dl>"
	else
		trad_text = gsub(table.concat(trad_words), "([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2")
		simp_text = simp_exist and gsub(table.concat(simp_words), "([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2") or false
		phonetic = manual_tr or (#tr_words > 0 and table.concat(tr_words, " ") or false)

		-- overall transcription formatting
		if phonetic then
			phonetic = gsub(phonetic, " </b>", "</b> ")
			phonetic = gsub(phonetic, "  ", " ")
			if variety_code == "yue" or variety_code == "yue-tai" or variety_code == "nan-teo" or variety_code == "cmn-sze" then
				phonetic = gsub(phonetic, "([a-zê]+)([1-9%-]+)", "%1<sup>%2</sup>") -- superscript tones
			end
			phonetic = gsub(phonetic, " ([,%.?!;:’”)])", "%1") -- remove excess spaces from punctiation
			phonetic = gsub(phonetic, "([‘“(]) ", "%1")
			if not manual_tr then
				phonetic = gsub(phonetic, "%'([^%'])", "%1") -- allow bolding for manual translit
				if variety_code == "nan" then
					phonetic = gsub(phonetic, " +%-%-", "--")
				end
			end
			
			-- capitalisation
			if match(example, "[。?!]") then
				phonetic = "^" .. gsub(phonetic, "([%.?!]) ", "%1 ^")
			end
			phonetic = gsub(phonetic, "%^<b>", "<b>^")
			phonetic = gsub(phonetic, "%^+.", mw.ustring.upper)
			phonetic = gsub(phonetic, "%^", "")
				
			if variety_code == "wuu" then
				local wuu_pron = require("Module:wuu-pron") 
				phonetic = "<span class=\"IPA\">[" .. wuu_pron.ipa_conv(phonetic) .. "]</span>"
			
			elseif variety_code == "cmn-wuh" then
				phonetic = "<span class=\"IPA\">[" .. phonetic .. "]</span>"
			
			elseif variety_code == "cdo" then
				local cdo_pron = require("Module:cdo-pron")
				phonetic = "<i>" .. phonetic .. "</i>" .. 
					(not match(phonetic, "-[^ ]+-[^ ]+-[^ ]+-")
						and " / <span class=\"IPA\"><small>[" .. cdo_pron.sentence(phonetic) .. "]</small></span>"
						or "")
				
			else
				phonetic = "<i>" .. phonetic .. "</i>"
			end
			phonetic = "<span style=\"color:#404D52\">" .. phonetic .. "</span>"
		end
	end
	
	local collapse_start, collapse_end, collapse_tag = '', '', ''
	if collapsed then
		collapse_start = '<span class="mw-collapsible mw-collapsed" id="mw-customcollapsible-zhexample">'
		collapse_end = '</span>'
		collapse_tag = '<span class="mw-customtoggle-zhexample" style="color:darkgreen; font-size:x-small;padding-left:10px">[▼ expand/hide]</span>'
	end
		
	-- indentation, font and identity tags
	if 
		((variety_code == "cmn" and original_length > 7) 
			or (variety_code ~= "cmn" and original_length > 5)
			or reference
			or (match(example, "[,。?!、:; ]") and variety_code == "wuu") 
			or (variety_code == "cdo" and original_length > 3)
			or match(inline or "", "^n"))
		
		and not match(inline or "", "^y") then
			
		trad_text = "<dd>" .. zh_format_start .. trad_text .. zh_format_end
		
		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end
		
		if phonetic then
			phonetic = "<dl><dd>" .. collapse_start .. phonetic
			translation = "<dd>" .. translation .. "</dd></dl>"
			tr_tag = tag_start .. variety_list[variety][3] .. tag_end .. collapse_end .. "</dd>"
		else
			translation = "<dl>" .. translation .. "</dl>"
		end
		
		if audio_file then
			audio = "<dd>[[File:" .. audio_file .. "]]</dd>"
		end
		
		trad_tag = collapse_start .. tag_start .. variety_list[variety][1] .. ", <i>[[w:Traditional Chinese|trad.]]" .. 
			((simp_exist or variety_code == "vi") and "" or " and [[w:Simplified Chinese|simp.]]") .. "</i>" .. tag_end .. collapse_end .. collapse_tag .. "</dd>"
		
		if simp_exist then
			simp_text = "<dd>" .. collapse_start .. zh_format_start .. simp_text .. zh_format_end
			simp_tag = tag_start .. variety_list[variety][1] .. ", <i>[[w:Simplified Chinese|simp.]]</i>" .. tag_end .. collapse_end .. "</dd>"
		end
		
		if reference then
			reference = "<dd>" .. collapse_start .. "<i><small>From:</i> " .. 
				(ref_list[reference] and ref_list[reference][2] or reference) .. "</small>" .. collapse_end .. "</dd>"
		end
		
		return trad_text .. trad_tag .. (simp_text or "") .. (simp_tag or "") .. (reference or "") .. 
			(phonetic and phonetic .. tr_tag or "") .. (audio or "") .. translation
		
	else
		trad_text = zh_format_start .. trad_text .. zh_format_end
		divider = "&nbsp; ―&nbsp; "
		
		if variety ~= "MSC" then
			ts_tag = tag_start .. variety_list[variety][1] .. tag_end
			tr_tag = tag_start .. variety_list[variety][3] .. tag_end
		end
		
		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end
		
		if simp_exist then
			simp_text = " / " .. zh_format_start .. simp_text .. zh_format_end
		end
		
		if audio_file then
			audio = " [[File:" .. audio_file .. "]]"
		end
		
		return trad_text .. (simp_text or "") .. (ts_tag or "") .. divider .. 
			(phonetic and phonetic .. (tr_tag or "") .. (audio or "") .. divider or "") .. translation
	end
end

return export