Module:och-pron

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
The following documentation is located at Module:och-pron/documentation. [edit]
Useful links: subpage listtransclusionstestcases

Old Chinese pronunciation module. See {{zh-pron}}. Data stored at Module:zh/data/och-pron-BS, Module:zh/data/och-pron-ZS and their subpages.


local export = {}
local m_zh_data = mw.loadData("Module:zh/data")
 
local function retrieve_data(text, index, system)
	result = get_subpage(text, index, system)
	if not result then
		result = get_subpage(m_zh_data.st[text], index, system)
	end
	return result
end
 
function get_subpage(text, index, system)
	if not text or text == "" then
		return false
	end
	if system == 'BS' then
		startpoint = 13361
	else
		startpoint = 15035
	end
	unit = 1000
	codepoint = mw.ustring.codepoint(text)
	page_index = math.floor((codepoint - startpoint) / unit)
	if system == 'BS' and page_index > 27 then
		page_index = 28
	end
	local success, data = pcall(mw.loadData,
		('Module:zh/data/och-pron-' .. system .. '/%02d'):format(page_index)
	)
	if success then
		return data[text..index]
	else
		return false
	end
end
 
local function pron_table(titlechar, occur, system)
	local pron, reading = {}, {}
	local start_index, end_index
	if mw.ustring.match(occur, '[1-9]') then
		start_index = tonumber(occur)
		end_index = tonumber(occur)
	else
		start_index = 1
		local occur_data = mw.loadData("Module:zh/data/och-pron-" .. system)
		end_index = occur_data.occur[titlechar] or 1
	end
	for i = start_index, end_index do
		if i == 1 then j = '' else j = i - 1 end
		local reading = retrieve_data(titlechar, j, system)
		if not reading then
			return false
		end
		if system == 'BS' then
			pron_reading = [=[
 
|-
|colspan=8|<b>]=] .. titlechar .. [=[</b>
|colspan=8|]=] .. reading[1] .. [=[
 
|colspan=8|‹ <i>]=] .. mw.ustring.gsub(reading[2], '([XH])', '<sup>%1</sup>') .. [=[</i> ›
|colspan=8|<span class="IPAchar" lang="">/]=] .. reading[3] .. [=[/</span>
|colspan=8|]=] .. reading[4]
 
		else
			pron_reading = [=[
 
|-
|colspan=5|<b>]=] .. titlechar .. [=[</b>
|colspan=5|]=] .. reading[1] .. [=[
 
|colspan=5|[[]=] .. reading[2] .. [=[]]
|colspan=5|[[]=] .. reading[3] .. [=[]]
|colspan=5|]=] .. reading[4] .. [=[
 
|colspan=5|[[]=] .. reading[5] .. [=[]]
|colspan=5|<span class="IPAchar" lang="">/*]=] .. reading[6] .. [=[/</span>
|colspan=5|<span class="Hani" lang="zh">]=] .. reading[7] .. [=[</span>]=]
 
		end
		table.insert(pron, pron_reading)
	end
	return table.concat(pron, "")
end
 
function export.make_table(frame)
	local titlechar = mw.title.getCurrentTitle().text
	local reading_index = mw.text.split(frame.args[1], ";")
 
	local header = [=[{| class="wikitable mw-collapsible mw-collapsed" style="width:50em;margin:0; position:left; text-align:center"
|- style="height:40px"
!colspan=40|[[w:Old Chinese|Old Chinese]] pronunciation (<span class="Hani">]=] .. titlechar .. [=[</span>, reconstructed)]=]
 
	local system_header = {
		['BS'] = [=[
 
|- style="height:50px"
!colspan=40 style="background-color:#FFF8DC"|[[w:William H. Baxter|Baxter]]-[[w:Laurent Sagart|Sagart]] system 1.1 ([http://ocbaxtersagart.lsait.lsa.umich.edu/ 2014])
|-
! style="background-color:#E0FFFF" colspan=8 | <small>Character</small>
! style="background-color:#E0FFFF" colspan=8 | <small>Modern Beijing<br/>(Pinyin)</small>
! style="background-color:#E0FFFF" colspan=8 | <small>Middle Chinese</small>
! style="background-color:#E0FFFF" colspan=8 | <small>Old Chinese</small>
! style="background-color:#E0FFFF" colspan=8 | <small>English</small>]=],
 
		['ZS'] = [=[
 
|- style="height:50px"
!colspan=40 style="background-color:#FFF8DC"|[[w:Zhengzhang Shangfang|Zhengzhang]] system (2003)
|-
! style="background-color:#E0FFFF" colspan=5 | <small>Character</small>
! style="background-color:#E0FFFF" colspan=5 | <small>No.</small>
! style="background-color:#E0FFFF" colspan=5 | <small>Phonetic<br>component</small>
! style="background-color:#E0FFFF" colspan=5 | <small>Rime<br>group</small>
! style="background-color:#E0FFFF" colspan=5 | <small>Rime<br>subdivision</small>
! style="background-color:#E0FFFF" colspan=5 | <small>Corresponding<br>MC rime</small>
! style="background-color:#E0FFFF" colspan=5 | <small>Old Chinese</small>
! style="background-color:#E0FFFF" colspan=5 | <small>Notes</small>]=]
 
	}
 
	local BS_note = [=[
 
|-
|colspan=40 style="text-align:left; font-size:90%"|<div class="toccolours mw-collapsible mw-collapsed">
'''''Notes''' for Old Chinese notations in the Baxter-Sagart system:''
<div class="mw-collapsible-content">
* Parentheses "()" indicate uncertain presence;
* Square brackets "[]" indicate uncertain identity, e.g. *[t] as coda may in fact be *-t or *-p;
* Angle brackets "<>" indicate infix;
* Hyphen "-" indicates morpheme boundary;
* Period "." indicates syllable boundary.</div>
</div>]=]
 
	local system_seq = { ['BS'] = 1, ['ZS'] = 2 }
	local testchar_1, testchar_2 = mw.ustring.sub(titlechar, 1, 1), mw.ustring.sub(titlechar, 2, 2)
	local systems = { 'BS', 'ZS' }
	output_text = header 
 
	for _, system in ipairs(systems) do
		local reading_number, char_pron = '', {}
		if retrieve_data(testchar_1, '', system) or retrieve_data(testchar_2, '', system) then
			for i, cp in ipairs { mw.ustring.codepoint(titlechar, 1, -1) } do
				local ch = mw.ustring.char(cp)
				if #reading_index > 1 then
					reading_number = reading_index[i]
				else
					reading_number = reading_index[1]
				end
				reading_number = mw.text.split(reading_number, ',')[system_seq[system]] or reading_number
				skip_system = reading_number == 'n'
				if skip_system then
					break
				else
					table.insert(char_pron, (pron_table(ch, reading_number, system) or ''))
				end
			end
			if not skip_system then
				output_text = output_text .. system_header[system] .. table.concat(char_pron, "")
				if system == 'BS' then
					output_text = output_text .. BS_note
				end
			end
		end
	end
	output_text = mw.ustring.gsub(output_text .. '\n|}', '%[%[%]%]', '')
 
	return output_text
end
 
return export