Module:IPA

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module returns X-SAMPA pronunciation when given IPA or the IPA pronunciation when given X-SAMPA.

Data is in Module:IPA/data, template-callable functions in Module:IPA/templates.

Unit tests[edit]

See also: Module:IPA/testcases

IPA to X-SAMPA back to IPA[edit]

Term IPA Generated X-SAMPA Regenerated IPA Matched?
dictionary /ˈdɪkʃən(ə)ɹi/ /"dIkS@n(@)r\i/ /ˈdɪkʃən(ə)ɹi/ yes
/ˈdɪkʃənɛɹi/ /"dIkS@nEr\i/ /ˈdɪkʃənɛɹi/ yes
Україна /ukrɑˈjɪnɑ/ /ukrA"jInA/ /ukrɑˈjɪnɑ/ yes
نوروز [næu̯ˈɾoːz] [n{u_^"4o:z] [næu̯ˈɾoːz] yes
[nou̯ˈɾuːz] [nou_^"4u:z] [nou̯ˈɾuːz] yes
[noːˈɾuːz] [no:"4u:z] [noːˈɾuːz] yes
[næu̯ˈɾɵːz] [n{u_^"48:z] [næu̯ˈɾɵːz] yes
新年 [ɕɪn˥˥niɛn˧˥] [s\In_T_TniEn_M_T] [ɕɪn˥˥niɛn˧˥] yes
battleship [ˈbætl̩ʃɪp] ["b{tl=SIp] [ˈbætl̩ʃɪp] yes
báid [bˠɑːdʲ] [b_GA:d_j] [bˠɑːdʲ] yes
Deutsch [dɔʏ̯t͡ʃ] [dOY_^t__S] [dɔʏ̯t͡ʃ] yes
dóigh [d̪ˠoːɟ] [d_d_Go:J\] [d̪ˠoːɟ] yes
murder [ˈmɝdɚ] ["m3`d@`] [ˈmɝdɚ] yes

local export = {}
-- [[Module:IPA/data]]
local m_data = mw.loadData('Module:IPA/data') 

function export.format_IPA_full(lang, items)
	local prefix = nil
	
	if m_data.langs_with_infopages[lang:getCode()] then
		prefix = "Appendix:" .. lang:getCanonicalName() .. " pronunciation"
	else
		prefix = "wikipedia:" .. lang:getCanonicalName() .. " phonology"
	end
	
	prefix = "[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>([[" .. prefix .. "|key]])</sup>:&#32;" .. export.format_IPA_multiple(lang, items)
	
	if mw.title.getCurrentTitle().nsText == "" or mw.title.getCurrentTitle().nsText == "Reconstruction" then 
		 prefix = prefix .. "[[Category:" .. lang:getCanonicalName() .. " terms with IPA pronunciation]]"
	end
		
	return prefix
end

function export.format_IPA_multiple(lang, items)
	notes = notes or {}
	local categories = {}
	
	-- Format
	if #items == 0 then
		if mw.title.getCurrentTitle().nsText == "Template" then
			table.insert(items, {pron = "/aɪ piː ˈeɪ/"})
		else
			table.insert(categories, "[[Category:Pronunciation templates without a pronunciation]]")
		end
	end
	
	local bits = {}
	
	for _, item in ipairs(items) do
		local bit = export.format_IPA(lang, item.pron)
		
		if item.note then
			bit = bit .. mw.getCurrentFrame():extensionTag("ref", item.note)
		end
		
		table.insert(bits, bit)
	end
	
	return table.concat(bits, ", ") .. table.concat(categories)
end

-- TODO: Use data module for this


local diacritics = '̘̙̜̝̞̟̠̣̤̥̩̪̬̯̰̹̺̻̼͇͈͉͍͎͔͕̀́̂̃̄̆̈̋̌̏̽͆͊͋͌̊̌᷄᷅᷆᷇᷈᷉̚͢͡'




local tones = '˥˦˧˨˩¹²³⁴⁵'
local valid_symbols = ' %(%)%%{%|%}%-~.!abcdefhijklmnopqrstuvwxyz¡àáâãäæçèéêëìíîïðòóôõöøùúûüýÿāăēĕěħĩīĭŋōŏőœũūŭűŷǀǁǂǃǎǐǒǔǖǘǚǜǟǣǽǿȁȅȉȍȕȫȭȳɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɪɫɬɭɮɯɰɱɲɳɴɵɶɸɹɺɻɽɾʀʁʂʃʄʈʉʊʋṽʌʍʎʏʐʑʒʔʕʘʙʛʜʝʟʡʢʬʭ⁻¹²³⁴⁵ᵝʰʱʲʳʴʵʶʷʸʼˀˁˈˌːˑ˞ˠˡˢˣˬ˭β͜θχᴙᵊᵐᵑᶑᶣᶬᶮᶯᶰᶹ᷽ḁḛḭḯṍṏṳṵṹṻạẹẽịọụỳỵỹ‖․‥…‼‿ⁿ↑↓↗↘ⱱꜛꜜꟸꟹ𝆏𝆑' .. diacritics .. tones

-- Takes an IPA pronunciation and formats it and adds cleanup categories.
function export.format_IPA(lang, pron)
	local categories = {}
	
	-- Detect whether this is a phonemic or phonetic transcription
	local repr_mark = {}
	repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = mw.ustring.find(pron, '^(.).-(.)$')
	local repr = nil
	
	-- If valid, strip the representation marks
	if repr_mark.left == '/' and repr_mark.right == '/' then
		repr = "phonemic"
		pron = mw.ustring.sub(pron, 2, -2)
	elseif repr_mark.left == '[' and repr_mark.right == ']' then
		repr = "phonetic"
		pron = mw.ustring.sub(pron, 2, -2)
	else
		table.insert(categories, "[[Category:IPA pronunciations with invalid representation marks]]")
	end
	
	-- Check for obsolete and nonstandard symbols
	for i, symbol in ipairs(m_data.nonstandard) do
		local result = mw.ustring.find(pron, symbol)
		
		if result then
			table.insert(categories, "[[Category:IPA pronunciations with obsolete or nonstandard characters|" .. result .. "]]")
			break
		end
	end
	
	-- Check for invalid symbols
	local result = mw.ustring.gsub(pron, '[' .. valid_symbols .. ']', '')
	if result ~= '' then
		mw.log(pron,result)
		table.insert(categories, "[[Category:IPA pronunciations with invalid IPA characters|" .. result .. "]]")
	end
	
	-- Check for double character
	if mw.ustring.match(pron, '([^'..tones..'])%1[^'..diacritics..']') then
		table.insert(categories, "[[Category:IPA pronunciations with repetition]]")
	end
	
	-- Reference inside IPA template usage
	-- FIXME: Doesn't work; you can't put HTML in module output.
	--if mw.ustring.find(pron, '</ref>') then
	--	table.insert(categories, "[[Category:IPA pronunciations with reference]]")
	--end
	
	if repr == "phonemic" then
		if lang and m_data.phonemes[lang:getCode()] then
			local valid_phonemes = m_data.phonemes[lang:getCode()]
			local rest = pron
			local phonemes = {}
			
			while mw.ustring.len(rest) > 0 do
				local longestmatch = ""
				
				if mw.ustring.sub(rest, 1, 1) == "(" or mw.ustring.sub(rest, 1, 1) == ")" then
					longestmatch = mw.ustring.sub(rest, 1, 1)
				else
					for _, phoneme in ipairs(valid_phonemes) do
						if mw.ustring.len(phoneme) > mw.ustring.len(longestmatch) and mw.ustring.sub(rest, 1, mw.ustring.len(phoneme)) == phoneme then
							longestmatch = phoneme
						end
					end
				end
				
				if mw.ustring.len(longestmatch) > 0 then
					table.insert(phonemes, longestmatch)
					rest = mw.ustring.sub(rest, mw.ustring.len(longestmatch) + 1)
				else
					local phoneme = mw.ustring.sub(rest, 1, 1)
					table.insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
					rest = mw.ustring.sub(rest, 2)
					table.insert(categories, "[[Category:IPA pronunciations with invalid phonemes/" .. lang:getCode() .. "]]")
					require("Module:debug").track("IPA/invalid phonemes/" .. phoneme)
				end
			end
			
			pron = table.concat(phonemes)
		end
		
		pron = "/" .. pron .. "/"
	elseif repr == "phonetic" then
		pron = "[" .. pron .. "]"
	end
	
	return '<span class="IPA" lang="">' .. pron .. '</span>' .. table.concat(categories)
end

-- IPA <-> XSAMPA lookup tables
local i2x_lookup, x2i_lookup = {}, {}
function Populate_IPA_XSAMPA_LookupTables()
	if #i2x_lookup > 0 or #x2i_lookup > 0 then return end
	
	local m = mw.loadData('Module:IPA/data/symbols') --[[Module:IPA/data/symbols]]
	
	for ipa_sym, data in pairs(m.symbols[1]) do
		if type(data.XSAMPA) == "table" then
			i2x_lookup[ipa_sym] = data.XSAMPA[1]
			for _, xsampa_sym in ipairs(data.XSAMPA) do
				x2i_lookup[xsampa_sym] = ipa_sym
			end
		else
			i2x_lookup[ipa_sym] = data.XSAMPA
			x2i_lookup[data.XSAMPA] = ipa_sym
		end
	end

	--exception cases where two IPA characters map to one XSAMPA character
	x2i_lookup["_T"]="˥"
	x2i_lookup["_H"]="˦"
	x2i_lookup["_M"]="˧"
	x2i_lookup["_L"]="˨"
	x2i_lookup["_B"]="˩"
end


function export.IPA_to_XSAMPA(text)
	Populate_IPA_XSAMPA_LookupTables()
	
	local escape = false
	if type(text) == 'table' then -- a frame, extract args
		text = text.args[1]
		text = text:gsub('{{=}}','='):gsub('{{!}}','|')
		text = mw.text.decode(text) -- XXX
		escape = true
	end

	text = mw.ustring.gsub(text, 'ːː', ':') -- this basically sums up m_data.symbols[2].XSAMPA
	text = mw.ustring.gsub(text, '.', i2x_lookup)

	if escape then
		text = mw.text.nowiki(text)
	end
	return text
end

function export.XSAMPA_to_IPA(text)
	Populate_IPA_XSAMPA_LookupTables()
	
	local escape = false
	if type(text) == 'table' then -- a frame, extract args
		text = text.args[1]
		text = mw.text.decode(text) -- XXX
		escape = true
	end
	
	-- XXX: may not be the most efficient, but at least correct.
	local output = {}
	while #text > 0 do
		local a1, a2, a3, a4 = mw.ustring.sub(text, 1, 1), mw.ustring.sub(text, 1, 2), mw.ustring.sub(text, 1, 3), mw.ustring.sub(text, 1, 4)
		if x2i_lookup[a4] then
			table.insert(output, x2i_lookup[a4])
			text = mw.ustring.sub(text, 5)
		elseif x2i_lookup[a3] then
			table.insert(output, x2i_lookup[a3])
			text = mw.ustring.sub(text, 4)
		elseif x2i_lookup[a2] then
			table.insert(output, x2i_lookup[a2])
			text = mw.ustring.sub(text, 3)
		elseif x2i_lookup[a1] then
			table.insert(output, x2i_lookup[a1])
			text = mw.ustring.sub(text, 2)
		else -- no match
			table.insert(output, a1)
			text = mw.ustring.sub(text, 2)
		end
	end

	output = table.concat(output)
	if escape then
--		output = mw.text.nowiki(output)
	end

	return output
end

return export