Module:cs-pronunciation: difference between revisions

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Content deleted Content added
slightly neater
add assimilation of n before velar
Line 179: Line 179:
-- at the end of a word after an obstruent
-- at the end of a word after an obstruent
IPA = gsub(IPA, "(" .. obstruent .. sonorant .. ")$", "%1" .. syllabic)
IPA = gsub(IPA, "(" .. obstruent .. sonorant .. ")$", "%1" .. syllabic)
return IPA
end

local function assimilate_nasal(IPA)
local velar = "[ɡk]"
IPA = gsub(IPA, "n(" .. velar .. ")", "ŋ%1")
return IPA
return IPA
Line 194: Line 202:
IPA = devoice_fricative_r(IPA)
IPA = devoice_fricative_r(IPA)
IPA = syllabicize_sonorants(IPA)
IPA = syllabicize_sonorants(IPA)
IPA = assimilate_nasal(IPA)
-- Change double to single consonants.
-- Change double to single consonants.

Revision as of 23:33, 17 February 2017

This module generates Czech pronunciation transcriptions for {{cs-IPA}}.

Testcases

See Module:cs-pronunciation/testcases.


local export = {}

local m_params = require("Module:parameters")
local m_IPA = require("Module:IPA")
local m_links = require("Module:links")
local m_script_utils = require("Module:script utilities")

local lang = require("Module:languages").getByCode("cs")
local sc = require("Module:scripts").getByCode("Latn")

local U = mw.ustring.char
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch

local long = "ː"
local nonsyllabic = U(0x32F)	-- inverted breve below
local syllabic = U(0x0329)
local syllabic_below = U(0x030D)
local raised = U(0x31D)			-- uptack below
local caron = U(0x30C)			-- combining caron
local tie = U(0x361)			-- combining double inverted breve

local replacements = {
	-- ě, i, and í indicate that the preceding consonant t, d, or n is palatal, as if written ť, ď, or ň.
	["([tdn])ě"] = "%1" .. caron .. "e",
	["([tdn])([ií])"] = "%1" .. caron .. "%2",
	["mě"] = "mn" .. caron .. "e",
}

local data = {
	["á"] = "a" .. long,
	["c"] = "t" .. tie .. "s",
	["č"] = "t" .. tie .. "ʃ",
	["ď"] = "ɟ",
	["e"] = "ɛ",
	["é"] = "ɛ" .. long,
	["ě"] = "jɛ",
	["g"] = "ɡ",
	["h"] = "ɦ",
	["ch"] = "x",
	["i"] = "ɪ",
	["í"] = "i" .. long,
	["ň"] = "ɲ",
	["ó"] = "o" .. long,
	["q"] = "k",
	["ř"] = "r" .. raised,
	["š"] = "ʃ",
	["t"] = "t",
	["ť"] = "c",
	["ú"] = "u" .. long,
	["ů"] = "u" .. long,
	["x"] = "ks",
	["y"] = "ɪ",
	["ý"] = "i" .. long,
	["ž"] = "ʒ",
	["ou"] = "ou" .. nonsyllabic,
	["au"] = "au" .. nonsyllabic,
	["eu"] = "eu" .. nonsyllabic,
	["\""] = "ˈ",
}

-- Add data["a"] = "a", data["b"] = "b", etc.
for character in gmatch("abdfjklmnoprstuvz ", ".") do
	data[character] = character
end

--[[	This replaces multiple-character sounds with numbers
		to make it easier to process them.					]]

local multiple_char = {
	"t" .. tie .. "s",	"t" .. tie .. "ʃ",	"r̝̊",
	"d" .. tie .. "z",	"d" .. tie .. "ʒ",	"r̝",
}

local singlechar = {}
for number, character in pairs(multiple_char) do
	singlechar[character] = tostring(number)
end

--[[	These are obstruents only;
		sonorants, /m, n, ɲ, r, l, j/, are not involved
		in voicing assimilation, according to Wikipedia.	]]
local voiceless	= { "p", "t", "c", "k", "f", "s", "ʃ", "x", "1", "2", "3", }
local voiced	= { "b", "d", "ɟ", "ɡ", "v", "z", "ʒ", "ɦ", "4", "5", "6", }
local sonorants = { "m", "n", "ɲ", "r", "l", "j", }

local features = {}
local indices = {}
for i, consonant in pairs(voiceless) do
	if not features[consonant] then
		features[consonant] = {}
	end
	features[consonant]["voicing"] = false
	indices[consonant] = i
end

for i, consonant in pairs (voiced) do
	if not features[consonant] then
		features[consonant] = {}
	end
	features[consonant]["voicing"] = true
	indices[consonant] = i
end

local causing_assimilation = gsub("[" .. table.concat(voiceless) .. table.concat(voiced) .. "]", "[v6]", "")	-- all but v and r̝

local assimilable = "[" .. table.concat(voiceless) .. table.concat(voiced) .. "]"

local function regressively_assimilate(IPA)
	for obstruent1, obstruent2 in gmatch(IPA, "(" .. assimilable .. ")(" .. causing_assimilation .. ")" ) do
		local obstruent1_replacement
		
		local voicing1 = features[obstruent1] and features[obstruent1].voicing
		local voicing2 = features[obstruent2] and features[obstruent2].voicing
		
		if voicing1 and ( not voicing2 ) then
			local index = indices[obstruent1]
			obstruent1_replacement = voiceless[index] or error('No entry for "' .. tostring(obstruent1) .. '" in the table voiceless.')
		elseif ( not voicing1 ) and voicing2 then
			local index = indices[obstruent1]
			obstruent1_replacement = voiced[index] or error('No entry for "' .. tostring(obstruent1) .. '" in the table voiced.')
		end
		
		if obstruent1_replacement then
			IPA = gsub(IPA, obstruent1 .. "(" .. obstruent2 .. ")", obstruent1_replacement .. "%1", 1)
		end
	end
	
	return IPA	
end

local function devoice_finally(IPA)
	local voiced_obstruent = "[" .. table.concat(voiced) .. "]"
	
	local final_voiced_obstruent = match(IPA, voiced_obstruent .. "+$") or match(IPA, voiced_obstruent .. "+%s")
	
	if final_voiced_obstruent then
		local replacement = {}
		
		local length = mw.ustring.len(final_voiced_obstruent)
		
		for i = 1, length do
			local consonant = sub(final_voiced_obstruent, i, i)
			local index = indices[consonant]
			local devoiced = voiceless[index]
			
			table.insert(replacement, devoiced)
		end
		
		replacement = table.concat(replacement)
		-- This will cause problems if the same consonant cluster occurs elsewhere in the term.
		IPA = gsub(IPA, final_voiced_obstruent, replacement)
	end
	
	return IPA
end

local function devoice_fricative_r(IPA)
	-- all but r̝̊, which is added by this function
	local voiceless = gsub("[" .. table.concat(voiceless) .. "]", "3", "")
	
	IPA = gsub(IPA, "(" .. voiceless .. ")" .. "6", "%13")	-- 6 represents r̝, 3 represents r̝̊
	IPA = gsub(IPA, "6" .. "(" .. voiceless .. ")", "3%1")
	
	return IPA
end

local function syllabicize_sonorants(IPA)
	local sonorant = gsub("[" .. table.concat(sonorants) .. "]", "[ɲj]", "") -- all except ɲ and j
	local obstruent = "[" .. table.concat(voiced) .. table.concat(voiceless) .. "]"
	local consonant = "[" .. gsub(sonorant .. obstruent, "[%[%]]", "") .. "]"
	
	-- between a consonant and an obstruent
	IPA = gsub(IPA, "(" .. consonant .. sonorant .. ")(" .. obstruent .. ")", "%1" .. syllabic .. "%2")
	-- at the beginning of a word before an obstruent
	IPA = gsub(IPA, "^(" .. sonorant .. ")(" .. obstruent .. ")", "%1" .. syllabic .. "%2")
	-- at the end of a word after an obstruent
	IPA = gsub(IPA, "(" .. obstruent .. sonorant .. ")$", "%1" .. syllabic)
	
	return IPA
end

local function assimilate_nasal(IPA)
	local velar = "[ɡk]"
	
	IPA = gsub(IPA, "n(" .. velar .. ")", "ŋ%1")
	
	return IPA
end

local function apply_rules(IPA)
	for sound, character in pairs(singlechar) do
		IPA = gsub(IPA, sound, character)
	end
	
	local consonant = "[" .. table.concat(sonorants) .. table.concat(voiceless) .. table.concat(voiced) .. "]"
	
	IPA = regressively_assimilate(IPA)
	IPA = devoice_finally(IPA)
	IPA = devoice_fricative_r(IPA)
	IPA = syllabicize_sonorants(IPA)
	IPA = assimilate_nasal(IPA)
	
	-- Change double to single consonants.
	IPA = gsub(IPA, "(" .. consonant .. ")%1", "%1")
	
	for sound, character in pairs(singlechar) do
		IPA = gsub(IPA, character, sound)
	end
	
	return IPA
end

function export.toIPA(term)
	local IPA = {}
	
	local transcription = mw.ustring.lower(term)
	for regex, replacement in pairs(replacements) do
		transcription = gsub(transcription, regex, replacement)
	end
	transcription = mw.ustring.toNFC(transcription)	-- Recompose combining caron.
	
	local working_string = transcription
	
	while mw.ustring.len(working_string) > 0 do
		local IPA_letter
		
		local letter = sub(working_string, 1, 1)
		local twoletters = sub(working_string, 1, 2) or ""
		
		if data[twoletters] then
			IPA_letter = data[twoletters]
			working_string = sub(working_string, 3)
		else
			IPA_letter = data[letter] or error('The letter "' .. tostring(letter) .. '" is not a member of the Czech alphabet.')
			working_string = sub(working_string, 2)
		end
		
		table.insert(IPA, IPA_letter)
	end
	
	IPA = table.concat(IPA)
	IPA = apply_rules(IPA)
	
	return IPA, transcription
end

function export.show(frame)
	local params = {
		[1] = {}
	}
	
	local args = m_params.process(frame:getParent().args, params)
	local term = args[1] or mw.title.getCurrentTitle().text
	
	local IPA = export.toIPA(term)
	
	IPA = "[" .. IPA .. "]"
	IPA = m_IPA.format_IPA_full(lang, { { pron = IPA } } )
	
	return IPA
end

function export.example(frame)
	local params = {
		[1] = { required = true },
		["term"] = {}
	}
	
	local args = m_params.process(frame.args, params)
	local term = args["term"] or args[1]
	local transcribable = args[1]
	
	local IPA, transcribable = export.toIPA(transcribable)
	
	IPA = "[" .. IPA .. "]"
	IPA = m_IPA.format_IPA_full(lang, { { pron = IPA } } )
	
	link = m_links.full_link( { term = term, lang = lang, sc = sc }, "term" )
	
	return link .. ( term ~= transcribable and ( " (" .. m_script_utils.tag_text(transcribable, lang, nil, "term") .. ") — " ) or " — " ) .. IPA
end

return export