Module:la-pronunc

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
This module is not to be directly used. It is used by {{la-IPA}}, see there for usage.
local export = {}
 
local letters_ipa = {
	["a"] = "a",["e"] = "e",["i"] = "i",["o"] = "o",["u"] = "u",["y"] = "y",
	["ā"] = "aː",["ē"] = "eː",["ī"] = "iː",["ō"] = "oː",["ū"] = "uː",["ȳ"] = "yː",
	["ae"] = "ae̯",["oe"] = "oe̯",["ei"] = "ei̯",["au"] = "au̯",["eu"] = "eu̯",
	["b"] = "b",["d"] = "d",["f"] = "f",
	["c"] = "k",["g"] = "ɡ",["v"] = "w",["x"] = "ks",
	["ph"] = "pʰ",["th"] = "tʰ",["ch"] = "kʰ",["gh"] = "gʰ",["rh"] = "rʰ",["qv"] = "kʷ",["gv"] = "ɡʷ",
	["'"] = "ˈ",["ˈ"] = "ˈ",
}
 
local letters_ipa_eccl = {
	["a"] = "a",["e"] = "e",["i"] = "i",["o"] = "o",["u"] = "u",["y"] = "i",
	["ā"] = "aː",["ē"] = "eː",["ī"] = "iː",["ō"] = "oː",["ū"] = "uː",["ȳ"] = "iː",
	["ae"] = "ɛ",["oe"] = "ɛ",["ei"] = "ei̯",["au"] = "au̯",["eu"] = "eu̯",
	["b"] = "b",["d"] = "d",["f"] = "f",
	["c"] = "k",["g"] = "ɡ",["v"] = "v",["x"] = "ks",
	["ph"] = "f",["th"] = "tʰ",["ch"] = "kʰ",["gh"] = "gʰ",["rh"] = "rʰ",["qv"] = "kw",["gv"] = "ɡw",
	["h"] = "",
	["'"] = "ˈ",["ˈ"] = "ˈ",
}
 
local phonetic_vowels = {
	["e"] = "ɛ",
	["i"] = "ɪ",
	["o"] = "ɔ",
	["u"] = "ʊ",
	["y"] = "ʏ",
}
 
local phonetic_rules = {
	{"ɡ([.ˈ]?)n", "ŋ%1n"},
	{"n([.ˈ]?)([kɡ])", "ŋ%1%2"},
 
	{"ʷ([eɛiɪyʏ])", "ᶣ%1"},
 
	-- Nasal vowels
	{"a[nm]$", "ã"},
	{"[eɛ][nm]$", "ẽ"},
	{"[iɪ][nm]$", "ĩ"},
	{"[oɔ][nm]$", "õ"},
	{"[uʊ][nm]$", "ũ"},
	{"[yʏ][nm]$", "ỹ"},
	{"a[nm]([%.ˈ]?[sf])", "ãː%1"},
	{"[eɛ][nm]([%.ˈ]?[sf])", "ẽː%1"},
	{"[iɪ][nm]([%.ˈ]?[sf])", "ĩː%1"},
	{"[oɔ][nm]([%.ˈ]?[sf])", "õː%1"},
	{"[uʊ][nm]([%.ˈ]?[sf])", "ũː%1"},
	{"[yʏ][nm]([%.ˈ]?[sf])", "ỹː%1"},
 
	--L pinguis
	{"l", "ɫ"},
	{"ɫ(%.?)ɫ", "l%1l"},
	{"ɫ(%.?[iɪyʏ])", "l%1"},
}
 
local phonetic_rules_eccl = {
	{"n([.ˈ]?)([kɡ])", "ŋ%1%2"}, --assimilation
	{"z", "d͡z"},
	{"([aɛeiɔou])ː?%.ʃ([aɛeiɔou])","%1ʃ.ʃ%2"}, --gemination
	{"([aɛeiɔou])ː?%.ɲ([aɛeiɔou])","%1ɲ.ɲ%2"}, --gemination
}
 
local lengthen_vowel = {
	["a"] = "aː", ["aː"] = "aː",
	["ɛ"] = "ɛː", ["ɛː"] = "ɛː",
	["e"] = "eː", ["eː"] = "eː",
	["i"] = "iː", ["iː"] = "iː",
	["ɔ"] = "ɔː", ["ɔː"] = "ɔː",
	["o"] = "oː", ["oː"] = "oː",
	["u"] = "uː", ["uː"] = "uː",
}
 
local vowels = {
	"a", "ɛ", "e", "ɪ", "i", "ɔ", "o", "ʊ", "u", "y",
	"aː", "ɛː", "eː", "iː", "ɔː", "oː", "uː", "yː",
	"ae̯", "oe̯", "ei̯", "au̯", "eu̯",
}
 
 
local onsets = {
	"b", "p", "pʰ", "d", "t", "tʰ",
	"ɡ", "gʰ", "k", "kʰ", "kʷ", "ɡʷ", "kw", "ɡw", "t͡s", "t͡ʃ", "d͡ʒ", "ʃ",
	"f", "s", "h", "z",
	"l", "m", "n", "ɲ", "r", "rʰ", "j", "v", "w",
 
	"bl", "pl", "pʰl", "br", "pr", "pʰr", "ps", 
	"dr", "tr", "tʰr",
	"ɡl", "kl", "kʰl", "ɡr", "kr", "kʰr", "ɡn",
	"fl", "fr",
 
	"sp", "st", "sk", "skʷ", "sl", "sm", "sn", "sw",
	"spr", "str", "skr",
	"spl", "skl",
}
 
local codas = {
	"b", "p", "pʰ", "d", "t", "tʰ", "ɡ", "k", "kʰ",
	"f", "s",
	"l", "m", "n", "ɲ", "r", "j", "ʃ",
 
	"sp", "st", "sk",
	"spʰ", "stʰ", "skʰ",
 
	"lp", "lt", "lk",
	"lb", "ld", "lɡ",
	"lpʰ", "ltʰ", "lkʰ",
	"lf",
 
	"rp", "rt", "rk",
	"rb", "rd", "rɡ",
	"rpʰ", "rtʰ", "rkʰ",
	"rf",
 
	"mp", "nt", "nk",
	"mb", "nd", "nɡ",
	"mpʰ", "ntʰ", "nkʰ",
 
	"lm", "rl", "rm", "rn",
 
	"ps", "ts", "ks", "ls", "ns", "rs",
	"lks", "nks", "rks",
	"lms", "rls", "rms", "rns",
}
 
local breves = {
	["ă"] = "a",
	["ĕ"] = "e",
	["ĭ"] = "i",
	["ŭ"] = "u",
	["æ"] = "ae",
	["œ"] = "oe",
}
 
for i, val in ipairs(vowels) do
	vowels[val] = true
end
 
for i, val in ipairs(onsets) do
	onsets[val] = true
end
 
for i, val in ipairs(codas) do
	codas[val] = true
end
 
 
local function letters_to_ipa(word,phonetic,eccl)
	local phonemes = {}
 
	local dictionary = eccl and letters_ipa_eccl or letters_ipa
 
	while mw.ustring.len(word) > 0 do
		local longestmatch = ""
 
		for letter, ipa in pairs(dictionary) do
			if mw.ustring.len(letter) > mw.ustring.len(longestmatch) and mw.ustring.sub(word, 1, mw.ustring.len(letter)) == letter then
				longestmatch = letter
			end
		end
 
		if mw.ustring.len(longestmatch) > 0 then
			table.insert(phonemes, dictionary[longestmatch])
			word = mw.ustring.sub(word, mw.ustring.len(longestmatch) + 1)
		else
			table.insert(phonemes, mw.ustring.sub(word, 1, 1))
			word = mw.ustring.sub(word, 2)
		end
	end
 
	if eccl then for i=1,#phonemes do
		if phonemes[i+1] and (phonemes[i] == "k" or phonemes[i] == "ɡ") and (phonemes[i+1] == "e" or phonemes[i+1] == "ɛ" or phonemes[i+1] == "eː" or phonemes[i+1] == "i" or phonemes[i+1] == "iː") then
			phonemes[i] = phonemes[i] == "k" and "t͡ʃ" or "d͡ʒ"
			if phonemes[i] == "t͡ʃ" and phonemes[i-1] and phonemes[i-1] == "s" and not (phonemes[i-2] and not vowels[phonemes[i-2]]) and not (phonemes[i+1] and not vowels[phonemes[i+1]]) then
				phonemes[i-1] = ""
				phonemes[i] = "ʃ"
			end
			if phonemes[i-1] and phonemes[i-1] == "k" and phonemes[i] == "t͡ʃ" then
				phonemes[i-1] = "t"
			end
			if phonemes[i-1] and phonemes[i-1] == "g" and phonemes[i] == "d͡ʒ" then
				phonemes[i-1] = "d"
			end
		end
		if phonemes[i+2] and phonemes[i] == "t" and phonemes[i+1] == "i" and vowels[phonemes[i+2]] and not (phonemes[i-1] and phonemes[i-1] == "s") then
			phonemes[i] = "t͡s"
		end
		if phonemes[i] == "kʰ" then phonemes[i] = "k" end
		if phonemes[i] == "tʰ" then phonemes[i] = "t" end
		if phonemes[i+1] and phonemes[i] == "ɡ" and phonemes[i+1] == "n" then
			phonemes[i] = ""
			phonemes[i+1] = "ɲ"
		end
	end end
 
	return phonemes
end
 
 
local function get_onset(syll)
	local consonants = {}
 
	for i = 1, #syll do
		if vowels[syll[i]] then
			break
		end
		if syll[i] ~= "ˈ" then
			table.insert(consonants, syll[i])
		end
	end
 
	return table.concat(consonants)
end
 
 
local function get_coda(syll)
	local consonants = {}
 
	for i = #syll, 1, -1 do
		if vowels[syll[i]] then
			break
		end
 
		table.insert(consonants, 1, syll[i])
	end
 
	return table.concat(consonants)
end
 
 
local function get_vowel(syll)
	for i = 1,#syll do
		if vowels[syll[i]] then return syll[i] end
	end
end
 
 
-- Split the word into syllables of CV shape
local function split_syllables(remainder)
	local syllables = {}
	local syll = {}
 
	while #remainder > 0 do
		local phoneme = table.remove(remainder, 1)
 
		if phoneme == "." then
			if #syll > 0 then
				table.insert(syllables, syll)
				syll = {}
			end
		elseif phoneme == "ˈ" then
			if #syll > 0 then
				table.insert(syllables,syll)
			end
			syll = {"ˈ"}
		elseif vowels[phoneme] then
			table.insert(syll, phoneme)
			table.insert(syllables, syll)
			syll = {}
		else
			table.insert(syll, phoneme)
		end
	end
 
	-- If there are phonemes left, then the word ends in a consonant
	-- Add them to the last syllable
	for _, phoneme in ipairs(syll) do
		table.insert(syllables[#syllables], phoneme)
	end
 
	-- Split consonant clusters between syllables
	for i, current in ipairs(syllables) do
		if i > 1 then
			local previous = syllables[i-1]
			local onset = get_onset(current)
			-- Shift over consonants until the syllable onset is valid
			while not (onset == "" or onsets[onset]) do
				table.insert(previous, table.remove(current, 1))
				onset = get_onset(current)
			end
 
			-- If the preceding syllable still ends with a vowel, and the current one begins with s + another consonant, or with gn, then shift it over
			if get_coda(previous) == "" and ((current[1] == "s" and not vowels[current[2]]) or (current[1] =="g" and current[2] == "n")) then
				table.insert(previous, table.remove(current, 1))
			end
 
			-- If there is no vowel at all in this syllable
			if not get_vowel(current) then
				for j=1,#current do
					table.insert(syllables[i-1], table.remove(current, 1))
				end
				table.remove(syllables,i)
			end
 
		end
	end
 
	for i, syll in ipairs(syllables) do
		local onset = get_onset(syll)
		local coda = get_coda(syll)
 
		if not (onset == "" or onsets[onset]) then
			require("Module:debug").track("la-pronunc/bad onset")
			--error("onset error:[" .. onset .. "]")
		end
 
		if not (coda == "" or codas[coda]) then
			require("Module:debug").track("la-pronunc/bad coda")
			--error("coda error:[" .. coda .. "]")
		end
	end
 
	return syllables
end
 
 
local function detect_accent(syllables,eccl)
	-- Manual override
	for i=1,#syllables do
		for j=1,#syllables[i] do
			if syllables[i][j] == "ˈ" then
				table.remove(syllables[i],j)
				return i
			end
		end
	end
	-- Detect accent placement
	if #syllables > 2 then
		-- Does the penultimate syllable end in a single vowel?
		local penult = syllables[#syllables-1]
 
		if mw.ustring.find(penult[#penult], "^[aeiouy]$") then
			return #syllables - 2
		else
			return #syllables - 1
		end
	elseif #syllables == 2 then
		return #syllables - 1
	end
end
 
 
local function convert_word(word, phonetic, eccl)
	-- Normalize spelling
	word = mw.ustring.gsub(word, "w", "v")
	word = mw.ustring.gsub(word, "([aeiouyāēīōūȳ])v([^aeiouyāēīōūȳ])", "%1u%2")
	word = mw.ustring.gsub(word, "qu", "qv")
	word = mw.ustring.gsub(word, "ngu([aeiouyāēīōūȳ])", "ngv%1")
 
	word = mw.ustring.gsub(word, "^i([aeiouyāēīōūȳ])", "j%1")
	word = mw.ustring.gsub(word, "^u([aeiouyāēīōūȳ])", "v%1")
	word = mw.ustring.gsub(word, "([aeiouyāēīōūȳ])i([aeiouyāēīōūȳ])", "%1j%2")
	word = mw.ustring.gsub(word, "([aeiouyāēīōūȳ])u([aeiouyāēīōūȳ])", "%1v%2")
 
	-- Vowel length before nasal + fricative is allophonic
	word = mw.ustring.gsub(word, "ā([mn][fs])", "a%1")
	word = mw.ustring.gsub(word, "ē([mn][fs])", "e%1")
	word = mw.ustring.gsub(word, "ī([mn][fs])", "i%1")
	word = mw.ustring.gsub(word, "ō([mn][fs])", "o%1")
	word = mw.ustring.gsub(word, "ū([mn][fs])", "u%1")
	word = mw.ustring.gsub(word, "ȳ([mn][fs])", "y%1")
 
	-- Apply some basic phoneme-level assimilations
	word = mw.ustring.gsub(word, "xs?", "ks")
	word = mw.ustring.gsub(word, "b([st])", "p%1")
	word = mw.ustring.gsub(word, "d([st])", "t%1")
	word = mw.ustring.gsub(word, "g([st])", "k%1")
	word = mw.ustring.gsub(word, "n([bp])", "m%1")
 
	-- Convert word to IPA
	local phonemes = letters_to_ipa(word,phonetic,eccl)
 
	-- Split into syllables
	local syllables = split_syllables(phonemes)
 
	-- Add accent
	local accent = detect_accent(syllables,eccl)
 
	for i, syll in ipairs(syllables) do
		for j, phoneme in ipairs(syll) do
			if eccl then
				syll[j] = mw.ustring.gsub(syll[j],"ː","")
			else
				if phonetic then syll[j] = phonetic_vowels[syll[j]] or syll[j] end
			end
		end
	end
 
	for i, syll in ipairs(syllables) do
		if eccl and i == accent and phonetic and vowels[syll[#syll]] then
			syll[#syll] = lengthen_vowel[syll[#syll]] or syll[#syll]..'ː'
		end
		for j=1,#syll-1 do if syll[j]==syll[j+1] then syll[j+1] = "" end end
	end
	for i, syll in ipairs(syllables) do
		syllables[i] = (i == accent and "ˈ" or "") .. table.concat(syll)
	end
 
	word = (mw.ustring.gsub(table.concat(syllables, "."), "%.ˈ", "ˈ"))
 
	if phonetic then
		if eccl then
			for i,rule in ipairs(phonetic_rules_eccl) do
				word = mw.ustring.gsub(word,rule[1],rule[2])
			end
		else
			for i,rule in ipairs(phonetic_rules) do
				word = mw.ustring.gsub(word,rule[1],rule[2])
			end
		end
	end
 
	if eccl and not phonetic then word = mw.ustring.gsub(word,"ɔ","o") end
 
	return word
end
 
 
local function convert_words(words, phonetic, eccl)
	words = mw.ustring.lower(words)
	words = mw.ustring.gsub(words,'[,?!:;()"%-]', '')
	words = mw.ustring.gsub(words,'[ăĕĭŭæœ]', breves)
	if mw.ustring.find(words,'[^a-zA-Zāēīōūȳ,.?!:;()\'" ]') then error('no extra punctuations allowed than sentence markers') end
 
	local result = {}
 
	for word in mw.text.gsplit(words, " ") do
		table.insert(result, convert_word(word, phonetic, eccl))
	end
 
	return table.concat(result, " ")
end
 
 
function export.show(words, phonetic, eccl)
	if type(words) == "table" then -- assume a frame
		eccl = words.args["eccl"]
		words = words.args[1]:lower() or mw.title.getCurrentTitle().text:lower()
	end
 
	return convert_words(words, phonetic, eccl)
end
 
 
function export.allophone(word)
	return export.show(word, true)
end
 
 
local unassimilated_prefixes = {
	'ab','ad','circum','con','contrā','dis','ex','īn','inter','ob','per','sub','subter'
}
 
 
function export.track(words)
	if type(words) == "table" then -- assume a frame
		words = words.args[1]:lower() or mw.title.getCurrentTitle().text:lower()
	end
 
	words = mw.ustring.lower(words)
	words = mw.ustring.gsub(words,'[,.?!:;()\'"%-]','')
	words = mw.ustring.gsub(words,'[ăĕĭŭæœ]', breves)
 
	for word in mw.text.gsplit(words, " ") do
		for _,prefix in ipairs(unassimilated_prefixes) do
			if mw.ustring.find(word,prefix..'i[aeiouāēīōū]') then
				--return '[[Category:Latin terms needing attention]]'
			end
		end
	end
end
 
return export