Module:grc-pronunciation/sandbox: difference between revisions

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Content deleted Content added
No edit summary
No edit summary
Line 248: Line 248:
rawCondition = possP[1]
rawCondition = possP[1]
rawResult = possP[2]
rawResult = possP[2]
local condition = decode(rawCondition, x)
local condition = decode(rawCondition, x, term)
if condition then
if condition then
if type(rawResult) == 'string' then
if type(rawResult) == 'string' then

Revision as of 06:39, 29 January 2017


Data for the sandbox module: Module:grc-pronunciation/sandbox/data.

Testcases

Lua error in package.lua at line 80: module 'Module:a' not found


local export = {}

local m_accent = require('Module:grc-accent')
-- [[Module:grc-utilities]] converts sequences of diacritics to the order required by this module,
-- then replaces combining macrons and breves with spacing ones.
local rearrangeDiacritics = require("Module:grc-utilities").pronunciationOrder
local m_data = mw.loadData("Module:grc-pronunciation/data")
local m_IPA = require("Module:IPA")
local m_a = require("Module:a")
local lang = require("Module:languages").getByCode("grc")
local tag_text = require("Module:script utilities").tag_text

local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}
local inlinePeriods = {'cla', 'koi2', 'byz2'}
local IPAs = {}

local rfind = mw.ustring.find
local usub = mw.ustring.sub
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local ulower = mw.ustring.lower
local U = mw.ustring.char

--Combining diacritics are tricky.
local tie = U(0x35C)   --Tie bar    
local high = U(0x341)  -- combining acute tone mark
local low = U(0x340)  -- combining grave tone mark
local midHigh = U(0x1DC4)  -- mid–high pitch
local midLow = U(0x1DC6)  -- mid–low pitch
local highMid = U(0x1DC7)  -- high–mid pitch
local short = U(0x306)  -- short vowel length: combining breve

local fronts = {['ι']=true, ['η']=true, ['ε']=true, ['υ']=true, ['αι']=true, ['οι']=true}

local uStart = {['α'] = true, ['ε'] = true, ['ο'] = true}
local iStart = {['α'] = true, ['ε'] = true, ['ο'] = true, ['υ'] = true}

local iCons = {['b']=true, ['ɡ']=true, ['ŋ']=true, ['d']=true, ['z']=true, 
	['k']=true, ['l']=true, ['m']=true, ['n']=true, ['p']=true, ['r']=true, 
	['s']=true, ['t']=true, ['β']=true, ['ð']=true, ['ɣ']=true, ['ɸ']=true, 
	['θ']=true, ['x']=true, ['f']=true, ['v']=true, ['ɟ']=true, ['ʝ']=true, 
	['c']=true, ['ç']=true}

local iVows = {['a']=true, ['e']=true, ['i']=true, ['o']=true, ['u']=true, 
	['y']=true, ['w']=true, ['j']=true, ['ɛ']=true, ['ɔ']=true, ['ʍ']=true}

local iDias = {[high]=true, [low]=true, [midHigh]=true, [midLow]=true, [highMid]=true, [short]=true,
	['ʰ']=true, ['ː']=true, ['̥']=true}

local iLiqs = {['r']=true, ['l']=true, ['n']=true}

local iStops = {['b']=true, ['ɡ']=true, ['d']=true, ['k']=true, ['p']=true, 
	['t']=true, ['β']=true, ['ð']=true, ['ɣ']=true, ['ɸ']=true, ['θ']=true, 
	['x']=true, ['f']=true}

local function preFront(term, x, which)
	local let1 = usub(term, x+1+which, x+1+which)
	local let2 = usub(term, x+2+which, x+2+which)
	return fronts[m_accent.strip_accent(let1)] or (fronts[m_accent.strip_accent(let1..let2)] and not rmatch(let2,'[ϊΐῒῗ]'))
end

local function syllabify(IPAs)
	--Syllabify
	for _, period in ipairs(periods) do
		local word = IPAs[period].IPA
		local syllables = {}
		local cVowel, nVowel, sBreak, stress, wordEnd, searching
		while word ~= '' do
			cVowel, nVowel, sBreak, stress = false, false, false, false
			
			--First thing is to find the first vowel.
			searching = 1
			cVowelFound = false
			while not cVowel do
				letter = usub(word, searching, searching)
				if cVowelFound then
					if iVows[letter] or iCons[letter] or letter == '' or letter == 'ˈ' then
						cVowel = searching - 1
					elseif iDias[letter] then
						searching = searching + 1
					elseif letter == tie then
						cVowelFound = false
						searching = searching + 1
					else
						searching = searching + 1
					end
				else
					if iVows[letter] then
						cVowelFound = true
					elseif letter == 'ˈ' then
						stress = true
					elseif letter == '' then  --This shouldn't happen.
						cVowel = true
						wordEnd = true
					end
					searching = searching + 1
				end
			end
		
			--Next we try and find the next vowel or the end.
			searching = cVowel + 1
			while (not nVowel) and (not wordEnd) do
				letter = usub(word, searching, searching)
				if iVows[letter] or letter == 'ˈ' then
					nVowel = searching
				elseif letter == '' then
					wordEnd = true
				else
					searching = searching + 1
				end
			end
			
			--Finally we find the syllable break point.
			if wordEnd then
				sBreak = ulen(word)
			elseif iLiqs[usub(word, nVowel-1, nVowel-1)] then
				if iStops[usub(word, nVowel-2, nVowel-2)] then
					sBreak = nVowel - 3
				elseif usub(word, nVowel-2, nVowel-2) == 'ʰ' and iStops[usub(word, nVowel-3, nVowel-3)] then
					sBreak = nVowel - 4
				else
					sBreak = nVowel - 2
				end
			elseif iCons[usub(word, nVowel-1, nVowel-1)] then
				sBreak = nVowel - 2
			elseif usub(word, nVowel-1, nVowel-1) == 'ʰ' and iStops[usub(word, nVowel-2, nVowel-2)] then
				sBreak = nVowel - 3
			elseif usub(word, nVowel-1, nVowel-1) == '̥' and usub(word, nVowel-2, nVowel-2) == 'r' then
				sBreak = nVowel - 3
			else
				sBreak = nVowel - 1
			end
			
			--Pull everything up to and including the syllable Break.
			local syllable = usub(word, 1, sBreak)
			
			--If there is a stress accent, then we need to move it to the 
			--beginning of the syllable, unless it is a monosyllabic word,
			--in which case we remove it altogether.
			if stress then
				if next(syllables) or syllable ~= word then
					syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '')
				else 
					syllable = rsubn(syllable, 'ˈ', '')
				end
				stress = false
			end
			table.insert(syllables, syllable)
			word = usub(word, sBreak+1)
		end
		
		if #syllables > 0 then
			IPAs[period].IPA = table.concat(syllables, '.')
			IPAs[period].IPA = rsubn(IPAs[period].IPA, '%.ˈ', 'ˈ')
		end
	end
	return IPAs
end

local function make_table(IPAs, ambig)
	--Final format
	local inlineProns = {}
	local listOfProns = {}
	
	for _, period in ipairs(inlinePeriods) do
		local pron = '/' .. IPAs[period].IPA .. '/'
		table.insert(inlineProns, {pron = pron})
		table.insert(listOfProns, pron)
	end
	
	local inlineIPAlength = ulen("IPA(key): " .. table.concat(listOfProns, ' → '))
	
	local inline = '<div class="vsShow" style="display:none">\n* ' .. m_IPA.format_IPA_full(lang, inlineProns, nil, ' → ') .. '</div>'
	
	local fullProns = {}
	for _, period in ipairs(periods) do
		local notes = (#IPAs[period].notes > 0) and ('<span class="previewonly"><br>' .. table.concat(IPAs[period].notes, ', ') .. '</span>') or ''
		table.insert(fullProns, '* ' .. m_a.show({'grc-' .. period}) .. ' ' ..  m_IPA.format_IPA_full(lang, {{pron = '/' .. IPAs[period].IPA .. '/'}}) .. notes)
	end
	
	local ambignote = ''
	-- The table ambig is filled with all the ambiguous vowels that have been found in the term.
	if #ambig > 0 then
		local agreement = {}
		if #ambig > 1 then
			agreement = { 's ', 'each of them', 'they are' }
		else
			agreement = { ' ', 'it', 'it is' }
		end
			
		ambignote = '\n<p class="previewonly">Mark the vowel length of the ambiguous vowel' .. agreement[1]
			.. mw.text.listToText(ambig) .. ' by adding a macron after ' .. agreement[2]
			.. ' if ' .. agreement[3]  .. ' long, or a breve if ' .. agreement[3] .. ' short. By default, [[Module:grc-pronunciation]] assumes ' .. agreement[3] .. ' short if unmarked.</span></p>\n'
	end
	
	local full = '<div class="vsHide">\n' .. table.concat(fullProns, '\n') .. ambignote .. '</div>'
	
	return '<div class="vsSwitcher vsToggleCategory-pronunciations" style="width: ' .. inlineIPAlength * 0.68 .. 'em;"><span class="vsToggleElement" style="float: right;">&nbsp;</span>' .. inline .. full .. '</div>'
end

local function decode(condition, x, term)
	--[==[
		"If" and "and" statements.
		Note that we're finding the last operator first, 
		which means that the first will get ultimately get decided first.
		It's a recursive function.
	]==]
	if rfind(condition, '[+/]') then
		local sep = rfind(condition, "[/+][^/+]*$")  
		local chunk1 = usub(condition, 1, sep - 1)
		local chunk2 = usub(condition, sep + 1)
		if usub(condition, sep, sep) == '/' then
			return decode(chunk1, x, term) or decode(chunk2, x, term)
		elseif usub(condition, sep, sep) == '+' then
			return decode(chunk1, x, term) and decode(chunk2, x, term)
		end
	elseif rfind(condition, '=') then
		local sep = rfind(condition, '=')  
		local chunk1 = usub(condition, 1, sep - 1)
		local chunk2 = usub(condition, sep + 1)
		local checkLetter = (tonumber(x + chunk1) < 1) and '' or usub(term, x + chunk1, x + chunk1)
		return checkLetter == chunk2
	elseif rfind(condition, '%.') then
		local sep = rfind(condition, '%.')  
		local chunk1 = usub(condition, 1, sep - 1)
		local chunk2 = usub(condition, sep + 1)
		local checkLetter = usub(term, x + chunk1, x + chunk1)
		return m_data[checkLetter][chunk2]
	elseif rfind(condition, '%-') then
		local sep = rfind(condition, '%-')
		local chunk1 = usub(condition, 1, sep - 1)
		local chunk2 = usub(condition, sep + 1)
		if chunk2 == 'preFront' then
			return preFront(term, x, chunk1)
		end
	end
end

local function check(p, period, x, term)
	if type(p) == 'string' then
		return p
	elseif type(p) == 'table' then   --This table is sequential, with a variable number of entries.
		for _, possP in ipairs(p) do
			if type(possP) == 'string' then
				return possP
			elseif type(possP) == 'table' then    --This table is paired, with two values: a condition and a result.
				rawCondition = possP[1]
				rawResult = possP[2]
				local condition = decode(rawCondition, x, term)
				if condition then
					if type(rawResult) == 'string' then
						return rawResult
					else
						return check(rawResult, period, x, term)
					end
				end	
			end
		end
	end
end

function export.create(frame)
	local args = frame:getParent().args
	local term = args['w'] or args[1] or mw.title.getCurrentTitle().text
	term = ulower(term)
	term = rsubn(term, 'ς', 'σ')
	term = rsubn(term, 'ῤ', 'ρ')
	term = rearrangeDiacritics(term)
	for _, period in ipairs(periods) do
		IPAs[period] = {['notes']={}}
	end
	local length = ulen(term)
	local x = 1
	local letter = ''
	local nextLetter = ''
	local cla = {}
	local alert = false
	local ambig = {}
	local naked = ''
	local accent = ''
	local diphthong = false
	--local clusters = {}
	--local cluster = ''
	while x <= length do
		letter = usub(term, x, x)
		nextLetter = usub(term, x+1, x+1)
		if not m_data[letter] then
			
		elseif m_data[letter].type == 'consonant' then
			for i, period in ipairs(periods) do
				currentP = m_data[letter].p[period]
				result = check(currentP, period, x, term)
				table.insert(IPAs[period], result)
			end
		
		elseif m_data[letter].type == 'vowel' then
		
			--Start with a diphthong check
			local diphthong = false
			local vCurrent, vForm, vLength;
			if iStart[letter] and (m_data[nextLetter] and m_data[nextLetter].type == 'vowel' and (nextLetter == 'ι' or (m_accent.strip_accent(nextLetter) == 'ι' and not m_data[nextLetter].diar))) then
				diphthong = letter..'ι'
			elseif uStart[letter] and (m_data[nextLetter] and m_data[nextLetter].type == 'vowel' and (nextLetter == 'υ' or (m_accent.strip_accent(nextLetter) == 'υ' and not m_data[nextLetter].diar))) then
				diphthong = letter..'υ'
			end
			
			if diphthong then
				vCurrent = nextLetter
				vForm = diphthong
				vLength = 'long'
				x = x + 1
				nextLetter = usub(term, x+1, x+1)
			else 
				vCurrent = letter
				vForm = m_accent.strip_accent(letter)
				vLength = m_data[vCurrent].length or m_data[vForm].length
			end
			for _, period in ipairs(periods) do
				if period == 'cla' then
					if m_data[vCurrent].breath == 'rough' then
						table.insert(IPAs['cla'], 'h')
					end
				elseif period == 'koi1' then
					if m_data[vCurrent].breath == 'rough' then
						table.insert(IPAs['koi1'], '(h)')
					end
				end
				if period ~= 'cla' then
					--All other periods have a stress accent, instead of Classical's tonal accent.
					--The stress diacritic is initially placed immediately preceding the vowel,
					--and is moved to the front of the syllable during syllabification.
					if m_data[vCurrent].accent then
						table.insert(IPAs[period], 'ˈ')
					end
					local vIP = check(m_data[vForm].p[period], period, x, term)
					table.insert(IPAs[period], vIP)
					
				else
					-- Classical vowels are hard.
					-- [[Module:grc-utilities]] converts combining macrons and breves to spacing ones.
					if vLength == 'either' then
						if m_data[vCurrent].accent == 'circum' then
							vLength = 'long'
						elseif m_data[vCurrent].subi then
							vLength = 'long'
						elseif nextLetter == '¯' then
							vLength = 'long'			
						elseif nextLetter == '˘' then
							vLength = 'short'
						else
							local ambiguousVowel = tag_text(vCurrent, lang, nil, 'term')
							table.insert(IPAs.cla.notes, 'ambiguous vowel ' .. ambiguousVowel
								.. ' at ' .. x)
							table.insert(ambig, ambiguousVowel)
						end
					end
					
					local vIP = check(m_data[vForm].p.cla, cla, x, term)  --vIP stands for initial pronunciation
					
					--There has to be a prettier way to do this, but I just can't think of it.
					if ulen(vIP) > 1 then  --i.e. if it's a phonetic dipthong
						if m_data[vCurrent].accent == 'acute' then
							vFP = vIP..high
						elseif m_data[vCurrent].accent == 'grave' then
							vFP = vIP..low
						elseif m_data[vCurrent].accent == 'circum' then
							vFP = usub(vIP, 1, 1)..high..usub(vIP, 2)
						else
							vFP = vIP
						end
					elseif m_data[vCurrent].subi then
						if m_data[vCurrent].accent == 'acute' then
							vFP = vIP..tie..'ːj'..high
						elseif m_data[vCurrent].accent == 'grave' then
							vFP = vIP..tie..'ːj'..low
						elseif m_data[vCurrent].accent == 'circum' then
							vFP = vIP..high..tie..'ːj'
						else
							vFP = vIP..tie..'ːj'
						end
					elseif vLength == 'long' then
						if m_data[vCurrent].accent == 'acute' then
							vFP = vIP..midHigh..'ː'
						elseif m_data[vCurrent].accent == 'grave' then
							vFP = vIP..midLow..'ː'
						elseif m_data[vCurrent].accent == 'circum' then
							vFP = vIP..highMid..'ː'
						else
							vFP = vIP..'ː'
						end
					else
						if m_data[vCurrent].accent == 'acute' then
							vFP = vIP..high
						elseif m_data[vCurrent].accent == 'grave' then
							vFP = vIP..low
						else
							vFP = vIP
						end
					end
					table.insert(IPAs['cla'], vFP)
	
				end
			end
		else
			alert = true
		end
		x = x + 1
	end
	
	--Concatenate the IPAs
	for _, period in ipairs(periods) do
		IPAs[period] = {['IPA']=table.concat(IPAs[period], ''), ['notes']=IPAs[period]['notes']}
	end
	
	return make_table(syllabify(IPAs), ambig)
end

return export
--Things we still need:
--Voicing of sigma around (after?) voiced stops. 
--Proper alerts for editors, especially on ambiguous vowels.