Module:grc-pronunciation/sandbox: difference between revisions
Jump to navigation
Jump to search
Content deleted Content added
No edit summary |
No edit summary |
||
Line 248: | Line 248: | ||
rawCondition = possP[1] |
rawCondition = possP[1] |
||
rawResult = possP[2] |
rawResult = possP[2] |
||
local condition = decode(rawCondition, x) |
local condition = decode(rawCondition, x, term) |
||
if condition then |
if condition then |
||
if type(rawResult) == 'string' then |
if type(rawResult) == 'string' then |
Revision as of 06:39, 29 January 2017
- The following documentation is located at Module:grc-pronunciation/sandbox/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
Data for the sandbox module: Module:grc-pronunciation/sandbox/data.
Testcases
Lua error in package.lua at line 80: module 'Module:a' not found
local export = {}
local m_accent = require('Module:grc-accent')
-- [[Module:grc-utilities]] converts sequences of diacritics to the order required by this module,
-- then replaces combining macrons and breves with spacing ones.
local rearrangeDiacritics = require("Module:grc-utilities").pronunciationOrder
local m_data = mw.loadData("Module:grc-pronunciation/data")
local m_IPA = require("Module:IPA")
local m_a = require("Module:a")
local lang = require("Module:languages").getByCode("grc")
local tag_text = require("Module:script utilities").tag_text
local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}
local inlinePeriods = {'cla', 'koi2', 'byz2'}
local IPAs = {}
local rfind = mw.ustring.find
local usub = mw.ustring.sub
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local ulower = mw.ustring.lower
local U = mw.ustring.char
--Combining diacritics are tricky.
local tie = U(0x35C) --Tie bar
local high = U(0x341) -- combining acute tone mark
local low = U(0x340) -- combining grave tone mark
local midHigh = U(0x1DC4) -- mid–high pitch
local midLow = U(0x1DC6) -- mid–low pitch
local highMid = U(0x1DC7) -- high–mid pitch
local short = U(0x306) -- short vowel length: combining breve
local fronts = {['ι']=true, ['η']=true, ['ε']=true, ['υ']=true, ['αι']=true, ['οι']=true}
local uStart = {['α'] = true, ['ε'] = true, ['ο'] = true}
local iStart = {['α'] = true, ['ε'] = true, ['ο'] = true, ['υ'] = true}
local iCons = {['b']=true, ['ɡ']=true, ['ŋ']=true, ['d']=true, ['z']=true,
['k']=true, ['l']=true, ['m']=true, ['n']=true, ['p']=true, ['r']=true,
['s']=true, ['t']=true, ['β']=true, ['ð']=true, ['ɣ']=true, ['ɸ']=true,
['θ']=true, ['x']=true, ['f']=true, ['v']=true, ['ɟ']=true, ['ʝ']=true,
['c']=true, ['ç']=true}
local iVows = {['a']=true, ['e']=true, ['i']=true, ['o']=true, ['u']=true,
['y']=true, ['w']=true, ['j']=true, ['ɛ']=true, ['ɔ']=true, ['ʍ']=true}
local iDias = {[high]=true, [low]=true, [midHigh]=true, [midLow]=true, [highMid]=true, [short]=true,
['ʰ']=true, ['ː']=true, ['̥']=true}
local iLiqs = {['r']=true, ['l']=true, ['n']=true}
local iStops = {['b']=true, ['ɡ']=true, ['d']=true, ['k']=true, ['p']=true,
['t']=true, ['β']=true, ['ð']=true, ['ɣ']=true, ['ɸ']=true, ['θ']=true,
['x']=true, ['f']=true}
local function preFront(term, x, which)
local let1 = usub(term, x+1+which, x+1+which)
local let2 = usub(term, x+2+which, x+2+which)
return fronts[m_accent.strip_accent(let1)] or (fronts[m_accent.strip_accent(let1..let2)] and not rmatch(let2,'[ϊΐῒῗ]'))
end
local function syllabify(IPAs)
--Syllabify
for _, period in ipairs(periods) do
local word = IPAs[period].IPA
local syllables = {}
local cVowel, nVowel, sBreak, stress, wordEnd, searching
while word ~= '' do
cVowel, nVowel, sBreak, stress = false, false, false, false
--First thing is to find the first vowel.
searching = 1
cVowelFound = false
while not cVowel do
letter = usub(word, searching, searching)
if cVowelFound then
if iVows[letter] or iCons[letter] or letter == '' or letter == 'ˈ' then
cVowel = searching - 1
elseif iDias[letter] then
searching = searching + 1
elseif letter == tie then
cVowelFound = false
searching = searching + 1
else
searching = searching + 1
end
else
if iVows[letter] then
cVowelFound = true
elseif letter == 'ˈ' then
stress = true
elseif letter == '' then --This shouldn't happen.
cVowel = true
wordEnd = true
end
searching = searching + 1
end
end
--Next we try and find the next vowel or the end.
searching = cVowel + 1
while (not nVowel) and (not wordEnd) do
letter = usub(word, searching, searching)
if iVows[letter] or letter == 'ˈ' then
nVowel = searching
elseif letter == '' then
wordEnd = true
else
searching = searching + 1
end
end
--Finally we find the syllable break point.
if wordEnd then
sBreak = ulen(word)
elseif iLiqs[usub(word, nVowel-1, nVowel-1)] then
if iStops[usub(word, nVowel-2, nVowel-2)] then
sBreak = nVowel - 3
elseif usub(word, nVowel-2, nVowel-2) == 'ʰ' and iStops[usub(word, nVowel-3, nVowel-3)] then
sBreak = nVowel - 4
else
sBreak = nVowel - 2
end
elseif iCons[usub(word, nVowel-1, nVowel-1)] then
sBreak = nVowel - 2
elseif usub(word, nVowel-1, nVowel-1) == 'ʰ' and iStops[usub(word, nVowel-2, nVowel-2)] then
sBreak = nVowel - 3
elseif usub(word, nVowel-1, nVowel-1) == '̥' and usub(word, nVowel-2, nVowel-2) == 'r' then
sBreak = nVowel - 3
else
sBreak = nVowel - 1
end
--Pull everything up to and including the syllable Break.
local syllable = usub(word, 1, sBreak)
--If there is a stress accent, then we need to move it to the
--beginning of the syllable, unless it is a monosyllabic word,
--in which case we remove it altogether.
if stress then
if next(syllables) or syllable ~= word then
syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '')
else
syllable = rsubn(syllable, 'ˈ', '')
end
stress = false
end
table.insert(syllables, syllable)
word = usub(word, sBreak+1)
end
if #syllables > 0 then
IPAs[period].IPA = table.concat(syllables, '.')
IPAs[period].IPA = rsubn(IPAs[period].IPA, '%.ˈ', 'ˈ')
end
end
return IPAs
end
local function make_table(IPAs, ambig)
--Final format
local inlineProns = {}
local listOfProns = {}
for _, period in ipairs(inlinePeriods) do
local pron = '/' .. IPAs[period].IPA .. '/'
table.insert(inlineProns, {pron = pron})
table.insert(listOfProns, pron)
end
local inlineIPAlength = ulen("IPA(key): " .. table.concat(listOfProns, ' → '))
local inline = '<div class="vsShow" style="display:none">\n* ' .. m_IPA.format_IPA_full(lang, inlineProns, nil, ' → ') .. '</div>'
local fullProns = {}
for _, period in ipairs(periods) do
local notes = (#IPAs[period].notes > 0) and ('<span class="previewonly"><br>' .. table.concat(IPAs[period].notes, ', ') .. '</span>') or ''
table.insert(fullProns, '* ' .. m_a.show({'grc-' .. period}) .. ' ' .. m_IPA.format_IPA_full(lang, {{pron = '/' .. IPAs[period].IPA .. '/'}}) .. notes)
end
local ambignote = ''
-- The table ambig is filled with all the ambiguous vowels that have been found in the term.
if #ambig > 0 then
local agreement = {}
if #ambig > 1 then
agreement = { 's ', 'each of them', 'they are' }
else
agreement = { ' ', 'it', 'it is' }
end
ambignote = '\n<p class="previewonly">Mark the vowel length of the ambiguous vowel' .. agreement[1]
.. mw.text.listToText(ambig) .. ' by adding a macron after ' .. agreement[2]
.. ' if ' .. agreement[3] .. ' long, or a breve if ' .. agreement[3] .. ' short. By default, [[Module:grc-pronunciation]] assumes ' .. agreement[3] .. ' short if unmarked.</span></p>\n'
end
local full = '<div class="vsHide">\n' .. table.concat(fullProns, '\n') .. ambignote .. '</div>'
return '<div class="vsSwitcher vsToggleCategory-pronunciations" style="width: ' .. inlineIPAlength * 0.68 .. 'em;"><span class="vsToggleElement" style="float: right;"> </span>' .. inline .. full .. '</div>'
end
local function decode(condition, x, term)
--[==[
"If" and "and" statements.
Note that we're finding the last operator first,
which means that the first will get ultimately get decided first.
It's a recursive function.
]==]
if rfind(condition, '[+/]') then
local sep = rfind(condition, "[/+][^/+]*$")
local chunk1 = usub(condition, 1, sep - 1)
local chunk2 = usub(condition, sep + 1)
if usub(condition, sep, sep) == '/' then
return decode(chunk1, x, term) or decode(chunk2, x, term)
elseif usub(condition, sep, sep) == '+' then
return decode(chunk1, x, term) and decode(chunk2, x, term)
end
elseif rfind(condition, '=') then
local sep = rfind(condition, '=')
local chunk1 = usub(condition, 1, sep - 1)
local chunk2 = usub(condition, sep + 1)
local checkLetter = (tonumber(x + chunk1) < 1) and '' or usub(term, x + chunk1, x + chunk1)
return checkLetter == chunk2
elseif rfind(condition, '%.') then
local sep = rfind(condition, '%.')
local chunk1 = usub(condition, 1, sep - 1)
local chunk2 = usub(condition, sep + 1)
local checkLetter = usub(term, x + chunk1, x + chunk1)
return m_data[checkLetter][chunk2]
elseif rfind(condition, '%-') then
local sep = rfind(condition, '%-')
local chunk1 = usub(condition, 1, sep - 1)
local chunk2 = usub(condition, sep + 1)
if chunk2 == 'preFront' then
return preFront(term, x, chunk1)
end
end
end
local function check(p, period, x, term)
if type(p) == 'string' then
return p
elseif type(p) == 'table' then --This table is sequential, with a variable number of entries.
for _, possP in ipairs(p) do
if type(possP) == 'string' then
return possP
elseif type(possP) == 'table' then --This table is paired, with two values: a condition and a result.
rawCondition = possP[1]
rawResult = possP[2]
local condition = decode(rawCondition, x, term)
if condition then
if type(rawResult) == 'string' then
return rawResult
else
return check(rawResult, period, x, term)
end
end
end
end
end
end
function export.create(frame)
local args = frame:getParent().args
local term = args['w'] or args[1] or mw.title.getCurrentTitle().text
term = ulower(term)
term = rsubn(term, 'ς', 'σ')
term = rsubn(term, 'ῤ', 'ρ')
term = rearrangeDiacritics(term)
for _, period in ipairs(periods) do
IPAs[period] = {['notes']={}}
end
local length = ulen(term)
local x = 1
local letter = ''
local nextLetter = ''
local cla = {}
local alert = false
local ambig = {}
local naked = ''
local accent = ''
local diphthong = false
--local clusters = {}
--local cluster = ''
while x <= length do
letter = usub(term, x, x)
nextLetter = usub(term, x+1, x+1)
if not m_data[letter] then
elseif m_data[letter].type == 'consonant' then
for i, period in ipairs(periods) do
currentP = m_data[letter].p[period]
result = check(currentP, period, x, term)
table.insert(IPAs[period], result)
end
elseif m_data[letter].type == 'vowel' then
--Start with a diphthong check
local diphthong = false
local vCurrent, vForm, vLength;
if iStart[letter] and (m_data[nextLetter] and m_data[nextLetter].type == 'vowel' and (nextLetter == 'ι' or (m_accent.strip_accent(nextLetter) == 'ι' and not m_data[nextLetter].diar))) then
diphthong = letter..'ι'
elseif uStart[letter] and (m_data[nextLetter] and m_data[nextLetter].type == 'vowel' and (nextLetter == 'υ' or (m_accent.strip_accent(nextLetter) == 'υ' and not m_data[nextLetter].diar))) then
diphthong = letter..'υ'
end
if diphthong then
vCurrent = nextLetter
vForm = diphthong
vLength = 'long'
x = x + 1
nextLetter = usub(term, x+1, x+1)
else
vCurrent = letter
vForm = m_accent.strip_accent(letter)
vLength = m_data[vCurrent].length or m_data[vForm].length
end
for _, period in ipairs(periods) do
if period == 'cla' then
if m_data[vCurrent].breath == 'rough' then
table.insert(IPAs['cla'], 'h')
end
elseif period == 'koi1' then
if m_data[vCurrent].breath == 'rough' then
table.insert(IPAs['koi1'], '(h)')
end
end
if period ~= 'cla' then
--All other periods have a stress accent, instead of Classical's tonal accent.
--The stress diacritic is initially placed immediately preceding the vowel,
--and is moved to the front of the syllable during syllabification.
if m_data[vCurrent].accent then
table.insert(IPAs[period], 'ˈ')
end
local vIP = check(m_data[vForm].p[period], period, x, term)
table.insert(IPAs[period], vIP)
else
-- Classical vowels are hard.
-- [[Module:grc-utilities]] converts combining macrons and breves to spacing ones.
if vLength == 'either' then
if m_data[vCurrent].accent == 'circum' then
vLength = 'long'
elseif m_data[vCurrent].subi then
vLength = 'long'
elseif nextLetter == '¯' then
vLength = 'long'
elseif nextLetter == '˘' then
vLength = 'short'
else
local ambiguousVowel = tag_text(vCurrent, lang, nil, 'term')
table.insert(IPAs.cla.notes, 'ambiguous vowel ' .. ambiguousVowel
.. ' at ' .. x)
table.insert(ambig, ambiguousVowel)
end
end
local vIP = check(m_data[vForm].p.cla, cla, x, term) --vIP stands for initial pronunciation
--There has to be a prettier way to do this, but I just can't think of it.
if ulen(vIP) > 1 then --i.e. if it's a phonetic dipthong
if m_data[vCurrent].accent == 'acute' then
vFP = vIP..high
elseif m_data[vCurrent].accent == 'grave' then
vFP = vIP..low
elseif m_data[vCurrent].accent == 'circum' then
vFP = usub(vIP, 1, 1)..high..usub(vIP, 2)
else
vFP = vIP
end
elseif m_data[vCurrent].subi then
if m_data[vCurrent].accent == 'acute' then
vFP = vIP..tie..'ːj'..high
elseif m_data[vCurrent].accent == 'grave' then
vFP = vIP..tie..'ːj'..low
elseif m_data[vCurrent].accent == 'circum' then
vFP = vIP..high..tie..'ːj'
else
vFP = vIP..tie..'ːj'
end
elseif vLength == 'long' then
if m_data[vCurrent].accent == 'acute' then
vFP = vIP..midHigh..'ː'
elseif m_data[vCurrent].accent == 'grave' then
vFP = vIP..midLow..'ː'
elseif m_data[vCurrent].accent == 'circum' then
vFP = vIP..highMid..'ː'
else
vFP = vIP..'ː'
end
else
if m_data[vCurrent].accent == 'acute' then
vFP = vIP..high
elseif m_data[vCurrent].accent == 'grave' then
vFP = vIP..low
else
vFP = vIP
end
end
table.insert(IPAs['cla'], vFP)
end
end
else
alert = true
end
x = x + 1
end
--Concatenate the IPAs
for _, period in ipairs(periods) do
IPAs[period] = {['IPA']=table.concat(IPAs[period], ''), ['notes']=IPAs[period]['notes']}
end
return make_table(syllabify(IPAs), ambig)
end
return export
--Things we still need:
--Voicing of sigma around (after?) voiced stops.
--Proper alerts for editors, especially on ambiguous vowels.