Module:grc-pronunciation/sandbox: difference between revisions
Jump to navigation
Jump to search
Content deleted Content added
No edit summary |
transcribing long diphthongs with i and nonsyllabic diacritic |
||
Line 32: | Line 32: | ||
--Combining diacritics are tricky. |
--Combining diacritics are tricky. |
||
local tie = U(0x35C) -- tie bar |
local tie = U(0x35C) -- tie bar |
||
local nonsyllabic = U(0x32F) |
|||
local high = U(0x341) -- combining acute tone mark |
local high = U(0x341) -- combining acute tone mark |
||
local low = U(0x340) -- combining grave tone mark |
local low = U(0x340) -- combining grave tone mark |
||
Line 235: | Line 236: | ||
elseif m_data[vCurrent].subi then |
elseif m_data[vCurrent].subi then |
||
if m_data[vCurrent].accent == 'acute' then |
if m_data[vCurrent].accent == 'acute' then |
||
vFP = vIP |
vFP = vIP..'ːi'..nonsyllabic..high |
||
elseif m_data[vCurrent].accent == 'grave' then |
elseif m_data[vCurrent].accent == 'grave' then |
||
vFP = vIP |
vFP = vIP..'ːi'..nonsyllabic..low |
||
elseif m_data[vCurrent].accent == 'circum' then |
elseif m_data[vCurrent].accent == 'circum' then |
||
vFP = vIP..high |
vFP = vIP..high..'ːi'..nonsyllabic |
||
else |
else |
||
vFP = vIP |
vFP = vIP..'ːi'..nonsyllabic |
||
end |
end |
||
elseif vLength == 'long' then |
elseif vLength == 'long' then |
Revision as of 07:29, 30 January 2017
- The following documentation is located at Module:grc-pronunciation/sandbox/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
Data for the sandbox module: Module:grc-pronunciation/sandbox/data.
Testcases
Lua error in package.lua at line 80: module 'Module:a' not found
local export = {}
local strip_accent = require('Module:grc-accent').strip_accent
-- [[Module:grc-utilities]] converts sequences of diacritics to the order required by this module,
-- then replaces combining macrons and breves with spacing ones.
local rearrangeDiacritics = require("Module:grc-utilities").pronunciationOrder
local m_data = mw.loadData("Module:grc-pronunciation/sandbox/data")
local m_IPA = require("Module:IPA")
local m_a = require("Module:a")
local lang = require("Module:languages").getByCode("grc")
local m_links = require("Module:links")
local tag_text = require("Module:script utilities").tag_text
local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}
local inlinePeriods = {'cla', 'koi2', 'byz2'}
local rfind = mw.ustring.find
local usub = mw.ustring.sub
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local ulower = mw.ustring.lower
local U = mw.ustring.char
local function fetch(s, i)
--[==[
because we fetch a single character at a time so often
out of bounds fetch gives ''
]==]
return usub(s, i, i)
end
--Combining diacritics are tricky.
local tie = U(0x35C) -- tie bar
local nonsyllabic = U(0x32F)
local high = U(0x341) -- combining acute tone mark
local low = U(0x340) -- combining grave tone mark
local midHigh = U(0x1DC4) -- mid–high pitch
local midLow = U(0x1DC6) -- mid–low pitch
local highMid = U(0x1DC7) -- high–mid pitch
local voiceless = U(0x325) -- combining ring below
local aspirated = 'ʰ'
local macron = '¯'
local breve = '˘'
local function is(text, X)
if not text or not X then
return false
end
pattern = m_data.chars[X] or error("No data for \"" .. X .. "\".", 2)
if X == "frontDiphth" then
pattern = "^" .. pattern .. "$"
else
pattern = "^[" .. pattern .. "]$"
end
return rfind(text, pattern)
end
local env_functions = {
preFront = function(term, index)
local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2)
return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer"))
end,
isIDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'ι' and not m_data[letter].diar
end,
isUDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'υ' and not m_data[letter].diar
end,
isAspDiphth = function(term, index)
return m_data[fetch(term, index + 1)].breath == 'rough'
end,
isAcuteDiphth = function(term, index)
return m_data[fetch(term, index + 1)].accent == 'acute'
end,
isGraveDiphth = function(term, index)
return m_data[fetch(term, index + 1)].accent == 'grave'
end,
isCircumDiphth = function(term, index)
return m_data[fetch(term, index + 1)].accent == 'circum'
end,
isAccentDiphth = function(term, index)
return m_data[fetch(term, index + 1)].accent
end,
}
local function decode(condition, x, term)
--[==[
"If" and "and" statements.
Note that we're finding the last operator first,
which means that the first will get ultimately get decided first.
If + ("and") or / ("or") is found, the function is called again,
until if-statements are found.
In if-statements:
* A number represents the character under consideration:
1 the next character, 0 the current, and -1 the previous.
* Equals sign (=) checks to see if the character under consideration
is equal to a character.
* Period (.) plus a word sends the module to the corresponding entry
in the letter's data table.
* Tilde (~) calls a function on the character under consideration,
if the function exists.
]==]
if rfind(condition, '[+/]') then
-- Find slash or plus sign preceded by something else, and followed by anything
-- (including another sequence of slash or plus sign and something else).
local condition1, sep, condition2 = rmatch(condition, "^([^/+]*)([/+])(.*)$")
or error('Condition "' .. condition or 'nil' .. '" is improperly formed')
if sep == '/' then -- logic operator: or
return decode(condition1, x, term) or decode(condition2, x, term)
elseif sep == '+' then -- logical operator: and
return decode(condition1, x, term) and decode(condition2, x, term)
end
elseif rfind(condition, '=') then -- check character identity
local offset, char = unpack(mw.text.split(condition, "="))
return char == fetch(term, x + offset) -- out of bounds fetch gives ''
elseif rfind(condition, '%.') then -- check character quality
local offset, quality = unpack(mw.text.split(condition, "%."))
return m_data[fetch(term, x + offset)][quality]
elseif rfind(condition, '~') then -- check character(s) using function
local offset, func = unpack(mw.text.split(condition, "~"))
return env_functions[func] and env_functions[func](term, x + offset) or false
end
end
local function check(p, period, x, term)
if type(p) == 'string' then
return p
elseif type(p) == 'table' then --This table is sequential, with a variable number of entries.
for _, possP in ipairs(p) do
if type(possP) == 'string' then
return possP
elseif type(possP) == 'table' then --This table is paired, with two values: a condition and a result.
rawCondition, rawResult = possP[1], possP[2]
if decode(rawCondition, x, term) then
return (type(rawResult) == 'string') and rawResult or check(rawResult, period, x, term)
end
end
end
end
end
local function convert_term(term)
local IPAs = {}
for _, period in ipairs(periods) do
IPAs[period] = { notes = {} }
end
local length = ulen(term)
local x = 1
local letter = ''
local nextLetter = ''
local ambig = {}
local diphthong = false
--local clusters = {}
--local cluster = ''
while x <= length do
letter = fetch(term, x)
nextLetter = fetch(term, x + 1)
local data = m_data[letter]
if not data then
elseif data.type == 'consonant' or strip_accent(letter) == 'ω' or strip_accent(letter) == 'η' then
for _, period in ipairs(periods) do
table.insert(IPAs[period], check(data.p[period], period, x, term))
end
elseif data.type == 'vowel' then
--Start with a diphthong check
local diphthong = false
local vCurrent, vForm, vLength
local nextData = m_data[nextLetter]
if is(letter, 'iDiphth') and (nextData and nextData.type == 'vowel' and ( strip_accent(nextLetter) == 'ι' and not nextData.diar)) then
diphthong = letter..'ι'
elseif is(letter, 'uDiphth') and (nextData and nextData.type == 'vowel' and (strip_accent(nextLetter) == 'υ' and not nextData.diar)) then
diphthong = letter..'υ'
end
if diphthong then
vCurrent = nextLetter
vForm = diphthong
vLength = 'long'
x = x + 1
nextLetter = fetch(term, x + 1)
else
vCurrent = letter
vForm = strip_accent(letter)
vLength = m_data[vCurrent].length or m_data[vForm].length
end
for _, period in ipairs(periods) do
if m_data[vCurrent].breath == 'rough' then
if period == 'cla' then
table.insert(IPAs['cla'], 'h')
elseif period == 'koi1' then
table.insert(IPAs['koi1'], '(h)')
end
end
if period ~= 'cla' then
--All other periods have a stress accent, instead of Classical's tonal accent.
--The stress diacritic is initially placed immediately preceding the vowel,
--and is moved to the front of the syllable during syllabification.
if m_data[vCurrent].accent then
table.insert(IPAs[period], 'ˈ')
end
local vIP = check(m_data[vForm].p[period], period, x, term)
table.insert(IPAs[period], vIP)
else
-- Classical vowels are hard.
-- [[Module:grc-utilities]] converts combining macrons and breves to spacing ones.
if vLength == 'either' then
if m_data[vCurrent].accent == 'circum' or nextLetter == macron or m_data[vCurrent].subi then
vLength = 'long'
elseif nextLetter == breve then
vLength = 'short'
else
local ambiguousVowel = tag_text(vCurrent, lang, nil, 'term')
table.insert(IPAs.cla.notes, 'ambiguous vowel ' .. ambiguousVowel
.. ' at ' .. x)
table.insert(ambig, ambiguousVowel)
end
end
local vIP = check(m_data[vForm].p.cla, cla, x, term) --vIP stands for initial pronunciation
--There has to be a prettier way to do this, but I just can't think of it.
if ulen(vIP) > 1 then --i.e. if it's a phonetic dipthong
if m_data[vCurrent].accent == 'acute' then
vFP = vIP..high
elseif m_data[vCurrent].accent == 'grave' then
vFP = vIP..low
elseif m_data[vCurrent].accent == 'circum' then
vFP = fetch(vIP, 1)..high..usub(vIP, 2)
else
vFP = vIP
end
elseif m_data[vCurrent].subi then
if m_data[vCurrent].accent == 'acute' then
vFP = vIP..'ːi'..nonsyllabic..high
elseif m_data[vCurrent].accent == 'grave' then
vFP = vIP..'ːi'..nonsyllabic..low
elseif m_data[vCurrent].accent == 'circum' then
vFP = vIP..high..'ːi'..nonsyllabic
else
vFP = vIP..'ːi'..nonsyllabic
end
elseif vLength == 'long' then
if m_data[vCurrent].accent == 'acute' then
vFP = vIP..midHigh..'ː'
elseif m_data[vCurrent].accent == 'grave' then
vFP = vIP..midLow..'ː'
elseif m_data[vCurrent].accent == 'circum' then
vFP = vIP..highMid..'ː'
else
vFP = vIP..'ː'
end
else
if m_data[vCurrent].accent == 'acute' then
vFP = vIP..high
elseif m_data[vCurrent].accent == 'grave' then
vFP = vIP..low
else
vFP = vIP
end
end
table.insert(IPAs['cla'], vFP)
end
end
end
x = x + 1
end
--Concatenate the IPAs
for _, period in ipairs(periods) do
IPAs[period] = { IPA = table.concat(IPAs[period], ''), notes = IPAs[period]['notes'] }
end
return IPAs, ambig
end
local function syllabify(IPAs)
--Syllabify
for _, period in ipairs(periods) do
local word = IPAs[period].IPA
local syllables = {}
local cVowel, nVowel, sBreak, stress, wordEnd, searching
while word ~= '' do
cVowel, nVowel, sBreak, stress = false, false, false, false
--First thing is to find the first vowel.
searching = 1
cVowelFound = false
while not cVowel do
letter = fetch(word, searching)
if cVowelFound then
if is(letter, "vowel") or is(letter, "cons") or letter == '' or letter == 'ˈ' then
cVowel = searching - 1
elseif is(letter, "diacritic") then
searching = searching + 1
elseif letter == tie then
cVowelFound = false
searching = searching + 1
else
searching = searching + 1
end
else
if is(letter, "vowel") then
cVowelFound = true
elseif letter == 'ˈ' then
stress = true
elseif letter == '' then --This shouldn't happen.
cVowel = true
wordEnd = true
end
searching = searching + 1
end
end
--Next we try and find the next vowel or the end.
searching = cVowel + 1
while (not nVowel) and (not wordEnd) do
letter = fetch(word, searching)
if is(letter, "vowel") or letter == 'ˈ' then
nVowel = searching
elseif letter == '' then
wordEnd = true
else
searching = searching + 1
end
end
--Finally we find the syllable break point.
if wordEnd then
sBreak = ulen(word)
elseif is(fetch(word, nVowel - 1), "liquid") then
if is(fetch(word, nVowel - 2), "obst") then
sBreak = nVowel - 3
elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then
sBreak = nVowel - 4
else
sBreak = nVowel - 2
end
elseif is(fetch(word, nVowel - 1), "cons") then
sBreak = nVowel - 2
elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then
sBreak = nVowel - 3
elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then
sBreak = nVowel - 3
else
sBreak = nVowel - 1
end
--Pull everything up to and including the syllable Break.
local syllable = usub(word, 1, sBreak)
--If there is a stress accent, then we need to move it to the
--beginning of the syllable, unless it is a monosyllabic word,
--in which case we remove it altogether.
if stress then
if next(syllables) or syllable ~= word then
syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '')
else
syllable = rsubn(syllable, 'ˈ', '')
end
stress = false
end
table.insert(syllables, syllable)
word = usub(word, sBreak + 1)
end
if #syllables > 0 then
IPAs[period].IPA = table.concat(syllables, '.')
IPAs[period].IPA = rsubn(IPAs[period].IPA, '%.ˈ', 'ˈ')
end
end
return IPAs
end
local function make_table(IPAs, ambig)
--Final format
local inlineProns = {}
local listOfProns = {}
for _, period in ipairs(inlinePeriods) do
local pron = '/' .. IPAs[period].IPA .. '/'
table.insert(inlineProns, {pron = pron})
table.insert(listOfProns, pron)
end
local inlineIPAlength = ulen("IPA(key): " .. table.concat(listOfProns, ' → '))
local inline = '<div class="vsShow" style="display:none">\n* ' .. m_IPA.format_IPA_full(lang, inlineProns, nil, ' → ') .. '</div>'
local fullProns = {}
for _, period in ipairs(periods) do
local notes = (#IPAs[period].notes > 0) and ('<span class="previewonly"><br>' .. table.concat(IPAs[period].notes, ', ') .. '</span>') or ''
table.insert(fullProns, '* ' .. m_a.show({'grc-' .. period}) .. ' ' .. m_IPA.format_IPA_full(lang, {{pron = '/' .. IPAs[period].IPA .. '/'}}) .. notes)
end
local ambignote = ''
-- The table ambig is filled with all the ambiguous vowels that have been found in the term.
if #ambig > 0 then
local agr = {}
if #ambig > 1 then
agr = { 's ', 'each one' }
else
agr = { ' ', 'it' }
end
ambignote = '\n<p class="previewonly">Mark the vowel length of the ambiguous vowel' .. agr[1]
.. mw.text.listToText(ambig) .. ' by adding a macron after ' .. agr[2]
.. ' if it is long, or a breve if it is short. By default, [[Module:grc-pronunciation]] assumes it is short if unmarked.<br><small>[This message shows only in preview mode.]</small></span></p>\n'
end
local full = '<div class="vsHide">\n' .. table.concat(fullProns, '\n') .. ambignote .. '</div>'
return '<div class="vsSwitcher vsToggleCategory-pronunciations" style="width: ' .. inlineIPAlength * 0.68 .. 'em;"><span class="vsToggleElement" style="float: right;"> </span>' .. inline .. full .. '</div>'
end
function export.create(frame)
local params = {
[1] = {alias_of = 'w'},
w = {default = mw.title.getCurrentTitle().text},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
term = ulower(args.w)
term = rsubn(term, 'ς', 'σ')
term = rsubn(term, 'ῤ', 'ρ')
term = rearrangeDiacritics(term)
IPAs, ambig = convert_term(term)
IPAs = syllabify(IPAs)
return make_table(IPAs, ambig)
end
function export.example(frame)
local term = frame:getParent().args[1]
return m_links.full_link({term = term, lang = lang}) .. " || " .. export.create(frame)
end
return export
--Things we still need:
--Voicing of sigma around (after?) voiced stops.
--Proper alerts for editors, especially on ambiguous vowels.