Module:User:AmazingJus/af
Jump to navigation
Jump to search
- The following documentation is located at Module:User:AmazingJus/af/documentation. [edit] Categories were auto-generated by Module:documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
49 of 98 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
Afrika | A‧fri‧ka | A‧fri‧ka | |
Afrikaans | A‧fri‧kaans | A‧fri‧kaans | |
Afrikaner | A‧fri‧ka‧ner | A‧fri‧ka‧ner | |
Amerikaner | A‧me‧ri‧ka‧ner | A‧me‧ri‧ka‧ner | |
André | An‧dré | An‧dré | |
asyn | a‧syn | a‧syn | |
belangrik | be‧lang‧rik | be‧lang‧rik | |
berg | berg | berg | |
berge | ber‧ge | ber‧ge | |
berg+reeks | berg‧reeks | berg‧reeks | |
bos+bedryf | bos‧be‧dryf | bos‧be‧dryf | |
beskou | be‧skou | be‧skou | |
beter | be‧ter | be‧ter | |
beton | be‧ton | be‧ton | |
betoon | be‧toon | be‧toon | |
Botha | Bo‧tha | Bo‧tha | |
braai | braai | braai | |
Coetzee | Coet‧zee | Coet‧zee | |
Coetzer | Coet‧zer | Coet‧zer | |
dokumentasie | do‧ku‧men‧ta‧sie | do‧ku‧men‧ta‧sie | |
du Plessis | du Ples‧sis | du Ples‧sis | |
eggo | eg‧go | eg‧go | |
feste | fes‧te | fes‧te | |
geëet | ge‧eet | ge‧eet | |
gegee | ge‧gee | ge‧gee | |
ghitaar | ghi‧taar | ghi‧taar | |
hondjie | hon‧djie | hon‧djie | |
Jean Pierre | Jean Pierre | Je‧an Pier‧re | |
Johannesburg | Jo‧han‧nes‧burg | Jo‧han‧nes‧burg | |
karretjie | kar‧re‧tjie | kar‧re‧tjie | |
klu[b] | klub | klub | |
le Gran.ge | le Gran‧ge | le Gran‧ge | |
Macedonië | Ma‧ce‧do‧ni‧e | Ma‧ce‧do‧ni‧e | |
Nortje | Nor‧tje | Nor‧tje | |
'n | 'n | 'n | |
onweer | on‧weer | on‧weer | |
omstandigheid | om‧stan‧dig‧heid | om‧stan‧di‧gheid | |
Paraguay | Pa‧ra‧guay | Pa‧ra‧gu‧a‧y | |
Pretoria | Pre‧to‧ri‧a | Pre‧to‧ri‧a | |
Schalk | Schalk | Schalk | |
sjokolade | sjo‧ko‧la‧de | sjo‧ko‧la‧de | |
s'n | s'n | s'n | |
spieël | spie‧el | spie‧el | |
Suid-Afrika | Suid-‧A‧fri‧ka | Suid-‧A‧fri‧ka | |
vanaand | va‧naand | va‧naand | |
Venesië | Ve‧ne‧si‧e | Ve‧ne‧si‧e | |
vinger | ving‧er | ving‧er | |
wîe | wî‧e | wî‧e | |
zero | ze‧ro | ze‧ro |
Text | Expected | Actual | |
---|---|---|---|
Afrika | ˈɑː.fri.ka | ɑː.fri.kɑː | |
Afrikaans | ˌa.friˈkɑ̃ːs, ˌa.friˈkɑːns | ɑː.fri.kɑːns | |
Afrikaner | ˌa.friˈkɑː.nər | ɑː.fri.kɑː.nɛr | |
Amerikaner | aˌmɪə̯.riˈkɑː.nər | ɑː.mɪə̯.ri.kɑː.nɛr | |
André | ˈan.drəɪ̯ | an.dré | |
asyn | aˈsəɪ̯n | ɑː.səɪ̯n | |
belangrik | bəˈlaŋ.rək | be>.laŋ.rək | |
berg | ˈbɛrχ | be>rχ | |
berge | ˈbɛr.ɡə | be>r.ɡɪə̯ | |
berg+reeks | ˈbɛrχ.rɪə̯ks | be>rχ.rɪə̯ks | |
bos+bedryf | ˈbɔs.bəˌdrəɪ̯f | bɔs.bɪə̯.drəɪ̯f | |
beskou | bəˈskœʊ̯ | be>.skœʊ̯ | |
beter | ˈbɪə̯.tər | be>.tɛr | |
beton | bəˈtɔn | be>.tɔn | |
betoon | bəˈtʊə̯n | be>.tʊə̯n | |
Botha | ˈbʊə̯.ta | bʊə̯.tɑː | |
braai | brɑːɪ̯ | brɑːi | |
Coetzee | kutˈseə̯ | kut.zɪə̯ | |
Coetzer | ˈkut.sər | kut.zɛr | |
dokumentasie | ˌdɔ.kju.mɛnˈtɑː.si, ˌdɔ.ky.mɛnˈtɑː.si | dʊə̯.ky.mɛn.tɑː.si | |
du Plessis | dy.pləˈsi | dy plɛ.səs | |
eggo | ˈɛ.χu | e.χu | |
feste | ˈfɛs.tə | fɛs.tɪə̯ | |
geëet | χəˈɪə̯t | χe>.ɪə̯t | |
gegee | χəˈχɪə̯ | χe>.χɪə̯ | |
ghitaar | ɡiˈtɑːr | ɡi.tɑːr | |
hondjie | ˈɦœi̯ɲ.ci | ɦoŋ.ki | |
Jean Pierre | anˈpiːr | jɪə̯.an pi.rɪə̯ | |
Johannesburg | jʊə̯ˈɦa.nəsˌbœrχ | jʊə̯.ɦa.nɛs.bœrχ | |
karretjie | ˈka.rəi̯.ci | ka.rɪə̯.ki | |
klu[b] | klab, klœb | klub | |
le Gran.ge | ləˈχran.si | lɪə̯ χran.χɪə̯ | |
Macedonië | ˌma.səˈdʊə̯.ni.ə | mɑː.sɪə̯.dʊə̯.ni.ɪə̯ | |
Nortje | nɔrˈkɪə̯ | nɔr.ʧɪə̯ | |
'n | ə(n) | ə(n) | |
onweer | ˈɔn.vɪə̯r | ɔn.vɪə̯r | |
omstandigheid | ɔmˈstan.dəχˌɦəɪ̯t | ɔm>.stan.di.ɡəɪ̯d | |
Paraguay | ˈpa.ra.ɡwaɪ̯ | pɑː.rɑː.χy.ɑː.əɪ̯ | |
Pretoria | prəˈtʊə̯.ri.a | prɪə̯.tʊə̯.ri.ɑː | |
Schalk | skalk | skalk | |
sjokolade | ˌʃɔ.kɔˈlɑː.də | sjʊə̯.kʊə̯.lɑː.dɪə̯ | |
s'n | sən | sən | |
spieël | spiːl | spi.ɛl | |
Suid-Afrika | səɪ̯tˈɑː.fri.ka | suɪ̯d-.ɑː.fri.kɑː | |
vanaand | fəˈnɑːnt | vɑː.nɑːnd | |
Venesië | vəˈniː.si.ə | vɪə̯.nɪə̯.si.ɪə̯ | |
vinger | ˈfəŋ.ər | viŋ.ɛr | |
wîe | ˈvəː.(ɦ)ə | vəː.ɪə̯ | |
zero | ˈzɪə̯.ru | zɪə̯.ru |
local export = {}
local lang = require("Module:languages").getByCode("af")
local sc = require("Module:scripts").getByCode("Latn")
local hyph = require("Module:hyphenation")
local str = require("Module:string")
local tbl = require("Module:table")
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
local u = require("Module:string/char")
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local lower = mw.ustring.lower
local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.gmatch
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- list of constants
local GR = u(0x0300) -- grave
local AC = u(0x0301) -- acute
local CR = u(0x0302) -- circumflex
local DR = u(0x0308) -- diaresis
local accents = GR .. AC .. CR .. DR
local vowels = "aeiouyAEIOUY"
local cons = "bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ"
local syll_boundary = "‧#"
-- list of valid trigraphs and digraphs, including diphthongs and long vowels
local graphemes = {
["aai"] = "ɑːɪ̯",
["eeu"] = "iʊ̯",
["ieu"] = "iʊ̯",
["oei"] = "uɪ̯",
["ooi"] = "oːɪ̯",
["aa"] = "ɑː",
["ae"] = "ɑː",
["ai"] = "aɪ̯",
["au"] = "œʊ̯",
["ee"] = "ɪə̯",
["ei"] = "əɪ̯",
["eu"] = "iʊ̯",
["ie"] = "į", -- temporary value
["oe"] = "ů", -- temporary value
["oi"] = "ɔɪ̯",
["oo"] = "ʊə̯",
["ou"] = "œʊ̯",
["ui"] = "uɪ̯",
["uu"] = "ü"
}
-- sort trigraphs and digraphs in descending order
local graphemes_sorted = {}
for k, _ in pairs(graphemes) do
table.insert(graphemes_sorted, k)
end
table.sort(graphemes_sorted, function(a, b) return len(a) > len(b) end)
-- list of various grapheme sets
local sets = {
["vowel_length"] = { -- long-short vowels
["a"] = {"a", "ɑː"},
["e"] = {"ɛ", "ɪə̯"},
["i"] = {"ə", "i"},
["o"] = {"ɔ", "ʊə̯"},
["u"] = {"œ", "y"}
},
["cons_voice"] = { -- voiced/voiceless consonants
{"b", "p"},
{"d", "t"},
{"ʤ", "ʧ"},
{"ɡ", "k"},
{"v", "f"},
{"z", "s"},
{"ʒ", "ʃ"},
}
}
-- list of defined affixes
local affixes = {
["prefixes"] = { -- prefixes
"aan",
"agter",
"be",
"deur",
"er",
"ge",
"her",
"om",
"ont",
"onder",
"ver",
"voor"
},
["suffixes"] = { -- suffixes
"agtig",
"baar",
"dom",
"end",
"heid",
"lik",
"loos",
"nis",
"sel",
"skap",
}
}
-- sort prefixes and suffixes in ascending order
table.sort(affixes.prefixes, function(a, b) return len(a) < len(b) end)
table.sort(affixes.suffixes, function(a, b) return len(a) < len(b) end)
-- list of unstressed words
local unstressed = {
"die",
"dit",
"is",
"nie",
"'n"
}
-- list of respelling substitutions
local subs = {
-- 'N
{"#'n#", "#ə(n)#", "-"}, -- pronounced /ə(n)/ as the article 'n
{"'n#", "ən#", "-"}, -- pronounced /ən/ otherwise
-- CH
{"ch", "ʃ", "fr"}, -- pronounced /ʃ/ in french loans
{"ch([" .. cons .. "]?[ei])", "χ%1", "-"}, -- pronounced /χ/ before optional consonant cluster and "e" or "i"
{"ch", "k", "-"}, -- otherwise /k/
-- NG
{"ng", "ŋ", "-"}, -- pronounced /ŋ/
-- SH
{"sh", "ʃ", "-"}, -- pronounced /ʃ/
-- DJ/TJ
{"[dt]jie", "kį", "-"}, -- suffix "djie"/"tjie" is pronounced /-ci/
{"dj", "ʤ", "-"}, -- "dj" is otherwise /d͡ʒ/
{"tj", "ʧ", "-"}, -- "tj" is otherwise /t͡ʃ/
-- GH
{"gh", "ɡ", "-"}, -- pronounced /ɡ/
-- C
{"c([ei])", "s%1", "-"}, -- pronounced /s/ before "e" or "i"
{"c", "k", "-"}, -- otherwise /k/
-- G
{"g", "ɡ", "en"}, -- pronounced /ɡ/ in english loans
{"r‧ge", "r‧ɡe", "-"}, -- pronounced /ɡ/ between /r/ and /ə/
{"g", "χ", "-"}, -- otherwise /χ/
{"n(‧?[kɡ])", "ŋ%1", "-"}, -- /ŋ/ is an allophone of /n/ before /ɡ/ and /k/
-- V
{"v", "f", "af"}, -- pronounced /f/ in native words
-- W
{"w", "w", "en"}, -- pronounced /w/ in english loans
{"w", "v", "-"}, -- otherwise /v/
-- EAU
{"eaux?", "OU", "fr"}, -- pronounced /œʊ̯/ in French loans
-- OI
{"oi", "wA", "fr"}, -- pronounced /wa/ in French loans
-- X
{"#x", "#s", "-"}, -- pronounced /s/ word-initially
{"x", "ks", "-"}, -- otherwise /ks/
-- H
{"([" .. cons .. vowels .. "])h", "%1", "-"}, -- silent if part of consonant digraph or syllable-final
{"h", "ɦ", "-"}, -- otherwise /ɦ/
-- O
{"o([" .. syll_boundary .. "])", "OU%1", "en"}, -- pronounced /œʊ̯/ in open syllables in english loans
{"o#", "ů#", "-"}, -- otherwise /u/ in word-final position
-- U
{"u([" .. cons .. "])", "A%1", "en"}, -- pronounced /a/ in closed syllables in english loans
{"u", "jů", "en"}, -- otherwise /ju/ in english loans
-- Y
{"y", "EI", "-"}, -- otherwise /əɪ̯/
-- circumflex accent
{CR, "ː", "-"} -- lengthens a vowel with its short quality
}
-- syllabify words
local function syllabify(term)
-- decompose accents
term = decomp(term)
-- remove diaresis and split syllable (note: diaresis shouldn't be displayed in its hyphenation form)
term = rsub(term, "([" .. vowels .. "])" .. DR, "‧%1")
-- mark trigraphs and digraphs with angle brackets
for _, graph in ipairs(graphemes_sorted) do
term = rsub(term, graph, "{" .. graph .. "}")
end
-- add > and < for prefix and suffixes respectively
for _, prefix in ipairs(affixes.prefixes) do
if find(term, "#" .. prefix) then
term = rsub(term, "#" .. prefix, "#" .. prefix .. ">")
break
end
end
for _, suffix in ipairs(affixes.suffixes) do
if find(term, suffix .. "#") then
term = rsub(term, suffix .. "#", "<" .. suffix .. "#")
break
end
end
-- add dot before consonant + vowel
term = rsub(term, "([" .. cons .. "]?{?)([" .. vowels .. "][" .. accents .. "]?)", "‧%1%2")
-- remove any dots inside brackets
term = rsub(term, "{[^}]*}", function(a) return rsub(a, "‧", "") end)
-- shift dot before certain consonant clusters and digraphs
term = rsub(term, "([bcfgkpvw])‧l", "‧%1l") -- clusters with l
term = rsub(term, "([bcdfgkptwv])‧r", "‧%1r") -- clusters with r
term = rsub(term, "([dst])‧j", "‧%1j") -- digraphs with j
term = rsub(term, "([ckgt])‧h", "‧%1h") -- digraphs with h
term = rsub(term, "n‧g", "ng‧") -- ng is syllable-final
term = rsub(term, ">s‧", ">‧s") -- s can form a cluster after a prefix
-- remove leading dots and brackets
term = rsub(term, "#([^" .. vowels .. "]*)‧", "#%1")
term = rsub(term, "%.", "‧")
term = rsub(term, "[{}+]", "") -- comment out to debug
term = rsub_repeatedly(term, "‧‧", "‧")
return term
end
-- hyphenation function
function export.hyphenation(term)
-- get user input as table
if type(term) == "table" then
term = term.args[1]
end
-- mark all word borders
term = rsub(term, "([^ ]+)", "#%1#")
-- format hyphenation
-- local data = { lang = lang, sc = sc, hyphs = {{hyph = rsub(syllabify(term), "[#%[%]<>]", ""), "%.")}} }
-- return hyphen.format_hyphenations(data)
return rsub(recomp(syllabify(term)), "[#%[%]<>]", "")
end
-- pronunciation function
local function pron(term, ety)
-- make text lowercase
term = lower(term)
-- mark word borders with #
term = rsub(term, "([^ ]+)", "#%1#")
-- syllabify term
term = syllabify(term)
-- substitute phonemes
local subbed = {}
for _, s in ipairs(subs) do
if not subbed[s[1]] then
if ety ~= "-" and s[3] == ety then
term = rsub(term, s[1], s[2])
subbed[s[1]] = true
elseif s[3] == "-" then
term = rsub(term, s[1], s[2])
subbed[s[1]] = true
end
end
end
-- make text lowercase again
term = lower(term)
-- substitute graphemes
for graph, phoneme in pairs(graphemes) do
term = rsub(term, graph, phoneme)
end
-- substitute single-letter vowels
term = rsub(term, "([aeiou])([‧#ː" .. cons .. "])", function(a, b)
if match("[‧#]", b) then
return sets.vowel_length[a][2] .. b -- for open syllables
else
return sets.vowel_length[a][1] .. b -- for closed syllables
end
end)
-- replace į, ů, ü with their actual phonetic values
term = rsub(term, "[įůü]", {["į"] = "i", ["ů"] = "u", ["ü"] = "y"})
-- remove double consonants
term = rsub(term, "(.)(‧?)%1", "%2%1")
-- final adjustments
term = rsub(term, "‧", ".")
return rsub(term, "[#%[%]]", "")
end
-- main export function
function export.toIPA(term, ety)
-- get user input as table
if type(term) == "table" then
term = term.args[1]
end
return pron(term, ety)
end
return export