Module:User:AmazingJus/af
Jump to navigation
Jump to search
- The following documentation is located at Module:User:AmazingJus/af/documentation. [edit] Categories were auto-generated by Module:documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
79 of 148 tests failed. (refresh)
Text | Expected | Actual | Comments | |
---|---|---|---|---|
Afrika | A‧fri‧ka | A‧fri‧ka | ||
Afrikaans | A‧fri‧kaans | A‧fri‧kaans | ||
Afrikaner | A‧fri‧ka‧ner | A‧fri‧ka‧ner | ||
Amerikaner | A‧me‧ri‧ka‧ner | A‧me‧ri‧ka‧ner | ||
asyn | a‧syn | a‧syn | ||
belangrik | be‧lang‧rik | be‧lang‧rik | ||
berg | berg | berg | ||
berge | ber‧ge | ber‧ge | ||
berg+reeks | berg‧reeks | berg‧reeks | ||
bos+bedryf | bos‧be‧dryf | bos‧be‧dryf | ||
beskou | be‧skou | be‧skou | ||
beter | be‧ter | be‧ter | ||
beton | be‧ton | be‧ton | ||
betoon | be‧toon | be‧toon | ||
Botha | Bo‧tha | Bo‧tha | ||
braai | braai | braai | ||
dokumentasie | do‧ku‧men‧ta‧sie | do‧ku‧men‧ta‧sie | ||
eggo | eg‧go | eg‧go | ||
feste | fes‧te | fes‧te | ||
geëet | ge‧eet | ge‧eet | ||
gegee | ge‧gee | ge‧gee | ||
ghitaar | ghi‧taar | ghi‧taar | ||
hondjie | hon‧djie | hon‧djie | ||
Johannesburg | Jo‧han‧nes‧burg | Jo‧han‧nes‧burg | ||
karretjie | kar‧re‧tjie | kar‧re‧tjie | ||
klu[b] | klub | klub | ||
Macedonië | Ma‧ce‧do‧ni‧e | Ma‧ce‧do‧ni‧e | ||
'n | 'n | 'n | ||
onweer | on‧weer | on‧weer | ||
omstandigheid | om‧stan‧dig‧heid | om‧stan‧di‧gheid | ||
Paraguay | Pa‧ra‧guay | Pa‧ra‧gu‧a‧y | ||
Pretoria | Pre‧to‧ri‧a | Pre‧to‧ri‧a | ||
sjokolade | sjo‧ko‧la‧de | sjo‧ko‧la‧de | ||
s'n | s'n | s'n | ||
spieël | spie‧el | spie‧el | ||
Suid-Afrika | Suid-‧A‧fri‧ka | Suid-‧A‧fri‧ka | ||
vanaand | va‧naand | va‧naand | ||
Venesië | Ve‧ne‧si‧e | Ve‧ne‧si‧e | ||
vinger | ving‧er | ving‧er | ||
wîe | wî‧e | wî‧e | ||
zero | ze‧ro | ze‧ro | ||
André | An‧dré | An‧dré | ||
Barnard | Bar‧nard | Bar‧nard | ||
Blignaut | Blig‧naut | Blig‧naut | ||
Blignault | Blig‧nault | Blig‧nault | ||
Cilliers | Cil‧liers | Cil‧liers | ||
Coetzee | Coet‧zee | Coet‧zee | ||
Coetzer | Coet‧zer | Coet‧zer | ||
de Villiers | de Vil‧liers | de Vil‧liers | ||
du Plessis | du Ples‧sis | du Ples‧sis | ||
du Preez | du Preez | du Preez | ||
du Toit | du Toit | du Toit | ||
Fouché | Fou‧ché | Fou‧ché | ||
Fourie | Fou‧rie | Fou‧rie | ||
Grové | Gro‧vé | Gro‧vé | ||
Jean Pierre | Jean Pierre | Je‧an Pier‧re | ||
Joubert | Jou‧bert | Jou‧bert | ||
La.bus.chag.ne | La‧bus‧chag‧ne | La‧bus‧chag‧ne | ||
La.bu.schagne | La‧bu‧schagne | La‧bu‧s‧chag‧ne | ||
le Gran.ge | le Gran‧ge | le Gran‧ge | ||
le Roux | le Roux | le Roux | ||
Malan | Ma‧lan | Ma‧lan | ||
Malherbe | Mal‧her‧be | Mal‧her‧be | ||
Marais | Ma‧rais | Ma‧rais | ||
Meintjes | Mein‧tjes | Mein‧tjes | ||
Naudé | Nau‧dé | Nau‧dé | ||
Nortje | Nor‧tje | Nor‧tje | ||
Pienaar | Pie‧naar | Pie‧naar | ||
Schalk | Schalk | Schalk | ||
Terblanche | Ter‧blanche | Ter‧blan‧che | ||
Theron | The‧ron | The‧ron | ||
Viljoen | Vil‧joen | Vil‧joen | ||
Visagie | Vi‧sa‧gie | Vi‧sa‧gie | ||
Viviers | Vi‧vi‧ers | Vi‧viers |
Text | Expected | Actual | Comments | |
---|---|---|---|---|
Afrika | ˈɑː.fri.ka | ˈɑː.fri.kɑː | ||
Afrikaans | ˌa.friˈkɑ̃ːs, ˌa.friˈkɑːns | ɑː.fri.ˈkɑːns | ||
Afrikaner | ˌa.friˈkɑː.nər | ˈɑː.fri.kɑː.nɛr | ||
Amerikaner | aˌmɪə̯.riˈkɑː.nər | ˈɑː.mɪə̯.ri.kɑː.nɛr | ||
asyn | aˈsəɪ̯n | ɑː.ˈsəɪ̯n | ||
belangrik | bəˈlaŋ.rək | ˈbe>.laŋ.rək | ||
berg | ˈbɛrχ | ˈbe>rχ | ||
berge | ˈbɛr.ɡə | ˈbe>r.ɡɪə̯ | ||
berg+reeks | ˈbɛrχ.rɪə̯ks | ˈbe>rχ.rɪə̯ks | ||
bos+bedryf | ˈbɔs.bəˌdrəɪ̯f | ˈbɔs.bɪə̯.drəɪ̯f | ||
beskou | bəˈskœʊ̯ | be>.ˈskœʊ̯ | ||
beter | ˈbɪə̯.tər | ˈbe>.tɛr | ||
beton | bəˈtɔn | be>.ˈtɔn | ||
betoon | bəˈtʊə̯n | be>.ˈtʊə̯n | ||
Botha | ˈbʊə̯.ta | ˈbʊə̯.tɑː | ||
braai | brɑːɪ̯ | ˈbrɑːi | ||
dokumentasie | ˌdɔ.kju.mɛnˈtɑː.si, ˌdɔ.ky.mɛnˈtɑː.si | ˈdʊə̯.ky.mɛn.tɑː.si | ||
eggo | ˈɛ.χu | ˈe.χu | ||
feste | ˈfɛs.tə | ˈfɛs.tɪə̯ | ||
geëet | χəˈɪə̯t | χe>.ˈɪə̯t | ||
gegee | χəˈχɪə̯ | χe>.ˈχɪə̯ | ||
ghitaar | ɡiˈtɑːr | ɡi.ˈtɑːr | ||
hondjie | ˈɦœi̯ɲ.ci | ˈɦoŋ.ki | ||
Johannesburg | jʊə̯ˈɦa.nəsˌbœrχ | ˈjʊə̯.ɦa.nɛs.bœrχ | ||
karretjie | ˈka.rəi̯.ci | ˈka.rɪə̯.ki | ||
klu[b] | klab, klœb | ˈklub | ||
Macedonië | ˌma.səˈdʊə̯.ni.ə | ˈmɑː.sɪə̯.dʊə̯.ni.ɪə̯ | ||
'n | ə(n) | ˈən | ||
onweer | ˈɔn.vɪə̯r | ɔn.ˈvɪə̯r | ||
omstandigheid | ɔmˈstan.dəχˌɦəɪ̯t | ˈɔm>.stan.di.ɡəɪ̯d | ||
Paraguay | ˈpa.ra.ɡwaɪ̯ | pɑː.rɑː.χy.ɑː.ˈəɪ̯ | ||
Pretoria | prəˈtʊə̯.ri.a | ˈprɪə̯.tʊə̯.ri.ɑː | ||
sjokolade | ˌʃɔ.kɔˈlɑː.də | ˈʃʊə̯.kʊə̯.lɑː.dɪə̯ | ||
s'n | sən | ˈsən | ||
spieël | spiːl | spi.ˈɛl | ||
Suid-Afrika | səɪ̯tˈɑː.fri.ka | ˈsuɪ̯d-.ɑː.fri.kɑː | ||
vanaand | fəˈnɑːnt | ˈvɑː.nɑːnd | ||
Venesië | vəˈniː.si.ə | ˈvɪə̯.nɪə̯.si.ɪə̯ | ||
vinger | ˈfəŋ.ər | ˈviŋ.ɛr | ||
wîe | ˈvəː.(ɦ)ə | ˈvəː.ɪə̯ | ||
zero | ˈzɪə̯.ru | ˈzɪə̯.ru | ||
André | ˈan.drəɪ̯ | ˈan.dré | ||
Barnard | ˈbar.nart | ˈbar.nard | ||
Blignaut | ˈbləχ.nœʊ̯t, ˈbli.nœʊ̯ | ˈbliχ.nœʊ̯t | ||
Blignault | ˈbləχ.nœʊ̯t, ˈbli.nœʊ̯ | ˈbliχ.nœʊ̯lt | ||
Cilliers | səlˈjeə̯ | ˈsə.lirs | ||
Coetzee | kutˈseə̯ | kut.ˈzɪə̯ | ||
Coetzer | ˈkut.sər | ˈkut.zɛr | ||
de Villiers | də.fəlˈjeə̯ | ˈdɪə̯ və.lirs | ||
du Plessis | dy.pləˈsi | ˈdy plɛ.səs | ||
du Preez | dəˈpreə̯ | ˈdy prɪə̯z | ||
du Toit | dəˈtoːɪ̯ | ˈdy tɔɪ̯t | ||
Fouché | fuˈʃeə̯ | ˈfœʊ̯.χé | ||
Fourie | fuˈri | ˈfœʊ̯.ri | ||
Grové | χruˈveə̯ | ˈχrʊə̯.vé | ||
Jean Pierre | anˈpiːr | ˈjɪə̯.an pi.rɪə̯ | ||
Joubert | juˈbæːr | ˈjœʊ̯.bɛrt | ||
La.bus.chag.ne | la.busˈkaχ.nə | ˈlɑː.bœs.kaχ.nɪə̯ | ||
La.bu.schagne | ˈla.bu.ʃəɪ̯n | ˈlɑː.by.s.kaχ.nɪə̯ | ||
le Gran.ge | ləˈχran.si | ˈlɪə̯ χran.χɪə̯ | ||
le Roux | ləˈruː | ˈlɪə̯ rœʊ̯ks | ||
Malan | maˈlan, maˈlaŋ | ˈmɑː.lan | ||
Malherbe | malˈɦɛr.bə | ˈmal.ɦɛr.bɪə̯ | ||
Marais | maˈrɛː | ˈmɑː.raɪ̯s | ||
Meintjes | məɪ̯ɲˈcis | məɪ̯n.ˈʧɛs | ||
Naudé | nœʊ̯ˈdeə̯ | ˈnœʊ̯.dé | ||
Nortje | nɔrˈkɪə̯ | ˈnɔr.ʧɪə̯ | ||
Pienaar | ˈpi.nɑːr | pi.ˈnɑːr | ||
Schalk | skalk | ˈskalk | ||
Terblanche | tərˈblɑːnʃ | ˈtɛr.blan.χɪə̯ | ||
Theron | t(ə)ˈron | tɪə̯.ˈrɔn | ||
Viljoen | fəlˈjun | vəl.ˈjun | ||
Visagie | fəˈsɑː.χi, fəˈsɑː.si | ˈvi.sɑː.χi | ||
Viviers | fə.fəˈjeə̯ | ˈvi.virs |
local export = {}
local lang = require("Module:languages").getByCode("af")
local sc = require("Module:scripts").getByCode("Latn")
local hyph = require("Module:hyphenation")
local str = require("Module:string")
local tbl = require("Module:table")
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
local u = require("Module:string/char")
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local lower = mw.ustring.lower
local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.gmatch
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- list of constants
local GR = u(0x0300) -- grave
local AC = u(0x0301) -- acute
local CR = u(0x0302) -- circumflex
local DR = u(0x0308) -- diaresis
local accents = GR .. AC .. CR .. DR
local vowels = "aeiouyAEIOUY"
local cons = "bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ"
local syll_boundary = "‧#"
-- list of valid trigraphs and digraphs, including diphthongs and long vowels
local graphemes = {
["aai"] = "ɑːɪ̯",
["eeu"] = "iʊ̯",
["ieu"] = "iʊ̯",
["oei"] = "uɪ̯",
["ooi"] = "oːɪ̯",
["aa"] = "ɑː",
["ae"] = "ɑː",
["ai"] = "aɪ̯",
["au"] = "œʊ̯",
["ee"] = "ɪə̯",
["ei"] = "əɪ̯",
["eu"] = "iʊ̯",
["ie"] = "į", -- temporary value
["oe"] = "ů", -- temporary value
["oi"] = "ɔɪ̯",
["oo"] = "ʊə̯",
["ou"] = "œʊ̯",
["ui"] = "uɪ̯",
["uu"] = "ü" -- temporary value
}
-- sort trigraphs and digraphs in descending order
local graphemes_sorted = {}
for k, _ in pairs(graphemes) do
table.insert(graphemes_sorted, k)
end
table.sort(graphemes_sorted, function(a, b) return len(a) > len(b) end)
-- list of various grapheme sets
local sets = {
["vowel_length"] = { -- long-short vowels
["a"] = {"a", "ɑː"},
["e"] = {"ɛ", "ɪə̯"},
["i"] = {"ə", "i"},
["o"] = {"ɔ", "ʊə̯"},
["u"] = {"œ", "y"}
},
["cons_voice"] = { -- voiced/voiceless consonants
{"b", "p"},
{"d", "t"},
{"ʤ", "ʧ"},
{"ɡ", "k"},
{"v", "f"},
{"z", "s"},
{"ʒ", "ʃ"},
}
}
-- list of defined affixes
local affixes = {
["prefixes"] = { -- prefixes
"aan",
"agter",
"be",
"deur",
"er",
"ge",
"her",
"om",
"ont",
"onder",
"ver",
"voor"
},
["suffixes"] = { -- suffixes
"agtig",
"baar",
"dom",
"end",
"heid",
"lik",
"loos",
"nis",
"sel",
"skap",
}
}
-- sort prefixes and suffixes in ascending order
table.sort(affixes.prefixes, function(a, b) return len(a) < len(b) end)
table.sort(affixes.suffixes, function(a, b) return len(a) < len(b) end)
-- list of unstressed words
local unstressed = {
"die",
"dit",
"is",
"nie",
"'n"
}
-- list of stressed endings found in loanwords
local stressed_endings = {
"aal", "aan", "aans", "aar", "aard", "aat", "am", "ant", "at",
"ee", "eel", "eem", "een", "eer", "ees", "eet", "ein", "ek", "el", -- "-el" only in loanwords
"ent", "es", "et", "eur", "eus", "eut", "ieel", "ief",
"iek", "iel", "iem", "ien", "ine", "ier", "iet", "o", -- "-o" only in french loanwords
"oen", "on", "oof", "oog", "ooi", "ool", "oom", "oon", "oor",
"teek", "teit", "u", "uum", "uur", "uus", "uut", "y", "yn", "ys"
}
-- list of respelling substitutions
local subs = {
-- 'N
{"#'n#", "#ə(n)#", "-"}, -- pronounced /ə(n)/ as the article 'n
{"'n#", "ən#", "-"}, -- pronounced /ən/ otherwise
-- CH
{"ch", "ʃ", "fr"}, -- pronounced /ʃ/ in french loans
{"sch", "sk", "-"}, -- pronounced /sk/ in the sequence "sch"
{"ch([" .. cons .. "]?[ei])", "χ%1", "-"}, -- pronounced /χ/ before optional consonant cluster and "e" or "i"
{"ch", "k", "-"}, -- otherwise /k/
-- NG
{"ng", "ŋ", "-"}, -- pronounced /ŋ/
-- SH/SJ
{"s[hj]", "ʃ", "-"}, -- pronounced /ʃ/
-- DJ/TJ
{"[dt]jie", "kį", "-"}, -- pronounced /-ci/ in the suffix "-djie"/"-tjie"
{"dj", "ʤ", "-"}, -- "dj" is otherwise /d͡ʒ/
{"tj", "ʧ", "-"}, -- "tj" is otherwise /t͡ʃ/
-- C
{"c([ei])", "s%1", "-"}, -- pronounced /s/ before "e" or "i"
{"c", "k", "-"}, -- otherwise /k/
-- GH
{"gh", "ɡ", "-"}, -- pronounced /ɡ/
-- G
{"g", "ɡ", "en"}, -- pronounced /ɡ/ in english loans
{"r‧ge", "r‧ɡe", "-"}, -- pronounced /ɡ/ between /r/ and /ə/
{"g", "χ", "-"}, -- otherwise /χ/
{"n(‧?[kɡ])", "ŋ%1", "-"}, -- /ŋ/ is an allophone of /n/ before /ɡ/ and /k/
-- V
{"v", "f", "af"}, -- pronounced /f/ in native words
-- W
{"w", "w", "en"}, -- pronounced /w/ in english loans
{"w", "v", "-"}, -- otherwise /v/
-- EAU
{"eaux?", "OU", "fr"}, -- pronounced /œʊ̯/ in french loans
-- OI
{"oi", "wA", "fr"}, -- pronounced /wa/ in french loans
-- IJ
{"ij([^" .. vowels .. "])", "EI%1", "-"}, -- pronounced /əɪ̯/ in dutch-based names
-- X
{"#x", "#s", "-"}, -- pronounced /s/ word-initially
{"x", "ks", "-"}, -- otherwise /ks/
-- H
{"([" .. cons .. vowels .. "])h", "%1", "-"}, -- silent if part of consonant digraph or syllable-final
{"h", "ɦ", "-"}, -- otherwise /ɦ/
-- O
{"o([" .. syll_boundary .. "])", "OU%1", "en"}, -- pronounced /œʊ̯/ in open syllables in english loans
{"o#", "ů#", "-"}, -- otherwise /u/ in word-final position
-- U
{"u([" .. cons .. "])", "A%1", "en"}, -- pronounced /a/ in closed syllables in english loans
{"u", "jů", "en"}, -- otherwise /ju/ in english loans
-- Y
{"y", "j", "EN"}, -- pronounced /j/ in english loans
{"y", "EI", "-"}, -- otherwise /əɪ̯/
-- circumflex accent
{CR, "ː", "-"} -- lengthens a vowel with its short quality
}
-- syllabification function
local function syllabify(term, orig, pos)
-- decompose accents
term = decomp(term)
-- remove diaresis and split syllable (note: diaresis shouldn't be displayed in its hyphenation form)
term = rsub(term, "([" .. vowels .. "])" .. DR, "‧%1")
-- mark trigraphs and digraphs with angle brackets
for _, graph in ipairs(graphemes_sorted) do
term = rsub(term, graph, "{" .. graph .. "}")
end
-- add > and < for prefix and suffixes respectively
for _, prefix in ipairs(affixes.prefixes) do
if find(term, "#" .. prefix) then
term = rsub(term, "#" .. prefix, "#" .. prefix .. ">")
break
end
end
for _, suffix in ipairs(affixes.suffixes) do
if find(term, suffix .. "#") then
term = rsub(term, suffix .. "#", "<" .. suffix .. "#")
break
end
end
-- add dot before consonant + vowel
term = rsub(term, "([" .. cons .. "]?{?[" .. vowels .. "][" .. accents .. "]?)", "‧%1")
-- remove any dots inside brackets
term = rsub(term, "{[^}]*}", function(a) return rsub(a, "‧", "") end)
-- shift dot before certain consonant clusters and digraphs
term = rsub(term, "([bcfgkpvw])‧l", "‧%1l") -- clusters with l
term = rsub(term, "([bcdfgkptwv])‧r", "‧%1r") -- clusters with r
term = rsub(term, "([dst])‧j", "‧%1j") -- digraphs with j
term = rsub(term, "([ckgt])‧h", "‧%1h") -- digraphs with h
term = rsub(term, "n‧g", "ng‧") -- ng is syllable-final
term = rsub(term, ">s‧", ">‧s") -- s can form a cluster after a prefix
-- remove leading dots and brackets
term = rsub(term, "#([^" .. vowels .. "]*)‧", "#%1")
term = rsub(term, "%.", "‧")
term = rsub(term, "[{}+]", "") -- comment out to debug
return rsub(term, "‧+", "‧")
end
-- hyphenation function
function export.hyphenation(term, orig, pos)
-- get user input as table
if type(term) == "table" then
term = term.args[1]
end
-- mark all word borders
term = rsub(term, "([^ ]+)", "#%1#")
-- format hyphenation
-- local data = { lang = lang, sc = sc, hyphs = {{hyph = rsub(syllabify(term), "[#%[%]<>]", ""), "%.")}} }
-- return hyphen.format_hyphenations(data)
return rsub(recomp(syllabify(term)), "[#%[%]<>]", "")
end
-- stress assignment function
local function stress(term, orig, pos)
-- words with certain endings are syllable-final stressed
for _, ending in ipairs(stressed_endings) do
if find(term, ending .. "#") then
if ending == "el" then -- "-el" is only stressed in loanwords
if not orig and orig ~= "af" then
return rsub(term, ending .. "#", "ˈ" .. ending .. "#")
else
break
end
elseif ending == "o" then -- "-o" is only stressed in french loanwords
if orig == "fr" then
return rsub(term, ending .. "#", "ˈ" .. ending .. "#")
else
break
end
else
return rsub(term, ending .. "#", "ˈ" .. ending .. "#")
end
end
end
-- add stress mark to first syllable if no ending was stressed
return rsub(term, "^#", "#ˈ")
end
-- pronunciation function
local function pron(term, orig, pos)
-- make text lowercase
term = lower(term)
-- mark word borders with #
term = rsub(term, "([^ ]+)", "#%1#")
-- syllabify term
term = syllabify(term, orig, pos)
-- add stress to term
term = stress(term, orig, pos)
-- shift stress rightwards to a syllable boundary
term = rsub(term, "([^" .. syll_boundary .. "]*)ˈ", "ˈ%1")
-- substitute phonemes
local subbed = {}
for _, s in ipairs(subs) do
if not subbed[s[1]] then
if orig ~= "-" and s[3] == orig then
term = rsub(term, s[1], s[2])
subbed[s[1]] = true
elseif s[3] == "-" then
term = rsub(term, s[1], s[2])
subbed[s[1]] = true
end
end
end
-- make text lowercase again
term = lower(term)
-- substitute graphemes
for graph, phoneme in pairs(graphemes) do
term = rsub(term, graph, phoneme)
end
-- substitute single-letter vowels
term = rsub(term, "([aeiou])([‧#ː" .. cons .. "])", function(a, b)
if match("[‧#]", b) then
return sets.vowel_length[a][2] .. b -- for open syllables
else
return sets.vowel_length[a][1] .. b -- for closed syllables
end
end)
-- replace į, ů, ü with their actual phonetic values
term = rsub(term, "[įůü]", {["į"] = "i", ["ů"] = "u", ["ü"] = "y"})
-- remove double consonants
term = rsub(term, "(.)(‧?)%1", "%2%1")
-- final adjustments
term = rsub(term, "‧", ".")
return rsub(term, "[#%[%]]", "")
end
-- main export function
function export.toIPA(term, orig, pos)
-- get user input as table
if type(term) == "table" then
term = term.args[1]
end
return pron(term, orig, pos)
end
return export