Module:User:AmazingJus/af

From Wiktionary, the free dictionary
Jump to navigation Jump to search

49 of 98 tests failed. (refresh)

TextExpectedActual
test_hyphen:
PassedAfrikaA‧fri‧kaA‧fri‧ka
PassedAfrikaansA‧fri‧kaansA‧fri‧kaans
PassedAfrikanerA‧fri‧ka‧nerA‧fri‧ka‧ner
PassedAmerikanerA‧me‧ri‧ka‧nerA‧me‧ri‧ka‧ner
PassedAndréAn‧dréAn‧dré
Passedasyna‧syna‧syn
Passedbelangrikbe‧lang‧rikbe‧lang‧rik
Passedbergbergberg
Passedbergeber‧geber‧ge
Passedberg+reeksberg‧reeksberg‧reeks
Passedbos+bedryfbos‧be‧dryfbos‧be‧dryf
Passedbeskoube‧skoube‧skou
Passedbeterbe‧terbe‧ter
Passedbetonbe‧tonbe‧ton
Passedbetoonbe‧toonbe‧toon
PassedBothaBo‧thaBo‧tha
Passedbraaibraaibraai
PassedCoetzeeCoet‧zeeCoet‧zee
PassedCoetzerCoet‧zerCoet‧zer
Passeddokumentasiedo‧ku‧men‧ta‧siedo‧ku‧men‧ta‧sie
Passeddu Plessisdu Ples‧sisdu Ples‧sis
Passedeggoeg‧goeg‧go
Passedfestefes‧tefes‧te
Passedgeëetge‧eetge‧eet
Passedgegeege‧geege‧gee
Passedghitaarghi‧taarghi‧taar
Passedhondjiehon‧djiehon‧djie
FailedJean PierreJean PierreJe‧an Pier‧re
PassedJohannesburgJo‧han‧nes‧burgJo‧han‧nes‧burg
Passedkarretjiekar‧re‧tjiekar‧re‧tjie
Passedklu[b]klubklub
Passedle Gran.gele Gran‧gele Gran‧ge
PassedMacedoniëMa‧ce‧do‧ni‧eMa‧ce‧do‧ni‧e
PassedNortjeNor‧tjeNor‧tje
Passed'n'n'n
Passedonweeron‧weeron‧weer
Failedomstandigheidom‧stan‧dig‧heidom‧stan‧di‧gheid
FailedParaguayPa‧ra‧guayPa‧ra‧gu‧a‧y
PassedPretoriaPre‧to‧ri‧aPre‧to‧ri‧a
PassedSchalkSchalkSchalk
Passedsjokoladesjo‧ko‧la‧desjo‧ko‧la‧de
Passeds'ns'ns'n
Passedspieëlspie‧elspie‧el
PassedSuid-AfrikaSuid-‧A‧fri‧kaSuid-‧A‧fri‧ka
Passedvanaandva‧naandva‧naand
PassedVenesiëVe‧ne‧si‧eVe‧ne‧si‧e
Passedvingerving‧erving‧er
Passedwîewî‧ewî‧e
Passedzeroze‧roze‧ro
TextExpectedActual
test_pron:
FailedAfrikaˈɑː.fri.kaɑː.fri.kɑː
FailedAfrikaansˌa.friˈkɑ̃ːs, ˌa.friˈkɑːnsɑː.fri.kɑːns
FailedAfrikanerˌa.friˈkɑː.nərɑː.fri.kɑː.nɛr
FailedAmerikaneraˌmɪə̯.riˈkɑː.nərɑː.mɪə̯.ri.kɑː.nɛr
FailedAndréˈan.drəɪ̯an.dré
Failedasynaˈsəɪ̯nɑː.səɪ̯n
Failedbelangrikbəˈlaŋ.rəkbe>.laŋ.rək
Failedbergˈbɛrχbe>rχ
Failedbergeˈbɛr.ɡəbe>r.ɡɪə̯
Failedberg+reeksˈbɛrχ.rɪə̯ksbe>rχ.rɪə̯ks
Failedbos+bedryfˈbɔs.bəˌdrəɪ̯fbɔs.bɪə̯.drəɪ̯f
Failedbeskoubəˈskœʊ̯be>.skœʊ̯
Failedbeterˈbɪə̯.tərbe>.tɛr
Failedbetonbəˈtɔnbe>.tɔn
Failedbetoonbəˈtʊə̯nbe>.tʊə̯n
FailedBothaˈbʊə̯.tabʊə̯.tɑː
Failedbraaibrɑːɪ̯brɑːi
FailedCoetzeekutˈseə̯kut.zɪə̯
FailedCoetzerˈkut.sərkut.zɛr
Faileddokumentasieˌdɔ.kju.mɛnˈtɑː.si, ˌdɔ.ky.mɛnˈtɑː.sidʊə̯.ky.mɛn.tɑː.si
Faileddu Plessisdy.pləˈsidy plɛ.səs
Failedeggoˈɛ.χue.χu
Failedfesteˈfɛs.təfɛs.tɪə̯
Failedgeëetχəˈɪə̯tχe>.ɪə̯t
Failedgegeeχəˈχɪə̯χe>.χɪə̯
Failedghitaarɡiˈtɑːrɡi.tɑːr
Failedhondjieˈɦœi̯ɲ.ciɦoŋ.ki
FailedJean Pierreanˈpiːrjɪə̯.an pi.rɪə̯
FailedJohannesburgjʊə̯ˈɦa.nəsˌbœrχjʊə̯.ɦa.nɛs.bœrχ
Failedkarretjieˈka.rəi̯.cika.rɪə̯.ki
Failedklu[b]klab, klœbklub
Failedle Gran.geləˈχran.silɪə̯ χran.χɪə̯
FailedMacedoniëˌma.səˈdʊə̯.ni.əmɑː.sɪə̯.dʊə̯.ni.ɪə̯
FailedNortjenɔrˈkɪə̯nɔr.ʧɪə̯
Passed'nə(n)ə(n)
Failedonweerˈɔn.vɪə̯rɔn.vɪə̯r
Failedomstandigheidɔmˈstan.dəχˌɦəɪ̯tɔm>.stan.di.ɡəɪ̯d
FailedParaguayˈpa.ra.ɡwaɪ̯pɑː.rɑː.χy.ɑː.əɪ̯
FailedPretoriaprəˈtʊə̯.ri.aprɪə̯.tʊə̯.ri.ɑː
PassedSchalkskalkskalk
Failedsjokoladeˌʃɔ.kɔˈlɑː.dəsjʊə̯.kʊə̯.lɑː.dɪə̯
Passeds'nsənsən
Failedspieëlspiːlspi.ɛl
FailedSuid-Afrikasəɪ̯tˈɑː.fri.kasuɪ̯d-.ɑː.fri.kɑː
Failedvanaandfəˈnɑːntvɑː.nɑːnd
FailedVenesiëvəˈniː.si.əvɪə̯.nɪə̯.si.ɪə̯
Failedvingerˈfəŋ.ərviŋ.ɛr
Failedwîeˈvəː.(ɦ)əvəː.ɪə̯
Failedzeroˈzɪə̯.ruzɪə̯.ru

local export = {}

local lang = require("Module:languages").getByCode("af")
local sc = require("Module:scripts").getByCode("Latn")
local hyph = require("Module:hyphenation")
local str = require("Module:string")
local tbl = require("Module:table")

function export.tag_text(text, face)
	return require("Module:script utilities").tag_text(text, lang, sc, face)
end

function export.link(term, face)
	return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end

local u = require("Module:string/char")
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local lower = mw.ustring.lower

local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub

local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.gmatch

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- list of constants
local GR = u(0x0300) -- grave
local AC = u(0x0301) -- acute
local CR = u(0x0302) -- circumflex
local DR = u(0x0308) -- diaresis
local accents = GR .. AC .. CR .. DR
local vowels = "aeiouyAEIOUY"
local cons = "bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ"
local syll_boundary = "‧#"

-- list of valid trigraphs and digraphs, including diphthongs and long vowels
local graphemes = {
	["aai"] = "ɑːɪ̯",
	["eeu"] = "iʊ̯",
	["ieu"] = "iʊ̯",
	["oei"] = "uɪ̯",
	["ooi"] = "oːɪ̯",
	["aa"] = "ɑː",
	["ae"] = "ɑː",
	["ai"] = "aɪ̯",
	["au"] = "œʊ̯",
	["ee"] = "ɪə̯",
	["ei"] = "əɪ̯",
	["eu"] = "iʊ̯",
	["ie"] = "į", -- temporary value
	["oe"] = "ů", -- temporary value
	["oi"] = "ɔɪ̯",
	["oo"] = "ʊə̯",
	["ou"] = "œʊ̯",
	["ui"] = "uɪ̯",
	["uu"] = "ü"
}
-- sort trigraphs and digraphs in descending order
local graphemes_sorted = {}
for k, _ in pairs(graphemes) do
	table.insert(graphemes_sorted, k)
end
table.sort(graphemes_sorted, function(a, b) return len(a) > len(b) end)

-- list of various grapheme sets
local sets = {
	["vowel_length"] = { -- long-short vowels
		["a"] = {"a", "ɑː"},
		["e"] = {"ɛ", "ɪə̯"},
		["i"] = {"ə", "i"},
		["o"] = {"ɔ", "ʊə̯"},
		["u"] = {"œ", "y"}
	},
	["cons_voice"] = { -- voiced/voiceless consonants
		{"b", "p"},
		{"d", "t"},
		{"ʤ", "ʧ"},
		{"ɡ", "k"},
		{"v", "f"},
		{"z", "s"},
		{"ʒ", "ʃ"},
	}
}

-- list of defined affixes
local affixes = {
	["prefixes"] = { -- prefixes
		"aan",
		"agter",
		"be",
		"deur",
		"er",
		"ge",
		"her",
		"om",
		"ont",
		"onder",
		"ver",
		"voor"
	},
	["suffixes"] = { -- suffixes
		"agtig",
		"baar",
		"dom",
		"end",
		"heid",
		"lik",
		"loos",
		"nis",
		"sel",
		"skap",
	}
}
-- sort prefixes and suffixes in ascending order
table.sort(affixes.prefixes, function(a, b) return len(a) < len(b) end)
table.sort(affixes.suffixes, function(a, b) return len(a) < len(b) end)

-- list of unstressed words
local unstressed = {
	"die",
	"dit",
	"is",
	"nie",
	"'n"
}

-- list of respelling substitutions
local subs = {
	-- 'N
	{"#'n#", "#ə(n)#", "-"}, -- pronounced /ə(n)/ as the article 'n
	{"'n#", "ən#", "-"}, -- pronounced /ən/ otherwise

	-- CH
	{"ch", "ʃ", "fr"}, -- pronounced /ʃ/ in french loans
	{"ch([" .. cons .. "]?[ei])", "χ%1", "-"}, -- pronounced /χ/ before optional consonant cluster and "e" or "i"
	{"ch", "k", "-"}, -- otherwise /k/

	-- NG
	{"ng", "ŋ", "-"}, -- pronounced /ŋ/

	-- SH
	{"sh", "ʃ", "-"}, -- pronounced /ʃ/

	-- DJ/TJ
	{"[dt]jie", "kį", "-"}, -- suffix "djie"/"tjie" is pronounced /-ci/
	{"dj", "ʤ", "-"}, -- "dj" is otherwise /d͡ʒ/
	{"tj", "ʧ", "-"}, -- "tj" is otherwise /t͡ʃ/

	-- GH
	{"gh", "ɡ", "-"}, -- pronounced /ɡ/

	-- C
	{"c([ei])", "s%1", "-"}, -- pronounced /s/ before "e" or "i"
	{"c", "k", "-"}, -- otherwise /k/

	-- G
	{"g", "ɡ", "en"}, -- pronounced /ɡ/ in english loans
	{"r‧ge", "r‧ɡe", "-"}, -- pronounced /ɡ/ between /r/ and /ə/
	{"g", "χ", "-"}, -- otherwise /χ/
	{"n(‧?[kɡ])", "ŋ%1", "-"}, -- /ŋ/ is an allophone of /n/ before /ɡ/ and /k/

	-- V
	{"v", "f", "af"}, -- pronounced /f/ in native words

	-- W
	{"w", "w", "en"}, -- pronounced /w/ in english loans
	{"w", "v", "-"}, -- otherwise /v/

	-- EAU
	{"eaux?", "OU", "fr"}, -- pronounced /œʊ̯/ in French loans

	-- OI
	{"oi", "wA", "fr"}, -- pronounced /wa/ in French loans

	-- X
	{"#x", "#s", "-"}, -- pronounced /s/ word-initially
	{"x", "ks", "-"}, -- otherwise /ks/

	-- H
	{"([" .. cons .. vowels .. "])h", "%1", "-"}, -- silent if part of consonant digraph or syllable-final
	{"h", "ɦ", "-"}, -- otherwise /ɦ/

	-- O
	{"o([" .. syll_boundary .. "])", "OU%1", "en"}, -- pronounced /œʊ̯/ in open syllables in english loans
	{"o#", "ů#", "-"}, -- otherwise /u/ in word-final position

	-- U
	{"u([" .. cons .. "])", "A%1", "en"}, -- pronounced /a/ in closed syllables in english loans
	{"u", "jů", "en"}, -- otherwise /ju/ in english loans

	-- Y
	{"y", "EI", "-"}, -- otherwise /əɪ̯/

	-- circumflex accent
	{CR, "ː", "-"} -- lengthens a vowel with its short quality
}

-- syllabify words
local function syllabify(term)
	-- decompose accents
	term = decomp(term)

	-- remove diaresis and split syllable (note: diaresis shouldn't be displayed in its hyphenation form)
	term = rsub(term, "([" .. vowels .. "])" .. DR, "‧%1")

	-- mark trigraphs and digraphs with angle brackets
	for _, graph in ipairs(graphemes_sorted) do
		term = rsub(term, graph, "{" .. graph .. "}")
	end

	-- add > and < for prefix and suffixes respectively
	for _, prefix in ipairs(affixes.prefixes) do
		if find(term, "#" .. prefix) then
			term = rsub(term, "#" .. prefix, "#" .. prefix .. ">")
			break
		end
	end
	for _, suffix in ipairs(affixes.suffixes) do
		if find(term, suffix .. "#") then
			term = rsub(term, suffix .. "#", "<" .. suffix .. "#")
			break
		end
	end

	-- add dot before consonant + vowel
	term = rsub(term, "([" .. cons .. "]?{?)([" .. vowels .. "][" .. accents .. "]?)", "‧%1%2")

	-- remove any dots inside brackets
	term = rsub(term, "{[^}]*}", function(a) return rsub(a, "‧", "") end)

	-- shift dot before certain consonant clusters and digraphs
	term = rsub(term, "([bcfgkpvw])‧l", "‧%1l") -- clusters with l
	term = rsub(term, "([bcdfgkptwv])‧r", "‧%1r") -- clusters with r
	term = rsub(term, "([dst])‧j", "‧%1j") -- digraphs with j
	term = rsub(term, "([ckgt])‧h", "‧%1h") -- digraphs with h
	term = rsub(term, "n‧g", "ng‧") -- ng is syllable-final
	term = rsub(term, ">s‧", ">‧s") -- s can form a cluster after a prefix

	-- remove leading dots and brackets
	term = rsub(term, "#([^" .. vowels .. "]*)‧", "#%1")
	term = rsub(term, "%.", "‧")
	term = rsub(term, "[{}+]", "") -- comment out to debug
	term = rsub_repeatedly(term, "‧‧", "‧")

	return term
end

-- hyphenation function
function export.hyphenation(term)
	-- get user input as table
	if type(term) == "table" then
		term = term.args[1]
	end

	-- mark all word borders
	term = rsub(term, "([^ ]+)", "#%1#")

	-- format hyphenation
	-- local data = { lang = lang, sc = sc, hyphs = {{hyph = rsub(syllabify(term), "[#%[%]<>]", ""), "%.")}} }

	-- return hyphen.format_hyphenations(data)
	return rsub(recomp(syllabify(term)), "[#%[%]<>]", "")
end

-- pronunciation function
local function pron(term, ety)
	-- make text lowercase
	term = lower(term)

	-- mark word borders with #
	term = rsub(term, "([^ ]+)", "#%1#")

	-- syllabify term
	term = syllabify(term)

	-- substitute phonemes
	local subbed = {}
	for _, s in ipairs(subs) do
		if not subbed[s[1]] then
			if ety ~= "-" and s[3] == ety then
				term = rsub(term, s[1], s[2])
				subbed[s[1]] = true
			elseif s[3] == "-" then
				term = rsub(term, s[1], s[2])
				subbed[s[1]] = true
			end
		end
	end

	-- make text lowercase again
	term = lower(term)

	-- substitute graphemes
	for graph, phoneme in pairs(graphemes) do
		term = rsub(term, graph, phoneme)
	end

	-- substitute single-letter vowels
	term = rsub(term, "([aeiou])([‧#ː" .. cons .. "])", function(a, b)
		if match("[‧#]", b) then
			return sets.vowel_length[a][2] .. b -- for open syllables
		else
			return sets.vowel_length[a][1] .. b -- for closed syllables
		end
	end)

	-- replace į, ů, ü with their actual phonetic values
	term = rsub(term, "[įůü]", {["į"] = "i", ["ů"] = "u", ["ü"] = "y"})

	-- remove double consonants
	term = rsub(term, "(.)(‧?)%1", "%2%1")

	-- final adjustments
	term = rsub(term, "‧", ".")
	return rsub(term, "[#%[%]]", "")
end

-- main export function
function export.toIPA(term, ety)
	-- get user input as table
	if type(term) == "table" then
		term = term.args[1]
	end

	return pron(term, ety)
end

return export