Module:bg-pronunciation: difference between revisions

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Content deleted Content added
m Changed protection settings for "Module:bg-pronunciation" ([Edit=Allow only autopatrollers] (indefinite) [Move=Allow only autopatrollers] (indefinite))
changes from User:Kiril kovachev (lots of fixes, see Module talk:bg-pronunciation)
Line 1: Line 1:
local export = {}
local export = {}


local substring = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local U = mw.ustring.char
local U = mw.ustring.char
local lang = require("Module:languages").getByCode("bg")
local script = require("Module:scripts").getByCode("Cyrl")


local GRAVE = U(0x300)
local GRAVE = U(0x300)
Line 12: Line 16:
local FRONTED = U(0x31F)
local FRONTED = U(0x31F)
local DOTUNDER = U(0x323)
local DOTUNDER = U(0x323)
local vowels = "aæɐəɤeɛioɔuʊʉ"
local HYPH = U(0x2027)
local vowels = "aɤɔuɛiɐo"
local vowels_c = "[" .. vowels .. "]"
local vowels_c = "[" .. vowels .. "]"
local non_vowels_c = "[^" .. vowels .. "]"
local non_vowels_c = "[^" .. vowels .. "]"
local cons = "bvɡdʒzjklmnprstfxʃɣʲ" .. TIE
local cons = "bvɡdʒzjklmnprstfxʃɣʲ" .. TIE
local cons_c = "[" .. cons .. "]"
local cons_c = "[" .. cons .. "]"
local hcons_c = "[бвгджзйклмнпрстфхшщьчц#БВГДЖЗЙКЛМНПРСТФХШЩЬЧЦ=]"
local hvowels_c = "[аъоуеияѝюАЪОУЕИЯЍЮ]"
local accents = PRIMARY .. SECONDARY
local accents = PRIMARY .. SECONDARY
local accents_c = "[" .. accents .. "]"
local accents_c = "[" .. accents .. "]"
Line 180: Line 187:
-------------------- Vowel reduction (in unstressed syllables) ---------------
-------------------- Vowel reduction (in unstressed syllables) ---------------
local function reduce_vowel(vowel)
local function reduce_vowel(vowel)
return rsub(vowel, "[aɔɤu]", { ["a"] = "ə", ["ɔ"] = "o", ["ɤ"] = "ə", ["u"] = "ʊ" })
return rsub(vowel, "[aɔɤu]", { ["a"] = "ɐ", ["ɔ"] = "o", ["ɤ"] = "ɐ", ["u"] = "o" })
end
end


-- FIXME: This needs to be rewritten entirely and moved above stress movement.
-- FIXME: This needs to be rewritten entirely and moved above stress movement.
-- NOTE: This rule's removal may be a solution for the wrongly-reduced vowels seen in e.g. жар-птица, /ʒɐr-.../
-- /a/ directly before the stress is [ɐ].
term = rsub(term, "a(" .. non_vowels_c .. "*" .. accents_c .. ")", "ɐ%1")
term = rsub(term, "a(" .. non_vowels_c .. "*" .. accents_c .. ")", "ɐ%1")
-- Reduce all vowels before the stress, except if the word has no accent at all. (FIXME: This is presumably
-- Reduce all vowels before the stress, except if the word has no accent at all. (FIXME: This is presumably
-- intended for single-syllable words without accents, but if the word is multisyllabic without accents,
-- intended for single-syllable words without accents, but if the word is multisyllabic without accents,
-- presumably all vowels should be reduced.)
-- presumably all vowels should be reduced.)

term = rsub(term, "(#[^#" .. accents .. "]*)(.)", function(a, b)
term = rsub(term, "(#[^#" .. accents .. "]*)(.)", function(a, b)
if b == "#" then
if b == "#" then
Line 200: Line 208:
return a .. reduce_vowel(b)
return a .. reduce_vowel(b)
end)
end)
-- /u/ directly before the stress is [u] not [ʊ]. (FIXME: Correct?)
term = rsub(term, "ʊ(" .. non_vowels_c .. "*" .. accents_c .. ")", "u%1")


-------------------- Vowel assimilation to adjacent consonants (fronting/raising) ---------------
-------------------- Vowel assimilation to adjacent consonants (fronting/raising) ---------------
term = rsub_repeatedly(term, "([ʲj])[aɐə](" .. non_vowels_c .. "-[ʲj])", "%1æ%2")
term = rsub_repeatedly(term, "([ʲj])u(" .. non_vowels_c .. "-[ʲj])", "%1ʉ%2")
term = rsub(term, "([ʃʒʲj])([aouɤ])", "%1%2" .. FRONTED)
term = rsub(term, "([ʃʒʲj])([aouɤ])", "%1%2" .. FRONTED)
term = rsub(term, "([ʃʒ])ɛ", "%1e")

-- Palatalisation
term = rsub(term, "([kɡxl])([ieɛ])", "%1ʲ%2")


-- Hard l
-- Hard l
term = rsub_repeatedly(term, "l([^ʲ])", "ɫ%1")
term = rsub_repeatedly(term, "l([^ʲɛi])", "ɫ%1")


-- Voicing assimilation
-- Voicing assimilation
Line 228: Line 228:
-- Reduce consonant clusters
-- Reduce consonant clusters
term = rsub(term, "([szʃʒ])[td](" .. accents_c .. "?)([tdknml])", "%2%1%3")
term = rsub(term, "([szʃʒ])[td](" .. accents_c .. "?)([tdknml])", "%2%1%3")
term = rsub(term, "([sʃ])t#", "%1(t)#")

-- ijC -> iːC, ij# -> iː#
term = rsub(term, "ij(" .. non_vowels_c .. ")", "iː%1")


-- Strip hashes
-- Strip hashes
term = rsub(term, "#", "")
term = rsub(term, "#", "")

return term
return term
end

function export.hyphenate(word)
-- Source: http://logic.fmi.uni-sofia.bg/hyphenation/hyph-bg.html#hyphenation-rules-between-1983-and-2012
-- Also note: the rules from 2012 onward, which encode the modern standard, are entirely
-- backwards-compatible with the previous standard. Thus our code can generate valid 2012
-- hyphenations despite following the older rules.
---Pre-processing----
word = rsub(word, "[" .. GRAVE .. ACUTE .. "]", "") -- Remove accent marks

-- Treat дж as one single unit; this is bypassed by re-writing it as д.ж
-- I.e. we write суджук, but над.живея
word = rsub(word, "дж", "#")
word = rsub(word, "ДЖ", "=")
word = rsub(word, "[.]", "")

----Hyphenation----
word = rsub_repeatedly(word, "(" .. hcons_c .. hvowels_c .. ")(" .. hcons_c .. ")(" .. hvowels_c .. ")", "%1" .. HYPH .. "%2%3") -- Single consonants separated by single vowels are hyphenated
word = rsub_repeatedly(word, "(" .. hvowels_c .. ")([йЙ])(" .. hcons_c .. hcons_c .. hcons_c .. "-)", function(a, b, c)
return a .. b .. substring(c, 1, 1) .. HYPH .. substring(c, 2, -1)
end) -- A й followed by two or more consonsants keeps one consonant to the left of the hyphen
word = rsub_repeatedly(word, "(" .. hvowels_c .. ")([йЙ])(" .. hcons_c .. ")([^" .. HYPH .. "])", "%1%2" .. HYPH .. "%3%4") -- A й preceded by a vowel and followed by one consonant is kept with its vowel
word = rsub_repeatedly(word, "(" .. hvowels_c .. ")(" .. hcons_c .. hcons_c .. hcons_c .. "-)(" .. hvowels_c .. ")", function(a, b, c)
return a .. substring(b, 1, 1) .. HYPH .. substring(b, 2, -1) .. c
end) -- When multiple consonants intervene between a vowel, at least one stays on either side of the vowel
word = rsub_repeatedly(word, "(" .. hcons_c .. ")%1", function(a)
return a .. HYPH .. a end) -- Two of the same consonant are hyphenated
word = rsub_repeatedly(word, "(" .. hvowels_c .. hvowels_c .. hvowels_c .. "-)(" .. hcons_c .. ")", function(a, b)
return substring(a, 1, -2) .. HYPH .. substring(a, -1, -1) .. b end) -- For sequences of two or more vowels, the final vowel goes after the hyphen and the rest before

word = rsub(word, "(.)[" .. HYPH .. "]([ьЬ])", HYPH .. "%1%2") -- ь cannot be directly after a hyphen
word = rsub(word, "([ьЬ])[" .. HYPH .. "](.)", "%2%1" .. HYPH) -- ь cannot be directly before a hyphen
word = rsub(word, "(.)" .. HYPH .. "(.)$", HYPH .. "%1%2") -- At the beginning of words, merge isolated letters with their following letters
word = rsub(word, "^(.)" .. HYPH .."(.)", "%1%2" .. HYPH) -- At the end of words, merge isolated letters with their preceding letters
-- Note: the above is flawed in that it cannot detect isolated letters within the word.
-- We hope that this is sufficient, and there are no rogue cases in between words.

----Post-processing----
word = rsub(word, "#", "дж") -- Decode back to дж
word = rsub(word, "=", "ДЖ")

return word
end

local function get_anntext(term, ann)
if ann == "1" or ann == "y" then
-- remove secondary stress annotations
anntext = "'''" .. export.remove_pron_notations(term, true) .. "''': "
elseif ann then
anntext = "'''" .. ann .. "''': "
else
anntext = ""
end
return anntext
end

local function format_hyphenation(hyphenation)
local syllables = rsplit(hyphenation, HYPH)

return require("Module:hyphenation").format_hyphenations( {
lang = lang,
hyphs = { { hyph = syllables } },
sc = script,
caption = "Hyphenation",
} )
end

function export.show_hyphenation(frame)
local params = {
[1] = {},
}

local title = mw.title.getCurrentTitle()

local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1] or title.nsText == "Template" and "при́мер" or title.text

local hyphenation = export.hyphenate(term)
return format_hyphenation(hyphenation)
end
end


Line 245: Line 322:
["ann"] = {},
["ann"] = {},
}
}

local title = mw.title.getCurrentTitle()
local title = mw.title.getCurrentTitle()
Line 252: Line 329:


local ipa = export.toIPA(term, args.endschwa)
local ipa = export.toIPA(term, args.endschwa)
ipa = "[" .. ipa .. "]"
ipa = "[" .. ipa .. "]"
ipa = require("Module:IPA").format_IPA_full(require("Module:languages").getByCode("bg"), { { pron = ipa } } )


local ipa_text = require("Module:IPA").format_IPA_full(lang, { { pron = ipa } } )
local anntext
if args.ann == "1" or args.ann == "y" then
local anntext = get_anntext(term, args.ann)
-- remove secondary stress annotations
anntext = "'''" .. export.remove_pron_notations(term, true) .. "''': "
elseif args.ann then
anntext = "'''" .. args.ann .. "''': "
else
anntext = ""
end


return anntext .. ipa
return anntext .. ipa_text
end
end



Revision as of 21:50, 1 August 2023


This module automatically converts Bulgarian orthography to a phonetic transcription in the International Phonetic Alphabet. It also generates hyphenations and syllabifications.

Testcases

16 of 49 tests failed. (refresh)

TextExpectedActual
test_hyphenation:
Script error during testing: Module:bg-pronunciation/testcases:38: attempt to call field 'hyphenate_total' (a nil value)
stack traceback:
	[C]: in function 'hyphenate_total'
	Module:bg-pronunciation/testcases:38: in function 'func'
	Module:UnitTests:295: in function 'iterate'
	Module:bg-pronunciation/testcases:182: in function <Module:bg-pronunciation/testcases:115>
	(tail call): ?
	[C]: in function 'xpcall'
	Module:UnitTests:369: in function <Module:UnitTests:328>
	(tail call): ?
	mw.lua:527: in function <mw.lua:507>
	[C]: ?
	[C]: in function 'expandTemplate'
	mw.lua:333: in function 'expandTemplate'
	Module:documentation:894: in function 'chunk'
	mw.lua:527: in function <mw.lua:507>
	[C]: ?
TextExpectedActual
test_ipa:
Passedкъ́ща (kǎ́šta)ˈkɤʃtɐˈkɤʃtɐ
Passedсгъстя́ се (sgǎstjá se), endschwa=truezɡɐˈstʲɤ̟ sɛzɡɐˈstʲɤ̟ sɛ
Passedсгъстя́ се (sgǎstjá se) (respelled сгъстя̣́ се)zɡɐˈstʲɤ̟ sɛzɡɐˈstʲɤ̟ sɛ
Passedа̀бдики́ращ (àbdikírašt)ˌabdiˈkirɐʃtˌabdiˈkirɐʃt
Passedбезшу́мен (bezšúmen)bɛʃˈʃu̟mɛnbɛʃˈʃu̟mɛn
Passedщастли́в (štastlív)ʃtɐˈslifʃtɐˈslif
Passedнародността́ (narodnosttá)nɐrodnoˈstanɐrodnoˈsta
Passedя (ja)ja̟ja̟
Passedюг (jug)ju̟kju̟k
Passedяйце́ (jajcé)jɐjˈt͡sɛjɐjˈt͡sɛ
Passedучи́лище (učílište)oˈt͡ʃiliʃtɛoˈt͡ʃiliʃtɛ
Passedчорбаджи́я (čorbadžíja)t͡ʃo̟rbɐˈdʒijɐt͡ʃo̟rbɐˈdʒijɐ
Passedуби́йца (ubíjca)oˈbijt͡sɐoˈbijt͡sɐ
Passedбезбра́чие (bezbráčie)bɛzˈbrat͡ʃiɛbɛzˈbrat͡ʃiɛ
Passedизмра́ (izmrá) (respelled из.мра́)izˈmraizˈmra
Passedсала́та (saláta)sɐˈɫatɐsɐˈɫatɐ
Passedшега́ (šegá)ʃɛˈɡaʃɛˈɡa
Passedжена́ (žená)ʒɛˈnaʒɛˈna
Passedинти́мен (intímen)inˈtimɛninˈtimɛn
Passedпосо́лство (posólstvo)poˈsɔɫstvopoˈsɔɫstvo
Passedъ́гъл (ǎ́gǎl)ˈɤɡɐɫˈɤɡɐɫ
Passedусу́квам (usúkvam)oˈsukvɐmoˈsukvɐm
Passedле́ща (léšta)ˈlɛʃtɐˈlɛʃtɐ
Passedлипа́ (lipá)liˈpaliˈpa
Passedокеа́н (okeán)okɛˈanokɛˈan
Passedмеки́ца (mekíca)mɛˈkit͡sɐmɛˈkit͡sɐ
Passedла́гер (láger)ˈɫaɡɛrˈɫaɡɛr
Passedмаги́я (magíja)mɐˈɡijɐmɐˈɡijɐ
Passedхем (hem)xɛmxɛm
Passedхимн (himn)ximnximn
Passedтулу́п (tulúp)toˈɫuptoˈɫup
Failedжа̀р-пти́ца (žàr-ptíca)ˌʒa̟r-pˈtit͡sɐˌʒɐr-pˈtit͡sɐ
Passedв о́фис (v ófis)f ˈɔfisf ˈɔfis
Failedвъв Фра́нция (vǎv Fráncija)vɐf ˈfrant͡sijɐvɤf ˈfrant͡sijɐ
Passedня́колко (njákolko)ˈnʲa̟koɫkoˈnʲa̟koɫko
Passedв Япо́ния (v Japónija)f jɐˈpɔnijɐf jɐˈpɔnijɐ
Passedавтоплу́г (avtoplúg)ɐftoˈpɫukɐftoˈpɫuk
Failedўе́бса́йтˈwɛpˈsajtŏˈɛpˈsajt
Failedўе́лскиˈwɛɫskiŏˈɛɫski
Failedўе́стърнˈwɛstɐrnŏˈɛstɐrn
FailedО́ўенˈɔwɛnˈɔŏɛn
Failedно́ўхаўˈnɔwxɐwˈnɔŏxɐŏ
FailedДжо́ўзефˈdʒɔwzɛfˈdʒɔŏzɛf
Failedбо́ўлингˈbɔwliŋkˈbɔŏliŋk
Failedдаўнло́ўдdɐwnˈɫɔwtdɐŏnˈɫɔŏt
Failedўи́скиˈwiskiŏˈiski
Failedўи́кендˈwikɛntŏˈikɛnt
FailedЎо́рўикˈwɔrwikŏˈɔrŏik
FailedХе́лоўинˈxɛɫowinˈxɛɫoŏin
TextExpectedActual
test_syllabification:
Script error during testing: Module:bg-pronunciation/testcases:47: attempt to call field 'syllabify' (a nil value)
stack traceback:
	[C]: in function 'syllabify'
	Module:bg-pronunciation/testcases:47: in function 'func'
	Module:UnitTests:295: in function '?'
	Module:bg-pronunciation/testcases:545: in function <Module:bg-pronunciation/testcases:185>
	(tail call): ?
	[C]: in function 'xpcall'
	Module:UnitTests:369: in function <Module:UnitTests:328>
	(tail call): ?
	mw.lua:527: in function <mw.lua:507>
	[C]: ?
	[C]: in function 'expandTemplate'
	mw.lua:333: in function 'expandTemplate'
	Module:documentation:894: in function 'chunk'
	mw.lua:527: in function <mw.lua:507>
	[C]: ?

References

  • Тилков, Димитър, Бояджиев, Тодор, Георгиева, Елена, Пенчев, Йордан, Станков, Валентин (1998) Граматика на съвременния български книжовен език (in Bulgarian), 3rd edition, volume 1, Sofia: ABAGAR

local export = {}

local substring = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local U = mw.ustring.char
local lang = require("Module:languages").getByCode("bg")
local script = require("Module:scripts").getByCode("Cyrl")

local GRAVE = U(0x300)
local ACUTE = U(0x301)
local PRIMARY = U(0x2C8)
local SECONDARY = U(0x2CC)
local TIE = U(0x361)
local FRONTED = U(0x31F)
local DOTUNDER = U(0x323)
local HYPH = U(0x2027)
local vowels = "aɤɔuɛiɐo"
local vowels_c = "[" .. vowels .. "]"
local non_vowels_c = "[^" .. vowels .. "]"
local cons = "bvɡdʒzjklmnprstfxʃɣʲ" .. TIE
local cons_c = "[" .. cons .. "]"
local hcons_c = "[бвгджзйклмнпрстфхшщьчц#БВГДЖЗЙКЛМНПРСТФХШЩЬЧЦ=]"
local hvowels_c = "[аъоуеияѝюАЪОУЕИЯЍЮ]"
local accents = PRIMARY .. SECONDARY
local accents_c = "[" .. accents .. "]"

-- single characters that map to IPA sounds
local phonetic_chars_map = {
	["а"] = "a",
	["б"] = "b",
	["в"] = "v",
	["г"] = "ɡ",
	["д"] = "d",
	["е"] = "ɛ",
	["ж"] = "ʒ",
	["з"] = "z",
	["и"] = "i",
	["й"] = "j",
	["к"] = "k",
	["л"] = "l",
	["м"] = "m",
	["н"] = "n",
	["о"] = "ɔ",
	["п"] = "p",
	["р"] = "r",
	["с"] = "s",
	["т"] = "t",
	["у"] = "u",
	["ф"] = "f",
	["х"] = "x",
	["ц"] = "t" .. TIE .. "s",
	["ч"] = "t" .. TIE .. "ʃ",
	["ш"] = "ʃ",
	["щ"] = "ʃt",
	["ъ"] = "ɤ",
	["ь"] = "ʲ",
	["ю"] = "ʲu",
	["я"] = "ʲa",

	[GRAVE] = SECONDARY,
	[ACUTE] = PRIMARY
}

local devoicing = {
	["b"] = "p", ["d"] = "t", ["ɡ"] = "k",
	["z"] = "s", ["ʒ"] = "ʃ",
	["v"] = "f"
}

local voicing = {
	["p"] = "b", ["t"] = "d", ["k"] = "ɡ",
	["s"] = "z", ["ʃ"] = "ʒ", ["x"] = "ɣ",
	["f"] = "v"
}


-- Prefixes where, if they occur at the beginning of the word and the stress is on the next syllable, we place the
-- syllable division directly after the prefix. For example, the default syllable-breaking algorithm would convert
-- безбра́чие to беˈзбрачие; but because it begins with без-, we convert it to безˈбрачие. Note that we don't (yet?)
-- convert измра́ to изˈмра instead of default измˈра, although we probably should.
--
-- Think twice before putting prefixes like на-, пре- and от- here, because of the existence of над-, пред-, and о-,
-- which are also prefixes.
local prefixes = {"bɛz", "vɤz", "vɤzproiz", "iz", "naiz", "poiz", "prɛvɤz", "proiz", "raz"}


-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end


-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end


function export.remove_pron_notations(text, remove_grave)
	text = rsub(text, "[." .. DOTUNDER .. "]", "")
	-- Remove grave accents from annotations but maybe not from phonetic respelling
	if remove_grave then
		text = mw.ustring.toNFC(rsub(mw.ustring.toNFD(text), GRAVE, ""))
	end
	return text
end

	
function export.toIPA(term, endschwa)
	if type(term) == "table" then -- called from a template or a bot
		endschwa = term.args.endschwa
		term = term.args[1]
	end
		
	local origterm = term
	
	term = rsub(mw.ustring.toNFC(term), "й", "j")
	term = mw.ustring.toNFD(mw.ustring.lower(term))

	if term:find(GRAVE) and not term:find(ACUTE) then
		error("Use acute accent, not grave accent, for primary stress: " .. origterm)
	end

	-- allow DOTUNDER to signal same as endschwa=1	
	term = rsub(term, "а(" .. accents_c .. "?)" .. DOTUNDER, "ъ%1")
	term = rsub(term, "я(" .. accents_c .. "?)" .. DOTUNDER, "ʲɤ%1")
	term = rsub(term, ".", phonetic_chars_map)

	-- Mark word boundaries
	term = rsub(term, "(%s+)", "#%1#")
	term = "#" .. term .. "#"

	-- Convert verbal and definite endings
	if endschwa then
		term = rsub(term, "a(" .. PRIMARY .. "t?#)", "ɤ%1")
	end

	-- Change ʲ to j after vowels or word-initially
	term = rsub(term, "([" .. vowels .. "#]" .. accents_c .. "?)ʲ", "%1j")

	-------------------- Move stress ---------------

	-- First, move leftwards over the vowel.
	term = rsub(term, "(" .. vowels_c .. ")(" .. accents_c .. ")", "%2%1")
	-- Then, move leftwards over j or soft sign.
	term = rsub(term, "([jʲ])(" .. accents_c .. ")", "%2%1")
	-- Then, move leftwards over a single consonant.
	term = rsub(term, "(" .. cons_c .. ")(" .. accents_c .. ")", "%2%1")
	-- Then, move leftwards over Cl/Cr combinations where C is an obstruent (NOTE: IPA ɡ).
	term = rsub(term, "([bdɡptkxfv]" .. ")(" .. accents_c .. ")([rl])", "%2%1%3")
	-- Then, move leftwards over kv/gv (NOTE: IPA ɡ).
	term = rsub(term, "([kɡ]" .. ")(" .. accents_c .. ")(v)", "%2%1%3")
	-- Then, move leftwards over sC combinations, where C is a stop or resonant (NOTE: IPA ɡ).
	term = rsub(term, "([sz]" .. ")(" .. accents_c .. ")([bdɡptkvlrmn])", "%2%1%3")
	-- Then, move leftwards over affricates not followed by a consonant.
	term = rsub(term, "([td]" .. TIE .. "?)(" .. accents_c .. ")([szʃʒ][" .. vowels .. "ʲ])", "%2%1%3")
	-- If we ended up in the middle of a tied affricate, move to its right.
	term = rsub(term, "(" .. TIE .. ")(" .. accents_c .. ")(" .. cons_c .. ")", "%1%3%2")
	-- Then, move leftwards over any remaining consonants at the beginning of a word.
	term = rsub(term, "#(" .. cons_c .. "*)(" .. accents_c .. ")", "#%2%1")
	-- Then correct for known prefixes.
	for _, prefix in ipairs(prefixes) do
		prefix_prefix, prefix_final_cons = rmatch(prefix, "^(.-)(" .. cons_c .. "*)$")
		if prefix_final_cons then
			-- Check for accent moved too far to the left into a prefix, e.g. безбрачие accented as беˈзбрачие instead
			-- of безˈбрачие
			term = rsub(term, "#(" .. prefix_prefix .. ")(" .. accents_c .. ")(" .. prefix_final_cons .. ")", "#%1%3%2")
		end
	end
	-- Finally, if there is an explicit syllable boundary in the cluster of consonants where the stress is, put it there.
	-- First check for accent to the right of the explicit syllable boundary.
	term = rsub(term, "(" .. cons_c .. "*)%.(" .. cons_c .. "*)(" .. accents_c .. ")(" .. cons_c .. "*)", "%1%3%2%4")
	-- Then check for accent to the left of the explicit syllable boundary.
	term = rsub(term, "(" .. cons_c .. "*)(" .. accents_c .. ")(" .. cons_c .. "*)%.(" .. cons_c .. "*)", "%1%3%2%4")
	-- Finally, remove any remaining syllable boundaries.
	term = rsub(term, "%.", "")

	-------------------- Vowel reduction (in unstressed syllables) ---------------
	local function reduce_vowel(vowel)
		return rsub(vowel, "[aɔɤu]", { ["a"] = "ɐ", ["ɔ"] = "o", ["ɤ"] = "ɐ", ["u"] = "o" })
	end

	-- FIXME: This needs to be rewritten entirely and moved above stress movement.
	-- NOTE: This rule's removal may be a solution for the wrongly-reduced vowels seen in e.g. жар-птица, /ʒɐr-.../
	term = rsub(term, "a(" .. non_vowels_c .. "*" .. accents_c .. ")", "ɐ%1")
	-- Reduce all vowels before the stress, except if the word has no accent at all. (FIXME: This is presumably
	-- intended for single-syllable words without accents, but if the word is multisyllabic without accents,
	-- presumably all vowels should be reduced.)

	term = rsub(term, "(#[^#" .. accents .. "]*)(.)", function(a, b)
		if b == "#" then
			return a .. b
		else
			return reduce_vowel(a) .. b
		end
	end)
	-- Reduce all vowels after the accent except the first vowel after the accent mark (which is stressed).
	term = rsub(term, "(" .. accents_c .. "[^aɛiɔuɤ#]*[aɛiɔuɤ])([^#" .. accents .. "]*)", function(a, b)
		return a .. reduce_vowel(b)
	end)

	-------------------- Vowel assimilation to adjacent consonants (fronting/raising) ---------------
	term = rsub(term, "([ʃʒʲj])([aouɤ])", "%1%2" .. FRONTED)

	-- Hard l
	term = rsub_repeatedly(term, "l([^ʲɛi])", "ɫ%1")

	-- Voicing assimilation
	term = rsub(term, "([bdɡzʒv" .. TIE .. "]*)(" .. accents_c .. "?[ptksʃfx#])", function(a, b)
		return rsub(a, ".", devoicing) .. b end)
	term = rsub(term, "([ptksʃfx" .. TIE .. "]*)(" .. accents_c .. "?[bdɡzʒ])", function(a, b)
		return rsub(a, ".", voicing) .. b end)
	term = rsub(term, "n(" .. accents_c .. "?[ɡk]+)", "ŋ%1")
	term = rsub(term, "m(" .. accents_c .. "?[fv]+)", "ɱ%1")

	-- Sibilant assimilation
	term = rsub(term, "[sz](" .. accents_c .. "?[td]?" .. TIE .. "?)([ʃʒ])", "%2%1%2")

	-- Reduce consonant clusters
	term = rsub(term, "([szʃʒ])[td](" .. accents_c .. "?)([tdknml])", "%2%1%3")

	-- Strip hashes
	term = rsub(term, "#", "")
	
	return term
end

function export.hyphenate(word)
    -- Source: http://logic.fmi.uni-sofia.bg/hyphenation/hyph-bg.html#hyphenation-rules-between-1983-and-2012
    -- Also note: the rules from 2012 onward, which encode the modern standard, are entirely
    -- backwards-compatible with the previous standard. Thus our code can generate valid 2012
    -- hyphenations despite following the older rules.
    ---Pre-processing----
	word = rsub(word, "[" .. GRAVE .. ACUTE .. "]", "") -- Remove accent marks

    -- Treat дж as one single unit; this is bypassed by re-writing it as д.ж
    -- I.e. we write суджук, but над.живея
    word = rsub(word, "дж", "#")
    word = rsub(word, "ДЖ", "=")
    word = rsub(word, "[.]", "")

    ----Hyphenation----
    word = rsub_repeatedly(word, "(" .. hcons_c .. hvowels_c .. ")(" .. hcons_c .. ")(" .. hvowels_c .. ")", "%1" .. HYPH .. "%2%3") -- Single consonants separated by single vowels are hyphenated
    word = rsub_repeatedly(word, "(" .. hvowels_c .. ")([йЙ])(" .. hcons_c .. hcons_c .. hcons_c .. "-)", function(a, b, c)
    	return a .. b .. substring(c, 1, 1) .. HYPH .. substring(c, 2, -1)
    end) -- A й followed by two or more consonsants keeps one consonant to the left of the hyphen
    word = rsub_repeatedly(word, "(" .. hvowels_c .. ")([йЙ])(" .. hcons_c .. ")([^" .. HYPH .. "])", "%1%2" .. HYPH .. "%3%4") -- A й preceded by a vowel and followed by one consonant is kept with its vowel
    word = rsub_repeatedly(word, "(" .. hvowels_c .. ")(" .. hcons_c .. hcons_c .. hcons_c .. "-)(" .. hvowels_c .. ")", function(a, b, c)
    	return a .. substring(b, 1, 1) .. HYPH .. substring(b, 2, -1) .. c 
    end) -- When multiple consonants intervene between a vowel, at least one stays on either side of the vowel
    word = rsub_repeatedly(word, "(" .. hcons_c .. ")%1", function(a) 
    	return a .. HYPH .. a end) -- Two of the same consonant are hyphenated
    word = rsub_repeatedly(word, "(" .. hvowels_c .. hvowels_c .. hvowels_c .. "-)(" .. hcons_c .. ")", function(a, b)
    	return substring(a, 1, -2) ..  HYPH .. substring(a, -1, -1) .. b end) -- For sequences of two or more vowels, the final vowel goes after the hyphen and the rest before

    word = rsub(word, "(.)[" .. HYPH .. "]([ьЬ])", HYPH .. "%1%2") -- ь cannot be directly after a hyphen
    word = rsub(word, "([ьЬ])[" .. HYPH .. "](.)", "%2%1" .. HYPH) -- ь cannot be directly before a hyphen
    word = rsub(word, "(.)" .. HYPH .. "(.)$", HYPH .. "%1%2") -- At the beginning of words, merge isolated letters with their following letters
    word = rsub(word, "^(.)" .. HYPH .."(.)", "%1%2" .. HYPH) -- At the end of words, merge isolated letters with their preceding letters
    -- Note: the above is flawed in that it cannot detect isolated letters within the word.
    -- We hope that this is sufficient, and there are no rogue cases in between words.

    ----Post-processing----
    word = rsub(word, "#", "дж") -- Decode back to дж
	word = rsub(word, "=", "ДЖ")

    return word
end

local function get_anntext(term, ann)
	if ann == "1" or ann == "y" then
		-- remove secondary stress annotations
		anntext = "'''" .. export.remove_pron_notations(term, true) .. "''':&#32;"
	elseif ann then
		anntext = "'''" .. ann .. "''':&#32;"
	else
		anntext = ""
	end
	return anntext
end

local function format_hyphenation(hyphenation)
	local syllables = rsplit(hyphenation, HYPH)

	return require("Module:hyphenation").format_hyphenations( { 
		lang = lang,
		hyphs = { { hyph = syllables } },
		sc = script,
		caption = "Hyphenation",
		} )
	
end

function export.show_hyphenation(frame)
	local params = {
		[1] = {},
	}

	local title = mw.title.getCurrentTitle()

	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1] or title.nsText == "Template" and "при́мер" or title.text

	local hyphenation = export.hyphenate(term)
	return format_hyphenation(hyphenation)
	
end

function export.show(frame)
	local params = {
		[1] = {},
		["endschwa"] = { type = "boolean" },
		["ann"] = {},
	}

	local title = mw.title.getCurrentTitle()
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1] or title.nsText == "Template" and "при́мер" or title.text

	local ipa = export.toIPA(term, args.endschwa)
	ipa = "[" .. ipa .. "]"

	local ipa_text = require("Module:IPA").format_IPA_full(lang, { { pron = ipa } } )
	local anntext = get_anntext(term, args.ann)

	return anntext .. ipa_text
end

return export