Module:mh-pronunc/sandbox: difference between revisions

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Content deleted Content added
No edit summary
m function supplied to gsub need only return if it needs to make a replacement
Line 575: Line 575:
{ "^(ɦ)([ʲʷ])("..V..")", function(a, b, c)
{ "^(ɦ)([ʲʷ])("..V..")", function(a, b, c)
if F2[b] ~= F2[c] then
if F2[b] ~= F2[c] then
a = "h"
return a.."h"..c
end
end
return a..b..c
end },
end },
{ "("..V..")(ɦ)([ʲʷ])$", function(a, b, c)
{ "("..V..")(ɦ)([ʲʷ])$", function(a, b, c)
if F2[a] ~= F2[c] then
if F2[a] ~= F2[c] then
b = "h"
return a.."h"..c
end
end
return a..b..c
end },
end },
{ "([ɑʌɤɯɒɔou])ɦʲ([ɑʌɤɯɒɔou])", "%1hʲ%2" },
{ "([ɑʌɤɯɒɔou])ɦʲ([ɑʌɤɯɒɔou])", "%1hʲ%2" },
Line 593: Line 591:
(F2[d] == F2[c] and F1[a] > F1[d])
(F2[d] == F2[c] and F1[a] > F1[d])
) then
) then
b = "h"
return a.."h"..c..d
end
end
return a..b..c..d
end },
end },
{ "("..V..")"..V, "%1" }
{ "("..V..")"..V, "%1" }

Revision as of 19:56, 14 November 2019


-- This is still a work in progress.

local export = {}

local ceil = math.ceil
local concat = table.concat
local find = mw.ustring.find
local floor = math.floor
local gsplit = mw.text.gsplit
local gsub = mw.ustring.gsub
local insert = table.insert
local lower = mw.ustring.lower
local max = math.max
local split = mw.text.split
local trim = mw.text.trim

local LP = "%("
local RP = "%)"

local LQ = LP.."?"
local RQ = RP.."?"

local ASYLL = "̯"
local ASYLLTIE = "᷼"
local DENT = "̪"
local DEVO = "̥"
local DEVO2 = "̊"
local DOWN = "̞"
local RETR = "̠"
local TIE = "͡"
local TIE2 = "͜"
local UNREL = "̚"
local UP = "̝"

local C2 = "[ʲˠʷ]"
local G1_ = "ɦh"
local G1 = "["..G1_.."]"
local G = G1.."."
local NG1 = "[^"..G1_.."_]"
local NG = NG1..C2
local C = "."..C2
local V_ = "æɛeiɑʌɤɯɒɔouï"
local V = "["..V_.."]"
local NV = "[^"..V_.."]"
local S = "[%s%-]*"

local VOWELS = { -- VOWELS[f1][f2]
	{ "æ", "ɑ", "ɒ" },
	{ "ɛ", "ʌ", "ɔ" },
	{ "e", "ɤ", "o" },
	{ "i", "ɯ", "u" }
}

local F1 = {}
local F2 = { ["ʲ"] = 1, ["ˠ"] = 2, ["ʷ"] = 3 }

for f1, row in pairs(VOWELS) do
	for f2, vowel in pairs(row) do
		F1[vowel] = f1
		F2[vowel] = f2
	end
end

local function gsub2(text, patt, subst)
	text = gsub(text, patt, subst)
	text = gsub(text, patt, subst)
	return text
end

local function gsubx(text, patt, subst)
	repeat
		local oldText = text
		text = gsub(text, patt, subst)
	until text == oldText
	return text
end

local function gsubBatch(text, pattArgs, substArg, fnArg)
	fnArg = fnArg or gsub
	for _, pattArg in pairs(pattArgs) do
		local patt = pattArg
		local subst = substArg
		local fn = fnArg
		if type(pattArg) == "table" then
			patt = pattArg[1]
			subst = pattArg[2] or substArg
			fn = pattArg[3] or fnArg
		end
		text = fn(text, patt, subst)
	end
	return text
end

local function insertUnique(seq, value)
	for _, value2 in pairs(seq) do
		if value == value2 then
			return
		end
	end
	insert(seq, value)
end

local function parseBoolean(text)
	local z = false
	if text then
		text = trim(text)
		if text ~= "" and text ~= "0" and lower(text) ~= "false" then
			z = true
		end
	end
	return z
end

local function parse(code)
	
	local seq, subst, temp
	
	code = trim(code)
	seq = {}
	
	for text in gsplit(code, "%s*,[%s,]*") do
		
		text = trim(text)
		
		if text ~= "" then
		 
			text = " "..lower(text).." "
			
			temp = gsub(text, "[abdeghijklmnprtwy_&'%-%s]", "")
			if temp ~= "" then
				error("'"..code.."' contains unsupported characters: "..temp)
			end
			
			-- recognize "y_", "h_", "w_", "_y", "_h", "_w" as pseudo-glides
			subst = { ["h"] = "0ˠ", ["w"] = "0ʷ", ["y"] = "0ʲ" }
			text = gsub(text, "_*([hwy])_+", subst)
			text = gsub(text, "_+([hwy])", subst)
			if find(text, "_") then
				error("contains misplaced underscores: "..code)
			end
			text = gsub(text, "0", "_")
			
			-- recognize "ng", but not plain "g"
			-- "ngw" is a special sequence
			text = gsub(text, "ngw?", {
				["ng"] = "ŋˠ",
				["ngw"] = "ŋʷ"
			})
			if find(text, "g") then
				error("contains g that is not part of ng: "..code)
			end
			
			-- "kw", "lh", "lw", "mh", "nh", "nw", "rw" are special sequences
			-- recognize both these and plain "k", "l", "m", "n", "r"
			-- but "kh", "mw", "rh" are not special sequences
			text = gsub(text, "[klmnr][hw]?", {
				["k"] = "kˠ",
				["kh"] = "kˠh", -- N\A
				["kw"] = "kʷ",
				["l"] = "lʲ",
				["lh"] = "lˠ",
				["lw"] = "lʷ",
				["m"] = "mʲ",
				["mh"] = "mˠ",
				["mw"] = "mʲw", -- N\A
				["n"] = "nʲ",
				["nh"] = "nˠ",
				["nw"] = "nʷ",
				["r"] = "rˠ",
				["rh"] = "rˠh", -- N\A
				["rw"] = "rʷ"
			})
			
			-- "passing over lightly"
			text = gsub(text, "yi'+y", "ĭʲ")
			-- "dwelling upon"
			text = gsub(text, "'+yiy", "īʲ")
			-- a plain /i/ protected from dialect-specific reflexes
			text = gsub(text, "'+i", "ï")
			
			-- convert remaining sequences to internal format
			text = gsub(text, "[abdehijptwy&']", {
				["a"] = "æ",
				["b"] = "pˠ",
				["d"] = "rʲ",
				["e"] = "ɛ",
				["&"] = "e",
				["h"] = "ɦˠ",
				["i"] = "i",
				["j"] = "tʲ",
				["p"] = "pʲ",
				["t"] = "tˠ",
				["w"] = "ɦʷ",
				["y"] = "ɦʲ",
				["'"] = ""
			})
			
			-- treat initial /ɦˠɦˠ/ as a special consonant
			text = gsub(text, "("..NV..")ɦˠɦˠ("..V..")", "%1ɣˠ%2")
			
			-- enforce /CVC/, /CVCVC/, /CVCCVC/ phonotactics
			-- but allow /(_)VC/, /CV(_)/ at affix boundaries
			-- where a vowel may link to another morpheme's consonant
			temp = gsub(text, S, "")
			if find(temp, "_."..C) or find(temp, C.."_") then
				error("pseudo-glides may not neighbor a consonant")
			end
			if find(temp, V.."_."..V) then
				error("pseudo-glides may only be at the beginning or end"..code)
			end
			if find(temp, V..V) then
				error("vowels must be separated by a consonant: "..code)
			end
			if find(temp, C..C..C) then
				error("consonant clusters are limited to two: "..code)
			end
			if find(temp, C..C.."$") then
				error("may not end with a consonant cluster: "..code)
			end
			gsub(temp, "^("..C..")("..C..")", function(a, b)
				if a ~= b then
					error(
						"may only begin with single or geminated consonant: "
						..code
					)
				end
				return ""
			end)
			
			text = gsub(text, "%s+", " ")
			text = trim(text)
			if text ~= "" then
				insertUnique(seq, text)
			end
			
		end
		
	end
	
	return seq
	
end

local function toBender(inSeq, args)
	-- "1969" is from "Spoken Marshallese" (1969 by Byron W. Bender)
	-- "med" is from the Marshallese-English Dictionary (1976)
	-- "mod" is from the Marshallese-English Online Dictionary
	-- "default" is the same as "mod" but with cedillas
	local version = args and args.version ~= "" and lower(args.version)
		or "default"
	local consSubst = {
		["pʲ"] = "p",
		["pˠ"] = "b",
		["tʲ"] = "j",
		["tˠ"] = "t",
		["kˠ"] = "k",
		["kʷ"] = ({ ["1969"] = "q", ["med"] = "q" })[version] or "kʷ",
		["mʲ"] = "m",
		["mˠ"] = ({ ["1969"] = "ṁ", ["mod"] = "ṃ" })[version] or "m̧",
		["nʲ"] = "n",
		["nˠ"] = ({ ["1969"] = "ṅ", ["mod"] = "ṇ" })[version] or "ņ",
		["nʷ"] = ({
			["1969"] = "n̈", ["med"] = "ņ°", ["mod"] = "ṇʷ"
		})[version] or "ņʷ",
		["ŋˠ"] = "g",
		["ŋʷ"] = ({ ["1969"] = "g̈", ["med"] = "g°" })[version] or "gʷ",
		["rʲ"] = "d",
		["rˠ"] = "r",
		["rʷ"] = ({ ["1969"] = "r̈", ["med"] = "r°" })[version] or "rʷ",
		["lʲ"] = "l",
		["lˠ"] = ({ ["1969"] = "ƚ", ["mod"] = "ḷ" })[version] or "ļ",
		["lʷ"] = ({
			["1969"] = "l̈", ["med"] = "ļ°", ["mod"] = "ḷʷ"
		})[version] or "ļʷ",
		["ĭʲ"] = "yi'y",
		["īʲ"] = "'yiy",
		["ɣˠ"] = "hh",
		["ɦʲ"] = "y",
		["ɦˠ"] = "h",
		["ɦʷ"] = "w",
		["_ʲ"] = "",
		["_ˠ"] = "",
		["_ʷ"] = ""
	}
	local vowelSubst = {
		["æ"] = "a",
		["ɛ"] = "e",
		["e"] = ({ ["1969"] = "&", ["mod"] = "ẹ" })[version] or "ȩ",
		["i"] = "i",
		["ï"] = "i"
	}
	local outSeq = {}
	for _, text in pairs(inSeq) do
		text = gsub(text, C, consSubst)
		text = gsub(text, V, vowelSubst)
		insertUnique(outSeq, text)
	end
	return outSeq
end

local function toPhonemic(inSeq)
	local outSeq = {}
	for _, text in pairs(inSeq) do
		text = gsub(text, C, {
			["kˠ"] = "k",
			["ŋˠ"] = "ŋ",
			["ĭʲ"] = "ji̯j",
			["īʲ"] = "jijj",
			["ɣˠ"] = "ɰɰ",
			["ɦʲ"] = "j",
			["ɦˠ"] = "ɰ",
			["ɦʷ"] = "w",
			["_ʲ"] = "",
			["_ˠ"] = "",
			["_ʷ"] = ""
		})
		text = gsub(text, "ï", "i")
		insertUnique(outSeq, text)
	end
	return outSeq
end

local function toPhonetic(inSeq, args)
	
	-- if enabled, display any palatalized coronal sibilant allophones
	-- as alveolopalatal sibilants
	local alvPal = args and parseBoolean(args.alvpal)
	
	-- recognize "ralik" for Rālik Chain (western dialect)
	-- recognize "ratak" for Ratak Chain (eastern dialect)
	-- for other values, list both possible dialect reflexes where applicable
	local dialect = args and args.dialect and lower(args.dialect) or ""
	if dialect == "rālik" then
		dialect = "ralik"
	end
	
	-- if enabled, display full diphthong allophones for short vowels
	local diphthongs = args and parseBoolean(args.diphthongs)
	diphthongs = true
	
	-- argument "J" has format like "tstt"
	-- recognized letters are "t" = plosive, "c" = affricate, "s" = fricative
	-- letters for initial, medial, final and geminate respectively
	-- real-world pronunciation said to vary by sociological factors
	-- but all realizations may occur in free variation
	local modeJ = split(args.J and lower(args.J) or "tstt", "")
	local voicelessJ = { ["t"] = "t", ["c"] = "ʦ", ["s"] = "s" }
	local voicedJ    = { ["t"] = "d", ["c"] = "ʣ", ["s"] = "z" }
	local initialJ = voicelessJ[modeJ[1] or "t"] or voicelessJ["t"]
	local medialJ = voicedJ[modeJ[2] or "s"] or voicedJ["s"]
	local finalJ = voicelessJ[modeJ[3] or ""] or initialJ
	local geminateJ = voicelessJ[modeJ[4] or ""] or initialJ
	
	-- if enabled, do not display pseudo-glide hints at all
	local noHints = args and parseBoolean(args.nohints)
	
	-- false will display all obstruent allophones as voiceless
	-- true will display all obstruent allophones as voiced
	-- empty string or absent by default will display
	-- only medial obstruent allophones as semi-voiced
	local voice = args and args.voice and trim(args.voice) or ""
	
	local outSeq = {}
	
	local function forText(text)
		
		local map, map2, patt, patt2, subst
		
		text = gsub(text, S, "")
		
		function forDialect(text, dialect)
			
			-- morphemes can begin with geminated consonants,
			-- but spoken words cannot
			text = gsub(text, "^("..C..")%1("..V..")", function(a, b)
				-- the prosthetic vowel is never more open than /ɛ/
				local c = VOWELS[max(F1[b], 2)][1]
				if dialect == "ralik" then
					-- Rālik /CCV-/ becomes /jVCCV-/
					return "ɦʲ"..c..a..a..b
				else
					-- Ratak /CCV-/ becomes /CVCV-/
					return a..c..a..b
				end
			end)
			
			-- initial /jijV-, jiwV-, wiwV-/ sequences have special behavior
			-- to block this in the template argument, use "'i" instead of "i"
			text = " "..text
			text = gsub(
				text, "([ʲˠʷ ])(ɦ[ʲʷ])i(ɦ[ʲʷ])("..V..")",
				function(a, b, c, d)
					local result
					if c == "ɦʷ" then
						-- /jiwV-, wiwV-/ sequences
						if dialect == "ralik" then
							-- Rālik /wiwV-/ becomes /jiwV-/
							b = "ɦʲ"
						end
						-- /[jw]iwV-/ becomes /[jw]iwwV-/ in both dialects
						result = b.."ïɦʷɦʷ"
					elseif b == "ɦʲ" then
						-- /jijV-/ sequences
						if dialect == "ralik" then
							-- "dwelling upon"
							result = "īʲ"
						else
							-- "passing over lightly"
							result = "ĭʲ"
						end
					else
						-- no change for /wijV-/ sequences
						result = b.."ï"..c
					end
					return a..result..d
				end
			)
			text = trim(text)
			
			-- Rālik /ɰɰV-/ becomes /ɰVɰV-/
			if dialect == "ralik" then
				text = gsub(text, "(ɣˠ"..V..")", "%1%1")
			end
			-- Ratak /ɰɰV-/ becomes /ɰV-/
			text = gsub(text, "ɣ", "ɦ")
			
			return text
			
		end
		
		function forRemainder(text)
			
			-- if the phrase begins or ends with a bare vowel
			-- and no pseudo-glide, display phrase three times
			-- with each of the different pseudo-glides
			if find(text, "^"..V) then
				forRemainder("_ʲ"..text)
				forRemainder("_ˠ"..text)
				forRemainder("_ʷ"..text)
				return
			end	
			if find(text, V.."$") then
				forRemainder(text.."_ʲ")
				forRemainder(text.."_ˠ")
				forRemainder(text.."_ʷ")
				return
			end
			
			-- restore protected /i/, we won't be hiding it anymore
			text = gsub(text, "ï", "i")
			
			-- expand "dwelling upon" i, we won't be checking for it anymore
			text = gsub(text, "īʲ", "ɦʲiɦʲɦʲ")
			
			-- forward assimilation of rounded consonant clusters
			subst = "%1ʷ%2%ʷ"
			text = gsubBatch(text, {
				"([kŋ])ʷ([kŋ]).",
				"([nrl])ʷ([nrl]).",
				{ "nʷt.", "nʷtˠ" }
			}, "%1ʷ%2%ʷ")
			
			-- backward assimilation of remaining secondary articulations
			text = gsubBatch(text, {
				"([pm]).([pm])(.)",
				"([tn]).(t)(.)",
				"([kŋ]).([kŋ])(.)",
				"([nrl]).([nrl])(.)"
			}, "%1%3%2%3")
			
			-- backward nasal assimilation of certain consonant clusters
			text = gsubBatch(text, {
				"p(.)(m)",
				"[rl](.)(n)",
				"k(.)(ŋ)"
			}, "%2%1%2")
			
			-- insert epenthetic vowels within unstable consonant clusters
			text = gsubBatch(text, {
				"(r.)(t)",
				"(l.)(tʲ)",
				"([ptkmŋ].)([nrl])",
				"([pm].)([tkŋ])",
				"([tnrl].)([pkmŋ])",
				"([kŋ].)([ptm])"
			}, "%1V%2")
			
			-- give those epenthetic vowels a transitional height
			local function oldTransHeight(a, b, c, d)
				-- not currently used
				-- was based on vague references
				local f1a = F1[a]
				local f1d = F1[d]
				local f1 = (f1a + f1d) * 0.5
				if f1a < f1d then
					f1 = floor(f1)
				else
					f1 = ceil(f1)
				end
				return a..b.."("..VOWELS[f1][1]..")"..c..d
			end
			text = gsub2(text, "(.)(..)V(..)(.)", function(a, b, c, d)
				-- based on patterns observed in Bender (1968)
				return a..b.."("..VOWELS[max(F1[a], F1[d], 2)][1]..")"..c..d
			end)
			
			-- clusters with glides have epenthetic vowels, too
			-- but their height is that of the glide's other vowel
			-- forming epenthetic long vowels
			text = gsubBatch(text, {
				{ "("..V..")ĭʲ("..NG..")", "%1ɦʲ(i)%2" },
				{ "("..NG..")ĭʲ("..V..")", "%1(i)ɦʲ%2" },
				{ "("..V..")(ɦˠ)("..C..")", "%1%2(%1)%3" },
				{ "("..C..")(ɦˠ)("..V..")", "%1(%3)%2%3" },
				{ "("..V..")(ɦ.)("..C..")", "%1%2(%1)%3" },
				{ "("..C..")(ɦ.)("..V..")", "%1(%3)%2%3" }
			})
			
			-- temporarily convert vowels to diphthongs
			text = gsub(text, "("..V..")", "%1%1")
			
			-- convert vowel phonemes to allophones
			-- according to secondary articulations of neighboring consonants
			text = gsubBatch(text, {
				{ "("..C2..")("..LQ..")("..V..")", function(a, b, c)
					return a..b..VOWELS[F1[c]][F2[a]]
				end },
				{ "("..V..")("..RQ..".)("..C2..")", function(a, b, c)
					return VOWELS[F1[a]][F2[c]]..b..c
				end }
			})
			
			-- convert double monophthongs back to single monophthongs
			text = gsub(text, "("..V..")%1", "%1")
			
			if not diphthongs then
				
				-- deparenthesize epenthetic vowels neighboring glides
				text = gsubBatch(text, {
					"(ɦ.)"..LP.."("..V.."+)"..RP,
					LP.."("..V.."+)"..RP.."(ɦ.)"
				}, "%1%2")
				
				-- monophthongize vowels neighboring glide consonants
				text = gsubBatch(text, {
					"(ɦˠ"..V..")"..V.."("..C..")",
					"("..V.."ɦ.)"..V.."("..V.."ɦ.)",
					"("..V.."ɦ."..V..")"..V.."("..NG..")",
					{ "^(ɦʷ)"..V.."("..V..")("..NG1.."ʲ)", function(a, b, c)
						return a..VOWELS[F1[b]][F2["ˠ"]]..c
					end },
					"^(ɦʲ)"..V.."("..V..".ʷ)",
					"^(ɦʷ)ɒ(ɑ.ˠ)",
					"^(ɦʷ)"..V.."([ʌɤ]tˠ)",
					"^(ɦʲ)æ(ɑ[pkmnŋlɦ]ˠ)",
					{ "^(ɦʲ)ɑ(k+"..LQ..V..")", "%1æ%2" },
					"^(ɦ."..V..")"..V.."("..NG..")",
					"("..C..")"..V.."("..V.."ɦ.)"
				}, "%1%2")
				text = gsubBatch(text, {
					"([ɑʌɤɯ]ɦʷ)ɒ("..NG1.."ʲ)",
					"([ɛei]ɦʲ)æ("..NG1.."ˠ)",
					"([ɔou]ɦʷ)ɒ("..NG1.."[ʲʷ])",
					{ "([ɑʌɤɯ])(ɦˠ%1ɦʷ)([ɔou])("..NG1.."ʲ)",
						function(a, b, c, d)
							return a..b..VOWELS[F1[c]][F2["ˠ"]]..d
						end },
					"(ɒɦʷɒɦʷ)ɒ("..NG1.."[ʲˠ])",
					{ "ɦʲ([ɒɔou])(.ʷ[æɛeiɑʌɤɯ])", function(a, b)
						return "ɦʲ"..VOWELS[F1[a]][F2["ʲ"]]..b
					end }
				}, "%1ɑ%2")
				text = gsubBatch(text, {
					{ "^ɦ([ʲˠʷ])$", "h%1" },
					{ "^(ɦ)([ʲʷ])("..V..")", function(a, b, c)
						if F2[b] ~= F2[c] then
							return a.."h"..c
						end
					end },
					{ "("..V..")(ɦ)([ʲʷ])$", function(a, b, c)
						if F2[a] ~= F2[c] then
							return a.."h"..c
						end
					end },
					{ "([ɑʌɤɯɒɔou])ɦʲ([ɑʌɤɯɒɔou])", "%1hʲ%2" },
					{ "ɦʷ([æɛeiɑʌɤɯ])", "hʷ%1" },
					{ "("..V..")", "%1%1" },
					{ "("..V..")(ɦ)([ʲʷ])("..V..")", function(a, b, c, d)
						if F2[a] ~= F2[d] and (
							(F2[a] == F2[c] and F1[a] < F1[d]) or
							(F2[d] == F2[c] and F1[a] > F1[d])
						) then
							return a.."h"..c..d
						end
					end },
					{ "("..V..")"..V, "%1" }
				})
				
				-- monophthongize vowels between non-glide consonants
				text = gsubBatch(text, {
					"("..NG..LQ..")"..V.."([ɒɔou]"..RQ..NG1.."ʷ)",
					"("..NG1.."ʷ"..LQ.."u)"..V.."("..RQ..NG..")",
					"("..NG1.."ʷ"..LQ.."[ɔo])"..V.."("..RQ..NG1.."ˠ)",
					{ "("..NG1.."ʷ"..LQ..")"..V.."([æɛe])("..RQ..NG1.."ʲ)",
						function(a, b, c)
							return a..VOWELS[F1[b]][F2["ˠ"]]..c
						end },
					"("..NG1.."[ʲʷ]"..LQ..")"..V.."(ɑ"..RQ..NG1.."ˠ)",
					"("..NG1.."ʲ"..LQ.."i)"..V.."("..RQ..NG1.."ˠ)",
					"("..NG1.."ˠ"..LQ..")"..V.."(i"..RQ..NG1.."ʲ)",
					"([ptmr]ʲ"..LQ.."[ɛe])"..V.."("..RQ..NG1.."ˠ)",
					"("..NG1.."ʲ"..LQ..")"..V.."([ʌɤ]"..RQ..NG1.."ˠ)",
					"("..NG1.."ˠ"..LQ.."[ʌɤ])"..V.."("..RQ..NG1.."ʲ)",
					"("..NG1.."ˠ"..LQ.."ɑ)"..V.."("..RQ..NG1.."ʲ)"
				}, "%1%2")
				
				-- diphthongs neighboring affix pseudo-glides remain
				
			end
			
			-- tie short diphthongs
			text = gsub(text, "("..V..")("..V..")", "%1"..TIE.."%2")
			
			-- delete glide phonemes, now that we're done coloring vowels
			text = gsub(text, "ɦ.", "")
			
			-- experimental
			if true then
				-- simplify secondary articulation of consonant clusters
				text = gsubBatch(text, {
					"([mnŋ]).([ptkrl].)",
					"([rl]).([rl].)"
				}, "%1%2")
			end
			
			-- convert double consonants to geminates
			text = gsub(text, "("..C..")%1", "%1ː")
			
			-- convert certain medial obstruent consonants to voiced allophones
			map = {
				["pʲ"] = "bʲ",
				["pˠ"] = "bˠ",
				["tʲ"] = medialJ.."ʲ",
				["tˠ"] = "dˠ",
				["kˠ"] = "ɡˠ",
				["kʷ"] = "ɡʷ"
			}
			text = gsubBatch(text, {
				{ "("..V..RQ..")([ptk].)("..LQ..V..")", nil, gsub2 },
				"([mnŋl].?)([ptk].)("..V..")"
			}, function(a, b, c)
				return a..map[b]..c
			end)
			
			-- convert /tʲ/ to preferred allophones per J argument
			text = gsub(text, "tʲː?", {
				["tʲ"] = initialJ.."ʲ",
				["tʲː"] = geminateJ.."ʲː"
			})
			patt = "[tʦs]ʲ"
			text = gsub(text, patt.."$", finalJ.."ʲ")
			
			-- display full voicing or full devoicing per voice argument
			if voice ~= "" then
				if parseBoolean(voice) then
					-- display all consonants as voiced in this mode
					text = gsub(text, "[ptʦsk]", {
						["p"] = "b",
						["t"] = "d",
						["ʦ"] = "ʣ",
						["s"] = "z",
						["k"] = "ɡ"
					})
				else
					-- display all consonants as voiceless in this mode
					text = gsub(text, "[bdʣzɡ]", {
						["b"] = "p",
						["d"] = "t",
						["ʣ"] = "ʦ",
						["z"] = "s",
						["ɡ"] = "k"
					})
				end
			end
			
			if not diphthongs then
				
				-- convert /j/ hard glides to semivowels
				text = gsubBatch(text, {
					{ "^hʲ("..V..")", function(v)
						return VOWELS[max(F1[v], 2)][F2["ʲ"]]..ASYLL..v
					end },
					{ "("..V..")hʲ$", function(v)
						return v..VOWELS[max(F1[v], 2)][F2["ʲ"]]..ASYLL
					end },
					{ "("..V..")", "%1%1" },
					{ "("..V..")(hʲ)("..V..")", function(a, b, c)
						return a..VOWELS[
							max(F1[a], F1[c], 2)
						][F2["ʲ"]]..ASYLL..c
					end },
					{ "("..V..")"..V, "%1" }
				})
				
				-- convert repeated vowels to geminates
				-- allow vowels of arbitrarily long lengths
				text = gsubx(text, "("..V..")(ː*)%1", "%1%2ː")
				if true then
					-- recognize only one long vowel length
					text = gsub(text, "ː+", "ː")
				end
				
			end
			
			-- display affricates, if any, as tied consonants
			text = gsub(text, "[ʦʣ]", {
				["ʦ"] = "t"..TIE.."s",
				["ʣ"] = "d"..TIE.."z"
			})
			
			if alvPal then
				text = gsub(text, "[sz]ʲ", { ["sʲ"] = "ɕ", ["zʲ"] = "ʑ" })
			end
			
			-- (mostly) final consonant presentation forms
			text = gsub(text, C, {
				-- dorsal obstruents and nasals are postvelar
				["kˠ"] = "k"..RETR,
				["kʷ"] = "k"..RETR.."ʷ",
				["ɡˠ"] = "ɡ"..RETR,
				["ɡʷ"] = "ɡ"..RETR.."ʷ",
				["ŋˠ"] = "ŋ"..RETR,
				["ŋʷ"] = "ŋ"..RETR.."ʷ",
				-- "passing over lightly"
				["ĭʲ"] = "i"..ASYLL,
				-- surfaced glide consonants
				["hʲ"] = "j",
				["hˠ"] = "ʔ",
				["hʷ"] = "w"
			})
			
			-- experimental
			if true then
				if voice == "" then
					-- voiced allophones are actually semi-voiced
					text = gsub(text, "[bdzʑɡ]", {
						["b"] = "b"..DEVO,
						["d"] = "d"..DEVO,
						["z"] = "z"..DEVO,
						["ʑ"] = "ʑ"..DEVO2,
						["ɡ"] = "ɡ"..DEVO2
					})
				end
			end
			
			-- experimental
			if true then
				-- deparenthesize epenthetic vowels, but make them asyllabic
				text = gsub(text, LP.."("..V..")"..RP, "%1"..ASYLL.."")
				text = gsub(
					text, LP.."("..V..")"..TIE.."("..V..")"..RP,
					"%1"..ASYLL..TIE.."%2"..ASYLL..""
				)
			end
			
			-- experimental
			if true and not diphthongs then
				-- alternate between above and below ties
				-- to improve presentation
				text = gsub(
					text, TIE.."([^"..TIE.."]*)"..TIE.."([^"..TIE.."]*)",
					TIE.."%1"..TIE2.."%2"
				)
			end
			
			-- experimental
			if false then
				-- tying vowels from below
				-- is more elegant when there are ascenders
				text = gsub(
					text, "("..V..")"..TIE.."("..V..")", "%1"..TIE2.."%2"
				)
			end
			
			-- experimental
			if true then
				-- Wiktionary tends to prefer more diacritics
				-- Wikipedia tends to prefer fewer diacritics
				-- strip some diacritics that don't affect meaning
				text = gsub(
					text, "["..DENT..DEVO..DEVO2..DOWN..RETR..UNREL..UP.."]", ""
				)
			end
			
			if noHints then
				text = gsub(text, "_.", "")
			else
				-- convert pseudo-glides to hints for attached affixes
				text = gsub(text, "^_(.)", "%1‿")
				text = gsub(text, "_(.)$", "‿%1")
			end
			
			insertUnique(outSeq, text)
			
		end
		
		if dialect == "ralik" or dialect == "ratak" then
			forRemainder(forDialect(text, dialect))
		else
			local ralik = forDialect(text, "ralik")
			local ratak = forDialect(text, "ratak")
			-- if both dialect reflexes are the same, display only one of them
			if ralik == ratak then
				forRemainder(ralik)
			else
				forRemainder(ralik)
				forRemainder(ratak)
			end
		end
		
	end
	
	for _, text in pairs(inSeq) do
		forText(text)
	end
	
	return outSeq
	
end

export._parse = parse
export._toBender = toBender
export._toPhonemic = toPhonemic
export._toPhonetic = toPhonetic

function export.bender(frame)
	return concat(toBender(parse(frame.args[1], frame.args)), ", ")
end

function export.parse(frame)
	return concat(parse(frame.args[1]), ", ")
end

function export.phonemic(frame)
	return concat(toPhonemic(parse(frame.args[1])), ", ")
end

function export.phonetic(frame)
	return concat(toPhonetic(parse(frame.args[1]), frame.args), ", ")
end

function export.phoneticMED(frame)
	return "DEPRECATED"
end

function export.phoneticChoi(frame)
	return "DEPRECATED"
end

function export.phoneticWillson(frame)
	return "DEPRECATED"
end

return export