Module:fa-IPA/harakat-ira

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module can create phonetic, fully vocalized, Persian text from an Iranian romanization. It is meant to be used by {{fa-IPA}} to create phonetic spellings for Iranian Persian using modern Iranian vocalization, but if needed elsewhere it could be invoked elsewhere with {{xlit}}.

For the equivalent module for Classical Persian and Dari, see {{Module:fa-IPA/harakat}}


local rsubn = mw.ustring.gsub
local U = mw.ustring.char
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F) 
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652) -- sukoon
local he = "ه"
local zwnj = U(0x200C)
local highhmz = U(0x654)


local convert_consonants = {
-- STOP! fa-IPA should remove incorrect characters, 
-- if an incorrect character is appearing, check fa_IPA not here
	["b"] = "ب", 
	["č"] = "چ", 
	["d"] = "د", 
	["f"] = "ف", 
	["g"] = "گ", 
	["ğ"] = "غ", 
	["h"] = he, 
	["j"] = "ج", 
	["k"] = "ک", 
	["l"] = "ل", 
	["m"] = "م", 
	["n"] = "ن", 
	["p"] = "پ", 
	["q"] = "ق", 
	["r"] = "ر", 
	["s"] = "س", 
	["š"] = "ش",
	["t"] = "ت", 
	["ɖ"] = "د", --only for Hazaragi
	["ʈ"] = "ت", --only for Hazaragi
	["w"] = "و", 
	["v"] = "و", 
	["x"] = "خ", 
	["y"] = "ی", 
	["z"] = "ز", 
	["ž"] = "ژ", 
	["'"] = "ئ",
}

local convert_vowels = {
	["a"] = zabar, ["â"] = "ا", ["e"] = zer, 
	["o"] = pesh, ["u"] = "و", ["i"] = "ی",
}

local vowels = "aeoiu" --including â causes issues
local consonants = "bptjčxdrzžsš'ğfqkglmnwvwhy"
local dc_consonants = "âdrwvuzž"..jazm..""

function export.tr(text, lang, sc)
	text = rsubn(text, "([%(%)])", "")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "`", "")
	text = rsubn(text, ",".." ", ",")
	text = rsubn(text, ",", "] ,[")
	text = rsubn(text, "%]", "#]#")
	text = rsubn(text, "%[", "#[#")
	
	-- remove unpronounced or incorrect letters
	text = rsubn(text, "[.]", "")
	text = rsubn(text, "([aeo]h)#", "%1"..jazm.."#")
	-- prevent ezafe from being processed
	text = rsubn(text, "(["..consonants.."])([-])e#", "%1_e_")
	text = rsubn(text, "([âu])([-])ye#", "%1_ye_#")
	text = rsubn(text, "([i])([-])ye#", "%1yye_#")
	text = rsubn(text, "([y])([-])ye#", "%1ye_#")
	text = rsubn(text, "iy", "ey")
	text = rsubn(text, "(["..consonants.."])%1", "%1"..tashdid.."")
	text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
	text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
	-- needs to be repeated for overlapping patterns
	text = rsubn(text, "#â", "#آ")
	text = rsubn(text, "o'", "oؤ")
	text = rsubn(text, "e'", "eئ")
	text = rsubn(text, "'â", "آ")
	text = rsubn(text, "([aeo])([-])", "%1h-")
	text = rsubn(text, "(["..dc_consonants.."])([-])â", "%1"..jazm.."آ")
	text = rsubn(text, "([^"..dc_consonants.."])([-])â", "%1"..zwnj.."آ")
	text = rsubn(text, "(["..dc_consonants.."])([-])(["..vowels.."])", "%1"..jazm.."â%3")
	text = rsubn(text, "([^"..dc_consonants.."])([-])(["..vowels.."])", "%1"..zwnj.."â%3")
	text = rsubn(text, "(["..dc_consonants.."])([-])(["..consonants.."])", "%1"..jazm.."%3")
	text = rsubn(text, "([^"..dc_consonants.."])([-])(["..consonants.."])", "%1"..zwnj.."%3")
	text = rsubn(text, "#(["..vowels.."])", "#â%1")
	text = rsubn(text, "([aeo])#", "%1h#")
	-- try to find ezafe markings
	text = rsubn(text, "([aeo]h)("..zwnj.."yeh)#", "%1"..highhmz.."")
	text = rsubn(text, "([aeo]h)("..zwnj.."âeh)#", "%1"..highhmz.."")
	text = rsubn(text, "([âu])_ye_#", "%1ye#")
	text = rsubn(text, "%_", "")
	text = rsubn(text, "(['])#", "ء#")
	text = mw.ustring.gsub(text, '.', convert_consonants)
	text = mw.ustring.gsub(text, '.', convert_vowels)
	
	text = rsubn(text, "[-]", "")
	text = rsubn(text, "#", "")
	text = rsubn(text, "%[".." ", "[") --this prevents weird spacing
	return text
end

return export