Module:User:Sameerhameedy/fa-IPA/romanize

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Examples

[edit]
{{Template:User:Sameerhameedy/fa-IPA|آقَا}}
 

Readings
Classical reading? āqā
Dari reading? āqā
Iranian reading? âğâ
Tajik reading? oqo
{{Template:User:Sameerhameedy/fa-IPA|āqā}}
 

Readings
Classical reading? āqā
Dari reading? āqā
Iranian reading? âğâ
Tajik reading? oqo
{{Template:User:Sameerhameedy/fa-IPA|قَهْوَه‌اِی}}
 

Readings
Classical reading? qahwa-ī
Dari reading? qahwa-ī
Iranian reading? ğahve-i
Tajik reading? qahvayi
{{Template:User:Sameerhameedy/fa-IPA|کِتَابِ لَطِیفَه}}
 

Readings
Classical reading? kitāḇ-i latīfa
Dari reading? kitāb-i latīfa
Iranian reading? ketâb-e latife
Tajik reading? kitobi latifa
{{Template:User:Sameerhameedy/fa-IPA|میوَهٔ جَادُویِی}}
 

Readings
Classical reading? mēwa-yi jāḏūyī
Dari reading? mēwe jādūyī
Iranian reading? mive-ye jâduyi
Tajik reading? meva-yi joduyi
{{Template:User:Sameerhameedy/fa-IPA|می‌بَخْشین}}
 

Readings
Classical reading? mē-ḇaxšēn
Dari reading? mē-baxšēn
Iranian reading? mi-baxšin
Tajik reading? me-baxšen
{{Template:User:Sameerhameedy/fa-IPA|اَبَر-نَو-اَخْتَر}}
 

Readings
Classical reading? aḇar-naw-axtar
Dari reading? abar-naw-axtar
Iranian reading? abar-now-axtar
Tajik reading? abar-nav-axtar
{{Template:User:Sameerhameedy/fa-IPA|مُرْچ|ir=مَرِچ,مُرْچ}}
 

Readings
Classical reading? murč
Dari reading? murč
Iranian reading? Lua error: bad argument #1 to 'gsub' (string expected, got nil)
Tajik reading? murč
{{Template:User:Sameerhameedy/fa-IPA|هِنْدُوسْتَا`نِی}}
 

Readings
Classical reading? hindūstānī́
Dari reading? hindūstānī́
Iranian reading? hendustâní
Tajik reading? hindustoní
{{Template:User:Sameerhameedy/fa-IPA|خُ`دَا}}
 

Readings
Classical reading? xuḏā́
Dari reading? xudā́
Iranian reading? xodấ
Tajik reading? xudó
{{Template:User:Sameerhameedy/fa-IPA|مِهْمَانِی}}
 

Readings
Classical reading? mihmānī
Dari reading? mehmānī
Iranian reading? mehmâni
Tajik reading? mehmoni

local export = {}

local lang = require("Module:languages").getByCode("fa-cls") --transliteration must be clasical

local U = mw.ustring.char
local consonants = "bptTṭjčhxdDðḍrzžsšʔʾğGfqkglmnŋhɦwvy'" 
local vowels = "aiuāīūüēō"
local consonant = "[^" .. vowels .. ". -]"
local vowel = "[" .. vowels .. "]"
local pitchaccent = U(0x301)
local rsplit = mw.text.split
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local hyphen = U(0x02D)
local devoice = U(0x325)
local dtack = U(0x31E)
local gstop = U(0x027)
local dental = U(0x32A)

local function transliterate(text, sc)
	return require("Module:fa-cls-translit").tr(text)
end

local function fix_romanization(text, sc, options) -- common fixes
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	-- xwV clusters
	text = rsubn(text, "xw", "xW")
	if not sc then
		sc = require("Module:languages").getByCode("fa"):findBestScript(text):getCode()
	end
	if sc == "fa-Arab" then
	text = transliterate(text, sc)
	end
	text = rsubn(text, "xw", "xʷ")
	text = rsubn(text, "W", "w")
	text = rsubn(text, " | ", "# | #")
	text = rsubn(text, "[,]".. " ", ",")
	text = rsubn(text, "[,]", "#,#")
	text = rsubn(text, " ", "# #")
	text = rsubn(text, "^", "#")
	text = rsubn(text, "$", "#")
	text = rsubn(text, "ˈ", "`")
	text = rsubn(text, "`([" .. consonants .. "])([ʷ]?)([" .. vowels .. "])", "%1%2%3" .. pitchaccent .. "")
	text = rsubn(text, "`([" .. vowels .. "])", "%1" .. pitchaccent .. "")
	text = rsubn(text, "([" .. dental .. devoice .. dtack .. "ʰ])", "")
	text = rsubn(text, "([ɴŋ])", "n")
	text = rsubn(text, "e", "ē")
	text = rsubn(text, "o", "ō")
	text = rsubn(text, "G", "ğ")
	text = rsubn(text, "ḍ", "z")
	text = rsubn(text, "ṭ", "t")
	text = rsubn(text, "ṯ", "s")
	text = rsubn(text, "ṣ", "s")
	text = rsubn(text, "ḥ", "h")
	text = rsubn(text, "v", "w")
	return text
end


function export.romanize_fa_cls(text, sc, options)
	text = fix_romanization(text)
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	text = rsubn(text, "([" .. vowels .. "])([dḍ])", "%1ḏ")
	text = rsubn(text, "([" .. vowels .. "](%-?))b", "%1ḇ")
	text = rsubn(text, "ḏ", "ḏ")
	text = rsubn(text, "ḏd", "ḏḏ")
	text = rsubn(text, "ḇb", "ḇḇ")
	-- remove Hazaragi retroflexes
	text = rsubn(text, "D", "d")
	text = rsubn(text, "T", "t")
	text = rsubn(text, "ɖ", "d")
	text = rsubn(text, "ʈ", "t")
	text = rsubn(text, "#(['])", "")
	text = rsubn(text, "#", "")
	return text
end

function export.romanize_prs(text, sc, options)
	text = fix_romanization(text)
	text = rsubn(text, "i((" .. pitchaccent .. "?)['h])", "e%1")
	text = rsubn(text, "u((" .. pitchaccent .. "?)['h])", "o%1")
	-- Replace xw clusters
	text = rsubn(text, "xw([āē])", "x%1")
	text = rsubn(text, "xwa", "xu")
	text = rsubn(text, "a%-yi", "e")
	text = rsubn(text, "a%-i", "e")
	-- for rare exceptions
	text = rsubn(text, "ʷ", "w")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	-- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES
	-- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY
	text = rsubn(text, "D", "d")
	text = rsubn(text, "T", "t")
	text = rsubn(text, "ɖ", "d")
	text = rsubn(text, "ʈ", "t")
	text = rsubn(text, "ḏ", "z")
	-- remove unnecessary marks
	text = rsubn(text, "#", "")
	return text
end

function export.romanize_ira(text, sc, options)
	text = fix_romanization(text)
	-- Replace xw clusters
	text = rsubn(text, "ʷ", "w")
	text = rsubn(text, "xw([āē])", "x%1")
	text = rsubn(text, "xwa", "xu")
	text = rsubn(text, "w(" .. vowel .. ")", "v%1")
	text = rsubn(text, "w(" .. consonant .. ")", "w%1")
	text = rsubn(text, "([iuāīūüēō])w", "%1v")
	text = rsubn(text, "v%(w", "v(v")
	text = rsubn(text, "(" .. consonant .. ")w#", "%1v#")
	text = rsubn(text, "wv", "vv")
	text = rsubn(text, "wæ", "væ")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	text = rsubn(text, "iy", "īy")
	text = rsubn(text, "ayy", "Ayy")
	-- Replace diphthong
	text =
		rsubn(
		text,
		"a([wy])()",
		function(semivowel, position)
			local consonant = mw.ustring.sub(text, position, position)
			if consonant == "" or consonant:find(consonant) then
				if semivowel == "w" then
					return "uw"
				else
					return "iy"
				end
			end
		end
	)
	text = rsubn(text, "A", "a")
	text = rsubn(text, "q", "ğ")
	text = rsubn(text, "ā", "â")
	text = rsubn(text, "u", "o")
	text = rsubn(text, "i", "e")
	-- remove Hazaragi retroflexes
	text = rsubn(text, "D", "d")
	text = rsubn(text, "T", "t")
	text = rsubn(text, "ɖ", "d")
	text = rsubn(text, "ʈ", "t")
	text = rsubn(text, "ḏ", "z")
	-- IP does not have vowel length
	text = rsubn(text, "([ēī])", "i")
	text = rsubn(text, "([ūō])", "u")
	-- terminal w is only possible in a dipthong
	text = rsubn(text, "([o]0)w#", "v#")
	text = rsubn(text, "a#", "e#")
	text = rsubn(text, "a%-", "e-")
	text = rsubn(text, "æ", "a")
	text = rsubn(text, "#(['])", "")
	text = rsubn(text, "#", "")
	return text
end

function export.romanize_tg(text, sc, options)
	text = fix_romanization(text)
	text = rsubn(text, "i(['h])", "ē%1")
	text = rsubn(text, "u(['h])", "ō%1")
	text = rsubn(text, "w", "v")
	text = rsubn(text, "ʷ", "v")
	-- Replace xw clusters
	text = rsubn(text, "xv([āē])", "x%1")
	text = rsubn(text, "xva", "xu")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	text = rsubn(text, "(['])", "ʾ")
	text = rsubn(text, "ğ", "ġ")
	text = rsubn(text, "ē", "e")
	text = rsubn(text, "ō", "ü")
	text = rsubn(text, "ā", "o")
	-- remove Hazaragi retroflexes
	text = rsubn(text, "D", "d")
	text = rsubn(text, "T", "t")
	text = rsubn(text, "ɖ", "d")
	text = rsubn(text, "ʈ", "t")
	text = rsubn(text, "ḏ", "z")
	-- Tajik does not have vowel length
	text = rsubn(text, "([iī])", "i")
	text = rsubn(text, "`([" .. consonants .. "])i#", "%1ī#")
	text = rsubn(text, "([ūu])", "u")
	text = rsubn(text, "`", "")
	text = rsubn(text, "ˈ", "")
	text = rsubn(text, "#([ʾ])", "")
	text = rsubn(text, "#", "")
	text = rsubn(text, "([" .. vowels .. "])%-i", "%1yi")
	text = rsubn(text, "%-i", "i")
	return text
end


--- regional/colloquial varieties
local function remove_glottal_c(text) --only for regional dialects
	--completely delete GC if both vowels are the either the same or similar
	text = rsubn(text, "([" .. consonants .. "])([uū])(" .. pitchaccent .. "?)([h'])([uū])", "`%1ū")
	text = rsubn(text, "([" .. consonants .. "])([iī])(" .. pitchaccent .. "?)([h'])([iī])", "`%1ī")
	text = rsubn(text, "([" .. consonants .. "])([aā])(" .. pitchaccent .. "?)([h'])([aā])", "`%1ā")
	-- remove glottal consonants with appropriate glide
	text = rsubn(text, "([" .. vowels .. "](" .. pitchaccent .. "?))%-([īēi])#", "%1-y%3")
	text = rsubn(text, "([" .. vowels .. "](" .. pitchaccent .. "?))%-([uūō])#", "%1-y%3")
	text = rsubn(text, "(" .. consonant .. ")%-(" .. vowel .. ")", "%1'%2")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([aā](" .. pitchaccent .. "?))([hɦ'])([uū])", "%1w%4")
	text = rsubn(text, "([iī])(" .. pitchaccent .. "?)([hɦ'])([auāēōū])", "i%2y%4")
	text = rsubn(text, "([auāēōū](" .. pitchaccent .. "?))([hɦ'])([iīē])", "%1y%4")
	text = rsubn(text, "([ē](" .. pitchaccent .. "?))([hɦ'])([auāēōū])", "%1y%4")
	text = rsubn(text, "([uū])(" .. pitchaccent .. "?)([hɦ'])([aāiīēō])", "u%2w%4")
	text = rsubn(text, "([ō](" .. pitchaccent .. "?))([hɦ'])([aāiīēō])", "%1w%4")
	text = rsubn(text, "([" .. consonants .. "])([h\'])", "%1%1")
	--Else, turn GC into majhul long vowels
	text = rsubn(text, "([aā])((%.?)[h'])", "ā")
	text = rsubn(text, "([iī])((%.?)['])", "ē")
	text = rsubn(text, "([uū])((%.?)['])", "ō")

	text = rsubn(text, "(['h])", "")
	-- lastly, remove all remaning GC
	return text
end

function export.romanize_haz(text, sc, options)
	text = fix_romanization(text)
	-- Replace xw clusters
	text = rsubn(text, "xw([āē])", "x%1")
	text = rsubn(text, "xwa", "xu")
	text = rsubn(text, "(" .. vowel .. ")%-([īē])", "%1-y%2")
	text = rsubn(text, "a%-yi", "I")
	text = rsubn(text, "a%-i", "I")
	text = rsubn(text, "%-i#", "-I#")
	text = rsubn(text, "%-i%-", "I-")
	-- for rare exceptions
	text = rsubn(text, "ʷ", "w")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	-- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES
	-- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY
	text = rsubn(text, "D", "ḍ")
	text = rsubn(text, "T", "ṭ")
	text = rsubn(text, "ɖ", "ḍ")
	text = rsubn(text, "ʈ", "ṭ")
	text = rsubn(text, "ḏ", "z")
	--Vowel Harmony
	text = rsubn(text, "ē(" .. pitchaccent .. "?)(" .. consonant .. ")([ūiī])", "%3%1%2%3")
	text = rsubn(text, "ē(" .. pitchaccent .. "?)(" .. consonant .. ")(" .. consonant .. ")([ī])", "%4%1%2%3%4")
	text = rsubn(text, "i(" .. pitchaccent .. "?)(" .. consonant .. ")([ouū])", "%3%1%2%3")
	text = rsubn(text, "ī(" .. pitchaccent .. "?)(" .. consonant .. ")([ēōuūiī])", "%3%1%2%3")
	text = rsubn(text, "ō(" .. pitchaccent .. "?)(" .. consonant .. ")([uū])", "%3%1%2%3")
	text = rsubn(text, "ō(" .. pitchaccent .. "?)(" .. consonant .. ")([i])", "u%1%2%3")
	text = rsubn(text, "ō(" .. pitchaccent .. "?)(" .. consonant .. ")([ī])", "ū%1%2%3")
	text = remove_glottal_c(text)
	text = rsubn(text, "i", "ī")
	text = rsubn(text, "u", "ū")
	text = rsubn(text, "I", "i")
	-- remove unnecessary marks
	text = rsubn(text, "#(['])", "")
	text = rsubn(text, "#", "")
	return text
end

function export.romanize_kbl(text, sc, options)
	text = fix_romanization(text)
	-- Replace xw clusters
	text = rsubn(text, "xw([āē])", "x%1")
	text = rsubn(text, "xwa", "xu")
	text = rsubn(text, "(" .. vowel .. ")%-ī", "%1-yī")
	text = rsubn(text, "a%-yi", "i")
	text = rsubn(text, "a%-i", "i")
	text = rsubn(text, "i(" .. pitchaccent .. "?)#", "I#")
	text = rsubn(text, "i(" .. pitchaccent .. "?)%-#", "I-#")
	-- for rare exceptions
	text = rsubn(text, "ʷ", "w")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	-- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES
	-- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY
	text = rsubn(text, "D", "d")
	text = rsubn(text, "T", "t")
	text = rsubn(text, "ɖ", "d")
	text = rsubn(text, "ʈ", "t")
	text = rsubn(text, "ḏ", "z")
	text = remove_glottal_c(text)
	text = rsubn(text, "I", "i")
	return text
end

return export