Module:User:Awesomemeeos/farsiexperimental2

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Awesomemeeos, for their own experimentation. Items in this module may be added and removed at Awesomemeeos's discretion; do not rely on this module's stability.


local export = {}

-- local symbols= {
	-- used to map more than one letter
	-- ["لا"]="ﻻ",
	-- ["الله"]="ﷲ",
-- }
 
local mapping = {
	["ا"]='â', ["ب"]='b', ["پ"]='p', ["ت"]='t', ["ث"]='s', ["ج"]='j', ["چ"]='č', ["ح"]='h', ["خ"]='x', 
	["د"]='d', ["ذ"]='z', ["ر"]='r', ["ز"]='z', ["ژ"]='ž', ["س"]='s', ["ش"]='š', ["ص"]='s', ["ض"]='z', 
	["ط"]='t', ["ظ"]='z', ["غ"]='ğ', ["ف"]='f', ["ق"]='q', ["ک"]='k', ["گ"]='g', ["ل"]='l', 
	["م"]='m', ["ن"]='n', ["و"]='o', ["ه"]='h', ["ی"]='e', ["آ"]='â',

	-- displaying on separate lines as the viewing becomes distorted on these combinations
	["ع"]="’",
	["ء"]="’",
	["ئ"]="’", 
	["ؤ"]="’",
	["أ"]="’",
	
	-- diacritics
	["\217\142"]="a", -- fathe, zabar
	["\217\144"]="i", -- kasre, zir
	["\217\143"]="u", -- zamme, piš
	["\217\146"]="", -- jazm, sokun - no vowel
	["\226\128\140"]="-", -- ZWNJ (zero-width non-joiner)
	-- ligatures
	["ﻻ"]="lâ",
	["ﷲ"]="llâh",
	-- kashida
	["ـ"]="", -- kashida, no sound
	-- numerals
	["۱"]="1", ["۲"]="2", ["۳"]="3", ["۴"]="4", ["۵"]="5",
	["۶"]="6", ["۷"]="7", ["۸"]="8", ["۹"]="9", ["۰"]="0",
	-- normal arabic variants to numerals
	["١"]="1", ["٢"]="2", ["٣"]="3", ["٤"]="4", ["٥"]="5",
	["٦"]="6", ["٧"]="7", ["٨"]="8", ["٩"]="9", ["٠"]="0",
	-- punctuation (leave on separate lines)
	["؟"]="?", -- question mark
	["،"]=",", -- comma
	["؛"]=";", -- semicolon
	["«"]='“', -- quotation mark
	["»"]='”', -- quotation mark
	["٪"]="%", -- percent
	["؉"]="‰", -- per mille
	["٫"]=".", -- decimal point
	["٬"]=",", -- thousands separator
	["ۀ"]="-ye" -- he ye (in ezâfe)
};
 
function export.track(text, lang, sc)
	if type(text) == "table" then
		text, lang, sc = text.args[1], text.args[2], text.args[3]
	end

	text = mw.ustring.gsub(text, 'ه$', "\217\144")
	text = mw.ustring.gsub(text, 'ه([^’bdfghjklmnpqrstvyxzčğšž])', "\217\144")
	text = mw.ustring.gsub(text, 'ىٰ', "â")
	text = mw.ustring.gsub(text, '.', mapping)
	text = mw.ustring.gsub(text, 'âً', "an")
	text = mw.ustring.gsub(text, 'aâ', "â")
	text = mw.ustring.gsub(text, 'âa', "a")
	text = mw.ustring.gsub(text, 'ie', "î")
	text = mw.ustring.gsub(text, 'ae', "ay")
	text = mw.ustring.gsub(text, 'uo', "û")
	text = mw.ustring.gsub(text, 'ao', "au")
	-- text = mw.ustring.gsub(text, 'o([aâeiou])', "v%1")
	-- text = mw.ustring.gsub(text, 'e([aâeiou])', "y%1")
	text = mw.ustring.gsub(text, "([aâeiîouû])(\217\145)", "%2%1") -- swapping tašdid with vowels
	text = mw.ustring.gsub(text, "(.)\217\145", "%1%1") -- implementing tašdid
	text = mw.ustring.gsub(text, 'ih$', "")
	text = mw.ustring.gsub(text, 'ih([^aâeiîouûy’bdfghjklmnpqrstvyxzčğšž])', "i%1")

--[[ This is what should happen to West Persian:

Convert:

i to e
î to i
e to i
ay to ey
u to o
û to u
o to u
au to ou

]]--

	return text
end
 
return export