Module:fa-translit

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
This module is in beta stage.
Do not use this module yet. It's used for testing only. It is impossible to transliterate unvocalised Persian with vowels and vocalised Persian may not always be accurate.
This module will transliterate Persian language text per WT:FA TR.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:fa-translit/testcases.

Functions[edit]

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang. When the transliteration fails, returns nil.

Test cases[edit]

7 tests failed. (refresh)

test_translit_persian:
Text Expected Actual Differs at
Failed سَرانجام saranjâm sarânjâm 4
Failed سَرانْجام saranjâm sarânjâm 4
Failed سَرَانْجَام saranjâm sarânjâm 4
Passed کُروز koruz koruz
Failed کُرُوز korouz koruz 4
Passed طَنین tanin tanin
Failed طَنِین taneyn tanin 4
Passed عَصاً ’asan ’asan
Failed خانه xâne xânh 4
Passed خانِه xâne xâne
Passed کُرِۀ شُمالی kore-ye šomâli kore-ye šomâli
Failed ضَمّه zamme zammh 5
Passed ضَمِّه zamme zamme
Passed وُدکا vodkâ vodkâ
Passed اَرمَنِستان armanestân armanestân
Passed باکو bâku bâku



local export = {}

local U = mw.ustring.char

local fatHatan = U(0x64B) -- What is the Persian term for this?
local fathe = U(0x64E) -- also zabar
local zamme = U(0x64F) -- also zir
local kasre = U(0x650) -- also piš
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652)

 
local mapping = {
	["ا"] = 'â', ["ب"] = 'b', ["پ"] = 'p', ["ت"] = 't', ["ث"] = 's', ["ج"] = 'j', ["چ"] = 'č', ["ح"] = 'h', ["خ"] = 'x', 
	["د"] = 'd', ["ذ"] = 'z', ["ر"] = 'r', ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's', ["ش"] = 'š', ["ص"] = 's', ["ض"] = 'z', 
	["ط"] = 't', ["ظ"] = 'z', ["غ"] = 'ğ', ["ف"] = 'f', ["ق"] = 'q', ["ک"] = 'k', ["گ"] = 'g', ["ل"] = 'l', 
	["م"] = 'm', ["ن"] = 'n', ["و"] = 'u', ["ه"] = 'h', ["ی"] = 'i', ["آ"] = 'â',

	-- displaying on separate lines as the viewing becomes distorted on these combinations
	["ع"] = "’",
	["ء"] = "’",
	["ئ"] = "’", 
	["ؤ"] = "’",
	["أ"] = "’",
	
	-- diacritics
	[fathe] = "a",
	[kasre] = "e",
	[zamme] = "o",
	[jazm] = "", -- also sokun - no vowel
	[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
	[fatHatan] = "n",
	-- ligatures
	["ﻻ"] = "lâ",
	["ﷲ"] = "llâh",
	-- kashida
	["ـ"] = "", -- kashida, no sound
	-- numerals
	["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
	["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
	-- normal arabic variants to numerals
	["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5",
	["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0",
	-- punctuation (leave on separate lines)
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
	["ۀ"] = "-ye" -- he ye (in ezâfe)
}
 
function export.tr(text, lang, sc)

	text = mw.ustring.gsub(text, 'ىٰ', "â")
	text = mw.ustring.gsub(text, 'ا' .. fatHatan, "an")
	-- text = mw.ustring.gsub(text, 'الله', "ﷲ")
	-- text = mw.ustring.gsub(text, 'لا', "ﻻ")
	text = mw.ustring.gsub(text, '.', mapping)
	text = mw.ustring.gsub(text, 'ou', "u")
	text = mw.ustring.gsub(text, 'aâ', "â")
	text = mw.ustring.gsub(text, 'âa', "a")
	text = mw.ustring.gsub(text, 'ei', "i")
	text = mw.ustring.gsub(text, 'ai', "ey")
	text = mw.ustring.gsub(text, 'au', "ou")
	text = mw.ustring.gsub(text, 'u([aâeiou])', "v%1")
	text = mw.ustring.gsub(text, 'i([aâeiou])', "y%1")
	text = mw.ustring.gsub(text, "([aâeiou])(" .. tashdid .. ")", "%2%1") -- swapping tašdid with vowels
	text = mw.ustring.gsub(text, "(.)" .. tashdid, "%1%1") -- implementing tašdid
	text = mw.ustring.gsub(text, 'eh$', "e")
	text = mw.ustring.gsub(text, 'eh([^aâeiouy’bdfghjklmnpqrstvyxzčğšž])', "e%1")

	return text
end
 
return export