Module:fa-translit

Definition from Wiktionary, the free dictionary
Jump to navigation Jump to search
This module is in beta stage.
Do not use this module yet. It's used for testing only. It is impossible to transliterate unvocalised Persian with vowels and vocalised Persian may not always be accurate.
This module will transliterate Persian language text per WT:FA TR.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:fa-translit/testcases.

Functions[edit]

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang. When the transliteration fails, returns nil.

Test cases[edit]

4 tests failed. (refresh)

Text Expected Actual Differs at
test_translit_persian:
Failed سَرانجام saranjâm sarânjâm 4
Failed سَرانْجام saranjâm sarânjâm 4
Failed سَرَانْجَام saranjâm sarânjâm 4
Passed کُروز koruz koruz
Failed کُرُوز korouz koruz 4
Passed طَنین tanin tanin
Passed طَنِین taneyn taneyn
Passed عَصاً ’asan ’asan
Passed خانه xâne xâne
Passed خانِه xâne xâne
Passed کُرِۀ شُمالی kore-ye šomâli kore-ye šomâli
Passed ضَمّه zamme zamme
Passed ضَمِّه zamme zamme
Passed وُدکا vodkâ vodkâ
Passed اَرمَنِستان armanestân armanestân
Passed باکو bâku bâku



local export = {}

local U = mw.ustring.char

local fatHatan = U(0x64B) -- What is the Persian term for this?
local fathe = U(0x64E) -- also zabar
local zamme = U(0x64F) -- also piš
local kasre = U(0x650) -- also zir
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652)

local waw = U(0x0648)
local ye = U(0x06CC)

local group = "بپتثجچحخدذرزژسشصضطظغفقکگلمنوهی"

 
local mapping = {
	["ا"] = 'â', ["ب"] = 'b', ["پ"] = 'p', ["ت"] = 't', ["ث"] = 's', ["ج"] = 'j', ["چ"] = 'č', ["ح"] = 'h', ["خ"] = 'x', 
	["د"] = 'd', ["ذ"] = 'z', ["ر"] = 'r', ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's', ["ش"] = 'š', ["ص"] = 's', ["ض"] = 'z', 
	["ط"] = 't', ["ظ"] = 'z', ["غ"] = 'ğ', ["ف"] = 'f', ["ق"] = 'q', ["ک"] = 'k', ["گ"] = 'g', ["ل"] = 'l', 
	["م"] = 'm', ["ن"] = 'n', ["و"] = 'u', ["ه"] = 'h', ["ی"] = 'i', ["آ"] = 'â',

	-- displaying on separate lines as the viewing becomes distorted on these combinations
	["ع"] = "’",
	["ء"] = "’",
	["ئ"] = "’", 
	["ؤ"] = "’",
	["أ"] = "’",
	
	-- diacritics
	[fathe] = "a",
	[kasre] = "e",
	[zamme] = "o",
	[jazm] = "", -- also sokun - no vowel
	[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
	[fatHatan] = "n",
	-- ligatures
	["ﻻ"] = "lâ",
	["ﷲ"] = "llâh",
	-- kashida
	["ـ"] = "", -- kashida, no sound
	-- numerals
	["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
	["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
	-- normal arabic variants to numerals
	["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5",
	["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0",
	-- punctuation (leave on separate lines)
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
	["ۀ"] = "-ye" -- he ye (in ezâfe)
}
 
function export.tr(text, lang, sc)

    text = mw.ustring.gsub(text, '([' .. group .. ']' .. tashdid .. '?)ه$', '%1e') 

    -- ou
    -- NOT WORKING
    text = mw.ustring.gsub(text, zamme .. waw .. '([' .. group .. '])', "ou%1")

    -- ey
    -- WORKING
    text = mw.ustring.gsub(text, kasre .. ye .. '([' .. group .. '])', "ey%1")

	text = mw.ustring.gsub(text, 'ىٰ', "â")
	text = mw.ustring.gsub(text, 'ا' .. fatHatan, "an")
	-- text = mw.ustring.gsub(text, 'الله', "ﷲ")
	-- text = mw.ustring.gsub(text, 'لا', "ﻻ")
	text = mw.ustring.gsub(text, '.', mapping)
	text = mw.ustring.gsub(text, 'ou', "u")
	text = mw.ustring.gsub(text, 'aâ', "â")
	text = mw.ustring.gsub(text, 'âa', "a")
	text = mw.ustring.gsub(text, 'ei', "i")
	text = mw.ustring.gsub(text, 'ai', "ay")
	text = mw.ustring.gsub(text, 'au', "aw")
	text = mw.ustring.gsub(text, 'u([aâeiou])', "v%1")
	text = mw.ustring.gsub(text, 'i([aâeiou])', "y%1")
	text = mw.ustring.gsub(text, "([aâeiou])(" .. tashdid .. ")", "%2%1") -- swapping tašdid with vowels
	text = mw.ustring.gsub(text, "(.)" .. tashdid, "%1%1") -- implementing tašdid
	text = mw.ustring.gsub(text, 'eh$', "e")
	text = mw.ustring.gsub(text, 'eh([^aâeiouy’bdfghjklmnpqrstvyxzčğšž])', "e%1")

	return text
end
 
return export