Module:User:Kushalpok01/convert3

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Tests[edit]

سٲری اِنسان چھِ آزاد زامٕتؠ۔ وؠقار تہٕ حۆقوٗق چھِ ہِوی۔ تِمَن چھُ سوچ سَمَج عَطا کَرنہٕ آمُت تہٕ تِمَن پَزِ بٲے بَرادٔری ہٕنٛدِس جَذباتَس تَحَت اکھ أکِس اکار بَکار یُن ۔


local U = mw.ustring.char
local gsub = mw.ustring.gsub
local export = {}

local hri = U(0x93F)
local hri2 = U(0x907)
local diri = U(0x940)
local diri2 = U(0x908)
local hru = U(0x941)
local hru2 = U(0x909)
local diru = U(0x942)
local diru2 = U(0x90A)
local E = U(0x947)
local E2 = U(0x90F)
local AI = U(0x948)
local AI2 = U(0x910)
local O = U(0x94B)
local O2 = U(0x913)
local AU = U(0x94C)
local AU2 = U(0x914)
local A = U(0x905)
local LA = U(0x93E)
local ret = U(0x615)
local halant = U(0x94D)
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local upesh = U(0x657)
local hama = U(0x654)
local hamb = U(0x655)
local hambw = U(0x673)
local kyeh = U(0x620)
local yetail = U(0x6CD)
local smallv = U(0x65A)
local inv = U(0x65B)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ہ"

local consonants = "ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨوہھٹڈںڑشؕ"
local consonantS = "ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨہھٹڈںڑشؕ"
local consonantS2 = "یببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨںوہھٹڈڑشؕ" 
local consonantS3 = "ببپتثیجچحخدذؠرزژسشصضطظعغفقکگلࣇمنݨوہھٹڈںڑشؕ"
local sun = "تثصشسزرذدنلطظض"
local vowels = "اۂۆیؠئےۓوؤٲێ"
local hes = "ہح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"

local mapping = {
	["آ"] = 'आ', ["ب"] = 'ब', ["پ"] = 'प', 
    ["ت"] = 'त', ["ٹ"] = 'ट', ["ث"] = 'स',
    ["ج"] = 'ज', ["چ"] = 'च', ["ح"] = 'ह',
    ["خ"] = 'ख़', ["د"] = 'द', ["ڈ"] = 'ड', 
    ["ذ"] = 'ज़', ["ر"] = 'र', ["ڑ"] = "ड़",
    ["ز"] = 'ज़', ["ژ"] = 'च़', ["س"] = 'स',
    ["ش"] = 'श', ["ص"] = 'स', ["ض"] = 'ज़',
    ["ط"] = 'त', ["ظ"] = 'ज़', ["غ"] = 'ग़',
    ["ف"] = 'फ़', ["ق"] = 'क़', ["ک"] = 'क', 
    ["گ"] = 'ग', ["ل"] = 'ल', ["م"] = 'म', 
    ["ن"] = 'न', ["و"] = 'व', ["ہ"] = 'ह', 
    ["ی"] = 'य',
    ["ں"] = 'ं', 
    
	["ݨ"] = 'ण', ["ࣇ"] = 'ळ', ["ك"] = 'क',

	["ع"] = 'अ',
	["ء"] = '',
	["ئ"] = '', 
	["ؤ"] = 'व',
    ["ێ"] = 'ऎ',
	
	-- diacritics
	[zabar] = "॑",
	[zer] = "" .. hri .. "",
	[pesh] = "" .. hru .. "",
	[jazm] = "" .. halant .. "",
    [hama] = "ऺ", 
    [hamb] = "ॖ", 
    [hambw] = "ॗ", 
    [kyeh] = "" .. halant .. "य",
    [yetail] = "" .. halant .. "य",
	[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
	
	-- ligatures
	["ﻻ"] = "ला",
	["ﷲ"] = "अल्लाह",
	
	-- kashida
	["ـ"] = "-", -- kashida, no sound
	

-- numerals
	["١"] = "१", ["٢"] = "२", ["٣"] = "३", ["٤"] = "४", ["٥"] = "५", 
	["٦"] = "६", ["٧"] = "७", ["٨"] = "८", ["٩"] = "९", ["٠"] = "०",
	["۱"] = "१", ["۲"] = "२", ["۳"] = "३", ["۴"] = "४", ["۵"] = "५",
	["۶"] = "६", ["۷"] = "७", ["۸"] = "८", ["۹"] = "९", ["۰"] = "०",
	
	-- punctuation (leave on separate lines)
    ["۔"] = "।", 
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
    ["ۓ"] = "-ये", 
	["ۂ"] = "ॖ" 
}

local ain = 'ع'
local kzabar = 'ٰ'
local alif = 'ا'
local halif = 'أ'
local hwalif = 'ٲ'
local madda = 'آ'
local ye = 'ی'
local yev = 'ێ'
local ye2 = 'ئ'
local sualif = 'ٖ'
local ye3 = "ے"
local vao = "و"
local vaov = "ۆ"
local vaor = "ۄ"
local ye4 = "ۓ"
local he2 = "ۂ"
local aspirate = 'ھ'

function export.tr(text, lang, sc)
text = gsub(text, alif .. hambw .. '([' .. consonants .. '])', "ॷ%1")
	text = gsub(text, alif .. hamb .. '([' .. consonants .. '])', "ॶ%1")
text = gsub(text, '([ن])' .. inv, "%1" .. halant .. "")
text = gsub(text, '([ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمݨوہھٹڈڑشؕ])' .. '([' .. zabar .. zer .. pesh .. hama .. hamb .. hambw .. ']?)' .. inv, "" .. halant .. "%1%2")

	-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere
	text = gsub(text, '([' .. consonants .. '])' .. ye .. "ں", "%1ें")
	text = gsub(text, "ؤ" .. pesh, "ऊ")
	text = gsub(text, "وہ", "वह")
	text = gsub(text, alif .. ye2 .. '([' .. zabar .. ']?)' .. '([' .. consonants .. '])', "ाय%2")
	text = gsub(text, madda .. ye2 .. '([' .. zabar .. ']?)' .. '([' .. consonants .. '])', "आय%2")

-- SSH
text = gsub(text, "ش"  .. jazm .. ret, "ष्")
text = gsub(text, "(ش)" .. "([" .. ZZP  .. "])" .. ret, "ष%2")

	-- Tashdeed
text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid, "%1" .. halant .. "%1")
	text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid .. '([' .. ZZP .. '])', "%1" .. halant .. "%1%2")
	text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1य्य%2")
	text = gsub(text, '([' .. ZZP .. '])' .. vao .. '([' .. ZZP .. '])' .. tashdid, "%1व्व%2")
	-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
	text = gsub(text, '([' .. consonants .. '])' .. '([' .. ZZP .. '])' .. tashdid, "%1" .. halant .. "%1%2")	
	
	-- tanween diacritic
	text = gsub(text, '([' .. consonants .. '])' .. 'ً' .. alif, "%1न")
	text = gsub(text, alif .. 'ً', "न")
	text = gsub(text, '([' .. consonants .. '])' .. 'ً', "%1न")

	-- khari zabar --
text = gsub(text, '([' .. consonants .. '])' .. kzabar, "%1" .. LA .. "")
text = gsub(text, '([' .. vowels .. '])' .. kzabar, "" .. LA .. "")
	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. alif, "%1" .. halant .. "%1" .. LA .. "")
	
	-- ‘ain
		text = gsub(text, '([' .. consonants .. '])' .. ain .. zabar .. he, "%1" .. LA .. "")
text = gsub(text, ain .. alif  .. ain, "आ")
	text = gsub(text, alif .. ain  .. '([' .. consonants .. '])', "" .. E2 .. "%1")
	text = gsub(text, '([' .. consonants .. '])' .. ain .. he, "%1अ")
	text = gsub(text, '([' .. consonants .. '])' .. '([' .. zer .. pesh .. ']?)' .. ain, "%1%2")
	text = gsub(text, ain .. zabar .. vao .. '([' .. consonants .. '])', "औ%1")
	text = gsub(text, ain .. zabar .. ye .. '([' .. consonants .. '])', "ऐ%1")
	text = gsub(text, ain .. zer  .. '([' .. consonants .. '])', "इ%1")
	text = gsub(text, ain .. pesh  .. '([' .. consonants .. '])', "उ%1")
	text = gsub(text, ain .. zer .. ye .. '([' .. consonants .. '])', "ई%1")
    text = gsub(text, ain .. vao .. upesh .. '([' .. consonantS .. '])', "ऊ%1")
text = gsub(text, ain .. jazm, "" .. LA .. "")
  
	-- Vao
	text = gsub(text, vao .. '([' .. ZZP .. '])', "व%1")
	text = gsub(text, '([' .. consonants .. '])' .. zabar .. vao .. alif, "%1वा")
text = gsub(text, '([' .. consonants .. '])' .. zabar .. vao .. zabar .. alif, "%1वा")
text = gsub(text, vao .. vao , "वो")
text = gsub(text, vao .. alif , "वा")
--VAO alone
text = gsub(text, " و ", " ओ ")
	
	-- Fatha Majhool --
text = gsub(text, "([" .. consonants  .. "])" .. zabar .. he .. jazm .. "([" .. ZZP  .. "])" , "%1ह%2")

text = gsub(text, '([' .. consonantS3 .. '])' .. ye3 .. smallv, "%1ॆ")
	-- medial/final consonants.
text = gsub(text, he .. '([' .. zer .. pesh .. hama .. hamb .. hambw .. '])([،]?)$', "%1%2")
text = gsub(text, he .. '([' .. zer .. pesh .. hama .. hamb .. hambw .. ']) ', "%1 ")
text = gsub(text, he .. '([' .. zabar .. ']) ', "ा ")
text = gsub(text, he .. '([' .. zabar .. '])([،]?)$', "ा%2")
    text = gsub(text, zer .. he .. alif , "िहा")
	text = gsub(text, zabar .. he .. alif, "हा")
	text = gsub(text, zabar .. he .. '([' .. consonants .. vowels .. '])', "ह%1")
text = gsub(text, zer .. he .. '([' .. consonants .. vowels .. '])', "िह%1")
	text = gsub(text, '([' .. consonants .. '])' .. alif, "%1ा")
text = gsub(text, '([' .. consonants .. '])' .. halif, "%1ऺ")
text = gsub(text, '([' .. consonants .. '])' .. hwalif, "%1ऻ")
	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. alif, "%1%1ा")

	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. vao, "%1%1ो")
	
	text = gsub(text, zer .. ye .. alif, "िया")
	text = gsub(text, '([' .. consonants .. '])' .. ye .. '([' .. consonants .. '])', "%1े%2")
text = gsub(text, '([' .. consonantS3 .. '])' .. yev .. '([' .. consonants .. '])', "%1ॆ%2")
text = gsub(text, '([' .. consonantS3 .. '])' .. ye .. smallv .. '([' .. consonants .. '])', "%1ॆ%2")
	text = gsub(text, ye2 .. ye, "ई")
	text = gsub(text, ye2 .. 'ے', "ए")
	text = gsub(text,'([' .. consonantS3 .. '])' .. ye .. ye3, "%1" .. diri .. "ए")
	text = gsub(text, alif .. zabar .. ye3, "" .. AI2 .. "")
	text = gsub(text, '([' .. consonants .. alif .. '])' .. ye2 .. ye, "%1ई")
	text = gsub(text, '([' .. consonantS3 .. '])' .. ye2 .. ye3, "%1ए")
	text = gsub(text, zabar .. ye3, "ै")
text = gsub(text, '([' .. consonantS3 .. '])' .. ye3, "%1" .. E .. "")
text = gsub(text, '([' .. consonantS3 .. '])' .. vao .. "(ٗ)", "%1ू")
text = gsub(text, '([' .. consonants .. '])' .. vao, "%1" .. O .. "")
text = gsub(text, '([' .. consonantS3 .. '])' .. vaov, "%1ॊ")
text = gsub(text, '([' .. consonantS3 .. '])' .. vao .. smallv, "%1ॊ")
text = gsub(text, '([' .. consonants .. '])' .. vaor .. alif, "%1ौ")
text = gsub(text, '([' .. consonantS3 .. '])' .. vaor, "%1ॏ")
text = gsub(text, "([" .. consonants  .. "])" .. ye .. sualif .. '([' .. ZZP .. ']?)', "%1" .. diri .. "%2")
text = gsub(text, alif  .. ye .. '([' .. consonants .. '])', "" .. E2 .. "%1")
text = gsub(text, alif  .. yev .. '([' .. consonants .. '])', "ऎ%1")
text = gsub(text, alif  .. ye .. smallv .. '([' .. consonants .. '])', "ऎ%1")
	text = gsub(text, alif  .. vao .. '([' .. consonants .. '])', "" .. O2 .. "%1")


text = gsub(text, pesh .. vao, "ू")
text = gsub(text, "([" .. consonants  .. "])" .. zer .. ye, "%1" .. diri .. "")

text = gsub(text, alif  .. vaov .. '([' .. consonants .. '])', "ऒ%1")
text = gsub(text, alif  .. vao .. smallv .. '([' .. consonants .. '])', "ऒ%1")
	
	-- Initial alif
	text = gsub(text, "" .. alif .. '([' .. consonantS .. '])', "अ%1")
text = gsub(text, "" .. halif .. '([' .. consonantS .. '])', "ॳ%1")
text = gsub(text, "" .. hwalif .. '([' .. consonantS .. '])', "ॴ%1")
	text = gsub(text, alif .. '([' .. consonantS .. '])', "अ%1")
	text = gsub(text, alif .. zabar .. '([' .. consonantS .. '])', "अ%1")
	text = gsub(text, alif .. vaor .. alif .. '([' .. consonants .. '])', "औ%1")
	text = gsub(text, alif .. vaor .. '([' .. consonants .. '])', "ॵ%1")
	text = gsub(text, alif  .. vao .. '([' .. consonants .. '])', "ओ%1")
	text = gsub(text, alif  .. ye .. '([' .. consonants .. '])', "ए%1")
	text = gsub(text, alif .. zabar .. ye .. '([' .. consonants .. '])', "ऐ%1")
	text = gsub(text, alif .. pesh .. '([' .. consonantS .. '])', "उ%1")
text = gsub(text, alif .. vao .. upesh .. '([' .. consonantS3 .. '])', "" .. diru2 .. "%1")
	text = gsub(text, alif .. zer .. '([' .. consonants .. '])', "इ%1")
	text = gsub(text, alif .. ye .. sualif .. '([' .. consonants .. '])', "ई%1")

text = gsub(text, alif  .. ye3, "" .. E2 .. "")

  --- aspirate
	text = gsub(text, "(ک)" .. "([" .. ZZP  .. "])" .. aspirate, "ख%2")
	    text = gsub(text, "(گ)" .. "([" .. ZZP  .. "])" .. aspirate, "घ%2")
	    text = gsub(text, "(چ)" .. "([" .. ZZP  .. "])" .. aspirate, "छ%2")
	    text = gsub(text, "(ج)" .. "([" .. ZZP  .. "])" .. aspirate, "झ%2")
	    text = gsub(text, "(ٹ)" .. "([" .. ZZP  .. "])" .. aspirate, "ठ%2")
	    text = gsub(text, "(ڈ)" .. "([" .. ZZP  .. "])" .. aspirate, "ढ%2")
	    text = gsub(text, "(ت)" .. "([" .. ZZP  .. "])" .. aspirate, "थ%2")
	    text = gsub(text, "(د)" .. "([" .. ZZP  .. "])" .. aspirate, "ध%2")
	    text = gsub(text, "(پ)" .. "([" .. ZZP  .. "])" .. aspirate, "फ%2")
	    text = gsub(text, "(ب)" .. "([" .. ZZP  .. "])" .. aspirate, "भ%2")
	    text = gsub(text, "(ڑ)" .. "([" .. ZZP  .. "])" .. aspirate, "ढ़%2")
	    text = gsub(text, "(م)" .. "([" .. ZZP  .. "])" .. aspirate, "म्ह%2")
	    text = gsub(text, "(ن)" .. "([" .. ZZP  .. "])" .. aspirate, "न्ह%2")
	    text = gsub(text, "(ل)" .. "([" .. ZZP  .. "])" .. aspirate, "ल्ह%2")
text = gsub(text, "(ژ)" .. "([" .. ZZP  .. "])" .. aspirate, "छ़%2")

	-- diacritics
	text = gsub(text, "([" .. consonants  .. "])" .. zabar .. vao, "%1ौ")
	text = gsub(text, "([" .. consonants  .. "])" .. zabar .. ye, "%1ै")
	text = gsub(text, "([" .. consonants  .. "])" .. zabar .. ye3, "%1" .. AI .. "")
	text = gsub(text, "([" .. consonants .. "])" .. ye, "%1ी")
	
	
	-- final he + short vowel disregards the he and transliterates the vowel
	text = gsub(text, ye .. he , "यह")
	text = gsub(text, "([" .. consonants .. "])" .. he , "%1ह")
	
----
text = gsub(text, "نْ([کگجچٹڈتدن])" , "ं%1")
text = gsub(text, "ن٘([کگجچٹڈتدن])" , "ं%1")
text = gsub(text, "مْ([بپمو])" , "ं%1")
----

	text = gsub(text, zabar .. he .. "([" .. ZZP  .. "])" , "ह%1")

text = gsub(text, '([' .. zer .. '])' .. he, "ि")
text = gsub(text, zabar .. he .. alif , "हा")
text = gsub(text, '([' .. zer .. '])' .. he .. alif .. '([' .. consonants .. '])' , "%1हा%2")
text = gsub(text, he .. alif , "हा")

	--
   
    text = gsub(text, "ࣇ", "ळ")
    text = gsub(text, "شؕ", "ष")
    text = gsub(text, "ن٘", "ङ")
    text = gsub(text, "ڃ", "ञ")
	text = gsub(text, "کھ", "ख")
	text = gsub(text, "گھ", "घ")
	text = gsub(text, "چھ", "छ")
	text = gsub(text, "جھ", "झ")
	text = gsub(text, "ٹھ", "ठ")
	text = gsub(text, "ڈھ", "ढ")
	text = gsub(text, "تھ", "थ")
	text = gsub(text, 'دھ', "ध")
	text = gsub(text, "پھ", "फ")
	text = gsub(text, "بھ", "भ")
    text = gsub(text, "ڑھ", "ढ़")
    text = gsub(text, "مھ", "म्ह")
	text = gsub(text, "نھ", "न्ह")
    text = gsub(text, "لھ", "ल्ह")
    text = gsub(text, "ژھ", "छ़")
	--text = gsub(text, "ۂ", "ॖ")
    --text = gsub(text, "ۂ", "ॖ")
    text = gsub(text, "ے", "य्")
    
	text = mw.ustring.gsub(text, '.', mapping)
text = mw.ustring.gsub(text, "ललह", "ल्लाह")

	text = mw.ustring.gsub(text, 'ोा', "वा")
	text = mw.ustring.gsub(text, 'ौा', "वा")
	text = mw.ustring.gsub(text, 'ोا', "वा")
	text = mw.ustring.gsub(text, 'व॑ا', "वा")
	text = mw.ustring.gsub(text, 'ɔ̄ا', "वा")
	
	-- Changed these to 'iy(*)', because they will be used for with ی, which are normally written as 'iy'
	text = mw.ustring.gsub(text, 'ी॑ा', "िया")
	text = mw.ustring.gsub(text, 'ी॑', "िय")
	--
	
	text = mw.ustring.gsub(text, 'اे', "ए")
	text = mw.ustring.gsub(text, 'ीا', "िया")
	text = mw.ustring.gsub(text, 'यا', "या")
	
	-- vao as a medial consonant
	text = mw.ustring.gsub(text, "ूू॑", "ुुव")
    text = mw.ustring.gsub(text, "ौ([॑िु])", "व%1")

    -- Final corrections
    text = mw.ustring.gsub(text, "अ॑ا", "आ")
    text = mw.ustring.gsub(text, "ا", "अ") -- to avoid error
--text = mw.ustring.gsub(text, "हॖ$", "ॖ")
text = mw.ustring.gsub(text, "अٟ", "ॷ")
text = mw.ustring.gsub(text, "ोٗ", "ू")
        text = mw.ustring.gsub(text, '॑ि', "इ")
        text = mw.ustring.gsub(text, '॑े', "ै")
        text = mw.ustring.gsub(text, '॑ो', "ौ")
        text = mw.ustring.gsub(text, '्यअ', "्या")
	text = mw.ustring.gsub(text, "आ॑", "आ")
    text = mw.ustring.gsub(text, "॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑॑", "आ")
	text = mw.ustring.gsub(text, '॑', "")
text = mw.ustring.gsub(text, "ीु", "यु")
text = mw.ustring.gsub(text, "ीा", "िया")
text = mw.ustring.gsub(text, "ाि", "ाइ")
text = mw.ustring.gsub(text, "आि", "आइ")
text = mw.ustring.gsub(text, 'ुो', "ू")
text = mw.ustring.gsub(text, 'िे', "ी")
text = mw.ustring.gsub(text, 'ोا', "वा")
text = mw.ustring.gsub(text, 'ौا', "वा")
text = mw.ustring.gsub(text, 'ौा', "वा")
text = mw.ustring.gsub(text, 'ोा', "वा")
text = mw.ustring.gsub(text, 'अै', "ऐ")
text = mw.ustring.gsub(text, 'ीआ', "िया")
text = mw.ustring.gsub(text, 'ीअ', "िय")
text = mw.ustring.gsub(text, '+', "")
text = mw.ustring.gsub(text, 'اिय', "ई")
text = mw.ustring.gsub(text, 'अै', "ऐ")
text = mw.ustring.gsub(text, 'अा', "आ")
text = mw.ustring.gsub(text, 'अौ', "औ")
	return text
end
return export