Module:mai-IPA-Deva

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local u = mw.ustring.char

local c = {
	["Deva"] = {
	['k']='क', ['ɡ']='ग', ['kʰ']='ख', ['ɡʱ']='घ',  ['ŋ']='ङ',
    ['ɲ']='ञ', ['ç']='च', ['ʐ']='ज', ['çʰ']='छ', ['ʐʱ']='झ', 
    ['ʦ']='च', ['ʣ']='ज', ['ʦʰ']='छ', ['ʣʱ']='झ', 
	['ʈ']='ट', ['ɖ']='ड', ['ʈʰ']='ठ', ['ɖʱ']='ढ', ['ɳ']='ण', 
	['t̪']='त', ['d̪']='द', ['t̪ʰ']='थ', ['d̪ʱ']='ध', ['n']='न', ['n̪']='न',
	['l̪']='ल', ['t']='त', ['d']='द', ['tʰ']='थ', ['dʱ']='ध', 
	['p']='प', ['b']='ब', ['pʰ']='फ', ['bʱ']='भ', ['m']='म',
	['j']='य', ['r']='र', ['l']='ल', ['w']='व', ['v']='व',  ['ʃ']='श',
    ['ʂ']='ष', ['s']='स', ['z']='ज़', ['ɦ']='ह', ['ʒ']='ॹ', 
    ['ɽ']='ड़', ['ɽʱ']='ढ़', ['ɸ']='फ', ['f']='फ़', ['x']='ख़', 
    ['β']='व', ['ɣ']='ग़', ['ɾ']='र', ['ʱ']='ह',
    ['.']='',  ['̪']='', ['͡']='', ['t̚t͡ɕ']='च्च', ['d̚d͡ʑ']='ज्ज', 
    ['t̚t͡ɕʰ']='च्छ', ['d̚d͡ʑʱ']='ज्झ', ['ɭ']='ळ', 
	['ā']='आ', ['ɪ']='इ', ['ī']='ई',  ['ʊ']='उ', ['ū']='ऊ', 
    ['e']='ऎ', ['o']='ऒ', ['ĕ']='ऎ', ['ŏ']='ऒ', ['ē']='ए', ['ō']='ओ', ['ɔ']='ऑ', ['ɛ']='ऍ', 
    ['ɑ']='ऑ', ['æ']='ऍ',  ['əʊ̯']='औ', ['əɪ̯']='ऐ', ['əʊ']='औ', ['əɪ']='ऐ',['ǒ']='ॵ', ['ě']='ꣾ',['əo']='ॵ', ['əe']='ꣾ', 

	['ə']='अ', ['a']='ॴ', ['ᵊ']='', ['ɔ']='अ꣱',

    ['ā̃']='आँ', ['ɪ̃']='इँ', ['ī̃']='ईं',  ['ʊ̃']='उँ', ['ū̃']='ऊँ', 
    ['ẽ']='ऎं', ['õ']='ऒं', ['ē̃']='एँ', ['ō̃']='ओं',  ['ə̃ʊ̯̃']='औं', ['ə̃ɪ̯̃']='ऐं', 
	['ə̃']='अँ‌',
	-- chandrabindu    
	['̃']='ँ', ['ṃ']='ँ',
	-- visarga    
    ['ː']='ऽ', ['̤']='', ['̚']='',

		[""] = "", 
	},
	
}

local v = {
	["Deva"] = {
		["ə"] = "", ["ᵊ"] = "", ["a"] = "ऻ", ["ā"] = "ा", 
		["ɪ"] = "ि", ["ī"] = "ी", ["ɔ"] = "꣱",
		["ʊ"] = "ु", ["ū"] = "ू", 
		["e"] = "ॆ", ["ē"] = "े",
        ["o"] = "ॊ", ["ō"] = "ो", ["ǒ"] = "ॏ", ["ě"] = "ꣿ", ["əo"]= "ॏ", ["əe"] = "ꣿ", 
     	["ə̃"] = "ँ", ["ā̃"] = "ाँ",
		["ɪ̃"] = "िं", ["ī̃"] = "ीं",
		["ʊ̃"] = "ुँ", ["ū̃"] = "ूँ", 

		["ǒ̃"] = "ॏं", ["ě̃"] = "ꣿं",["ẽ"] = "ें",["ē̃"] = "ें", ["õ"] = "ों", ["ɛ"] = "ॅ", ["æ"] = "ॅ", ["ɛ̃"] = "ॅं", ["æ̃"] = "ॅं", 
		 ["ō̃"] = "ों", ["ɔ"] = "ॉ", ["ɔ̃"] = "ॉं", ["ɑ"] = "ॉ", 
		[""] = "", ["əʊ̯"] = "ौ", ["əɪ̯"] = "ै", ["əʊ"] = "ौ", ["əɪ"] = "ै",  ["ə̃ʊ̯̃"] = "ौं", ["ə̃ɪ̯̃"] = "ैं", 
	},

}

local s = {
	["Deva"] = {
		["0"] = "०", ["1"] = "१", ["2"] = "२", ["3"] = "३", ["4"] = "४",
		["5"] = "५", ["6"] = "६", ["7"] = "७", ["8"] = "८", ["9"] = "९", 
		["."] = "", [","] = "।", ["-"] = "",
	},

}

local join = {
	["Deva"] = "्", 
}

local kill = {
	["Deva"] = "्",
}

local nukta = u(0x09bc) -- Just list all those used here.

local function return_error(text)
	return error(("Unrecognised part: \"%s\""):format(text))
end

function export.tr(text, script, options)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end

	local easy_syllable_pattern =
		"^([khɡgʰʱɽɦṅcjñːṭḍṇtdçʦʣɭʐɸnɾt̚d̚t̪d̪m͡məu̯əi̯ŋɲɳɖʈzqxcɣʒβðθʃpbwmyrlḷvs]*)([aaəäāə̃ä̃ĩũẽõiěǒīuɪʊiuūēōᵊeṃoəu̯əi̯ɛɔæɑä̤̃ɑ̤])(̃?)("..kill[script].."?)$"

	text = gsub(text, "[0-9%.,%-]", s[script])
-- Compose patterns for processing onsets. 
	local letter = "[^"..join[script]..nukta.."]["..nukta.."]?"
	local letter_pair = "("..letter..")("..letter..")"
	
	for word in mw.ustring.gmatch(text, "[ĕŏəäaiīuɪʊɪʊiuɛæəī̃ū̃i̯əəu̯ɔə̃ä̃ēōěǒĩũẽõɡāɭɑä̤̃ɑ̤ᵊūeoːt̚d̚t̪d̪m͡ṃɦʰʱkhʐçʣʦgṅcjñṭḍṇtɾŋɽɲɳɖʈzqxcɣʒβɸðθʃdnpbmwyrlḷvs]+") do
		local word_conv, orig_word = {}, word
		word = gsub(word, "([aāaäəiīuūeoɛə̃ä̃ĩī̃ū̃əɪɪʊiuēōʊěǒĕŏũẽõæɔəi̯ᵊəu̯ɑä̤̃ɑ̤ṃ]ṃ?)", "%1 ")
		word = gsub(word, " $", "")
		
		for syllable in mw.text.gsplit(word, " ") do
			if not match(syllable, "[aāäaəiīuūeoɪʊiuī̃ū̃əɪʊṃɛĕŏěǒæə̃ä̃ĩũẽõəi̯əṃu̯ɔēōɑä̤̃ɑ̤ᵊ]$") then
				syllable = syllable .. "ə" .. kill[script]
			end
			syllable = gsub(syllable, easy_syllable_pattern,
							function(onset, vowel, coda, optJoin)
				if onset == "" then
					onset = vowel
					vowel = ""
				end
				if not c[script][onset] then
					onset = gsub(onset, ".ʰ", c[script])
                    onset = gsub(onset, ".ʱ", c[script])
                    onset = gsub(onset, ".͡", c[script])
                    onset = gsub(onset, "..ʱ", c[script])
                    onset = gsub(onset, "..ʰ", c[script])

				 	onset = gsub(onset, ".", c[script])
-- Join pairs of consonants
					onset = gsub(onset, letter_pair, "%1"..join[script].."%2")
-- Join adjacent consonants that were in different pairs.
					onset = gsub(onset, letter_pair, "%1"..join[script].."%2")
				else
					onset = c[script][onset]
				end
				
				return onset .. (v[script][vowel] or return_error(vowel)) .. c[script][coda] .. optJoin
			end)
				
			table.insert(word_conv, syllable)
		end
		word = table.concat(word_conv, "")

		word = gsub(word, "इऽ्", "ई")
		word = gsub(word, "उऽ्", "ऊ")
		word = gsub(word, "ऽ्", "ऽ")
        word = gsub(word, "ऽ", "ː")
		word = gsub(word, "िऽ्", "ी")
		word = gsub(word, "ुऽ्", "ू")
		word = gsub(word, "अउ̯", "औ")
		word = gsub(word, "अइ̯", "ऐ")
		word = gsub(word, "उ̯", "ौ")
		word = gsub(word, "इ̯", "ै")
		word = gsub(word, "अँइँ̯", "ऐं")
		word = gsub(word, "अँउँ̯", "औं")
		word = gsub(word, "ँइँ̯", "ैं")
		word = gsub(word, "ँउँ̯", "ौं")
		word = gsub(word, "([ेोैौꣿॆॊ])ँ", "%1ं")
		word = gsub(word, "ॺ", "ढ़")
        word = gsub(word, "([अआकखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])ऎ", "%1ꣿ")
        word = gsub(word, "([अआकखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])ऒ", "%1ॏ")
        word = gsub(word, "([अआकखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])([aृेोाॉॅॆॊऻऻॊॆॏ꣱ᵊə]?)([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसङञॾॺफ़ज़ॹ])(ि)$", "%1%2इ%3")
        word = gsub(word, "([अआकखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])([aृेोाॉॅॆॊऻऻॊॆॏ꣱ᵊə]?)([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसङञॾॺफ़ज़ॹ])(ि) ", "%1%2इ%3 ")
        word = gsub(word, "([अआकखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])([aृेोाॉॅॆॊऻऻॊॆॏ꣱ᵊə]?)([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसङञॾॺफ़ज़ॹ])(ु)$", "%1%2उ%3")
        word = gsub(word, "([अआकखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])([aृेोाॉॅॆॊऻऻॊॆॏ꣱ᵊə]?)([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसङञॾॺफ़ज़ॹ])(ु) ", "%1%2उ%3 ")
        word = gsub(word, "([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसङञॾॺफ़ज़ॹ])्हि$", "इ%1्ह")
        word = gsub(word, "([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसङञॾॺफ़ज़ॹ])्हि ", "इ%1्ह ")
        word = gsub(word, "([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसङञॾॺफ़ज़ॹ])्हु", "उ%1्ह")
        word = gsub(word, "([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसङञॾॺफ़ज़ॹ])्हु ", "उ%1्ह ")
		word = gsub(word, "([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])्ऽ", "%1्%1")
		word = gsub(word, "([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])(्)य([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹ])", "%1%2य%3")
		word = gsub(word, "([कखगघचछजझटठडढढ़ख़ड़णतथदधनपफबभमयरलवशषसहङञॾॺफ़ज़ॹिीुूेैोौा])य$", "%1य")
		word = gsub(word, "ड़््", "ड़्")
        word = gsub(word, "्$", "")
        word= gsub(word, "्इ", "ि")
        word= gsub(word, "्उ", "ु")
        word= gsub(word, "्ई", "ी")
        word= gsub(word, "्ऊ", "ू")
        word= gsub(word, "्ए", "े")
        word= gsub(word, "्ओ", "ो")

		text = gsub(text, orig_word, word, 1)
	end
	return text
end

return export