Module:mai-IPA2

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local consonants = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषहड़ढ़"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊऻऻॊॆॏ꣱’", "अइउएओआईऊऋॠऎऒव़य़ॵॳॴऐऔऑऍ"
local lvowel = "आईऊएओाीूेो"
local el = "िु"

local conv = {
-- consonants
['क'] = 'k', ['ख'] = 'kʰ', ['ग'] = 'ɡ', ['घ'] = 'ɡʱ', ['ङ'] = 'ŋ',
['च'] = 'ʦ', ['छ'] = 'ʦʰ', ['ज'] = 'ʣ', ['झ'] = 'ʣʱ', ['ञ'] = 'ɲ',
['ट'] = 'ʈ', ['ठ'] = 'ʈʰ', ['ड'] = 'ɖ', ['ढ'] = 'ɖʱ', ['ण'] = 'ɳ',
['त'] = 't', ['थ'] = 'tʰ', ['द'] = 'd', ['ध'] = 'dʱ', ['न'] = 'n',
['प'] = 'p', ['फ'] = 'pʰ', ['ब'] = 'b', ['भ'] = 'bʱ', ['म'] = 'm',
['य'] = 'j', ['र'] = 'ɾ', ['ल'] = 'l', ['व'] = 'ʋ',
['श'] = 'ɕ', ['ष'] = 'ʂ', ['स'] = 's', ['ह'] = 'ɦ',

['क़'] = 'q', ['ख़'] = 'x', ['ग़'] = 'ɣ', ['ऴ'] = 'ɭ',
['ळ'] = 'ɭ', ['ज़'] = 'z', ['श़'] = 'ʒ', ['झ़'] = 'ʒ',
['ड़'] = 'ɽ', ['ढ़'] = 'ɽʱ', ['फ़'] = 'f', ['थ़'] = 'θ',
['द़'] = 'ð', ['ऩ'] = 'n̪', ['ऱ'] = 'ɹ', ['ॽ'] = "ʔ", ['ॹ'] = 'ʒ',
-- vowel diacritics
['ि'] = 'ɪ', ['ु'] = 'ʊ', ['े'] = 'eː', ['ॆ'] = 'e', ['ॊ'] = 'o', ['ो'] = 'oː', ['ा'] = 'aː', ['ी'] = 'iː', ['ू'] = 'uː', ['ृ'] = 'ri', ['ॄ'] = 'ri', ['ॢ'] = 'liɾi', ['ॣ'] = 'liɾi', ['ै'] = 'əɪ', ['ौ'] = 'əʊ', ['ॉ'] = 'ɔ', ['ॅ'] = 'æ', ['ꣿ'] = 'əɪ',  ['ॏ'] = 'əʊ', ['ऺ'] = "ᵊ", ['ऻ'] = "a",
-- vowel signs
['अ'] = 'ə', ['इ'] = 'ɪ', ['उ'] = 'ʊ', ['ए'] = 'eː', ['ओ'] = 'oː', ['आ'] = 'aː', ['ई'] = 'iː', ['ऊ'] = 'uː', ['ऋ'] = 'ri', ['ॠ'] = 'ri', ['ऌ'] = 'liɾi', ['ॡ'] = 'liɾi', ['ऐ'] = 'əɪ', ['औ'] = 'əʊ', ['ऑ'] = 'ɔ', ['ॲ'] = 'æ', ['ऍ'] = 'æ', ['ऎ'] = 'e', ['ऒ'] = 'o', ['ꣾ'] = 'əĕ',  ['ॵ'] = 'əŏ', ['व़'] = 'ŏ',['य़'] = 'ĕ', ['ॴ'] = 'a',



['िं'] = 'ɪ̃', ['ुँ'] = 'ʊ̃', ['ें'] = 'ẽː', ['ॆं'] = 'ẽ', ['ॊं'] = 'õ', ['ों'] = 'õː', ['ाँ'] = 'ãː', ['ीं'] = 'ĩː', ['ूँ'] = 'ũː', ['ैं'] = 'ə̃ɪ̃', ['ौं'] = 'ə̃ʊ̃', ['ॉं'] = 'ɔ̃', ['ॅं'] = 'æ̃', ['ꣿं'] = 'ə̃ɪ̃',  ['ॏं'] = 'ə̃ʊ̃',



['꣱'] = "ɔ",
-- chandrabindu
['ँ'] = '̃',
-- anusvara
['ं'] = 'ṃ',
-- visarga
['ः'] = 'ʰ',
-- virama
['्'] = '',
-- om
['ॐ'] = 'oːm',
-- anusvara
['ऽ'] = 'ə',
-- zero-width non joiner
['‌'] = '',
-- zero-width joiner
['‍'] = 'ə',
-- diphthong marker
['ॱ'] = '̯',
-- numerals
['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4', ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',
-- punctuation
['।'] = '.', -- danda
['॥'] = '.', -- double danda
['+'] = '', -- compound separator

-- abbreviation sign
['॰'] = '.',
}

local nasal_assim = {
    ["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ",
	["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ",
	["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण",
	["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म",
	["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न"
}
local perm_cl = {
["म्ल"] = true, ["व्ल"] = true, ["न्ल"] = true
}

local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "छकखगतसहयथडढठपदणधरषटलवब भडचनशम"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊऻऻॊॆॏ꣱꣱’", "अइउएओआईऊऋॠऎऒव़य़ॵॳॴऐऔऑऍ"
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])ə(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'

local function rev_string(text)
	local result, length = {}, mw.ustring.len(text)
	for i = length, 1, -1 do
		table.insert(result, mw.ustring.sub(text, i, i))
	end
	return table.concat(result)
end
function export.tr(text, lang, sc)
	text =
		gsub(
		text,
		"([" .. all_cons .. "]़?)([" .. vowel .. "्]?)",
		function(c, d)
			return c .. (d == "" and "ə" or d)
		end
	)
	for word in mw.ustring.gmatch(text, "[ऀ-ॿə]+") do
		local orig_word = word
		word = rev_string(word)
		word = gsub(word, '^ə(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third)
			return (((match(first, '[' .. special_cons .. ']') and match(second, '्') and not perm_cl[first..second..third])
				or match(first .. second, 'य[ी]'))
				and 'ə' or "") .. opt .. first .. second .. third end)

while match(word, syncope_pattern) do
																																																																																																																																																												word = gsub(word, syncope_pattern, '%1%2ᵊ%3%4')
end
	word =
			gsub(
			word,
			"(.?)ं(.)",
			function(succ, prev)
				return succ ..
					(succ .. prev == "ə" and "्म" or
						(succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "̃")) ..
						prev
			end
		)





local escaped_orig_word = gsub(orig_word, "%+", "")
text = gsub(text, orig_word, rev_string(word))
text = gsub(text, "ज्ञ", "gj")
text = gsub(text, "इऺ", "ɪ̆" )         
text = gsub(text, "उऺ",  "ʊ̆" ) 
text = gsub(text, "ॳ",  "ᵊ" )
text = gsub(text, "अ꣱", "ɔ")
--text = gsub(text, "([ं]?[आईऊएओाीूेो])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ं]?[िुृेोाीूॉॅॆॊऻऻॊॆॏ꣱ᵊəअआइईउऊएओ])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ं]?[िुृेोाीूॉॅॆॊऻऻॊॆॏ꣱ᵊəअआइईउऊएओ])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ं]?[ुिɪᵊʊ]?)$", "%1ˑ%2%3%4%5%6%7")
--text = gsub(text, "([ं]?[आईऊएओाीूेो])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ं]?[िुृेोाीूॉॅॆॊऻऻॊॆॏ꣱ᵊəअआइईउऊएओ])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ं]?[िुृेोाीूॉॅॆॊऻऻॊॆॏ꣱ᵊəअआइईउऊएओ])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ं]?[ुिɪᵊʊ]?) ", "%1ˑ%2%3%4%5%6%7 ")

--text = gsub(text, "([आईऊएओाीूेो])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ैौऐऔ])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ुिɪᵊʊ]?)$", "%1ˑ%2%3%4%5")
--text = gsub(text, "([आईऊएओाीूेो])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ैौऐऔ])([कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह]?)([ुिɪᵊʊ]?) ", "%1ˑ%2%3%4%5 ")

end



text = gsub(text, '.़?', conv)
text = gsub(text, "[<>]", "")
text = gsub(text, "ॱ", "")
text= gsub(text, "([aāäeâôoʌiuɪʊe̯eëəᵊ])ː([kɦgɕʑṅcjñṭḍṇɽtʈɖdnʦʣpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcjñṭḍṇɽtdnʈɖpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcʈɖjñṭḍṇɽtdnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪᵊʊ]?(̃?))$", "%1ˑ%2%3%4%5%6%7%8%9")
text= gsub(text, "([aāäeâôoʌiuɪʊe̯eëəᵊ])ː([kɦgɕʑṅcjñṭḍʈɖṇɽtdnʦʣpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcjñṭḍṇɽtdnʈɖpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcʈɖjñṭḍṇɽtdnpbmɽ̃yrlɳwvɾjwśṣshɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪᵊʊ]?(̃?)) ", "%1ˑ%2%3%4%5%6%7%8%9 ")



text = gsub(text, "([hkcɕʑʃʓʒɲŋʋɡcjɽtɖʈɳʦʣdnpbmɽ̃yrlɳwvɾjsqxzfθð]?)([æɔaəeo]?)([ɪʊ])$", "%3%1%2")
text = gsub(text, "([hkcɕʑʃʓʒɲŋʋɡcjɽtɖʈɳdʦʣnpbmɽ̃yrlɳwvɾjsqxzfθð])([ʰɦʱ]?)([æɔaəeo]?)([ɪʊ])$", "%4%1%2%3")
text = gsub(text, "([hkcɕʑʃʓʒɲŋʋɡcjɽtɖʈɳdʦʣnpbmɽ̃yrlɳwvɾjsqxzfθð]?)([æɔaəeo]?)([ɪʊ]) ", "%3%1%2 ")
text = gsub(text, "([hkcɕʑʃʓʒɲŋʋɡcjɽtɖʈɳdnʦʣpbmɽ̃yrlɳwvɾjsqxzfθð])([ʰɦʱ]?)([æɔaəeo]?)([ɪʊ]) ", "%4%1%2%3 ")
text = gsub(text, "([aäə])(ː?)([̤]?)(̃?)([j]?)([ĕe])(̃?)", "æ%3%4%7")
text = gsub(text, "([aäə])(ː?)([̤]?)(̃?)([vwʋ]?)([ŏo])(̃?)", "ɔ%3%4%7")
text = gsub(text, "([iɪʊæɔauəeo])(ː?)ɽ([ʱʰ]?)([iɪʊæɔauəeo]?)(̃?)", "%1%2ɾ%3%4%5")
text = gsub(text, 'ɦri', 'ri')
text = gsub(text, 'ː̃', '̃ː')
text = gsub(text, 'ː̃ː', '̃ː')
text = gsub(text, 'ː̤ː', 'ː')
text = gsub(text, "a([ɪʊ])̃", "a%̃1")
text = gsub(text, "ʊʊ", "ʊ")
text = gsub(text, "([iuɪʊïüaôeo])(ː?)ᵊ", "%1%2")
text = gsub(text, "əᵊ", "ə")
text = gsub(text, "ᵊə", "ə")
text = gsub(text, "əə", "ə")
text = gsub(text, "ᵊ([ɪʊ])", "ə%1")
text = gsub(text, "ə([ɪʊ])", "ə%1")
text = gsub(text, "([ɪʊ])̯̯", "%1")
text = gsub(text, "ɪɪ", "ɪ")
text = gsub(text, "ʦ", "t͡ɕ")
text = gsub(text, "ʣ", "d͡ʑ")
text = gsub(text, "ĕ", "ĕ")
text = gsub(text, "ŏ", "ŏ")
text = gsub(text, 'ːː', 'ː')
text = gsub(text, "aːˑ", "a")
text = gsub(text, "iːˑ", "ɪ")
text = gsub(text, "uːˑ", "ʊ")
text = gsub(text, "eːˑ", "e")
text = gsub(text, "oːˑ", "o")
text = gsub(text, "aˑ", "a")
text = gsub(text, "iˑ", "ɪ")
text = gsub(text, "uˑ", "ʊ")
text = gsub(text, "eˑ", "e")
text = gsub(text, "oˑ", "o")
--text = gsub(text, "əɦʊ(̃?)$", "ɔ%1")
--text = gsub(text, "əɦʊ(̃?) ", "ɔ%1 ")
return mw.ustring.toNFC(text)
end
return export