Module:mai-IPA3

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local conv = {
    -- consonants
    ["क"] = "k",
    ["ख"] = "kʰ",
    ["ग"] = "ɡ",
    ["घ"] = "ɡʱ",
    ["ङ"] = "ŋ",
    ["च"] = "ʦ",
    ["छ"] = "ʦʰ",
    ["ज"] = "ʣ",
    ["झ"] = "ʣʱ",
    ["ञ"] = "n",
    ["ट"] = "ʈ",
    ["ठ"] = "ʈʰ",
    ["ड"] = "ɖ",
    ["ढ"] = "ɖʱ",
    ["ण"] = "ɳ",
    ["त"] = "t",
    ["थ"] = "tʰ",
    ["द"] = "d",
    ["ध"] = "dʱ",
    ["न"] = "n",
    ["प"] = "p",
    ["फ"] = "pʰ",
    ["ब"] = "b",
    ["भ"] = "bʱ",
    ["म"] = "m",
    ["य"] = "j",
    ["र"] = "ɾ",
    ["ल"] = "l",
    ["व"] = "v",
    ["श"] = "s",
    ["ष"] = "ʂ",
    ["स"] = "s",
    ["ह"] = "ɦ",
    ["क़"] = "q",
    ["ख़"] = "x",
    ["ग़"] = "ɣ",
    ["ऴ"] = "ɭ",
    ["ळ"] = "ɭ",
    ["ज़"] = "z",
    ["श़"] = "ʒ",
    ["झ़"] = "ʒ",
    ["ड़"] = "ɾ",
    ["ढ़"] = "ɾʱ",
    ["फ़"] = "f",
    ["थ़"] = "θ",
    ["द़"] = "ð",
    ["ऩ"] = "n̪",
    ["ऱ"] = "ɹ",
    ["ॽ"] = "ʔ",
    ["य़"] = "ĕ",
    ["व़"] = "ŏ",
    ["ॹ"] = "ʒ",
    -- vowel diacritics
    ["ि"] = "ɪ",
    ["ु"] = "ʊ",
    ["ॆ"] = "e",
    ["े"] = "ē",
    ["ॊ"] = "o",
    ["ो"] = "ō",
    ["ऺ"] = "ᵊ",
    ["ऻ"] = "a",
    ["꣱"] = "ɔ",
    ["ा"] = "ā",
    ["ी"] = "ī",
    ["ू"] = "ū",
    ["ृ"] = "rɪ",
    ["ॄ"] = "rī",
    ["ॢ"] = "lɪɾɪ",
    ["ॣ"] = "lɪɾī",
    ["ै"] = "əɪ",
    ["ौ"] = "əʊ",
    ["ॏ"] = "ǒ",
    ["ॉ"] = "ɔ",
    ["ॅ"] = "æ",
    ["ऽ"] = "ə",
    -- vowel signs
    ["अ"] = "ə",
    ["ॳ"] = "ᵊ",
    ["ॴ"] = "a",
    ["इ"] = "ɪ",
    ["उ"] = "ʊ",
    ["ए"] = "ē",
    ["ओ"] = "ō",
    ["आ"] = "ā",
    ["ई"] = "ī",
    ["ऊ"] = "ū",
    ["ऋ"] = "rɪ",
    ["ॠ"] = "rī",
    ["ऌ"] = "lɪɾɪ",
    ["ॡ"] = "lɪɾī",
    ["ऐ"] = "əɪ",
    ["औ"] = "əʊ",
    ["ॵ"] = "ǒ",
    ["ऑ"] = "ɔ",
    ["ॲ"] = "æ",
    ["ऍ"] = "æ",
    -- chandrabindu
    ["ँ"] = "̃",
    -- anusvara
    ["ं"] = "ṃ",
    -- visarga
    ["ः"] = "",
    -- virama
    ["्"] = "",
    -- om
    ["ॐ"] = "oːm",
    -- zero-width non joiner
    ["‌"] = "",
    -- zero-width joiner
    ["‍"] = "ə",
    -- diphthong marker
    ["ॱ"] = "̯",
    -- numerals
    ["०"] = "0",
    ["१"] = "1",
    ["२"] = "2",
    ["३"] = "3",
    ["४"] = "4",
    ["५"] = "5",
    ["६"] = "6",
    ["७"] = "7",
    ["८"] = "8",
    ["९"] = "9",
    -- punctuation
    ["।"] = ".", -- danda
    ["॥"] = ".", -- double danda
    ["+"] = "", -- compound separator
    -- abbreviation sign
    ["॰"] = "."
}

local nasal_assim = {
    ["क"] = "ङ",
    ["ख"] = "ङ",
    ["ग"] = "ङ",
    ["घ"] = "ङ",
    ["च"] = "ञ",
    ["छ"] = "ञ",
    ["ज"] = "ञ",
    ["झ"] = "ञ",
    ["ट"] = "ण",
    ["ठ"] = "ण",
    ["ड"] = "ण",
    ["ढ"] = "ण",
    ["प"] = "म",
    ["फ"] = "म",
    ["ब"] = "म",
    ["भ"] = "म",
    ["म"] = "म",
    ["त"] = "न",
    ["थ"] = "न",
    ["द"] = "न",
    ["ध"] = "न",
    ["न"] = "न",
    ["ष"] = "न",
    ["श"] = "ङ",
    ["स"] = "ङ",
    ["य"] = "म",
    ["र"] = "म",
    ["ल"] = "ँ",
    ["व"] = "म",
    ["ह"] = "ङ"
}
local perm_cl = {
["म्ल"] = true, 
["व्ल"] = true,
["न्ल"] = true
}

local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "छकखगतसहयथडढठपदणधरषटलवब भडचनशम"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊऻऻॊॆॏ꣱’", "अइउएओआईऊऋॠऎऒव़य़ॵॳॴऐऔऑऍ"
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])ə(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'

local function rev_string(text)
	local result, length = {}, mw.ustring.len(text)
	for i = length, 1, -1 do
		table.insert(result, mw.ustring.sub(text, i, i))
	end
	return table.concat(result)
end
function export.tr(text, lang, sc)
	text =
		gsub(
		text,
		"([" .. all_cons .. "]़?)([" .. vowel .. "्]?)",
		function(c, d)
			return c .. (d == "" and "ə" or d)
		end
	)
	for word in mw.ustring.gmatch(text, "[ऀ-ॿə]+") do
		local orig_word = word
		word = rev_string(word)
		word = gsub(word, '^ə(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third)
			return (((match(first, '[' .. special_cons .. ']') and match(second, '्') and not perm_cl[first..second..third])
				or match(first .. second, 'य[ी]'))
				and 'ə' or "") .. opt .. first .. second .. third end)

while match(word, syncope_pattern) do
																																																																																																																																																												word = gsub(word, syncope_pattern, '%1%2ᵊ%3%4')
end

        word =
            gsub(
            word,
            "(.?)ं(.)",
            function(succ, prev)
                return succ ..
                    (succ .. prev == "ə" and "्म" or
                        (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "̃")) ..
                        prev
            end
        )

        local escaped_orig_word = gsub(orig_word, "%+", "")
        text = gsub(text, orig_word, rev_string(word))
        text = gsub(text, "ज्ञ", "ɡj")
        text = gsub(text, "अ꣱", "ɔ")

    end
    text = gsub(text, ".़?", conv)
    text = gsub(text, "ə([ɪʊ])̯̃", "ə̃%1̯̃")
    text = gsub(text, "([ə])̃([iuɪʊ])̯", "%1̃%2̯")
    text = gsub(text, "[<>]", "")
    text = gsub(text, "ॱ", "")

    
    text = gsub(text, "(#)və([ɪ̯ʊ̯ɪ̃ʊ̃])", "bə%2")
    text = gsub(text, "(#)v([ieɪēōʊuē̃ō̃ojr])(ː?)", "b%1%2")
    text = gsub(text, "(#)([v])ə([krɾjtcʦʣçʐṅñysśdpɦhn])([tnrṇṣcśkghjɦsāēōçʐueoʌəayd])", "bə%3%4")
    text =
        gsub(
        text,
        "([śsnlcçʦʣʐjzkʰʱɦhpɡtdgb])([v])([aʌäəāiāɪʊɪ̃ʊ̃āēōīuūeoŏĕɔæɛʌ̃ä̃ĩũā̃ē̃ō̃ī̃ū̃ẽõɔ̃e̤])(ː?)([cspdtçʐnɡgkʦʣbɾrjyṇṣśṇɾṅñṃ])",
        "%1w%3%4%5"
    )
    text = gsub(text, "([ʌäəɪʊāiuāēōeoŏĕ])(ː?)([nl])([td]̪)", "%1%2%3̪%4") -- dental assimilation
    text = gsub(text, "([ʌəäaɪʊāiāēōueoŏĕ])(ː?)n([ʈɖ])", "%1%2ɳ%3") -- retroflex aassimilation
    text = gsub(text, "([l])([ʈɖ])", "ɭ%2")
    text = gsub(text, "([ʌʌ̃äaāiuāūɪʊəãā̃ī̃ĩũū̃ẽõeeāēōā̃ē̃ō̃o̯o ̤])(ː?)ɾ([ʌʌ̃äaāāiīɪuūʊā̃ē̃ō̃əãā̃ī̃ĩũū̃ẽõeeyo̯o])(ː?)", "%1%2ɾ%3%4")
    text = gsub(text, "([śsnlcjzʐçʦʣkhptdgb])([vw])([aāäɪʊəiīāēōuūoeĩ])(ː?)([cspdtngkbrjyṇṣśṇɾṅñṃ])", "%1w%3%4%5")

    text = gsub(text, "ʂp", "ɸp")
    text = gsub(text, "(#)([spdtzʱʰɦgkbrṇṣśʂʈɖçʐʦʣnṇʌāāäiuīūoɔæɛā̃ē̃ō̃ʌ̃ä̃ĩī̃āēōū̃īūũõɔ̃e̤ːɾṅñ]?)jə", "%2jə")

    text = gsub(text, "([aʌəäāiīɪuūʊeoŏāēōĕɔæā̃ē̃ō̃ɛāʌ̃ä̃ĩɪ̃ũʊ̃ī̃ū̃ẽõɔ̃e̤])(ː?)kʂ", "%1%2t̚t͡ɕʰ") -- kṣ ligature
    text = gsub(text, "()kʂ", "t͡ɕʰ") -- kṣ initial
    text = gsub(text, "ʂ", "s")
    text = gsub(text, "ɦɾɪ", "ɾɪ")
    text = gsub(text, "nɡj", "ŋɡj")
    text= gsub(text, "ā([kɦgɕʑṅcjñṭḍṇɽtdʈɖnʦʣʰʱpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäīūeâôoʌāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjʰʱñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aīūāäeâāēōôoʌiuɪʊe̯eëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?))$", "a%1%2%3%4%5%6")
text= gsub(text, "ā([kɦgɕʑṅcjñṭḍṇɽtdʈɖnʦʣʰʱpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäīūeâôoʌāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjʰʱñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aīūāäeâāēōôoʌiuɪʊe̯eëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?)) ", "a%1%2%3%4%5%6 ")


text= gsub(text, "ī([kɦgɕʑṅcjñṭḍṇɽtdʈɖnʦʣʰʱpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâôīūoʌāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjʰʱñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâāīūēōôoʌiuɪʊe̯eëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?))$", "ɪ%1%2%3%4%5%6")

text= gsub(text, "ī([kɦgɕʑṅcjñṭḍṇɽtdʈɖnʦʣʰʱpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâôīūoʌāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjʰʱñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâāīūēōôoʌiuɪʊe̯eëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?)) ", "ɪ%1%2%3%4%5%6 ")


text= gsub(text, "ū([kɦgɕʑṅcjñṭḍṇɽtdʈɖnʦʣʰʱpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâīūôoʌāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjʰʱñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeīūâāēōôoʌiuɪʊe̯eëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?))$", "ʊ%1%2%3%4%5%6")
text= gsub(text, "ū([kɦgɕʑṅcjñṭḍṇɽtdʈɖnʦʣʰʱpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâīūôoʌāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjʰʱñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeīūâāēōôoʌiuɪʊe̯eëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?)) ", "ʊ%1%2%3%4%5%6 ")

text= gsub(text, "ē([kɦgṅʦʣcɕʑjñṭḍṇɽtdʈɖnpbmɽ̃yrʰʱlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeīūâôoʌāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjñṭḍɕʑʦʣṇɽtdʈɖnpbmʰʱɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâôoʌīūiuɪʊe̯āēōeëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yɕʑrlɳʦʣwvɾjwśṣsʰʱhqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?))$", "e%1%2%3%4%5%6")

text= gsub(text, "ē([kɦgṅʦʣcɕʑjñṭḍṇɽtdʈɖnpbmɽ̃yrʰʱlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeīūâôoʌāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjñṭḍɕʑʦʣṇɽtdʈɖnpbmʰʱɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâôoʌīūiuɪʊe̯āēōeëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yɕʑrlɳʦʣwvɾjwśṣsʰʱhqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?)) ", "e%1%2%3%4%5%6 ")
text= gsub(text, "ō([kɦgṅʦʣcɕʑjñṭḍṇɽtdʈɖnpbmɽ̃yrʰʱlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâôoʌīūāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjñṭḍɕʑʦʣṇɽtdʈɖnpbmʰʱɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâôoīūʌiuɪʊe̯āēōeëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yɕʑrlɳʦʣwvɾjwśṣsʰʱhqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?))$", "o%1%2%3%4%5%6")
text= gsub(text, "ō([kɦgṅʦʣcɕʑjñṭḍṇɽtdʈɖnpbmɽ̃yrʰʱlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâôoʌīūāēōiuɪʊɨʉe̯eëəᵊ])([kɦgṅcjñṭḍɕʑʦʣṇɽtdʈɖnpbmʰʱɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([aāäeâôoīūʌiuɪʊe̯āēōeëəᵊ])([kɦgṅcjñṭḍṇɽtdʈɖnpbmɽ̃yɕʑrlɳʦʣwvɾjwśṣsʰʱhqxġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪʊ]?(̃?)) ", "o%1%2%3%4%5%6 ")



    return mw.ustring.toNFC(text)
end
return export