Module:User:Sinonquoi/ks-pa-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Sinonquoi, for their own experimentation. Items in this module may be added and removed at Sinonquoi's discretion; do not rely on this module's stability.


local u = mw.ustring.char -- unicode
local gsub = mw.ustring.gsub -- string manipulation

-- TODO
-- 1. [HACK] Aspirate aspirables
-- 2. Vowels with fixed characters (ā, ạ̄, ū, o, ō, e, ē, ọ, ọ̄)
-- 3. Initial variants of vowels (with alef)
-- 4. Medial variants of vowels (diacritics or standalone)
-- 5. Final variants of vowels (same as medial except: e, ē)
-- 6. Treat final yē and vāv differently depending on what comes before
-- 7. Treat final hē with vowel diacritic
-- 8. Fixed consonants and provided vowels
-- 9. [DONE] Disregard hat for nūn
-- 10. [DONE] Work around hat for rē
-- 11. [DONE] yē with hat is a palatal 
-- 12. [DONE] Kashmiri yē medial is 'a; final '
-- 12b. Check support for compound words
-- 13. Add vowels to consonants
-- 14. Add vowels to dual role characters

-- How to add vowels
-- Make list of consonants and vowels
-- Check if conditions are met (C+V; alef initial+V; vowel carrier + V)
-- Change

local export = {}

local aspirable = "پتٹچژک"
local aspirate_h = "ھ"
local hattable = "یر"

local vowel_diacritics = u(0x064E) .. u(0x064F) .. u(0x0650) .. u(0x0654) .. u(0x0655) .. u(0x065F)


-- DIACRITICS
local v_sign = u(0x065A) -- V
local inverted_v_sign = u(0x065B) -- inverted V
local hats = v_sign .. inverted_v_sign


local conv = {
    ['ب'] = 'b', ['پ'] = 'p', ['ت'] = 't', ['ٹ'] = 'ṭ', ['ث'] = 's',
	['ج'] = 'j', ['چ'] = 'c', ['ح'] = 'h', ['خ‬'] = 'kh',
	['د'] = 'd', ['ڈ'] = 'ḍ', ['ذ‬'] = 'z',
	['ر'] = 'r', ['ڑ'] = 'ḍ', ['ز'] = 'z', ['ژ'] = 'ċ',
	['س'] = 's', ['ش'] = 'ś', ['ص‬'] = 's', ['ض‬'] = 'z',
	['ط‬'] = 't', ['ظ'] = 'z',
	['ع'] = 'ʿ', ['غ'] = 'ġ',
	['ف'] = 'f', ['ق'] = 'q',
	['ک'] = 'k', ['گ'] = 'g',
	['ل'] = 'l', ['م'] = 'm', ['ن'] = 'n',

    ['ھ'] = 'h',

    -- treatment varies
	['ه'] = 'h',

    -- extended set
    ['ی'] = 'y', ['و'] = 'v',

}

local vowels_conv = {
    [u(0x064E)] = 'a', [u(0x064F)] = 'u', [u(0x0650)] = 'i', [u(0x0654)] = 'ạ', [u(0x0655)] = 'ụ', [u(0x065F)] = 'ụ̄',
}

local consonants = 'بپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنھ'
local consonants_extended = 'بپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنھوی'

function export.transliterate(text)
    -- ASPIRATE
    -- text = gsub(text, aspirable .. aspirate_h, "hhhh")

    -- REMOVE HAT FROM NŪN and RĒ
    text = gsub(text, '([نر])' .. inverted_v_sign, "%1")

    -- C2=r/palatal
    text = gsub(text, '([' .. hattable .. '])([' .. vowel_diacritics .. '])' .. inverted_v_sign, "%1%2")

    -- YĒ with INVERTED HAT
    text = gsub(text, 'ی' .. inverted_v_sign, "\'")

    -- FINAL HALF-YĒ IS A PALATAL
    text = gsub(text, 'ؠ$', "\'")
    -- BEFORE A SPACE
    text = gsub(text, 'ؠ[ ]+', "\' ")

    -- MEDIAL HALF-YĒ IS 'a
    text = gsub(text, '([' .. consonants .. '])ؠ([' .. consonants .. '])', "%1\'a%2")

    -- CONSONANT + VOWEL
    text = gsub(text,
           '([' .. consonants_extended .. '])([' .. vowel_diacritics .. '])',
           function(c,v)
               return conv[c] .. vowels_conv[v]
           end)
    -- text = gsub(text, '([' .. vowel_diacritics .. '])', vowels_conv)

    -- FINAL HE + VOWEL
    --    text = gsub(text, 'ہ([' ..  short_vowels_list .. '])$', short_vowels)

    text = gsub(text, '[بپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنھ]', conv)
	return text
end

return export