Module:User:TagaSanPedroAko/tl-IPA

From Wiktionary, the free dictionary
Jump to navigation Jump to search
local export = {}
function export.show(word,Southern,phonetic,do_debug)
        local debug = {}
	
	if type(word) == 'table' then
		do_debug = word.args[4]
		word = word.args[1]
	end
	word = (word or mw.title.getCurrentTitle().text):lower()
	word = mw.ustring.gsub(word,"[^abcdefghijklmnopqrstuvwxyzáâàéêèíîìóôòúûùüñ.]","")

    table.insert(debug,word)

    --alphabet-to-phoneme (native sounds) 
    word = mw.ustring.gsub(word,"e","ɛ")
    word = mw.ustring.gsub(word,"g","ɡ")
    word = mw.ustring.gsub(word,"r","ɾ")
    word = mw.ustring.gsub(word,"y","ɟ") --not the real sound
    --native digraphs and trigraphs
    word = mw.ustring.gsub(word,"([aeiou])ng([abcdefhijklmnopqrstuvwyz]?)","%1ŋ%2")
    word = mw.ustring.gsub(word,"([aeiou])ngg([aeiou])","%1%ŋɡ%2")
    word = mw.ustring.gsub(word,"kuw([aeiou])","kw%1")
    word = mw.ustring.gsub(word,"([akɛinopu])sy([aɛiouáâàéêèíîìóôòúûù])","%1ʃ%2")
    word = mw.ustring.gsub(word,"dy","dj")
    word = mw.ustring.gsub(word,"ty","tj")
    word = mw.ustring.gsub(word,"([aɛiou])ny([aɛiou])","%1ñ%2") --not the real sound
    --alphabet-to-phoneme (for assimilated Spanish forms)
    word = mw.ustring.gsub(word,"qu","k")
	word = mw.ustring.gsub(word,"v","b")
	word = mw.ustring.gsub(word,"([aeinoɾu]?)ch([aeiou])","%1ts%2")
       word = mw.ustring.gsub(word,"([aeiou])ll([aeiou])","%1ʎ%2")
	word = mw.ustring.gsub(word,'[cfjñxz]',{['c']='k', ['f']='p', ['j']='h', ['ñ']='ɲ', ['rr']='r', ['x']='h', ['z']='s'})  --['g']='ɡ':  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G

    table.insert(debug,word)

    --"c" & "g" before "i" and "e" (proper nouns from Spanish, such as place names, given names, and surnames, or Spanish transcriptions of native words only) 
	word = mw.ustring.gsub(word,"([aeiknoɾsu])c([ieíé])","%1s%2")
	word = mw.ustring.gsub(word,"([aeioɾu])gü([ieíé])","%1ɡw%2")
    word = mw.ustring.gsub(word,"ü","w")
	word = mw.ustring.gsub(word,"([aeiou]?)gu([ieíé])","%1ɡ%2")
	
    --"cu" before any vowel (proper nouns from Spanish, such as place names, given names, and surnames, or Spanish transcriptions of native words only) 
    word = mw.ustring.gsub(word,"cu([aou])",('kw') .. "%1")

    table.insert(debug,word)

    --syllable division
	word = mw.ustring.gsub(word,"([aeiouáâàéêèíîìóôòúûù])([^aeiouáâàéêèíîìóôòúûù.])([aeiouáâàéêèíîìóôòúûù])","%1.%2%3")
	word = mw.ustring.gsub(word,"([aeiouáâàéêèíîìóôòúûù])([^aeiouáâàéêèíîìóôòúûù.])([aeiouáâàéêèíîìóôòúûù])","%1.%2%3")
	word = mw.ustring.gsub(word,"([aeiouáâàéêèíîìóôòúûù])([^aeiouáâàéêèíîìóôòúûù.])([^aeiouáâàéêèíîìóôòúûù.])([aeiouáâàéêèíîìóôòúûù])","%1%2.%3%4")
	word = mw.ustring.gsub(word,"([aeiouáâàéêèíîìóôòúûù])([^aeiouáâàéêèíîìóôòúûù.])([^aeiouáâàéêèíîìóôòúûù.])([aeiouáâàéêèíîìóôòúûù])","%1%2.%3%4")
	word = mw.ustring.gsub(word,"([aeiouáâàéêèíîìóôòúûù])([^aeiouáâàéêèíîìóôòúûù.])([^aeiouáâàéêèíîìóôòúûù.])([^aeiouáâàéêèíîìóôòúûù.])([aeiouáâàéêèíîìóôòúûù])","%1%2.%3%4%5")
	word = mw.ustring.gsub(word,"([aeiouáâàéêèíîìóôòúûù])([^aeiouáâàéêèíîìóôòúûù.])([^aeiouáâàéêèíîìóôòúûù.])([^aeiouáâàéêèíîìóôòúûù.])([aeiouáâàéêèíîìóôòúûù])","%1%2.%3%4%5")
	word = mw.ustring.gsub(word,"([pbktdɡ])%.([lɾ])",".%1%2")
	word = mw.ustring.gsub(word,"([^aeiouáâàéêèíîìóôòúûù.])%.s([^aeiouáâàéêèíîìóôòúûù.])","%1s.%2")
	word = mw.ustring.gsub(word,"([aeoáâàéêèíîìóôòúûù])([aeoáâàéêèíîìóôòúûù])","%1.%2")
	word = mw.ustring.gsub(word,"([ií])([ií])","%1.%2")
	word = mw.ustring.gsub(word,"([uú])([uú])","%1.%2")
	
    table.insert(debug,word)
	
	--accentuation
	local syllables = mw.text.split(word,"%.")
	if mw.ustring.find(word,"[áéíóú]") then
		for i=1,#syllables do
			if mw.ustring.find(syllables[i],"[áéíóú]") then syllables[i] = "ˈ"..syllables[i] end
		end
	else
		if mw.ustring.find(word,"[^aeiouns]$") then
			syllables[#syllables] = "ˈ"..syllables[#syllables]
		else
			if #syllables > 1 then syllables[#syllables-1] = "ˈ"..syllables[#syllables-1] end
		end
	end
   
	table.insert(debug,word)

    --glottalization
	local syllables = mw.text.split(word,"%.")
	if mw.ustring.find(word,"[âêîôû]") then
		for i=1,#syllables do
		for u=1,#syllables do
			if mw.ustring.find(syllables[i],"[âêîôû]",syllables[u]) then syllables[i] = "ˈ" syllables[u] = "ʔ" end
        end
        end
    else
    local syllables = mw.text.split(word,"%.")
	if mw.ustring.find(word,"[àèìòù]") then
		for i=1,#syllables do
			if mw.ustring.find("[àèìòù]",syllables[i]) then syllables[i] = "ʔ"..syllables[i] end
        end
    end

    --Closed syllables (in southern dialects, i.e. Batangas and Quezon)
	local syllables = mw.text.split(word,"%.")
	if mw.ustring.find(word,"[áéíóú]") then
		for i=1,#syllables do
			if mw.ustring.find(syllables[i],"[áéíóú]") then syllables[i] = "ʔ"..syllables[i] end
		end
	end

     --removal of acute accent
	for i=1,#syllables do
		syllables[i] = mw.ustring.gsub(syllables[i],'[áéíóú]',{['á']='a', ['é']='e', ['í']='i', ['ó']='o', ['ú']='u'})
    end

    --secondary stress
	word = mw.ustring.gsub(word,'ˈ(.+)ˈ','ˌ%1ˈ')
	word = mw.ustring.gsub(word,'ˈ(.+)ˌ','ˌ%1ˌ')
	word = mw.ustring.gsub(word,'ˌ(.+)ˈ(.+)ˈ','ˌ%1ˌ%2ˈ')

	--phonetic transcription
	if phonetic then
        --allophones
		word = mw.ustring.gsub(word,'([ˈ]?[d])j','d͡ʒ')
        word = mw.ustring.gsub(word,'([ˈ]?[s])j','ʃ')
        word = mw.ustring.gsub(word,'([ˈ]?[d])j','t͡ʃ')
        word = mw.ustring.gsub(word,'([aɛinortu][ˈ]?[t])s([aɛiou])','%1t͡ʃ%2')
        word = mw.ustring.gsub(word,'[ɾɲ]',{['ɾ']='r', ['ɲ']='ni'})
    	word = mw.ustring.gsub(word,'([ˈ]?)ʃ([aɛiou])','s')
        word = mw.ustring.gsub(word,'([aɛiou][ˈ]?)k','%1x')
        word = mw.ustring.gsub(word,'k([aɛiouɾ])','kx%1')
       end
       --real sound of "ɟ"
        word = mw.ustring.gsub(word,"ɟ","j")
        word = mw.ustring.gsub(word,"ʎ","lj")


	if do_debug == 'yes' then
		return word .. table.concat(debug,"")
	else
		return word
	end
end

end

function export.Southern(frame)
	return export.show(frame,true)
end

function export.phonetic(frame)
	return export.show(frame,false,true)
end

function export.phoneticSouthern(frame)
	return export.show(frame,true,true)
end

return export