User:Flubot/Adding sort key to Greek nouns

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This script is tested.

Its function is adding the "sort=" parameter to Greek nouns' inflexion line. For example see this, [1] and this diff.

A second function of this script is to replace the {{infl|el|noun}} template in the inflection line with {{el-noun}} template (e.g. this diff. It generates the romanization and adds a note if the gender is missing (e.g this diff.

We need:

  1. pywikipedia
  2. a list of entries to modify, named list-sort and in the same directory (pywikipedia). The headwords must be written without brackets and each line of text must have one headword.
  3. to run it just give python taxinomos.py

taxinomos.py[edit]

#!/usr/bin/python
# -*- coding: utf-8 -*-

import wikipedia
import sys, re, codecs

spaces_tag = re.compile('([ \*\.,\'\-\(\)]*)')
apost_tag = re.compile(u'\’')
page_tag = re.compile('(.*)')
# ((?P<arith>\|αριθ=[0-9]+)
pos2_tag = re.compile(u'\{\{el-noun(?P<parms>.*)(?P<kleidi>\|sort\=[^\}]*)\}\}')
pos1_tag = re.compile(u'\{\{el-noun\|(?P<param>[^\}]*)(\|*)\}\}')
pos3_tag = re.compile(u'\{\{el-noun')
pos4_tag = re.compile(u'\|(\|+)')
sc_tag = re.compile('sc=Gr(e+)k\|')
infl_tag = re.compile('\{\{infl\|el\|noun\|')
tr_tag= re.compile('tr\=([^\|\}]*)')
genos_tag= re.compile('g=([^\|\}]*)')

debug_bul = True

# πίνακες αντιστοίχισης
ch_apo = u'γγ γχ γξ αύω αύο αύε αυγ αυδ αυλ αυμ αυν αυρ αύγ αύδ αύζ αύλ αύμ αύν αύρ αυ αύ ευη ευή ευα ευά εύω εύε ευό ευο ευί ευβ ευγ ευδ ευζ ευλ ευμ ευν ευρ εύβ εύγ εύδ εύζ εύλ εύμ εύν εύρ ευ εύ ϊ α ά β γ δ ε έ ζ η ή θ ι ί κ λ μ ν ξ ου ού ο π ρ σ ς τ υ ύ ϋ φ χ ψ ω ώ ΐ ΰ'
ch_se =  u'ng nch nx ávo ávo áve avg avd avl avm avn avr ávg ávd ávz ávl ávm ávn ávr af áφ evi eví eva evá évo éve evó evo eví ev evg evd evz evl evm evn evr év évg évd évz évl évm évn évr ef éf ï a á v g d e é z i í th i í k l m n x ou οú o p r s s t y ý ÿ f ch ps o ó ḯ ÿ́'

trapo = ch_apo.split(u' ')
trse = ch_se.split(u' ')

def roman(trnsl):
  for i in range(len(trapo)):
        trnsl = trnsl.replace(trapo[i], trse[i])
  return trnsl


mtg_apo = u'ά έ ή ί ϊ ΐ ό ύ ϋ ΰ ώ ς'
mtg_se =  u'α ε η ι ι ι ο υ υ υ ω σ'

trkeys = mtg_apo.split(u' ')
for i in range(len(trkeys)):
        trkeys[i] = ord(trkeys[i]) 
trvals = mtg_se.split(u' ')
trtable = dict(zip(trkeys,trvals)) 

# Which entry?
fin = codecs.open('list-sort', 'r', 'utf-8')
eof=0
while not eof:
 line = fin.readline()
 if line == "":
   eof = 1
 else:
  t1 = page_tag.search(line)
  t = t1.group(1)
  print(t)
  kleida = t.lower()
  kleida = kleida.translate(trtable)
  kleida = spaces_tag.sub('',kleida)
  kleida = apost_tag.sub('',kleida)
  if kleida != t:
  	kleida = '|sort=' + kleida
	wikipedia.setAction('Adding sort key')
  else:
        kleida = ''
	wikipedia.setAction(u'remove wrong or reduntant sort key')
  page = wikipedia.Page(wikipedia.getSite(), t)
  arxiko = page.get(get_redirect=True)

# Αντικαταστάσεις
  seires = arxiko.split("\n")
  kainoyrio = []  
  for seira in seires:
	  result1 = infl_tag.search(seira)
	  if (result1):
		g1 = genos_tag.search(seira)
		if (g1):
			gen=g1.group(1)
		else:
			gen="gender missing"
		seira = '{{el-noun|' + gen + '|' + roman(t) + kleida + '}}'
          result = pos2_tag.search(seira)
          if (result):
                  if result.group('kleidi'):
                          seira = pos2_tag.sub(u'{{el-noun\g<parms>'+ kleida + u'}}',seira)
          else:
                  seira = pos1_tag.sub(u'{{el-noun|\g<param>' + kleida + u'}}',seira)
	  result = pos3_tag.search(seira)
	  if (result):
		seira = sc_tag.sub(u'|', seira)
	  	seira = pos4_tag.sub(u'|',seira)
		
          kainoyrio.append(seira)
  keimeno = "\n".join(kainoyrio)

  #keimeno = pos2_tag.sub(u'{{el-noun\g<1>' + kleida + u'}}', arxiko)
  #keimeno = pos1_tag.sub(u'{{el-noun\g<1>' + kleida + u'}}', keimeno)

# Saving changes
  if keimeno != page.get():
            # Show the title of the page we're working on.
            # Highlight the title in purple.
      wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
            ## show what was changed
      wikipedia.showDiff(page.get(), keimeno)
      #choice= 'y'
      if debug_bul:
        choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['yes', 'No', 'all'], ['y', 'N', 'a'], 'N')
        if choice == 'a':
          choice = 'y'
          debug_bul = False
        elif choice != 'y':
          choice='n'
      if choice == 'y':
                    try:
                        # Save the page
                        page.put(keimeno)
		    except wikipedia.IsRedirectPage:
			wikipedia.output(u'Skipping %s because it is a redirect' % (page.title()))
                    except wikipedia.EditConflict:
                        wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
                    except wikipedia.SpamfilterError, error:
                        wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))

fin.close()