User:Flubot/Adding DEFAULTSORT key to Greek words
Jump to navigation
Jump to search
This script is an adaptation of the one used on el.wiktionary to add sorting keys to Greek words. It adds the DEFAULTSORT key before the interwikis.
defaultsort.py
[edit]# -*- coding: utf-8 -*- u""" Much of this code was stolen from cosmetic_changes.py. """ __version__ = '$Id: default_kleidaTaksinomhshs.py 4260 2007-09-12 22:12:11Z wikipedian $' import wikipedia, pagegenerators, string import sys import re warning = u"""If you are running this bot outside en.wiktionary, please reconsider""" docuReplacements = { '¶ms;': pagegenerators.parameterHelp, '&warning;': warning, } # Summary message when using this module as a stand-alone script msg_standalone = { 'en': u'Bot: defaultsort key', } class EpiloghKleidaTaksinomhshsToolkit: def __init__(self, site, title, exceptions = [], debug = False): self.site = site self.debug = debug self.title = title self.exceptions = exceptions def allagh(self, keimeno): """ Given a wiki source code text, returns the cleaned up version. FIXME σύνοψη εδώ """ defaultsort_templ=re.compile(u'\{\{DEFAULTSORT\:([^\}]*)\}\}') if defaultsort_templ.search(keimeno): kleidakeimenou=defaultsort_templ.search(keimeno).group(1) protypo=self.paragwghKleidaTaksinomhshsProtypo() kleida=defaultsort_templ.search(protypo).group(1) if kleida==kleidakeimenou: return keimeno palioKeimeno = keimeno protypo = u'{{DEFAULTSORT}}' # αφαιρούμε την επόμενη κενή γραμμή μαζί με το πρότυπο αν υπάρχει protyporegexp = u'\{\{DEFAULTSORT\:[^\}]*\}\}(\r\n\r\n|$)' keimeno = self.removeKleidaTaksinomhshsProtypo(keimeno,protyporegexp) protyporegexp1 = u'\{\{DEFAULTSORT\:[^\}]*\}\}\r\n' keimeno = self.removeKleidaTaksinomhshsProtypo(keimeno,protyporegexp1) keimeno = self.addKleidaTaksinomhshsProtypo(keimeno,protypo) if self.debug: wikipedia.showDiff(palioKeimeno, keimeno) return keimeno def paragwghKleidaTaksinomhshsProtypo(self): parametros = self.title().lower() mtg_apo = u'ά έ ή ί ϊ ΐ ό ύ ϋ ΰ ώ ς ά έ ή ί ό ύ ώ ᾴ ῄ ῴ ὰ ὲ ὴ ὶ ὸ ὺ ὼ ᾲ ῂ ῲ ᾶ ῆ ῖ ῦ ῶ ᾷ ῇ ῷ ῗ ῧ ῒ ῢ ΐ ΰ ᾳ ῃ ῳ ἀ ἐ ἠ' mtg_se = u'α ε η ι ι ι ο υ υ υ ω σ α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω ι υ ι υ ι υ α η ω α ε η' mtg_apo = mtg_apo + u' ἰ ὀ ὐ ὠ ᾀ ᾐ ᾠ ἄ ἔ ἤ ἴ ὄ ὔ ὤ ᾄ ᾔ ᾤ ἂ ἒ ἢ ἲ ὂ ὒ ὢ ᾂ ᾒ ᾢ ἆ ἦ ἶ ὖ ὦ ᾆ ᾖ ᾦ ἁ ἑ ἡ ἱ ὁ' mtg_se = mtg_se + u' ι ο υ ω α η ω α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω α ε η ι ο' mtg_apo = mtg_apo + u' ὑ ὡ ἅ ἕ ἥ ἵ ὅ ὕ ὥ ᾅ ᾕ ᾥ ἃ ἓ ἣ ἳ ὃ ὓ ὣ ᾃ ᾓ ᾣ ἇ ἧ ἷ ὗ ὧ ᾇ ᾗ ᾧ ᾰ ῐ ῠ ᾱ ῑ ῡ ῥ' mtg_se = mtg_se + u' υ ω α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω α ι υ α ι υ ρ' trkeys = mtg_apo.split(u' ') for i in range(len(trkeys)): trkeys[i] = ord(trkeys[i]) trvals = mtg_se.split(u' ') trtable = dict(zip(trkeys,trvals)) parametros = parametros.translate(trtable) parametros = re.sub(u'[^αβγδεζηθικλμνξοπρστυφχψω]', u'', parametros) protypo = u'{{DEFAULTSORT:' + parametros + '}}' return protypo def removeKleidaTaksinomhshsProtypo(self,keimeno,protypo): u""" remove old template if it is there """ teliko_keimeno = re.sub(protypo,u'',keimeno) return teliko_keimeno def addKleidaTaksinomhshsProtypo(self,keimeno,protypo): u""" stuff provided protypo into the wikitext right before interwiki links. """ marker = '@@' while marker in keimeno: marker += '@' site = self.site protypo = self.paragwghKleidaTaksinomhshsProtypo() interwiki = wikipedia.getLanguageLinks(keimeno, insite = site) textnoiws = wikipedia.removeLanguageLinks(keimeno.replace(marker,'').strip(), site = self.site) + site.family.category_text_separator + protypo + site.family.category_text_separator teliko_keimeno = wikipedia.replaceLanguageLinks(textnoiws, interwiki, site = self.site) return teliko_keimeno class EpiloghKleidaTaksinomhshsBot: def __init__(self, generator, exceptions=[], acceptall = False): self.generator = generator self.acceptall = acceptall self.exceptions = exceptions # Load default summary message. wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg_standalone)) def checkExceptions(self, original_text): """ If one of the exceptions applies for the given text, returns the substring which matches the exception. Otherwise it returns None. """ for exception in self.exceptions: hit = exception.search(original_text) if hit: return hit.group(0) return None def treat(self, page): try: # Show the title of the page we're working on. # Highlight the title in purple. wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) ccToolkit = EpiloghKleidaTaksinomhshsToolkit(page.site(), page.title, debug = True) keimeno = page.get() match = self.checkExceptions(keimeno) # skip all pages that contain certain texts if match: wikipedia.output(u'Skipping %s because it contains %s' % (page.aslink(), match)) else: allages = ccToolkit.allagh(keimeno) if allages != keimeno: if not self.acceptall: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice in ['a', 'A']: self.acceptall = True if self.acceptall or choice in ['y', 'Y']: page.put(allages) else: wikipedia.output(u"No changes for %s" % page.title()) except wikipedia.NoPage: wikipedia.output(u"Page %s does not exist;!" % page.aslink()) except wikipedia.IsRedirectPage: wikipedia.output("Page %s is a redirect, skipping." % page.aslink()) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked?!" % page.aslink()) def run(self): for page in self.generator: self.treat(page) original_text = page.get() def main(): #page generator gen = None PageTitles = [] exceptions=[] namespaces = [] regex = False caseInsensitive = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-except:'): exceptions.append(arg[8:]) elif arg == '-regex': regex = True elif arg == '-nocase': caseInsensitive = True elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append(wikipedia.input(u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) else: generator = genFactory.handleArg(arg) if generator: gen = generator else: wikipedia.showHelp() for i in range(len(exceptions)): exception = exceptions[i] if not regex: exception = re.escape(exception) if caseInsensitive: exceptionR = re.compile(exception, re.UNICODE | re.IGNORECASE) else: exceptionR = re.compile(exception, re.UNICODE) exceptions[i] = exceptionR if PageTitles: pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles] gen = iter(pages) if not gen: wikipedia.showHelp() elif wikipedia.inputChoice(warning + u'\nDo you want to continue?', ['yes', 'no'], ['y', 'N'], 'N') == 'y': if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = EpiloghKleidaTaksinomhshsBot(preloadingGen, exceptions) bot.run() if __name__ == "__main__": try: main() finally: wikipedia.stopme()