User:Robert Ullmann/Pronunciation exceptions/code
Jump to navigation
Jump to search
#!/usr/bin/python # -*- coding: utf-8 -*- # wikipath en wiktionary User:Robert Ullmann/Pronunciation_exceptions/code """ This bot looks for and executes replacements, customized for each run This version looks unmatched wikisyntax and parens No command line arguments. """ import wikipedia import xmlreader import sys import re import pickle import xmldate import socket from mwapi import getwikitext def safe(s): return pickle.dumps(s)[1:-5] # work cache, record time last looked at entry # each record is key: lc:word, pickled with safe(), value is integer time() import shelve cache = shelve.open("pronex") from time import time # we want to identify trouble cases, line by line # they are applied after checking all the AF regex fixes Flags = set([ '<tt>', "{|", "//", "[[w:", "{{enPR|/", "[[Rhymes:", "[[rhymes:", "hymes:--", "hymes|-", "''US''", "''(US)''", "''UK''", "''(UK)''", "[[RP]]", "[[WEAE]]", "* [[", "* ''", "* (" ]) reenpr = re.compile(r'\{\{enPR\|(.*?)}}') reipa = re.compile(r'\{\{IPA\|(.*?)}}') resampa = re.compile(r'\{\{SAMPA\|(.*?)}}') rerfp = re.compile(r'\{\{rfp\|(.*?)}}') rederef = re.compile(r'<ref.*?/ref>') # not quite correct, but will do for now redecom = re.compile(r'<!--.*?-->') rehttp = re.compile(r'\[http:.*?\]') # match "stackable" format characters at start of lines, so we can have one space exactly restack = re.compile(r"^([:#\*]+)\s*") # match entire line is "blank" IPA, SAMPA, etc: reblank = re.compile(r"^\* ?\[\[(IPA|SAMPA|AHD)\]\]:? *//$") # exact copies of AF regex it will fix (manually copied) AFcount = 0 Prex = {} def preset(): # Pronunciate # like Regex, but applied line by line only in pronunciation sections # use ^ and $ as needed with re.M for prescreen Prex['template enPR/IPA/SAMPA'] = \ (re.compile(r'^\*? ?([^ \{\|\}/]+), /([^\{\|\}/]+)/, /<tt>([^\|\}/]+)</tt>/$', re.M), r'* {{enPR|\1}}, {{IPA|/\2/}}, {{SAMPA|/\3/}}') Prex['template enPR/IPA/SAMPA (RP, UK, US)'] = \ (re.compile(r"^\*? ?\(''(RP|UK|US)''\):? *" r'([^ \{\|\}/]+), /([^\{\|\}/]+)/, /<tt>([^\|\}/]+)</tt>/$', re.M), r'* {{a|\1}} {{enPR|\2}}, {{IPA|/\3/}}, {{SAMPA|/\4/}}') Prex['template enPR/IPA/SAMPA with {a}'] = \ (re.compile(r"^\*? ?(\{\{a\|[^\}]+\}\}):? *" r'([^ \{\|\}/]+), /([^\{\|\}/]+)/, /<tt>([^\|\}/]+)</tt>/$', re.M), r'* \1 {{enPR|\2}}, {{IPA|/\3/}}, {{SAMPA|/\4/}}') Prex['+rhymes template'] = (re.compile("'*Rhymes:'* *\[\[[Rr]hymes:English:-(?P<s>.+?)\|-(?P=s)\]\]"), r'{{rhymes|\1}}') # w/O "Rhymes:": Prex['+rhymes template w/Rhymes: in link'] = \ (re.compile("^([\*:]+) *\[\[[Rr]hymes:English:-(?P<s>.+?)\|Rhymes: -(?P=s)\]\]", re.M), r'\1 {{rhymes|\2}}') Prex['+rhymes template (Finnish)'] = (re.compile("'*Rhymes:'* *\[\[[Rr]hymes:Finnish:-(?P<s>.+?)\|-(?P=s)\]\]"), r'{{rhymes|\1|lang=fi}}') Prex['+rhymes template w/Rhymes: in link (Finnish)'] = \ (re.compile("^([\*:]+) *\[\[[Rr]hymes:Finnish:-(?P<s>.+?)\|Rhymes: -(?P=s)\]\]", re.M), r'\1 {{rhymes|\2|lang=fi}}') Prex['+rhymes template w/Rhymes: in link (French)'] = \ (re.compile("^([\*:]+) *\[\[[Rr]hymes:French:-(?P<s>.+?)\|Rhymes: -(?P=s)\]\]", re.M), r'\1 {{rhymes|\2|lang=fr}}') Prex['+rhymes template (Icelandic)'] = \ (re.compile("'*Rhymes:'* *\[\[[Rr]hymes:Icelandic:-(?P<s>.+?)\|-(?P=s)\]\]"), r'{{rhymes|\1|lang=is}}') Prex['template -Rhymes +rhymes'] = (re.compile(r'\{\{Rhymes([\|\}])'), r'{{rhymes\1') # multiple rhymes (assume language matches! ;-) Prex['add additional rhyme to template'] = \ (re.compile(r'(\{\{rhymes\|[^\}]+)\}\} *(,|or|) *\[\[[Rr]hymes:[A-Za-z -]+:-(?P<s>.+?)\| ?-(?P=s)\]\]'), r'\1|\3}}') Prex["rm /'s from enPR template"] = (re.compile(r'\{\{enPR\|/([^ /\[\]\{\}]+?)/\}\}'), r'{{enPR|\1}}') # RP, UK, and US in a wide variety of cases Prex['(RP) to {{a|RP}}'] = (re.compile(r"^\*? ?[\(\[\{']+RP[\]\)\}:']+", re.M), r'* {{a|RP}}') Prex['(UK) to {{a|UK}}'] = (re.compile(r"^\*? ?[\(\[\{']+UK[\]\)\}:']+", re.M), r'* {{a|UK}}') Prex['(US) to {{a|US}}'] = (re.compile(r"^\*? ?[\(\[\{']+US[\]\)\}:']+", re.M), r'* {{a|US}}') Prex['(italbrac RP) to {{a|RP}}'] = (re.compile(r"^\*? ?\{\{italbrac\|\[*RP\]*\}\}:?", re.M), r'* {{a|RP}}') Prex['(italbrac UK) to {{a|UK}}'] = (re.compile(r"^\*? ?\{\{italbrac\|\[*UK\]*\}\}:?", re.M), r'* {{a|UK}}') Prex['(italbrac US) to {{a|US}}'] = (re.compile(r"^\*? ?\{\{italbrac\|\[*US\]*\}\}:?", re.M), r'* {{a|US}}') Prex['IPA: [[WEAE]] to {{a|WEAE}} IPA:'] = \ (re.compile(r"^\*? ?IPA: [\(\[\{']+WEAE[\]\)\}:']+", re.M), r'* {{a|WEAE}} IPA:') Prex['(GenAm) to {{a|GenAm}}'] = (re.compile(r"^\*? ?\[\[w:G[^\|]+\|GenAm\]\]", re.M), r'* {{a|GenAM}}') Prex['(Canada) to {{a|Canada}}'] = (re.compile(r"^\*? ?[\(\[\{']+Canada[\]\)\}:']+", re.M), r'* {{a|Canada}}') Prex['(Australia) to {{a|Australia}}'] = \ (re.compile(r"^\*? ?[\(\[\{']+Australia[\]\)\}:']+", re.M), r'* {{a|Australia}}') Prex['(Aus) to {{a|Aus}}'] = (re.compile(r"^\*? ?[\(\[\{']+Aus[\]\)\}:']+", re.M), r'* {{a|Aus}}') Prex['(GenAm|US) to {{a|GenAm}}'] = \ (re.compile('^' + re.escape("* (''[[General American|US]]'')"), re.M), r'* {{a|GenAm}}') Prex['(RecPr|UK) to {{a|RP}}'] = \ (re.compile('^' + re.escape("* (''[[Received Pronunciation|UK]]'')"), re.M), r'* {{a|RP}}') # untemplated SAMPA and IPA, several combinations, also for "AHD", allow an {{a}} template in front Prex['template IPA'] = \ (re.compile(r"^\*? ?(\{\{a\|.+?\}\} *|)" r"\[*(w:IPA\||)IPA\]*:? *([/\[][^\{\|\}/\]]+?[/\]])$", re.M), r'* \1{{IPA|\3}}') Prex['template IPA -IPAchar'] = \ (re.compile(r"^\*? ?(\{\{a\|.+?\}\} *|)" r"\[*(w:IPA\||)IPA\]*:? *\{\{IPAchar\|([/\[][^\{\|\}/\]]+?[/\]])\}\}$", re.M), r'* \1{{IPA|\3}}') Prex['template SAMPA'] = \ (re.compile(r"^\*? ?(\{\{a\|.+?\}\} *|)" r"\[*(w:SAMPA\||)SAMPA\]*:? *([/\[])(<tt>|)([^\|\}/]+?)(</tt>|)([/\]])$", re.M), r'* \1{{SAMPA|\3\5\7}}') Prex['template enPR (was AHD)'] = \ (re.compile(r"^\*? ?(\{\{a\|.+?\}\} *|)\[*(w:AHD\||)AHD\]*:? *([^ \{\|\}/]+?)$", re.M), r'* \1{{enPR|\3}}') Prex['template X-SAMPA'] = \ (re.compile(r"^\*? ?(\{\{a\|.+?\}\} *|)" r"\[*(w:X-SAMPA\||)X-SAMPA\]*:? *([/\[])(<tt>|)([^\{\|\}/]+?)(</tt>|)([/\]])$", re.M), r'* \1{{X-SAMPA|\3\5\7}}') Prex['or/comma to multiple parameters in IPA template'] = \ (re.compile(r"\{\{IPA\|([^\}]+/)(, ?| or | ''or'' )(/[^\}]+)\}\}"), r'{{IPA|\1|\3}}') Prex['or/comma to multiple parameters in enPR template'] = \ (re.compile(r"\{\{enPR\|([^\}]+/)(, ?| or | ''or'' )(/[^\}]+)\}\}"), r'{{enPR|\1|\3}}') Prex['or/comma to multiple parameters in SAMPA template'] = \ (re.compile(r"\{\{SAMPA\|([^\}]+/)(, ?| or | ''or'' )(/[^\}]+)\}\}"), r'{{SAMPA|\1|\3}}') # accent templates, try to cover the A-cai/Min Nan cases and others, up to 4 Prex['+accent template 1'] = (re.compile(r"^\* \(''" r"\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r"''\):?", re.M), r'* {{a|\2}}') Prex['+accent template 2'] = (re.compile(r"^\* \(''" r"\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r", *\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r"''\):?", re.M), r'* {{a|\2|\4}}') Prex['+accent template 3'] = (re.compile(r"^\* \(''" r"\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r", *\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r", *\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r"''\):?", re.M), r'* {{a|\2|\4|\6}}') Prex['+accent template 4'] = (re.compile(r"^\* \(''" r"\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r", *\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r", *\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r", *\[*(w?:?[A-Za-z -]+\||)([A-Za-z -]+)\]*" r"''\):?", re.M), r'* {{a|\2|\4|\6|\8}}') # hyphenation ... Prex['+hyphenation template'] = (re.compile(r"'*Hyphenation:?'*:? *([^ \{\}]+)$", re.M), r'{{hyphenation|\1}}') Prex['middot to | in hyphenation template'] = (re.compile(r'(\{\{hyphenation\|.+?)' + u'\u00B7' + '(.+?\}\})'), r'\1|\2') Prex['hyphpt to | in hyphenation template'] = (re.compile(r'(\{\{hyphenation\|.+?)' + u'\u2027' + '(.+?\}\})'), r'\1|\2') Prex['bullet to | in hyphenation template'] = (re.compile(r'(\{\{hyphenation\|.+?)' + u'\u2022' + '(.+?\}\})'), r'\1|\2') Prex['middot (HTML) to | in hyphenation template'] = (re.compile(r'(\{\{hyphenation\|.+?)·(.+?\}\})'), r'\1|\2') # sorting enPR, IPA, (X-)SAMPA: Prex['enPR before SAMPA'] = (re.compile(r'\{\{(X-|)SAMPA\|([^\}]*)\}\}, \{\{enPR\|([^\}]*)\}\}'), r'{{enPR|\3}}, {{\1SAMPA|\2}}') Prex['IPA before SAMPA'] = (re.compile(r'\{\{(X-|)SAMPA\|([^\}]*)\}\}, \{\{IPA\|([^\}]*)\}\}'), r'{{IPA|\3}}, {{\1SAMPA|\2}}') Prex['enPR before IPA'] = (re.compile(r'\{\{IPA\|([^\}]*)\}\}, \{\{enPR\|([^\}]*)\}\}'), r'{{enPR|\2}}, {{IPA|\1}}') def trouble(s): global AFcount s2 = s = restack.sub(r'\1 ', s) # skip AF fix(es), do what it will do: for rx in Prex: s2 = Prex[rx][0].sub(Prex[rx][1], s2) if s2 != s: if AFcount < 500: print "AF will fix:" print " %s" % safe(s) print "to %s" % safe(s2) AFcount += 1 return False # as AF will do something # IPA, SAMPA, enPR are in AF.StarTemp: if s.startswith(('{{IPA|', '{{SAMPA|', '{{enPR|')): AFcount += 1 return False # "blank" non-templates, in general Regex in AF: if reblank.match(s): AFcount += 1 return False if "Manuel de Codage" in s: return False # remove rfp and contents, ref tags, comments, http links s = rerfp.sub(' ', s) if s == '{{rfap}}': return False s = rederef.sub(' ', s) s = redecom.sub(' ', s) if s.startswith('<!--') or s.endswith('-->'): return False s = rehttp.sub(' ', s) # non-templates (skip "[Aa]udio-IPA" for now) if "IPA" in s and "{{IPA" not in s and "udio-IPA" not in s: return "IPA not template" if "enPR" in s and "{{enPR" not in s: return "enPR not template" if "SAMPA" in s and "{{SAMPA" not in s and "{{X-SAMPA" not in s: return "SAMPA not template" if "AHD" in s: return "AHD found" # check sequence e = s.find("{{enPR|") i = s.find("{{IPA|") m = s.find("{{SAMPA|") if e > 0 and i > 0 and i < e: return "IPA before enPR" if i > 0 and m > 0 and m < i: return "SAMPA before IPA" if e > 0 and m > 0 and m < e: return "SAMPA before enPR" # a must be at start, and only follow wikisyntax a = s.find("{{a|") if a > 0 and s[0:a].strip(':* '): return "{a} template not at beginning" # now check templates for c in reenpr.findall(s): if not c.strip(): return 'empty enPR template' for p in c.split('|'): p = p.strip() if " or " in p: return '"or" should be multiple template parameters' if " ''or'' " in p: return '"or" should be multiple template parameters' # next is fixed by AF at present # if p.startswith('/') and p.endswith('/'): return "slashes in enPR template" for c in reipa.findall(s): if not c.strip(): return 'empty IPA template' for p in c.split('|'): p = p.strip() if p.startswith('lang='): continue if " or " in p: return '"or" should be multiple template parameters' if " ''or'' " in p: return '"or" should be multiple template parameters' if p.startswith('/'): if not p.endswith('/'): return "mismatched /'s in IPA template" elif p.startswith('['): if not p.endswith(']'): return "mismatched [ ]'s in IPA template" else: return "no / / or [ ]'s in IPA template" for c in resampa.findall(s): if not c.strip(): return 'empty SAMPA template' for p in c.split('|'): p = p.strip() if p.startswith('lang='): continue if p[:1].isdigit() and p[1:2] == '=': p = p[2:] if " or " in p: return '"or" should be multiple template parameters' if " ''or'' " in p: return '"or" should be multiple template parameters' if p.startswith('/'): if not p.endswith('/'): return "mismatched /'s in SAMPA template" elif p.startswith('['): if not p.endswith(']'): return "mismatched [ ]'s in SAMPA template" else: return "no / / or [ ]'s in SAMPA template" # some simple cases that are just flagged for flag in Flags: if flag in s: return "flag <tt><no" + "wiki>" + flag + "</no" + "wiki></tt>" # couple of other randoms if s.endswith('/'): return "line ends with /" # if s.startswith('[['): return "line starts with <no" + "wiki>[[</no" + "wiki>" # if s.startswith('('): return "line starts with (" # if s.startswith('{{'): return "line starts with <no" + "wiki>{{</no" + "wiki>" # if s.startswith("''") and not s.endswith("''"): return "line starts with <no" + "wiki>''</no" + "wiki>" # next rule is fixed by AF in most cases (one of these at start), fix this rule sometime # if ('{{enPR|' in s or '{{IPA|' in s or '{{SAMPA|' in s) and not s.startswith('*'): # return 'line does not start with *' return False # (sporked from Tbot/script, no need to keep up to date): # table of scripts, each is lowest character code point, highest code + 1, ISO script Scs = [ (0x0080, 0x0250, 'Latin'), (0x0250, 0x02B0, 'IPA'), (0x0370, 0x0400, 'Greek'), (0x0400, 0x0530, 'Cyrillic'), (0x0530, 0x0590, 'Armenian'), (0x0590, 0x0600, 'Hebrew'), (0x0600, 0x0700, 'Arabic'), (0x0700, 0x0750, 'Syriac'), (0x0750, 0x0780, 'Arabic Ext'), (0x0900, 0x0980, 'Devanagari'), (0x0980, 0x0A00, 'Bengali'), (0x0C00, 0x0C80, 'Telugu'), (0x0D00, 0x0D80, 'Malayalam'), (0x1A00, 0x1100, 'Georgian'), (0x1E00, 0x1F00, 'Latin Ext'), (0x1F00, 0x2000, 'Greek Ext'), (0x3040, 0x30A0, 'Hiragana'), (0x30A0, 0x3100, 'Katakana'), (0x3400, 0xA000, 'Han'), # Han Ext A and Unified (0xAC00, 0xD800, 'Hangeul'), (0x20000, 0x2A6D7, 'Han Ext B') ] # Han Ext B def tkey(word): # generate a TOC key for a given word # simple case first, also handles '' if word[:1] <= 'z': return word[:1] a = ord(word[0:1]) if a >= 0xd800 and a < 0xdc00: if len(word) < 2: return word # ouch! b = ord(word[1:2]) # "UTF-16" crap: a = (a - 0xd800) * 1024 + (b - 0xdc00) + 0x10000 sc = '' for low, high, scode in Scs: if a >= low and a < high: sc = scode break if not sc: print "no match for script for char code %x" % a return word[:1] return sc def main(): global AFcount socket.setdefaulttimeout(40) # list of entry names to ignore Stops = set() reports = { } preset() # make sure we are logged in site = wikipedia.getSite() site.forceLogin() # read Stops page = wikipedia.Page(site, "User:Robert Ullmann/Pronunciation exceptions/stops") text = page.get() for s in re.findall(r'\* \[\[(.*?)\]\]', text): Stops.add(s) print 'found %d stops' % len(Stops) # get XML dump dump = xmlreader.XmlDump("en-wikt.xml") entries = 0 probs = 0 fixed = 0 reps = 0 replimit = 1000 cis = 0 lasttab = 0 rem = """ remainder, one per link, not checked against current, one reason for exception ---- """ # testing test = False tmod = 20 if test: replimit /= tmod print "in test mode" for entry in dump.parse(): text = entry.text title = entry.title if title.find(':') >= 0: continue # if title.find('/') >= 0: continue if not title: continue # ? entries += 1 if entries % 10000 == 0: print "%d entries, %d problems" % (entries, probs) # if test and title[0:1] != 'c': continue if test and entries % tmod != 0: continue # skim a lot of the db for now # if entries % tmod != 0: continue if title in Stops: continue # screen entries: tag = False inPron = False for line in text.splitlines(): if '=Pronunciation=' in line: inPron = True continue if line.startswith('='): inPron = False if not inPron: continue a = trouble(line) if a: if line.startswith('{|') and entries < 300000: lasttab = entries tag = True break # now see if it is something that should be reported: if tag: ckey = safe(title) # must be string for bsd dbm if ckey in cache: last = cache[ckey] if last > time() - (70 * 24 * 3600): print "%s in 70 day cache, not checked" % safe(title) continue probs += 1 # ... pick up current version from en.wikt if reps < replimit: print '%s is possible problem, getting current entry' % safe(title) try: page = wikipedia.Page(site, title) # text = page.get() text = getwikitext(page) except wikipedia.NoPage: print "Can't get %s from en.wikt!" % safe(page.aslink()) text = '' except wikipedia.IsRedirectPage: print safe(title), 'is now a redirect page' text = '(redirect page)' # will be treated as fixed and added to cache except KeyboardInterrupt: raise KeyboardInterrupt except Exception, e: print "unknown exception, maybe timeout" continue # do this again next time else: print '%s is possible problem' % safe(title) rem += '* [[' + title + "]] ''" + a + "''\n" if not text: continue # check each line for trouble act = '' inPron = False for line in text.splitlines(): if '=Pronunciation=' in line: inPron = True continue if line.startswith('='): inPron = False if not inPron: continue a = trouble(line) if a and a not in act: act += ', ' + a # if fixed, add to cache so we don't keep re-checking if not act: print "%s has been fixed" % safe(title) cache[ckey] = time() # entry has been fixed for now cis += 1 if cis % 20 == 0: cache.sync() continue else: continue # don't write any change to entry, report: if act: act = " ''" + act.strip(', ') + "''" if reps < replimit: xp = wikipedia.Page(site, title) url = xp.urlname() repline = \ "* [[%s]] [http://en.wiktionary.org/w/index.php?title=%s&action=edit§ion=SECTXX (edit)] %s" % (title, url, act) # go isolate the lines s = 0 se = 0 ts = '' inPron = False for line in text.splitlines(): if line.startswith('='): s += 1 if '=Pronunciation=' in line: inPron = True continue if line.startswith('='): inPron = False if not inPron: continue if trouble(line): if not se: se = s ts += ', ' + trouble(line) repline += '\n*: <tt><no' + 'wiki>' + line + '</no' + 'wiki></tt>' print reps, safe(title), safe(line) if reps < replimit: repline = repline.replace('SECTXX', "%d"%se) reports[title] = repline reps += 1 if test and reps > replimit: break continue # no corrections here! print "%d entries, %d problems" % (entries, probs) cache.close() print "last table at entry %d" % lasttab if not test: page = wikipedia.Page(site, "User:Robert Ullmann/Pronunciation exceptions") else: page = wikipedia.Page(site, "User:Robert Ullmann/Pronunciation exceptions/test") try: oldrep = page.get() except wikipedia.NoPage: pass ss = ', '.join(sorted(Stops)) fs = '' for flag in sorted(Flags): fs += ", <tt><no" + "wiki>" + flag + "</no" + "wiki></tt>" fs = fs.lstrip(", ") report = """ '''occurances of pronunciation section exceptions''' ---- * from XML dump as of %s, checked against live wiki {{subst:CURRENTDAY}} {{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}} * see talk page for rules in effect * checks may not be perfect at this point * entries are not listed if [[User:AutoFormat|AutoFormat]] would fix something, though perhaps not entirely * total AF will fix: %d * some entries are listed as "stops" and not shown * stops in effect: %s *: from [[User:Robert Ullmann/Pronunciation exceptions/stops]] * specific strings flagged: %s * "blank" IPA, SAMPA, etc (i.e. "* [[SAMPA]]: //") are not reported * %d total problems, limit of %d shown, remainder listed in [[User:Robert Ullmann/Pronunciation exceptions/remains]] Please do section edit and remove completed entries, the automation will then recheck them. If you do most of a section but not quite all, feel free to just blank the section, any leftovers will get picked up again. ---- <div class="floatright" style="margin-top:1.5em">__TOC__</div><div class=plainlinks> """ % (xmldate.enXMLdate, AFcount, ss, fs, reps, replimit) if test: report += "'''this is a test run, you want to look at [[User:Robert Ullmann/Pronunciation exceptions]]'''\n" prev = '' s = 0 i = 1 for t in sorted(reports): if tkey(t) != prev: report += '\n==' + tkey(t) + '==\n\n' prev = tkey(t) s = 0 i = 1 s += 1 if s > 9: i += 1 report += '\n==' + tkey(t) + ' (%d)==\n\n' % i s = 0 report += reports[t] + '\n' wikipedia.setAction("regenerate, add more") page.put(report) if not test: wikipedia.setAction("updating remainder") page = wikipedia.Page(site, "User:Robert Ullmann/Pronunciation exceptions/remains") page.put(rem) # done if __name__ == "__main__": try: main() finally: wikipedia.stopme()