User:Interwicket/code/iwlinks

From Wiktionary, the free dictionary
Jump to navigation Jump to search



#!/usr/bin/python
# -*- coding: utf-8  -*-
# wikipath en wiktionary User:Interwicket/code/iwlinks


import wikipedia
import re

renotags = re.compile(r'<nowiki>.*?||', \
        re.IGNORECASE | re.DOTALL)

reiwiki = re.compile(r'\[\[([a-z-]{2,10}):([^\[\]\n]+)\]\]')

  1. match link to a-z, any non-null title, (if |, included in title, to be removed)
  2. Various other errors ignored
  1. routine to get iwiki links from entry text
  2. return dict of code->title
  3. ignores unknown codes; ignores duplicate codes (returns last found)
  4. explicit deletes are returned so we can remove them and reflect that in edit summary

def getiwlinks(text, flws):

   mt = renotags.sub(, text)
   links = { }
   for code, title in reiwiki.findall(mt):
       if code not in flws: continue
       if flws[code].lockedwikt and not flws[code].deletecode: continue
       links[code] = title
   return links

def replaceiwlinks(text, links, flw, flws):

   links = links.copy() # private copy (shallow, okay)
   # proceed as above in finding old links, but different action
   # duplicate codes are silently elided (probably not best, but as before)
   mt = renotags.sub(, text)
   for code, title in reiwiki.findall(mt):
       if code not in flws: continue
       text = re.sub(r'\[\[' + code + ':' + re.escape(title) + r'\]\]\s*', , text)
       # no add or remove links to locked wikts (mostly harmless, but not worth it)
       # do remove explicit deletes
       if flws[code].lockedwikt and not flws[code].deletecode and code not in links:
           links[code] = title
   # strip WS at bottom (and top for pl.wikt)
   text = text.strip('\n ')
   # sort if needed
   linklist = [ ]
   pf = flw.site.interwiki_putfirst()
   if pf:
       for code in pf:
           if code in links:
               linklist.append("[[" + code + ':' + links[code] + "]]")
               del links[code]
   # remaining, or all in code order:
   for code in sorted(links):
       linklist.append("[[" + code + ':' + links[code] + "]]")
   if flw.oneline:
       ls = ' '.join(linklist)
   else:
       ls = '\n'.join(linklist)
   if flw.attop:
       newt = ls + '\n' + text
   else:
       newt = text + '\n\n' + ls
   return newt


  1. test code

if __name__ == "__main__":

   from reciprocal import flws
   # init all the flws:
   for code in flws['en'].site.family.langs: foo = flws[code]
   code = 'sw'
   title = 'cat'
   print "sh status", flws['sh'].status, "locked", flws['sh'].lockedwikt
   # get some page, try a few things
   page = wikipedia.Page(flws[code].site, title)
   text = page.get()
   links = getiwlinks(text, flws)
   print title, ":", repr(links)
   # if 'ta' in links: del links['ta']
   # links['sw'] = title
   # so following should be no-op
   if 'sh' in links: del links['sh']
   newt = replaceiwlinks(text, links, flws[code], flws)
   wikipedia.showDiff(text, newt)
   page.put(newt)</nowiki>