User:Visviva/transclusion.py

import re
import re
import xmlreader
import urllib
import sys
from htmlentitydefs import name2codepoint

magicwords=[]
parserfunctions=set(["#if","#ifeq","#switch","#expr","#ifexist","lc","ucfirst","ns"])
pipescape="(\[\[[^\]\{\}]*?(\{\{.*?\}\})*?[^\]\{\}]*?)\|([^\[\{\}]*?(\{\{.*?\}\})*?[^\[\{\}]*?\]\])" #regex for pipes in brackets
entitlesfile="D:\Code\\en_titles.txt"

if __name__ == '__main__':
	import transclusion
	transclusion.parse_list(sys.argv[1])  #will create a file called "wikt_EN_definitions.txt" in the current directory

def get_stuff(dumpfile="wikt.bz2"):
	import datetime
	dump=xmlreader.XmlDump(dumpfile)
	global Templates
	global alltitles
	Templates={}
	alltitles=set()
	for d in dump.parse():
		if "Template:" in d.title.capitalize():
			templatename=d.title.split("emplate:",1)[1].encode("utf-8","ignore")
			Templates[templatename]=d.text.encode("utf-8","ignore")
		alltitles.add(d.title)
	for t in Templates:
		redirmatch=re.search("(?i)\#redirect\s*\[\[template\:(.*?)\]\]",Templates[t])
		if redirmatch:
			try:
				print "Redirect from "+t+" to "+redirmatch.group(1)
				Templates[t]=str(Templates[redirmatch.group(1)])
			except Exception:
				print "Error on "+t #Will complain about those templates that transclude non-Template:-namespace pages.  Fortunately these are seldom found in definitions.
#				print t,Templates[t]
	Templates["PAGENAME"]="page.name"
	Templates["CURRENTYEAR"]=str(datetime.date.today().year)
	Templates["NAMESPACE"]=""
	return Templates,alltitles

def rinse(text="",Templates={},alltitles=set()): #"text" must be a full, single template "{{...}}"
	text=text.strip()
	args={}
	if text[0:2] != "{{" or text[-2:] != "}}": 
		print "Invalid template.",Templates["PAGENAME"]
		return text,args
# get & remove template name
	templatename=re.split("(?<=[^\\\]{1})\|",text)[0][2:].replace("}","").strip()
	text=text[2:-2]+"|" #chop off closing brackets for uniformity
	if templatename not in Templates:
		if templatename.split(":")[0] not in parserfunctions: return "[[Template:"+templatename+"]]",args
		elif ":" not in templatename and ("[" in templatename or "]" in templatename): return "{{"+text[:-1]+"}}",args
		else: template=text
	else: template=Templates[templatename]
	template=re.sub('\<noinclude\>[\s\S]*?\<\/noinclude\>','',template)
	template=re.sub('\<\!\-\-[\s\S]*?\-\-\>','',template)
	if "<onlyinclude>" in template and "</onlyinclude>" in template:
		template=template.split("<onlyinclude>")[1].split("</onlyinclude>")[0]
	template=template.replace("<includeonly>","").replace("</includeonly>","")
#get args
	args=get_args(text)
#	print str(args)
# some very special cases
	if templatename=="isValidPageName":
		if not args["1"]: template=""
		for badchar in ["[","]","{","}"]:
			if badchar in args["1"]:
				template=""
				break
		if template: template="valid"
	elif templatename=="wlink": # Can get away with this because we just want plain text
		for arr in ["2","1","w"]:
			found=False
			if arr in args.keys(): 
				template=args[arr]
				found=True
		if not found: template=""
	elif templatename=="form of" or templatename=="form_of":
		if "2" in args.keys():
			template=args["1"]+" of "+args["2"]+"."
		else:
			template="Form of "+args["1"]+"."
		if "nocap" in args.keys():
			if args["nocap"]: 
#				print "Nocap:"+ args["nocap"]
				template=template[0].lower()+template[1:]
		else:
			template=template[0].upper()+template[1:]
		if "nodot" in args.keys():
			if args["nodot"]:
				template=template[:-1]
#Render passed args, substituting values gleaned above
	template=render_args(template,args)
	while re.search("[^\{]+\{\{\{[^\{]+",template) or re.search("[^\}]+\}\}\}[^\}]+",template): 
		formerly=template
		template=render_args(template,args)
		if template == formerly:
			break
#second, templates and magic words
	for r in [x[1] for x in re.findall("((?<=[^\{]{1})|(?<=[\{]{2}))(\{\{[^\{\}]+?\}\})"," "+template+" ")]:
		r=re.sub(pipescape,"\\1\|\\3",r)
		rinsed,argybargy=rinse(r,Templates=Templates)
		if rinsed != template:
			template=template.replace(r,rinsed)
			template=template.replace("\|","|")
		return template,args
#is this a ParserFunction?
	if templatename.split(":")[0] in parserfunctions:
		pfunction=templatename.split(":")[0]
		pargs=[n.strip() for n in re.split("(?<=[^\\\]{1})\|",template.split(":",1)[1])]
		if pfunction=="#if":
			if pargs[0].strip():
				template=pargs[1]
			else:
				try: template=pargs[2]
				except IndexError: template=""
		elif pfunction=="#ifeq":
			if pargs[0].strip()==pargs[1].strip():
				template=pargs[2]
			else:
				try: template=pargs[3]
				except IndexError: template=""
		elif pfunction=="#switch":
			options=[p.split("=")[0] for p in pargs[1:]]
			if pargs[0] in options:
				reg=re.search("[\{\|]+?\s*"+pargs[0].strip()+"\s*\=(.*?)\s*[\}\|]+",template)
				if reg: 
					template=reg.group(1)
				else:
					reg=re.search("[\{\|]+?"+pargs[0].strip()+"\|[\{]+?\=(.*?)[\}\|]+",template)
					try: 
						template=reg.group(1)
					except: 
						template=""
			elif "#DEFAULT" in options:
				template=pargs[options.index("#DEFAULT")+1].split("=",1)[1]
			elif "#default" in options:
				template=pargs[options.index("#default")+1].split("=",1)[1]
			elif pargs[-1].strip():
				if "=" not in pargs[-1]:
					template=pargs[-1]
			else: 
				template=""
		elif pfunction=="lc":
			template=template.split(":",1)[1][:-1].lower()
		elif pfunction=="ucfirst":
			template=pargs[1].capitalize()
		elif pfunction=="ns":
			if pargs[0]=="0": template=""
			else: template=pargs[0]
		elif pfunction=="#expr":
			exec("template="+pargs[0])
			template=str(template)
		elif pfunction=="#ifexist":
			if pargs[0].strip() in alltitles:
				template=pargs[1]
			else:
				try: 
					template=pargs[2]
#					print "template:" ,pargs[2]
				except IndexError: 
					template=""
	if template and template[-1]=="|": #undo the hack from the beginning
		template=template[0:-1]
	return template,args

def cycle(text="",Templates={},alltitles=set(),args={},limit=1000):
	old=""
	x=0
	while "{{" in text and "}}" in text and x < limit:
		text,args=intake(text,Templates,alltitles,args)
		if old == text: break #nothing left that renders
		if re.search("[^\{]+\{[^\{]+",text) or re.search("[^\}]+\}[^\}]+",text):
			print "Invalid output.",Templates["PAGENAME"]
#			break
		old=text
		x+=1
	if "{" in text or "}" in text or "_" in text: return False, text
	else: return True,text
	

def intake(text="",Templates={},alltitles=set(),args={}): #For raw or returned text
	text=re.sub('\<noinclude\>[\s\S]*?\<\/noinclude\>','',text)
	text=re.sub('\<\!\-\-[\s\S]*?\-\-\>','',text)
	text=text.replace("<includeonly>","").replace("</includeonly>","")
	if len(text.split("{{"))==0: 
#		print "Nothing to parse."
		return text,{}
	if text.count("{") != text.count("}"): 
		print "Unmatched bracket.",Templates["PAGENAME"]
		return text,{}
	text=render_args(text,args)
	templatecatcher="((?<=[^\{]{1})|(?<=[\{]{2}))(\{\{[^\{]+?\}\})"
	while re.search(templatecatcher," "+text):
		text=re.sub(pipescape,"\\1\|\\3",text) #escape any pipes inside bracketed links
		r=re.search(templatecatcher," "+text).group(2)
		rinsed,args=rinse(r,Templates)
		text=text.replace(r,rinsed)
		text=text.replace("\|","|")
		if r == rinsed: break
	return text,args


def balanced_triples(startval=3,str=""):
	open=startval+str.count("{{{")
	closed=str.count("}}}")
	return open == closed


def render_arg(r,args,template):
		argname=re.split("(?<=[^\\\]{1})\|",r)[0]
		if argname[-3:] == "}}}":
			argname=argname[:-3]
		if argname[:3] == "{{{":
			argname=argname[3:]
		if "{{" in argname or "}}" in argname:
			if argname.count("}}") == argname.count("{{"): 
				null,argname=cycle(argname,Templates,alltitles,args) #Can't do anything until any functions/templates in the argument name are dealt with
			else: #if unbalanced, we must have cut too soon
				argparts=re.split("(?<=[^\\\]{1})\|",r)
				x=1
				while argname.count("{{") != argname.count("}}"):
					try: 
						argname=argname+"|"+argparts[x]
					except IndexError:
						print "We have a problem:", argname
					x+=1
				null,argname=cycle(argname,Templates,alltitles,args)
				
		argname=re.escape(argname)
		argname=argname.replace("{{{","").replace("}}}","").strip()
		if argname in args: 
			template=re.sub("\{\{\{"+argname+"\|.*?\}\}\}",args[argname],template).strip()
			template=template.replace("{{{"+argname+"}}}",args[argname]).strip()
		else: 
			template=re.sub("\{\{\{"+argname+"\|(.*?)\}\}\}","\\1",template).strip()
			template=template.replace("{{{"+argname+"}}}","_"+argname+"_").strip() #Bad hack
		return template


def parse_list(dumpfile="C:\Code\\wikt.bz2",limit=0):
	import xmlreader
	English=set()
	try: #in case the previous attempt was aborted, reuse data
		import transclusion
		Templates=transclusion.Templates
		alltitles=transclusion.alltitles
		print len(transclusion.Templates),len(transclusion.alltitles) #loaded as globals?
		if not len(transclusion.Templates) or not len(transclusion.alltitles): #Zeroed?
			print "Reloading templates and title list."
			Templates,alltitles=transclusion.get_stuff()
	except:
		print "Getting templates and title list."
		Templates,alltitles=transclusion.get_stuff()
	contemplates={"context":""} # for holding all members of the {{context}} family
	for t in Templates:
		if "{{context" in Templates[t]: 
			temptext=re.sub("\[\[[^\]]*?\|(.*?)\]\]","\\1",Templates[t])
			labelmatch=re.search("label\=([^\|\}]*)",temptext)
			if not labelmatch:
				label=t
			else:
				label=labelmatch.group(1)
			contemplates[t]=label
	for t2 in Templates: #some templates use {{context}} at secondhand
		if t2 in contemplates: continue
		workingtemplate=re.sub("\<noinclude\>[\s\S]*?\<\/noinclude\>","",Templates[t2])
		workingtemplate=re.sub("\<.*?\>","",workingtemplate).strip()
		if workingtemplate[0:2] != "{{": continue
		else:
			if workingtemplate[2:].startswith(tuple(contemplates.keys())):
				labelmatch=re.search("label\=([^\|]+.*)",Templates[t2])
				if labelmatch:
					label=re.sub("\[\[[^\]]*?\|","",labelmatch.group(1)).split("|")[0]
				else:
					label=t2
				contemplates[t2]=label
	print "Identified "+str(len(contemplates))+" context templates."
	try: 
		English=set(open(entitlesfile).read().split("\n")) #just as a timesaver
	except: 
		pass
#	print len(English)
	writefile=open("wikt_EN_definitions.txt","w")
	writefile.close()
	writefile2=open("wikt_EN_definitions.txt","a")
#	badfile=open("bad_pos.txt","w")
	limitcounter=0
	dump=xmlreader.XmlDump(dumpfile)
	for entry in dump.parse():
		limitcounter+=1
		if limit and limitcounter > limit: break
		if entry.title not in English and "==English==" not in entry.text: continue
		if ":" in entry.title: continue
#		try: print entry.title.encode('utf-8','ignore')
#		except: pass
		try: 
			section=re.split("\n\=\=[^\=]{1}",entry.text.split("English==",1)[1])[0]
		except: continue
		Templates["PAGENAME"]=entry.title.encode("utf-8","ignore")
		posses=re.split("\n[\=]{3,5}(?=[^\=]{1})",section)[1:]
		valids=["noun","proper noun","verb","adjective","adverb","article","preposition","conjunction","determiner","letter","symbol","initialism","acronym","abbreviation","cardinal number","ordinal number","numeral","pronoun","particle","suffix","prefix","confix","infix","circumfix","interfix","interjection","phrase","proverb","number","contraction","idiom","affix"]
		print entry.title.encode("utf-8","ignore")
		for p in posses:
			if "\n#" not in p: continue
			pos=p.split("=")[0].replace("{","").replace("}","").capitalize().encode("utf-8","ignore")
			pos=pos.split("|")[0]
			if pos.lower() not in valids: 
				continue
			defs=[e.split("\n")[0].strip() for e in re.split("\n[\#]+(?=[^\:\#\*]{1})",p)[1:]]
			for d in defs:
				d=re.sub("\[\[[^\]]*?\|","",d) #remove pipes now, since we're not going to want them anyway
				d=unescape(d)
				d=d.replace("etyl|","").replace("non-gloss definition|","")  #change etyl to basic template.
				d=re.sub("\{\{i\|(.*?)\}\}","_\\1_",d)
				d=re.sub("\{\{.*term\|([^\|\}]*)(.*?)\}\}","_\\1_",d) # term and en-term
				begone=["jump\|","rf.*?","cattag.*?",]
				for begonia in begone:
					d=re.sub("\{\{"+begonia+".*?\}\}","",d)
				d=re.sub("\{\{IPA\|(.*?)\}\}","\\1",d)
				d=re.sub("\<\!\-\-[\s\S]*?\-\-\>","",d)
				d=re.sub("\<ref.*?\>.*?\<\/ref\>","",d)
				chunks=d.split("{{")
				rebuilt=chunks[0]
				for chunk in chunks[1:]: #context template hunting
					tempname=chunk.split("|")[0].split("}")[0].strip()
					if tempname in contemplates:
						chunk=chunk.replace(tempname,contemplates[tempname])
						chunkparts=re.sub("[\|\{]*\=[\|\}]*","",chunk)
						chunkparts=chunk.split("}}")
						try: 
							chunkparts[0]=re.sub("\|.*?\=[^\|^\}]*","",chunkparts[0])
							chunk=chunkparts[0].replace("|_|"," ").replace("|",", ")
							rebuilt+="("+chunk+")"+chunkparts[1]
						except IndexError:
							continue 
					else: rebuilt+="{{"+chunk
				rebuilt=rebuilt.replace("(, ","(") #final tidy
				d=rebuilt
#				print d.encode('utf-8','ignore')
				try: #now we transclude what we can...
					okay,newdefline=cycle(d,Templates,alltitles,limit=25)
					if not okay:
						if "}" not in newdefline or "{" not in newdefline:
							d=newdefline.replace("}","").replace("{","") #the problem is probably stray brackets
					else:
						if "[[Template" not in newdefline: #this almost always means trouble, ergo if present, skip
							d=newdefline
				except: pass #cycle() is still throwing some errors.  If that happens, we just do it the hard way.
				d=d.replace("qualifier|","").replace("ib|","").replace("italbrac|","")#common forms of uglitude
				useless=["Template","Image","File","Category"] #template droppings, images
				for u in useless:
					d=re.sub("(?i)\[\["+u+"\:[^\]]*?\]\]","",d)
				d=re.sub("\[\[[^\]]*?\|","",d) #visible text only, please
				d=re.sub("\[http[^\]\s]+?\s*(.*?)\]","\\1",d)
				d=re.sub("\(rf.*?\)","",d)
				d=re.sub("\|[^\|]+?\=[^\}\|]*","",d)
				d=re.sub("\<.*?\>","",d) # <span> et al.
				d=d.replace("[[","").replace("]]","")
				d=d.replace("{{","(").replace("}}",")")
				d=d.replace("\t"," ") # no valid use for tabs on definition line
				d=d.replace(" of|"," of ")
				d=d.replace("from=","from ") # Surname /given name templates
				d=d.replace("notcomparable","not comparable")
				d=d.replace("(,)", "")
				nocommas=["of","from","mostly","chiefly","usually","often","rarely","seldom","sometimes","extremely","markedly","or","and","except"]
				for n in nocommas:
					d=d.replace(" "+n+","," "+n+" ").replace("("+n+",","("+n+" ")
				d=d.replace("'''",'"') #Most common use of explicit boldface (heaven knows why...)
				d=d.replace("''(","(").replace(")''",")").replace("(''","(").replace("'')",")")
				d=d.replace(":''",":").replace("):",")")
				d=d.replace("|_|"," ").replace("|",", ")
				d=d.replace('""','"')
				d=d.replace("''","_") #standard plain-text code
				d=d.replace(" )",")").replace("( ","(")
				d=re.sub("\s+"," ",d) #normalize spacing
				d=d.strip()
				try: 
					d=d[0].capitalize()+d[1:]
				except IndexError: d=d.capitalize()
				try: 
					d=d.encode('utf-8','ignore')
				except UnicodeDecodeError:
					pass
				try:
					line="\t".join([entry.title.encode('utf-8','ignore'),pos,d])
#					print line
				except UnicodeDecodeError: continue
				writefile2.write(line+"\n")
#	badfile.close()
	writefile2.close()
#alphabetize
	print "Alphabetizing..."
	lines=[(x.split("\t",1)[0],x) for x in open("parsed_titles.txt").read().split("\n")]
	lines.sort()
	fileheader="****\n\nThis is a text dump of definitions from Wiktionary, http://en.wiktionary.org, which is licensed under the GNU Free Documentation License.  See http://en.wiktionary.org/w/index.php?title=PAGENAME&action=history for the full list of contributors to each entry. \n\n****\n\n"
	writefile=open("parsed_titles.txt","w")
	writefile.write(fileheader)
	for line in lines:
		writefile.write(line[1]+"\n")
	writefile.close()
	make_dictionarylike() # just for fun

def make_dictionarylike(file="parsed_titles.txt",outfile="dictionarylike.txt",searchterm=""):
	currentword=""
	currentpos=""
	currententry=""
	sortkey=""
	outlines={}
	try: 
		lines=open(file).read().split("****",2)[2].split("\n")
	except IndexError:
		lines=open(file).read().split("\n")
	for line in lines:
		parts=line.split("\t")
		if len(parts) != 3: continue
		if not parts[2].strip(): continue
		try: 
			definition=parts[2][0].upper()+parts[2][1:]
		except IndexError:
			definition=parts[2].capitalize()
		if re.search("\(.*?\)\W*\n",parts[2]+"\n"):
			continue #skip any words that have a parenthesized (templated) definition only
		elif "participle of" in parts[2] or "tense of" in parts[2] or "past of" in parts[2] or re.search("form of .+\.",parts[2]) or "spelling of" in parts[2]: #Get thee gone, inflected forms!
			continue
		if currentword != parts[0]:
			outlines[sortkey]="*"+currententry+"<sup>[http://en.wiktionary.org/w/index.php?action=edit&title=%s e]</sup>\n" % urllib.quote(currentword)
			currentword=parts[0]
			currentpos=parts[1]
			currententry="'''"+currentword+"'''. ''"+currentpos+"''. "
			count=1
		elif currentpos != parts[1]:
			currentpos=parts[1]
			count=1
			currententry+="''%s''. " % currentpos
		else:
			count+=1
		currententry+="'''"+str(count)+".''' "+definition
		sortkey=re.sub("[^\w]","",currentword).lower().strip()+" "+currentword #Use fulll current word for tiebreaking only
		if currententry.strip()[-1] != ".":
			currententry+=". "
		elif currententry[-1] != " ":
			currententry+=" "
	writefile=open(outfile,"w")
	sortkeys=outlines.keys()
	sortkeys.sort()
	for s in sortkeys:
		writefile.write(outlines[s])
	writefile.close()


def page_from_word(word="",infile="dictionarylike.txt",outfile="daypage.txt"):
	if not word:
		import urllib2
		mainpage=urllib2.urlopen(urllib2.Request("http://en.wiktionary.org/wiki/Wiktionary:Main_Page",'',{'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'})).read()
		word=mainpage.split('<span id="WOTD-rss-title">')[1].split("</span>")[0]
		print word
		daily=True
	else:
		daily=False
	intext=open(infile).read()
	text=intext.split("\n*'''"+word+"'''",1)[1]
	pagenum=int((len(intext)-len(text))/20000) #length of text up to this point, divided by approx. page length
	text=re.sub("\[[\S]+?title\=(.*?) .+?\]\W*\n","\t\\1\t\n",text) #escape temporarily
	edgetext=text[20000:].split("\n",1)[0]
	text=text[0:20000]+edgetext
	length=0
	firstpage=""
	for t in text.split("\n"):
		firstpage+=t+"\n"
		length+=len(t)
		if length > 10000: break
	secondpage=text.replace(firstpage,"")
	firstpage="*'''"+word+"'''"+firstpage #restore
	header='<!-- page of the day, an experimental service based on WOTD --><table><tr><th width="45%" align="left">'+word+'</th><th width="5%" align="center">'+str(pagenum)+'</th><th width="45%" align="right">'+text.split("\n*'''")[-1].split("'''")[0]+'</th></tr><tr valign="top"><td>'
	breaker="</td><td></td><td>"
	footer="</td></tr></table>"
	text="\n".join([header,firstpage,breaker,secondpage,footer])
	text=text.replace("\t\n"," e]\n").replace("\t","[http://en.wiktionary.org/w/index.php?action=edit&title=")
	writefile=open(outfile,"w")
	writefile.write(text)
	writefile.close()
	if daily:
		import wikipedia
		site=wikipedia.getSite("en","wiktionary")
		page=wikipedia.Page(site,"User:Visviva/Page of the day")
		page.put(text)

def unescape(text): 
#From code by Fredrik Lundh at http://effbot.org/zone/re-sub.htm#-html
# Licensed to the public domain at http://effbot.org/zone/copyright.htm
# Seems to work better than BeautifulSoup for this purpose
    def fixup(m):
        text = m.group(0)
        if text.startswith("&#"):
            try:
                if text.startswith("&#x"):
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            try:
                text = unichr(name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text
    return re.sub("\&\#?\w+\;", fixup, text)

def render_args(template,args):
	matches=re.findall("((?<=[^\{]{1})|(?<=[\{]{2}))(\{\{\{[^\{\}\#]+?\}\}\})"," "+template+" ")
	if not matches:
		return template
	for r in matches:
		template=render_arg(r[1],args,template)
	argparts=template.split("{{{")
	counter=0
	unfinished=""
	while counter+1 < len(argparts):
		counter+=1
		workingarg=unfinished+argparts[counter]
#		print "2nd iter"
		if "}}}" not in workingarg: 
			unfinished+="{{{"+workingarg
			continue
		elif balanced_triples(3,workingarg) or balanced_triples(3,workingarg.split("}}}")[0]+"}}}"):
			unfinished=""
			workingarg="{{{"+("}}}".join(workingarg.split("}}}")[:-1])) # We know triple is balanced, so chop off anything after the last "}}}"
			if "{{{" in workingarg[3:]: #possibility of unrendered sub-args?
				workingarg=render_args(workingarg,args)
			template=render_arg(workingarg,args,template)
		else:
			unfinished+="{{{"+workingarg
			continue
	return template

def get_args(text):
	workingtext=" "+text+" "
	args=dict((y[0].strip(),y[1].strip()) for y in re.findall("(?<=[^\\\]{1})\|([^\|\}\<\>\#]+?)\=(.{0}|[^\{\}\|]*?(\{\{.*?\}\})*[^\{\}\|]*?[^\\\\|]{1})(?=[\|\}]{1})",workingtext))
	x=0
	anonyparts=re.findall("(?<=[^\\\]{1}\|)([^\{\}\|\=]*([\{\[]{2}[^\}\{]+?[\}\]]{1,2})*[^\{\}\\=|]*?[^\=\\\\|]*[^\\\\=\|]{1}|.{0})[\|]{1}",text)
	nextpart=""
	while x < len(anonyparts):
		thispart=anonyparts[x][0]
		while thispart.count("{{") != thispart.count("}}"):
			thispart=thispart+"|"+anonyparts[x+1][0]
#			print thispart
			x+=1
		if "{{" in thispart and "}}" not in thispart.split("{{")[-1]: # did we go too far?
			thispart="}}".join(thispart.split("}}")[:-1])
		args[str(x+1)]=thispart.strip()
		x+=1
	for a in args:
		args[a]=re.sub("\[\[.*?\|(.*?)\]\]","\\1",args[a])
		args[a]=args[a].replace("[","").replace("]","") #cheating... don't want this markup for now.
		args[a]=re.sub("(\w+)\#\w+","\\1",args[a]) # section links
	return args
User:Visviva/transclusion.py

Navigation menu

Search