User:MewBot/nlverbformbot.py

From Wiktionary, the free dictionary
Jump to navigation Jump to search
#!/usr/bin/env python
#coding: utf-8

# Copyright CodeCat 2010 - 2013

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
# 
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import wikipedia, re
from formbot import *


class NLVerbFormBot(GenericFormBot):
	"""A form bot for Dutch verb forms."""
	
	def __init__(self, head, cleanupCat, simulation = False, force = False, verbose = False):
		GenericFormBot.__init__(
			self, head, ['nl-conj-wk', 'nl-conj-wk-cht', 'nl-conj-st', 'nl-conj-irr'], 'nl', 'Dutch',
			cleanupCat, simulation, force, verbose)
	
	
	def generateEntries(self, template, params):
		"""Overrides base class method."""
		
		# Add the bot=1 parameter and expand the template, which will return a machine-readable version of the table
		params["bot"] = "1"
		templatestring = makeTemplate(template, params)
		wikipedia.output(templatestring)
		contents = wikipedia.getSite('en', 'wiktionary').getExpandedString(templatestring)
		
		# Convert the returned wikitext into a dictionary
		contents = string.split(string.strip(contents), '\n')
		forms = {}
		
		for line in contents:
			match = re.match(ur"^\* ([^=]+)=(.+)", line)
			
			if not match:
				wikipedia.output("Template generated invalid output, aborting...")
				return None
			
			forms[match.group(1)] = match.group(2)
		
		# Make a dictionary of lists of the definitions, with the word as key
		# That way we automatically group cases where two forms are identical
		definitions = {}		
		sub = ""
		
		if "sep" in forms and forms["sep"] == "1":
			sub = "|sub=1"
			
			# Present singular
			if forms["pres_indc_1sg_main"] == forms["pres_indc_2sg_main"] and forms["pres_indc_2sg_main"] == forms["pres_indc_3sg_main"]:
				definitions.setdefault(forms["pres_indc_1sg_main"], []).append('{{nl-verb-form|p=123|n=sg|t=pres|m=ind|' + self._head + '}}')
			elif forms["pres_indc_1sg_main"] == forms["pres_indc_2sg_main"]:
				definitions.setdefault(forms["pres_indc_1sg_main"], []).append('{{nl-verb-form|p=12|n=sg|t=pres|m=ind|' + self._head + '}}')
				definitions.setdefault(forms["pres_indc_3sg_main"], []).append('{{nl-verb-form|p=3|n=sg|t=pres|m=ind|' + self._head + '}}')
			elif forms["pres_indc_2sg_main"] == forms["pres_indc_3sg_main"]:
				definitions.setdefault(forms["pres_indc_1sg_main"], []).append('{{nl-verb-form|p=1|n=sg|t=pres|m=ind|' + self._head + '}}')
				definitions.setdefault(forms["pres_indc_2sg_main"], []).append('{{nl-verb-form|p=23|n=sg|t=pres|m=ind|' + self._head + '}}')
			elif forms["pres_indc_1sg_main"] == forms["pres_indc_3sg_main"]:
				definitions.setdefault(forms["pres_indc_1sg_main"], []).append('{{nl-verb-form|p=13|n=sg|t=pres|m=ind|' + self._head + '}}')
				definitions.setdefault(forms["pres_indc_2sg_main"], []).append('{{nl-verb-form|p=2|n=sg|t=pres|m=ind|' + self._head + '}}')
			else:
				definitions.setdefault(forms["pres_indc_1sg_main"], []).append('{{nl-verb-form|p=1|n=sg|t=pres|m=ind|' + self._head + '}}')
				definitions.setdefault(forms["pres_indc_2sg_main"], []).append('{{nl-verb-form|p=2|n=sg|t=pres|m=ind|' + self._head + '}}')
				definitions.setdefault(forms["pres_indc_3sg_main"], []).append('{{nl-verb-form|p=3|n=sg|t=pres|m=ind|' + self._head + '}}')
			
			if forms["pres_indc_gij_main"] != forms["pres_indc_2sg_main"]:
				definitions.setdefault(forms["pres_indc_gij_main"], []).append('{{nl-verb-form|p=2-gij|n=sg|t=pres|m=ind|' + self._head + '}}')
			
			if "pres_indc_u" in forms:
				definitions.setdefault(forms["pres_indc_u_main"], []).append('{{nl-verb-form|p=2-u|n=sg|t=pres|m=ind|' + self._head + '}}')
			
			definitions.setdefault(forms["pres_subj_sg_main"], []).append('{{nl-verb-form|n=sg|t=pres|m=subj|' + self._head + '}}')
			
			# Present plural
			definitions.setdefault(forms["pres_indc_pl_main"], []).append('{{nl-verb-form|n=pl|t=pres|m=ind+subj|' + self._head + '}}')
			
			# Past singular
			if forms["past_indc_sg_main"] == forms["past_subj_sg_main"]:
				definitions.setdefault(forms["past_indc_sg_main"], []).append('{{nl-verb-form|n=sg|t=past|m=ind+subj|' + self._head + '}}')
			else:
				definitions.setdefault(forms["past_indc_sg_main"], []).append('{{nl-verb-form|n=sg|t=past|m=ind|' + self._head + '}}')
				definitions.setdefault(forms["past_subj_sg_main"], []).append('{{nl-verb-form|n=sg|t=past|m=subj|' + self._head + '}}')
			
			if forms["past_indc_gij_main"] != forms["past_indc_sg_main"]:
				definitions.setdefault(forms["past_indc_gij_main"], []).append('{{nl-verb-form|p=2-gij|n=sg|t=past|m=ind|' + self._head + '}}')
			
			# Past plural
			definitions.setdefault(forms["past_indc_pl_main"], []).append('{{nl-verb-form|n=pl|t=past|m=ind+subj|' + self._head + '}}')
		
		# Present singular
		if forms["pres_indc_1sg"] == forms["pres_indc_2sg"] and forms["pres_indc_2sg"] == forms["pres_indc_3sg"]:
			definitions.setdefault(forms["pres_indc_1sg"], []).append('{{nl-verb-form|p=123|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
		elif forms["pres_indc_1sg"] == forms["pres_indc_2sg"]:
			definitions.setdefault(forms["pres_indc_1sg"], []).append('{{nl-verb-form|p=12|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
			definitions.setdefault(forms["pres_indc_3sg"], []).append('{{nl-verb-form|p=3|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
		elif forms["pres_indc_2sg"] == forms["pres_indc_3sg"]:
			definitions.setdefault(forms["pres_indc_1sg"], []).append('{{nl-verb-form|p=1|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
			definitions.setdefault(forms["pres_indc_2sg"], []).append('{{nl-verb-form|p=23|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
		elif forms["pres_indc_1sg"] == forms["pres_indc_3sg"]:
			definitions.setdefault(forms["pres_indc_1sg"], []).append('{{nl-verb-form|p=13|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
			definitions.setdefault(forms["pres_indc_2sg"], []).append('{{nl-verb-form|p=2|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
		else:
			definitions.setdefault(forms["pres_indc_1sg"], []).append('{{nl-verb-form|p=1|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
			definitions.setdefault(forms["pres_indc_2sg"], []).append('{{nl-verb-form|p=2|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
			definitions.setdefault(forms["pres_indc_3sg"], []).append('{{nl-verb-form|p=3|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
		
		if forms["pres_indc_gij"] != forms["pres_indc_2sg"]:
			definitions.setdefault(forms["pres_indc_gij"], []).append('{{nl-verb-form|p=2-gij|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
		
		if "pres_indc_u" in forms:
			definitions.setdefault(forms["pres_indc_u"], []).append('{{nl-verb-form|p=2-u|n=sg|t=pres|m=ind|' + self._head + sub + '}}')
		
		definitions.setdefault(forms["pres_subj_sg"], []).append('{{nl-verb-form|n=sg|t=pres|m=subj|' + self._head + sub + '}}')
		
		# Present plural
		definitions.setdefault(forms["pres_indc_pl"], []).append('{{nl-verb-form|n=pl|t=pres|m=ind+subj|' + self._head + sub + '}}')
		
		# Past singular
		if forms["past_indc_sg"] == forms["past_subj_sg"]:
			definitions.setdefault(forms["past_indc_sg"], []).append('{{nl-verb-form|n=sg|t=past|m=ind+subj|' + self._head + sub + '}}')
		else:
			definitions.setdefault(forms["past_indc_sg"], []).append('{{nl-verb-form|n=sg|t=past|m=ind|' + self._head + sub + '}}')
			definitions.setdefault(forms["past_subj_sg"], []).append('{{nl-verb-form|n=sg|t=past|m=subj|' + self._head + sub + '}}')
		
		if forms["past_indc_gij"] != forms["past_indc_sg"]:
			definitions.setdefault(forms["past_indc_gij"], []).append('{{nl-verb-form|p=2-gij|n=sg|t=past|m=ind|' + self._head + sub + '}}')
		
		# Past plural
		definitions.setdefault(forms["past_indc_pl"], []).append('{{nl-verb-form|n=pl|t=past|m=ind+subj|' + self._head + sub + '}}')
		
		# Imperative
		definitions.setdefault(forms["impr_sg"], []).append('{{nl-verb-form|m=imp|' + self._head + '}}')
		
		if forms["impr_pl"] != forms["impr_sg"]:
			definitions.setdefault(forms["impr_pl"], []).append('{{nl-verb-form|n=pl|m=imp|' + self._head + '}}')
		
		# Participles
		definitions.setdefault(forms["pres_ptc"], []).append('{{nl-verb-form|t=pres|m=ptc|' + self._head + '}}')
		definitions.setdefault(forms["past_ptc"], []).append('{{nl-verb-form|t=past|m=ptc|' + self._head + '}}')
		
		return self.zipEntries(definitions, '===Verb===\n{{head|nl|verb form}}\n\n')