User:BuchmeierBot/code

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

I use a combination of bash and gawk scrips to analyze the page and write the wiki-code and mvs to upload the pages:

Bash code[edit]

#!/bin/bash
verb=$1
TEMP=$verb-wiki.txt
TEMPCONJ=$verb-conjugation.txt
ERROUT=$verb-ERRORS.txt
BOTINPUT=$verb-botinput.txt
rm -f $TEMP $TEMPCONJ $ERROUT $BOTINPUT
 
# exports the page to a text file $TEMP with lynx
lynx -width=1000 -nolist -underscore -dump http://en.wiktionary.org/wiki/$verb >$TEMP
recode latin1..utf8 $TEMP
 
# analyse text file $TEMP with gawk
export LC_ALL=C
gawk --assign ERROUT=$ERROUT --assign INF=$verb -f es-conj-verb-readconj.awk $TEMP |sort >$TEMPCONJ
 
# check for the file $ERROUT, written by previous awk script if the conjugation table contains lines with uexpected number of words (in which case forther execution is stopped) 
if [ -f $ERROUT ]
then
cat $ERROUT
exit
fi
 
# write inputfile for pagefromfile.py using gawk
gawk --assign INF=$verb -f es-conj-verb-genpages.awk $TEMPCONJ >>$BOTINPUT

Awk code[edit]

es-conj-verb-readconj.awk[edit]

The awk script es-conj-verb-readconj.awk looks like this:

BEGIN {section="0"; MOOD=none; errfile=ERROUT; inf=INF;
inflength=length(inf); ending=substr(inf,inflength-1,2);
#if(ending=="ír") ending="ir";
if(index(inf,"ír")>0) ending="ir";
if(!((ending == "ar")||(ending == "er")||(ending == "ir"))) 
{print "unknown or unsupported ending: "ending" of infinitive: "inf>errfile; exit;}
}
 
/\[edit\] Spanish/ {section = "S"; next;}
/\[edit\] Conjugation/ {if (section == "S") section = "C"; next;}
/\[edit\]/ {if ((section == "C")||(section=="T")) exit;}
 
/Defective verb/ {if(section == "C") {
print "unsupported defective verb">errfile; exit;
}}
 
/Rule/ {next;}
/Irregular in the past participle/ {next;}
 
/infinitive/ {if(section == "C") {
inf2=$2;
if(inf==inf2) section="T";
}}
 
/gerund/ {if(section == "T") {
if(NF != 2) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood=gerund|[["inf"]]}}";
}}
 
/present participle/ {if(section == "T") {
if(NF != 3) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $3"\t{{es-verb form of|ending="ending"|mood=gerund|[["inf"]]}}"; next;
}}
 
/past participle/ {if(section == "T") {
if(NF != 3) {print "unexpected number of words on line:\n"$0>errfile; exit;}
partstem = $3; gsub(/o$/,"",partstem); 
print partstem"o\t{{es-verb form of|ending="ending"|mood=past participle|gender=m|number=s|[["inf"]]}}\t"partstem;
print partstem"a\t{{es-verb form of|ending="ending"|mood=past participle|gender=f|number=s|[["inf"]]}}\t"partstem;
print partstem"os\t{{es-verb form of|ending="ending"|mood=past participle|gender=m|number=p|[["inf"]]}}\t"partstem;
print partstem"as\t{{es-verb form of|ending="ending"|mood=past participle|gender=f|number=p|[["inf"]]}}\t"partstem;
}}
 
/indicative/ {if(section == "T") MOOD="indicative";}
/subjunctive/ {if(section == "T") MOOD="subjunctive";}
/imperative/ {if(section == "T") MOOD="imperative";}
#/conditional/ {if(section == "T") MOOD="conditional";}
 
/present\ / {if(section == "T") {
if(NF != 7) {print "unexpected number of words on line present:\n"$0>errfile; exit;}
TENSE="present"; 
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
 
/imperfect/ {if(section == "T") {TENSE="imperfect"; 
if(MOOD=="subjunctive") next;
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
 
/\(ra\)/ {if(section == "T") {TENSE="imperfect"; 
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
 
/\(se\)/ {if(section == "T") {TENSE="imperfect"; 
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
 
/preterite/ {if(section == "T") {TENSE="preterite";
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
 
/future/ {if(section == "T") {TENSE="future";
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
 
/conditional/ {if(section == "T") {TENSE="conditional";
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
 
/affirmative/ {if(section == "T") {
TENSE="affirmative";
if(NF != 6) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|sense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|pers=1|number=plural|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|sense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
 
/negative/ {if(section == "T") {
TENSE="negative";
if(NF != 11) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|sense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $9"\t{{es-verb form of|ending="ending"|mood="MOOD"|sense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
exit;
}}

es-conj-verb-genpages.awk[edit]

The awk script es-conj-verb-genpages.awk looks like this:

BEGIN {oldpage="0"; inf=INF; FS="\t";}
{ page=$1;
if(page==oldpage) {print "# "$2; oldpage=page; next}
if(FNR>1) print "{{-stop-}}";
print "{{-start-}}"; print "<<<"$1">>>";
if(NF==2)
print "==Spanish==\n\n===Verb===\n{{es-verb-form|"inf"}}\n";
if(NF==3)
print "==Spanish==\n\n===Verb===\n{{es-pp|"$3"|"inf"}}\n";
print "# "$2;
oldpage=page;
}
END {print "{{-stop-}}";}


The file $BOTINPUT is then uploaded using SemperBlotto's pagefromfile.py, modified for Spanish.