User:Scsbot/wikised

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
host=
baseurl=
login="?title=Special:Userlogin"

driverscript=
reason=
minoredit=

okaytocreate=
mustcreate=

delay=                  # sleep between edits (seconds)

checkscript=
editscript=
filter=
postcheckscript=

checkdiffs=no
expecteddelete=
expectedinsert=

rm="/bin/rm -f"

username=
password=

loginconfig=

masterscript=

configs="wikised.configs"

# argument parse

while expr "$1" : '^-' > /dev/null
do      case $1 in
                -\?|-h|-help|--help)
                        echo $usage
                        echo "options:"
                        echo "     -host h set host of wiki"
                        echo "     -url u  set full base URL of wiki"
                        echo "     -why m  set edit summary message"
                        echo "     -m      mark edits as minor"
                        echo "     -d f    set driver/data file"
                        echo "     -cre    okay to create new pages"
                        echo "     -new    don't overwrite old pages (must create)"
                        echo "     -s s    sleep for s seconds between edits"
                        echo "     -pre s  set pre-check script"
                        echo "     -post s set post-check script"
                        echo "     -edit s set main edit script"
                        echo "     -filter edit script is a pure filter"
                        echo "     -chkd   check diffs"
                        echo "     -ins n  expected insert"
                        echo "     -del n  expected delete"
                        echo "     -user u set username"
                        echo "     -pass p set password"
                        echo "     -cfg c  login configuration c"
                        echo "     -f s    master setup script s"
                        echo "     -?,-h   print this help"
                        printedhelp=yes;;

                -host)  host=$2
                        shift;;
                -u|-url|-baseurl)
                        baseurl=$2
                        shift;;
                -why|-reason|-message|-summary|-editsummary)
                        reason=$2
                        shift;;
                -m|-minor)
                        minoredit=yes;;
                -d|-driver|-driverscript|-data)
                        driverscript=$2
                        shift;;
                -cre|-create|-okaytocreate)
                        okaytocreate=yes;;
                -new|-mustcreate)
                        mustcreate=yes;;
                -s|-sleep|-delay)
                        delay=$2
                        shift;;
                -pre|-precheck|-precheckscript|-checkscript)
                        checkscript=$2
                        shift;;
                -post|-postcheckscript)
                        postcheckscript=$2
                        shift;;
                -edit|-editscript)
                        editscript=$2
                        shift;;
                -filter)
                        filter=yes;;
                -chkd|-checkdiff|-checkdiffs)
                        checkdiffs=yes;;
                -ins|-nins|-ninsert|-expectedinsert)
                        expectedinsert=$2
                        shift;;
                -del|-ndel|-ndelete|-expecteddelete)
                        expecteddelete=$2
                        shift;;
                -user|-username)
                        username=$2
                        shift;;
                -pass|-password)
                        password=$2
                        shift;;
                -cfg|-config)
                        loginconfig=$2
                        shift;;
                -f|-masterscript)
                        masterscript=$2
                        shift;;
                *)      echo "$0: unrecognized option $1" 1>&2;;
        esac
        shift
done

if test $# -ge 1
then
        if test -n "$masterscript"
        then    echo "master script specified twice (use -f or argument)" >&2
                exit 1
        fi
        masterscript=$1
        shift
fi

# login config shortcut (but don't override cmd line)

if test -n "$loginconfig"
then
        ent=`dbgrep -i -k name $loginconfig $configs`
        if test -z "$ent"
        then
                echo "no such configuration $loginconfig" >&2
                # exit?
        else
                nl=`echo "$ent" | wc -l`
                i=1
                while test $i -le $nl
                do
                        line=`echo "$ent" | line $i`
                        k=`expr "$line" : '\([^  ]*\).*'`
                        v=`expr "$line" : '[^    ]*[     ]*\(.*\)'`
                        v=`echo "$v" | sed 's/  *$//'`
                        case $k in
                                host)   if test -z "$host"; then host=$v; fi;;
                                baseurl)
                                        if test -z "$baseurl"; then baseurl=$v; fi;;
                                defaultlogin)
                                        defaultlogin=$v;;
                                confirm)
                                        confirm=$v;;
                        esac
                        i=`expr $i + 1`
                done
        fi
fi

# "master script" (but don't override cmd line)

if test -n "$masterscript"
then
        if test ! -r "$masterscript"
        then
                echo "master script $masterscript does not exist or is not readable" >&2
                exit
        fi

        # would like to use "while read k v < $masterscript",
        # but that sets vars in subshell and so is useless

        nl=`wc -l < $masterscript`
        i=1
        while test $i -le $nl
        do
                line=`line $i $masterscript`
                k=`expr "$line" : '\([^  ]*\).*'`
                v=`expr "$line" : '[^    ]*[     ]*\(.*\)'`
                v=`echo "$v" | sed 's/  *$//'`
                case $k in
                        host)   if test -z "$host"; then host=$v; fi;;
                        baseurl) if test -z "$baseurl"; then baseurl=$v; fi;;
                        driverscript)   if test -z "$driverscript"; then driverscript=$v; fi;;
                        reason) if test -z "$reason"; then reason=$v; fi;;
                        minoredit)
                                if test -z "$minoredit"
                                then
                                         if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
                                        then    minoredit=no
                                        else    minoredit=yes
                                        fi
                                fi;;
                        okaytocreate)
                                if test -z "$okaytocreate"
                                then
                                         if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
                                        then    okaytocreate=no
                                        else    okaytocreate=yes
                                        fi
                                fi;;
                        mustcreate)
                                if test -z "$mustcreate"
                                then
                                         if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
                                        then    mustcreate=no
                                        else    mustcreate=yes
                                        fi
                                fi;;
                        delay)  if test -z "$delay"; then delay=$v; fi;;
                        checkscript) if test -z "$checkscript"; then checkscript=$v; fi;;
                        editscript) if test -z "$editscript"; then editscript=$v; fi;;
                        filter)
                                if test -z "$filter"
                                then
                                         if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
                                        then    filter=no
                                        else    filter=yes
                                        fi
                                fi;;
                        postcheckscript) if test -z "$postcheckscript"; then postcheckscript=$v; fi;;
                        checkdiffs)
                                if test -z "$checkdiffs"
                                then
                                         if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
                                        then    checkdiffs=no
                                        else    checkdiffs=yes
                                        fi
                                fi;;
                        expecteddelete) if test -z "$expecteddelete"; then expecteddelete=$v; fi;;
                        expectedinsert) if test -z "$expectedinsert"; then expectedinsert=$v; fi;;
                        username)       if test -z "$username"; then username=$v; fi;;
                        password)       if test -z "$password"; then password=$v; fi;;
                esac
                i=`expr $i + 1`
        done
fi

# Most args/flags are required.  Check them all.

errs=0

if test -z "$baseurl"
then
        if test -n "$host"
        then    baseurl="http://$host/w/index.php"
        fi
fi

if test -z "$baseurl"
then
        echo "base url not specified; use -url, or -h to specify host" >&2
        errs=`expr $errs + 1`
fi

if test -z "$driverscript"
then
        echo "driver script not specified; use -d to specify" >&2
        errs=`expr $errs + 1`
fi

if test -z "$minoredit"
then    minoredit=no
fi

if test -z "$okaytocreate"
then    okaytocreate=no
fi

if test -z "$mustcreate"
then    mustcreate=no
fi

if test -z "$delay"
then    delay=60
fi

if test -z "$editscript"
then
        echo "edit script not specified; use -edit to specify" >&2
        errs=`expr $errs + 1`
fi

if test -z "$filter"
then    filter=no
fi

if test -z "$checkdiffs"
then    checkdiffs=no
fi

if test -z "$username"
then    username=$defaultlogin
fi

if test -z "$username"
then
        echo "login user name not specified; use -user to specify" >&2
        errs=`expr $errs + 1`
fi

if test -z "$password"
then
        : attempt to look up
fi

if test $errs -gt 0
then    exit 1
fi

if test -z "$password"
then
        if test "$confirm" = yes
        then    echo -n "enter password for $username to edit live wiki: "
        else    echo -n "enter password for $username: "
        fi
        read password
elif test "$confirm" = yes
then    echo -n "Confirm editing live wiki: "
        read ans
        if expr "$ans" : '[Yy]' > /dev/null
        then    :
        else    exit
        fi
fi

logfilebase=$driverscript.`date +%Y$m$d.%H%M`
acceptlog=$logfilebase.accept
rejectlog=$logfilebase.reject

tf=/tmp/tmpfile$$
tf4=/tmp/tmpfile$$.4

echo -n "Fetching login page..."
httpget -cookies -cookiefile $tf.cook "$baseurl$login" > $tf
echo " got it."

formsetup formtag$$ $tf || exit

formselect formtag$$ userlogin || exit

formcheckfields formtag$$ wpName wpPassword || exit

echo $username > `formgettmpfile formtag$$ wpName`
echo $password > `formgettmpfile formtag$$ wpPassword`

formsubmitbutton formtag$$ wpLoginattempt || exit

echo -n "Submitting login information..."
formsubmit formtag$$ $baseurl $tf.cook > $tf4
stat=$?
echo " got it."

if test $stat -ne 0; then exit 1; fi

if grep -q "Incorrect password entered" $tf4
then
        echo "bad password; login failed" >&2
        exit 1
fi


formfinish formtag$$

# ---

cat $driverscript |
while read article editscriptparms
do

sleep $delay

echo -n "Fetching \"$article\" to edit..."
httpget -cookies -cookiefile $tf.cook "$baseurl?title=$article&action=edit" > $tf
echo " got it."

if grep -q "<title>User is blocked" $tf ||
                grep -q "Your user name or IP address has been blocked" $tf
then
        echo "bot user $username blocked!" >&2
        exit
fi

formsetup formtag$$ $tf || exit

formselect formtag$$ editform || exit

formcheckfields formtag$$ wpTextbox1 || exit

tftextarea=`formgettmpfile formtag$$ wpTextbox1`

# ---

# Imperfect test for missing page, since it depends on text that can
# be and typically is customized per wiki.  I'm using two different
# strings from each of generic mediawiki, Wikipedia, and Wiktionary.
# The failure mode if it fails to notice that the page doesn't exist
# isn't too bad, because the check and/or edit scripts will usually
# fail on a blank textarea.

missing=no

if grep -q "does not yet have an entry for $article" $tf ||
        grep -q "start .* entry.* type .* box .* click .*[Ss]ave.*changes .* visible immediately" $tf ||
        grep -q "followed a link to a page that doesn't exist yet" $tf ||
        grep -q "To create the page, start typing in the box below" $tf ||
        grep -q "does not have an article with this exact name" $tf ||
        grep -q "To start a page called $article, type .* box .* and .* [Ss]ave .* changes .* visible immediately" $tf
then
        missing=yes
fi

if test $mustcreate = yes -a $missing = no
then
        echo "$article: page already exists" >&2
        # XXX duplicated
        formfinish formtag$$
        echo "$article     $editscriptparms        already exists" >> $rejectlog
        continue
elif test $okaytocreate = no -a $missing = yes
then
        echo "$article: no such page yet" >&2
        # XXX duplicated
        formfinish formtag$$
        echo "$article     $editscriptparms        no such page" >> $rejectlog
        continue
fi

if grep -i -q '^#redirect' < $tftextarea
then    
        # XXX duplicated
        formfinish formtag$$
        echo "$article is a redirect" >&2
        echo "$article     $editscriptparms        is redirect" >> $rejectlog
        continue
fi

if test -z "$checkscript" || sh $checkscript $checkscriptparms < $tftextarea
then    :
else
        # XXX duplicated
        formfinish formtag$$
        echo "$article     $editscriptparms        check script failure" >> $rejectlog
        continue
fi

# ---

# XXX questionable who/where should do this unescaping, and how aggressively
filter "sed -e 's/&lt;/</g' -e 's/&gt;/>/g' -e 's/&amp;/\&/g' -e 's/&quot;/\"/g'" $tftextarea

edit_script_output=""

if test "$filter" = "yes"
then    filter "sh $editscript $editscriptparms" $tftextarea
else    edit_script_output=`sh $editscript $tftextarea $editscriptparms`
fi

if test $? -ne 0
then
        echo $edit_script_output >&2
        # XXX duplicated
        formfinish formtag$$
        echo "$article     $editscriptparms        edit script failure" >> $rejectlog
        continue
fi

if test -n "$edit_script_output"
then
        if echo "$edit_script_output" | grep -q 'expect.*insertions'
        then
                expectedinsert=`expr "$edit_script_output" : 'expect \([0-9]*\) insertions'`
        fi
fi

# ---

if test "$checkdiffs" = "yes"
then

formsubmitbutton formtag$$ wpDiff || exit

echo -n "Fetching diffs..."
formsubmit formtag$$ $baseurl $tf.cook > $tf4
stat=$?
echo " done."

if test $stat -ne 0; then exit 1; fi

formfinish formtag$$

ndel=`grep -c "td class=['\\"]diff-deletedline['\\"]" $tf4`
nadd=`grep -c "td class=['\\"]diff-addedline['\\"]" $tf4`

if test $ndel -ne $expecteddelete -o $nadd -ne $expectedinsert
then
        echo "edit would cause $nadd/$ndel insertions/deletions, but expected $expectedinsert/$expecteddelete" >&2
        echo "Canceling." >&2
        echo "$article     $editscriptparms        diff check failure" >> $rejectlog
        continue
fi

formsetup formtag$$ $tf4 || exit

formselect formtag$$ editform || exit

formcheckfields formtag$$ wpTextbox1 || exit

tftextarea=`formgettmpfile formtag$$ wpTextbox1`

# XXX questionable who/where should do this unescaping, and how aggressively
# XXX also this is in two places, though I missed it at first, leading
# to a bad edit *not* caught by the checkdiffs bad-edit check :-( # :-( :-(
filter "sed -e 's/&lt;/</g' -e 's/&gt;/>/g' -e 's/&amp;/\&/g' -e 's/&quot;/\"/g'" $tftextarea

fi

# ---

tfwatch=`formgettmpfile formtag$$ wpWatchthis`
if test -n "$tfwatch"
then
        $rm $tfwatch
fi

tfminor=`formgettmpfile formtag$$ wpMinoredit`
if test -n "$tfminor"
then
        if test $minoredit = yes
        then    echo 1 > $tfminor
        else    $rm $tfminor
        fi
fi

tfsummary=`formgettmpfile formtag$$ wpSummary`
if test -n "$tfsummary"
then
        echo "edited by robot" > $tfsummary
        if test -n "$reason"
        then
                (echo 1s/$/:/; echo w) | ed - $tfsummary
                echo "$reason" >> $tfsummary
        fi
fi

formsubmitbutton formtag$$ wpSave || exit

echo -n "Submitting edits..."
formsubmit formtag$$ $baseurl $tf.cook > $tf4
stat=$?
echo " done."

if test $stat -ne 0; then exit 1; fi

formfinish formtag$$

if grep -q "<title>.*[Ee]dit [Cc]onflict" $tf4
then
        echo "edit conflict" >&2
        echo "$article     $editscriptparms        edit conflict" >> $rejectlog
        continue
fi

echo "$article     $editscriptparms" >> $acceptlog

done

$rm $tf $tf4 $tf.cook