User:Surjection/probotector.py

From Wiktionary, the free dictionary
Jump to navigation Jump to search

from pywikibot import Site, Page, Category, Timestamp
from datetime import datetime, timedelta


enwikt = Site('en', fam='wiktionary')
enwikt.login()


NAMESPACES = (
    828,    # Module
    10,     # Template
)
SCORE_TO_PROTECT = 1000
TRANSCLUDED_MULTIPLIER = {
    # content pages (mainspace, reconstruction) count two-fold
    0: 2,
    118: 2
}


DEBUG = False
LOG_ONLY = True
LOG_ALL = False
LEVELS = ["all", "autoconfirmed", "editautopatrolprotected", "templateeditor", "sysop"]
PROTECT_EDIT_AT = "autoconfirmed"
PROTECT_MOVE_AT = "autoconfirmed"


def insufficient_protection(page):
    edit_level = page.protection().get('edit', None)
    if not edit_level:
        return True
    if edit_level == 'all':
        return True
    return edit_level in LEVELS and LEVELS.index(edit_level) < LEVELS.index(PROTECT_EDIT_AT)


def do_protect(page, score):
    reason = f'(bot) automatically protect highly visible templates/modules (reference score: {score}+ >= {SCORE_TO_PROTECT})'
    edit_level = page.protection().get('edit', 'all') or 'all'
    move_level = page.protection().get('move', 'all') or 'all'

    if edit_level in LEVELS and LEVELS.index(edit_level) < LEVELS.index(PROTECT_EDIT_AT):
        edit_level = PROTECT_EDIT_AT
    if move_level in LEVELS and LEVELS.index(move_level) < LEVELS.index(PROTECT_MOVE_AT):
        move_level = PROTECT_MOVE_AT

    protections = {'edit': edit_level, 'move': move_level}

    if DEBUG or LOG_ONLY:
        print('\t', dict(page=page, reason=reason, protections=protections))
    else:
        page.protect(reason=reason, protections=protections)


def ignore_page_by_title(title):
    if '/' not in title:
        return False
    last_token = title.split('/')[-1]
    if last_token in {'documentation', 'sandbox', 'testcases'}:
        # don't mess with documentations, sandboxes or testcases
        return True
    if title.startswith('Template:User:') or title.startswith('Module:User:'):
        # don't mess with user templates/modules
        return True
    if title in ('Template:sandbox', 'Module:sandbox'):
        # don't mess with sandboxes
        return True
    if title.startswith('Template:sandbox/') or title.startswith('Module:sandbox/'):
        # don't mess with sandboxes
        return True
    if ":zh/data/" in title:
        # literally thousands of these - just skip them all
        return True
    if ":User " in title and "-" in title:
        # literally thousands of these - just skip them all
        return True
    return False


for namespace in NAMESPACES:
    print(namespace)
    for page in enwikt.allpages(namespace=namespace):  # , start='a'):
        try:
            if LOG_ALL:
                print(page.title())
            if not ignore_page_by_title(page.title()) and insufficient_protection(page):
                backlink_iterator = page.getReferences(follow_redirects=True, with_template_inclusion=True, only_template_inclusion=True, filter_redirects=False, total=SCORE_TO_PROTECT, content=False)
                score = 0
                for backlinker in backlink_iterator:
                    score += TRANSCLUDED_MULTIPLIER.get(backlinker.namespace().id, 1)
                if score >= SCORE_TO_PROTECT:
                    do_protect(page, score)
        except KeyboardInterrupt:
            print(page.title())
            raise