User:Commander Keane/Audio workflow/xPutInWiki7.py

From Wiktionary, the free dictionary
Jump to navigation Jump to search
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import pywikibot
from pywikibot import pagegenerators, page, diff
from pywikibot import editor as editarticle
from pywikibot.specialbots import BaseUnlinkBot  
from solve_disambiguation import EditOption, ShowPageOption
from pywikibot.bot import (
    CurrentPageBot, SingleSiteBot, ExistingPageBot, NoRedirectPageBot, AutomaticTWSummaryBot, Bot, StandardOption)
import re

#########################################################################
import codecs
from itertools import chain
import os
import re

import pywikibot
from pywikibot import editor as editarticle
from pywikibot.tools import first_lower, first_upper as firstcap
from pywikibot import pagegenerators, config, i18n
from pywikibot.bot import (
    SingleSiteBot,
    StandardOption, HighlightContextOption, ListOption, OutputProxyOption,
)
from pywikibot.tools.formatter import SequenceOutputter
#########################################################################
#TODO: entymology 1 matching
#Put pronun after pics and WP template in English section.
#start tk window at top of page
#Create an exceptions list
#Put AU file after IPA (if line contains "IPA") but before "{{rhymes" and after "audio" lines
#Make sure only english pronun section is used, eg combo used Spanish pronun section
#Re-record g'day / check on File:EN-AU ck1 cab off the rank.ogg
#after Alternate forms
#Turn off comma and bracket removal in other script
#Example to work on: 'battle axe'
#Re-record: "First in", "g'day", "it's the truth"
#Already done: "bag out"
#Idea: split entry into lines, remove whitespace
#launch notepad++ if exception occurs

class EditOption(StandardOption):

    """Edit the text."""

    def __init__(self, option, shortcut, text, start, title):
        """Initializer.

        @type option: str
        @type shortcut: str
        @type text: str
        @type start: int
        @type title: str
        @rtype: None

        """
        super(EditOption, self).__init__(option, shortcut)
        self._text = text
        self._start = start
        self._title = title

    @property
    def stop(self):
        """Return whether if user didn't press cancel and changed it.

        @rtype: bool

        """
        return self.new_text and self.new_text != self._text

    def result(self, value):
        """Open a text editor and let the user change it."""
        editor = editarticle.TextEditor()
        self.new_text = editor.edit(self._text, jumpIndex=self._start,
                                    highlight=self._title)
        return super(EditOption, self).result(value)



class ShowPageOption(StandardOption):

    """Show the page's contents in an editor."""

    def __init__(self, option, shortcut, start, page):
        """Initializer."""
        super(ShowPageOption, self).__init__(option, shortcut, False)
        self._start = start
        if page.isRedirectPage():
            page = page.getRedirectTarget()
        self._page = page

    def result(self, value):
        """Open a text editor and show the text."""
        editor = editarticle.TextEditor()
        editor.edit(self._page.text,
                    jumpIndex=self._start,
                    highlight=self._page.title())

    
siteCom=pywikibot.Site("commons","commons")
siteWikt=pywikibot.Site("en","wiktionary") 
fileCount=1
startDate = pywikibot.Timestamp(2019,1,18,6,10)  ##end date 9, 18   2019,1,6,6,32
endDate =   pywikibot.Timestamp(2019,1,18,1,11)    #start date 9,8   2019,1,5,0,0 #05:34, 7 January 2019 

cat = pywikibot.Category(siteCom,"Australian English pronunciation")   
uploadsList=pagegenerators.LogeventsPageGenerator(logtype='upload', total=2000, user="Commander Keane", site=siteCom, start=startDate, end=endDate)
categoryList = pagegenerators.CategorizedPageGenerator(cat)

generator_factory = pagegenerators.GeneratorFactory()
generator = generator_factory.getCombinedGenerator(gen=uploadsList)
generator = generator_factory.handleArg('-intersect')
generator = generator_factory.getCombinedGenerator(gen=categoryList)

listUnused = []

for page in generator:
    #print("---------------------------")
    #print(str(page)[15:] +" Number: "+str(fileCount))
    fileCount=fileCount+1
    shortFilename = str(page)[15:-2]
    #print(shortFilename)
    wiktLink = False
    #pageObject = pagegenerators.FileLinksGenerator(page)
    for link in page.globalusage():
        linkStr = str(link)
        if linkStr.find("wiktionary") != -1:
            wiktLink = True
    print(shortFilename[6:-4])
    #page.get())
    page99 = pywikibot.Page(pywikibot.Site("en","wiktionary"), shortFilename[6:-4])
    text99 = page99.text
    #print(text99)
    if text99.find("Audio (AU)") != -1:
        wiktLink=True
    #    print("found text!")
        
    #.text().find(shortFilename[1:]) != -1:
    #    wikiLink=True
    #print(wiktLink)
    if wiktLink == False:
        listUnused.append(shortFilename[6:-4])

print(listUnused)

for fileTitle in listUnused:
    wiktPageSect = pywikibot.Page(siteWikt,fileTitle+"#Etymology")
    text2=wiktPageSect.text
    #print(text2)
    #pywikibot.textlib.does_text_contain_section(text2,"Etymology")
    #print(a)
    wiktPage = pywikibot.Page(siteWikt,fileTitle)
    #print(wiktPage)
    origText = wiktPage.text
    text = wiktPage.text
    #print(text)
    if text.find('Audio (AU) ***') == -1: #if text.find('Audio (AU)') == -1:
        origSplit = text.splitlines(True)
        uptoLine = 0
        lineToStopAt = len(origSplit) #was 0
        strippedList=[]
        
        for line in origSplit:
            
            strippedLine = line.replace(" ", "")
            strippedList.append(strippedLine)
            #print(stippedLine)
            regex = r"==[A-Z]"
            searchObj = re.match(regex, strippedLine, flags=0)
            #print("Up to line: " +str(uptoLine)+ "...." + str(searchObj))
            uptoLine=uptoLine+1
            
            if searchObj != None and line.find("==English==")==-1:
                lineToStopAt = uptoLine
                break
                    
        print("line to stop at: " + str(lineToStopAt))    
        strippedList = strippedList[:lineToStopAt]
        strippedText = "gobbly gook"
        #print("length of new list: " + str(len(strippedList)))
        skipToNextLine = False
        foundPronunSection = -1
        
#        for lineNumber in range(len(strippedList)):            
#            
#            if strippedList[lineNumber].find("===Pronunciation===") != -1:
#                foundPronunSection = lineNumber
#            
#            if skipToNextLine ==True:
#                if strippedList[lineNumber].find("===") != -1:
#                    findNextSect = lineNumber
#                    break
#                    
#                
#            if foundPronunSection != -1:
#                skipToNextLine = True
        newLineNum=0
        findNextSect=len(strippedList)
        #foundPronunSection=99999
        fndPronun = False
        print(findNextSect)
        for lineNumber in range(len(strippedList)-1):            
            #print(strippedList[lineNumber])
            if strippedList[lineNumber].find("===Pronunciation===") != -1:
                fndPronun = True
                foundPronunSection = lineNumber+1
                newLineNum = lineNumber
        
        if fndPronun == True:
        
            for lineIter in range(newLineNum+1,len(strippedList)):

                if strippedList[lineIter].find("===") != -1:
                    findNextSect = lineIter
                    break    
            
           
            print("found pronun and findnext: " + str(foundPronunSection+1) +" "+str(findNextSect))    
            
            rowNum = foundPronunSection -1 #-1 #was +1
            for pronunLineNum in range(len(strippedList[foundPronunSection+1:findNextSect+2])):
                #print("pronun line is: " + pronunLine)
                #do IPA and rhymes stuff
                if strippedList[pronunLineNum+foundPronunSection].find("IPA")!=-1 or strippedList[pronunLineNum+foundPronunSection].find("Audio")!=-1:# or line.find("{{")==-1 : #or line.find("")==-1:
                    rowNum = foundPronunSection+1+pronunLineNum -1 #-1  #was -1

                    ############break
                    #print(strippedText)
            masterUpto = 0
            masterString = ""
            print("stripped text is: " + strippedText)
            #for lineNum in range(len(strippedList)):
            #    #print(line)
            #    if strippedList[lineNum].find(strippedText) != -1:
            #        masterUpto=lineNum+1
            #print(masterUpto)
            newTextLine = "* {{audio|en|en-au-"+fileTitle+".ogg|Audio (AU)}}\n"
            print(newTextLine)
            for numLine in range(len(origSplit)):
                #print(origSplit[numLine])
                masterString=masterString + origSplit[numLine]
                
                if numLine == rowNum:
                    masterString = masterString+ newTextLine
                    numLine=numLine + 1
            #print(masterString)    

            try:                            #https://stackoverflow.com/questions/53417668/getting-a-tkinter-tclerror-when-i-try-to-cut-and-paste-in-a-custom-tkinter-text
                edit = EditOption(u'edit page', u'e', masterString, 0, wiktPage.title() )  #masterString
                edit.result(6)
            except Exception:
                print("exception occurred")
                continue
                
            text6 = edit.new_text
            if text6 is not None:
                
                if text!=text6:
                    pywikibot.output(u'\nThe following changes have been made:\n')

                    pywikibot.showDiff(origText, text6)
                    pywikibot.output(u'')           
        
                    wiktPage.text = text6
                    wiktPage.save(u"Adding EN-AU audio file: " + fileTitle )


        elif fndPronun == False:

            pronunUpto = 0
            pronunUptoNext = 0
            lineUpto = 0
            nothingFound=True
            for line in range(len(strippedList)):               
                lineText = strippedList[line]    
                if lineText.find("===Etymology===") != -1 or lineText.find("===Alternative") != -1 :
                    
                    if lineText.find("==English==")!= -1:
                        pronunUpto = line
                    
                    if lineText.find("===Alternative") != -1: # and pronunUpto > line:
                       pronunUpto = line
                       #nothingFound = False 
                    if lineText.find("===Etymology===")!= -1:
                       pronunUpto = line
                       #nothingFound = False
                   
  
            
            for lineNum in range(len(strippedList[pronunUpto+1:])):
                
                    lineAfter = strippedList[pronunUpto+lineNum+1].find("===")    #Stuff after ==English==
                    if lineAfter != -1:
                        pronunUptoNext=pronunUpto + lineNum
                        break
                    
                    #pronunUpto = line+1 
            #Insert new Pronun section at 'pronunUpto'
            newText=u"===Pronunciation===\n* {{audio|en|en-au-"+fileTitle+".ogg|Audio (AU)}}\n\n"
            masterString = ""
            for numLine in range(len(origSplit)):
                #print(origSplit[numLine])
                masterString=masterString + origSplit[numLine]
                
                if numLine == pronunUptoNext:
                    masterString = masterString + newText
                    numLine=numLine + 1            

            try:                            #https://stackoverflow.com/questions/53417668/getting-a-tkinter-tclerror-when-i-try-to-cut-and-paste-in-a-custom-tkinter-text
                edit = EditOption(u'edit page', u'e', masterString, 0, wiktPage.title() )  #masterString
                edit.result(6)
            except Exception:
                print("exception occurred")
                continue

            text7 = edit.new_text
            if text7 is not None:
                
                if text!=text7:
                    pywikibot.output(u'\nThe following changes have been made:\n')

                    pywikibot.showDiff(origText, text7)
                    pywikibot.output(u'')           
        
                    wiktPage.text = text7
                    wiktPage.save(u"Adding EN-AU audio file: " + fileTitle )            
            
            

#LAST RESORT - place at top - needs code