Utilisateur:PamputtBot/homo.py

#!/usr/bin/env python
# coding: utf-8
# Ce script identifie les sections « homophones » qui ne sont pas placées au bon endroit
# python3 pwb.py homo.py
# python3 pwb.py homo.py ailé

import sys, codecs

from unidecode import unidecode
import re
import pywikibot

test = False
debug = False
if test:
        debug = True

def getCodeLangue(ligne):
        pos1 = ligne.find("|")+1
        pos2 = ligne.find("}}",pos1)
        code = ligne[pos1:pos2]
        #print(f"code: {code}")
        return code

# Modification du wiki
def modification(titre, codeLangueATraiter, mot_a_traiter=""):
        print ("Traitement de " + titre + " en " + codeLangueATraiter + "!")
        
        try:
                page = pywikibot.Page(pywikibot.Site(), titre)
                #print(page)
                #print(type(page))
        except UnicodeDecodeError: 
                print ("UnicodeDecodeError l 30")
                return

        if page.exists() and page.namespace() == 0:
                try:
                        PageBegin = page.get()
                except pywikibot.NoPage:
                        print ("NoPage l 36")
                        return
                except pywikibot.LockedPage: 
                        print ("Locked l 40")
                        return
                except pywikibot.IsRedirectPage: 
                        print ("IsRedirect l 43")
                        return
        else:
                print ("NoPage l 46")
                return

        PageTemp = PageBegin
        PageEnd = ""
        resume = "ajout d'une section prononciation manquante"


        ligne = ""
        homoSection = False
        pronSection = False
        apresSection = False
        autreSection = False
        finSection = False
        sectionAjoutee = False
        bonneSectionLangue = False
        contenuHomophone = ""
        first = True
        
        for ligne in PageTemp.splitlines():
                #print(f"{ligne}: sectionFr->{sectionFr}")

                if finSection:
                        finSection = False
                        
                ## print u'>>>>   ' + ligne
                ## on a trouvé une autre section de langue
                ## on garde cette info de côté
                if (ligne.find('{{langue|') != -1 and
	            ligne.find('==') != -1):
                        #print(f"{ligne}:\n ligne.find(u'{{langue|') != -1 ...")
                        autreSection = False
                        apresSection = False
                        doubleHomoSection = False
                        if(getCodeLangue(ligne) == codeLangueATraiter):
                                first = False
                                bonneSectionLangue = True
                        else:
                                bonneSectionLangue = False
                                if not first:
                                        finSection = True
                                        if not homoSection:
                                                pronSection = False
                                                homoSection = False
                                                contenuHomophone = ""
                else:
                        finSection = False

                #print(f"first: {first}, bonneSection: {bonneSectionLangue}")
                if not bonneSectionLangue and not finSection:
                        PageEnd += ligne + "\n"
                        if debug :
                                print(f"{ligne}")
                                print(f"homoSection: {homoSection}, apresSection: {apresSection}, sectionAjoutee: {sectionAjoutee}, autreSection: {autreSection}, finSection: {finSection}")
                        continue

                
                # ==== {{S|homophones|fr}} ====
                if (ligne.find("{{S|homo") != -1 and
                    ligne.find("===") != -1):
                        if homoSection:
                                print("Il y a deux sections « homophones » dans cette entrée")
                                with open("double_section_homo.txt", "a") as fichier:
	                                fichier.write(titre + "," + codeLangueATraiter + "\n")
                                return
                                
                        if pronSection:
                                sectionAjoutee = True
                        else:
                                homoSection = True
                        
                if (ligne.find("{{S|pron") != -1 and
                    ligne.find("{{S|pronom") == -1 and
                    ligne.find("===") != -1):
                        pronSection = True
                
                if (ligne.find('{{S|paro') != -1 or
                    ligne.find('{{S|anagr') != -1 or
                    ligne.find('{{S|voir') != -1 or
                    ligne.find('{{S|réf') != -1):
                        apresSection = True

                if (ligne.find('{{S|') != -1 and
                    ligne.find('{{S|homo') == -1 and
                    homoSection):
                    autreSection = True

                if ligne.find('[[Catégorie:') != -1:
                        apresSection = True
                                
                if ligne.find('{{clé de tri') != -1:
                        apresSection = True

                if (homoSection and
                    not pronSection and
                    not apresSection and
                    not autreSection and
                    not finSection):
                        contenuHomophone += ligne + "\n"
                
                if debug:
                        print(f"{ligne}")
                        print(f"homoSection: {homoSection}, apresSection: {apresSection}, sectionAjoutee: {sectionAjoutee}, autreSection: {autreSection}, finSection: {finSection}, pronSection: {pronSection}")
                # on crée le nouveau contenu à partir du contenu existant
                # on déplace simplement la section homophone sous la section
                # prononciation s'il y en a une, sinon on en ajoute une
                if not homoSection:
                        # on n'a pas encore trouvé de section homophone
                        # on récopie simplement les lignes
                        PageEnd += ligne + "\n"
                else:
                        # on a trouvé une section homophones   
                        if apresSection or finSection:
                                if not sectionAjoutee:
                                        sectionAjoutee = True
                                        homoSection = False
                                        finSection = False
                                        if not pronSection:
                                                PageEnd += "=== {{S|prononciation}} ===\n"
                                        else:
                                                resume = "déplacement de la section « homophone » dans la section « prononciation »"
                                        if debug:
                                                print(f"On ajoute:\n<<<{contenuHomophone}>>>")
                                        PageEnd += contenuHomophone
                                        
                                if not finSection:
                                        PageEnd += ligne + "\n"
                        elif autreSection:
                                PageEnd += ligne + "\n"
                        else:
                                # on est toujours dans la section homophone
                                # on stock le contenu de la section dans
                                # contenuHomophone
                                continue

        # on est arrivé à la fin de la section de langue ;
        # on ajoute une section prononciation puis le
        # contenu de la section homophone
        if not sectionAjoutee:
                sectionAjoutee = True
                if not pronSection:
                        PageEnd += "=== {{S|prononciation}} ===\n"
                else:
                        resume = "déplacement de la section « homophone » dans la section « prononciation »"
                PageEnd += contenuHomophone
		


        # suppression de la dernière ligne qui est vide
        PageEnd = PageEnd.strip()

        if len(PageEnd) < len(PageBegin) - 5:
                print("Suppression trop importante de texte. Il y a peut-être un problème.")
                with open("a_verifier.txt", "a") as fichier:
	                fichier.write(titre + "," + codeLangueATraiter + "\n")
                return
                
        
        if PageEnd != PageBegin:
                print(u'<<<<<<<<<<<<<<<<<<<<')
                print(PageBegin)
                print(u'>>>>>>>>>>>>>>>>>>>>')
                print(PageEnd)
                print(u'<<<<<<<<<<<<<<<<<<<<')
                sauvegarde(page, PageEnd, resume)



# Lecture du fichier liste_mot_par_langue.txt
def crawlerFile(source, mot_a_traiter=""):
        if not source:
                return

        PagesHS = codecs.open(source,"r","utf-8")
        titre = ""
        codeLangue = ""
        ligne = ""
        compteur = 0
        
        while True:
                ligne = PagesHS.readline()
                if ligne.find(u';') != -1:
                        titre = ligne[:ligne.find(u';')].strip()
                        codeLangue = ligne[ligne.find(u';')+2:-1]
                        #print(titre)
                else:
                        break

                #if compteur > 100000:
                #        break
                if (len(mot_a_traiter) > 0 and
                    titre != mot_a_traiter):
                        continue
             
                modification(titre, codeLangue, mot_a_traiter)                           


        PagesHS.close()
                                        

                                        

# Permet à tout le monde de stopper le bot en lui écrivant
def ArretDUrgence():
		page = Page(site,u'User talk:' + mynick)
		if page.exists():
			PageTemp = u''
			try:
				PageTemp = page.get()
			except wikipedia.NoPage: return
			except wikipedia.IsRedirectPage: return
			except wikipedia.LockedPage: return
			except wikipedia.ServerError: return
			except wikipedia.BadTitle: return
			except pywikibot.EditConflict: return
			if PageTemp != u"{{/Stop}}":
				pywikibot.output (u"\n*** \03{lightyellow}Arrêt d'urgence demandé\03{default} ***")
				exit(0)

def sauvegarde(PageCourante, Contenu, resume):
        if not test:
                PageCourante.put(Contenu, summary=resume)
        else:
                pywikibot.output(f"{PageCourante.title()} :\n{Contenu}")
		
# Lancement
if(len(sys.argv)>1):
        mot_a_traiter = sys.argv[1].strip()
        TraitementFichier = crawlerFile(u'liste_mots_avec_section_homophone_sans_section_prononciation.txt', mot_a_traiter)
else:
        TraitementFichier = crawlerFile(u'liste_mots_avec_section_homophone_sans_section_prononciation.txt')