MediaWiki:Gadget-translation editor.js/Statistiques/code/partie 2

Définition, traduction, prononciation, anagramme et synonyme sur le dictionnaire libre Wiktionnaire.
Sauter à la navigation Sauter à la recherche
  1 # -*- coding: utf-8 -*-
  2 import re
  3 import time
  4 import pywikibot
  5 import languages_list # Copie de [[MediaWiki:Gadget-translation editor.js/langues.json]] (la liste étant préfixée de languages =)
  6 import ast
  7 from subprocess import call
  8 
  9 site = pywikibot.Site()
 10 
 11 languages = languages_list.languages
 12 
 13 // Dossier qui contient les résultats générés précédemment et notamment le fichier
 14 // stats-trads-diffs-to-check.txt qui contient les infos sur les diffs à analyser.
 15 folder = "C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\"
 16 file_langs = 'stats-trads-langs-after-diffs.txt'
 17 file_dates = 'stats-trads-dates-after-diffs.txt'
 18 file_contributors = 'stats-trads-contributors-after-diffs.txt'
 19 file_ips = 'stats-trads-ips-after-diffs.txt'
 20 
 21 def sort_dict_by_value(d):
 22   return sorted(d.items(), key=lambda x: x[1])
 23   
 24 def first_ten_items_dict(d):
 25   return first_n_items_dict(d, 10)
 26   
 27 def first_n_items_dict(d, n, first_index):
 28   '''Returns a list of n top-valued items from index first_index'''
 29   return sorted(d.items(), key=lambda x: x[1], reverse=True)[first_index:first_index+n]
 30   
 31 def dict_from_file(file, folder=None):
 32   with open(folder+file, encoding='utf-8') as f:
 33     dict_results = ast.literal_eval(f.read())
 34   return dict_results
 35   
 36 def generate_graph(results, file_graph='graph_template.txt', cat='langs', label='lang'):
 37   '''
 38   @param results: an object containing the results
 39   @type results: str representing a file,
 40                 or dict or list of tuples
 41   '''
 42   input_file = folder + file_graph
 43   output_file = folder + 'graph-{}.txt'.format(cat)
 44   if isinstance(results, str):
 45     with open(folder+results) as f:
 46       results = ast.literal_eval(f.read())
 47   with open(input_file, 'r', encoding='utf-8') as f:
 48     content = f.read()
 49     specific_results = ''
 50     if isinstance(results, dict):
 51       for k in results:
 52         specific_results += '        {{"{}": "{}", "amount": {} }},\n'.format(label, k, str(results[k]))
 53     elif isinstance(results, list):
 54       for item in results:
 55         specific_results += '        {{"{}": "{}", "amount": {} }},\n'.format(label, item[0], str(item[1]))
 56     else:
 57       print('results objects should be either a string, a list or a dict, but it is a {}'.format(type(results))); return
 58     specific_results = specific_results[:-2] # removing last ",\n"
 59     content = content.replace('__TO_REPLACE__', specific_results)
 60     content = content.replace('__LABEL__', label)
 61     oblique_labels = '__OBLIQUE_LABELS__'
 62     if cat == 'dates':
 63         content = content.replace(oblique_labels, ', "properties": { "labels": {"angle": {"value": -45}, "dx": {"value": -20} } } ')
 64     else:
 65       content = content.replace(oblique_labels, '')
 66   with open(output_file, 'w', encoding='utf-8') as f:
 67     f.write(content)
 68   print('Graph saved to {}'.format(output_file))
 69   choice = pywikibot.inputChoice(u"Open file?", ["yes", "no"], ["y", "n"], default="n")
 70   if choice == 'y':
 71     call(["notepad", output_file])
 72     
 73 def generate_langs_graph(nbLangs=15, first_index=0, filename_var='langs'):
 74     d = dict_from_file(file_langs)
 75     data = first_n_items_dict(d, nbLangs, first_index)
 76     generate_graph(data, file_graph='graph_template.txt', cat=filename_var, label='lang')
 77     
 78 def generate_dates_graph():
 79     generate_graph(file_dates, file_graph='graph_template.txt', cat='dates', label='date')
 80     
 81 def results_to_wikitable(dict, header1='Langues', header2='Traductions ajoutées'):
 82   wikitext = '{{| class="wikitable sortable mw-collapsible"\n! {} !! {}'.format(header1, header2)
 83   for k in dict:
 84     wikitext += '\n|-\n| {} || {}'.format(k, str(dict[k]))
 85   wikitext += '\n|}'
 86   return wikitext
 87   
 88 def raw_results_to_wikitable(filename, dest=None, header1='Langues', header2='Traductions ajoutées'):
 89   if dest is None:
 90     print('A destination file must be provided')
 91     return
 92   input_file = folder + filename
 93   output_file = folder + dest
 94   with open(input_file, encoding='utf-8') as f:
 95     for line in f:
 96       dict_results = ast.literal_eval(line)
 97       break
 98   with open(output_file, 'w+', encoding='utf-8') as f:
 99     f.write(results_to_wikitable(dict_results, header1, header2))
100   print('Wikitable saved to {}'.format(output_file))
101   choice = pywikibot.inputChoice(u"Open file?", ["yes", "no"], ["y", "n"], default="n")
102   if choice == 'y':
103     call(["notepad", output_file])
104 
105 def count_trads_per_diff():
106   '''
107   Génère des statistiques d'ajout de trads lorsque les résumés d'édition sont tronqués
108   et necessitent une analyse des diffs
109   '''
110   stats_langs = {}
111   stats_dates = {}
112   stats_contribs = {}
113   stats_ips = {}
114   
115   cpt = 0
116   cpt_trads = 0
117   count_contributors = 0
118   count_ips = 0
119 
120   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-langs.txt", encoding="utf-8") as f:
121     for line in f:
122       match = re.search("(.+) : (\d+)", line)
123       if match is None:
124         continue
125       stats_langs[match[1]] = int(match[2])
126   
127   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-dates.txt", encoding="utf-8") as f:
128     for line in f:
129       match = re.search("(.+) : (\d+)", line)
130       stats_dates[match[1]] = int(match[2])
131   
132   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-contributors.txt", encoding="utf-8") as f:
133     for line in f:
134       match = re.search("(.+) : (\d+)", line)
135       if match is None:
136         continue
137       stats_contribs[match[1]] = int(match[2])
138       
139   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-ips.txt", encoding="utf-8") as f:
140     for line in f:
141       match = re.search("(.+) : (\d+)", line)
142       if match is None:
143         continue
144       stats_ips[match[1]] = int(match[2])
145       
146   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-diffs-to-check.txt", encoding="utf-8") as f:
147     for line in f:
148       # line = "title=lire;prev_rev_id=18770633;rev_id=18908602;contrib=Test;is_ip=false;date=2015-01"
149       # title = line.split(';')[0].split('=')[1]
150       from_rev = line.split(';')[1].split('=')[1]
151       to_rev = line.split(';')[2].split('=')[1]
152       contrib = line.split(';')[3].split('=')[1]
153       if line.split(';')[4].split('=')[1] == 'true':
154         is_ip = True
155       else:
156         is_ip = False
157       date = line.split(';')[5].split('=')[1].strip()
158       diff_html = site.compare(old=int(from_rev), diff=int(to_rev))
159       trads_added = re.findall('<td class="diff-addedline"><div>(?:.+<ins class="diffchange diffchange-inline">)?(.+)(?:</ins>)?</div></td>', diff_html)
160       codes = []
161       for t in trads_added:
162         codes += re.findall('{{trad[+-]{0,2}\|([^|]+)\|', t)
163       for code in codes:
164         if code in languages:
165           lang_name = languages[code]
166         elif code in languages['redirects']:
167           lang_name = languages[languages['redirects'][code]]
168         else:
169           # Les codes langue non répertoriés dans la liste des langues sont ignorés
170           # (1 seul cas le 1/09/2018, un ajout en zh-tc, code supprimé depuis)
171           print('CODE ' + code + ' NOT FOUND (and translation ignored) - ' + line.strip())
172           continue
173         if lang_name:
174           # sys.stdout.write(',' + lang_name)
175           if lang_name in stats_langs:
176             stats_langs[lang_name] += 1
177           else:
178             stats_langs[lang_name] = 1
179         if is_ip:
180           if contrib in stats_ips:
181             stats_ips[contrib] += 1
182           else:
183             stats_ips[contrib] = 1
184           count_ips += 1
185         else:
186           if contrib in stats_contribs:
187             stats_contribs[contrib] += 1
188           else:
189             stats_contribs[contrib] = 1
190           count_contributors += 1
191         if date in stats_dates:
192           stats_dates[date] += 1
193         else:
194           stats_dates[date] = 1
195         cpt_trads += 1
196       cpt += 1
197       if cpt % 100 == 0:
198         print(str(cpt) + " diffs traites (" + str(cpt_trads) + " traductions)")
199   print(stats_dates)
200   print(stats_langs)
201   print(stats_contribs)
202   print(str(stats_ips)[:1000] + '...') // Liste dIPs trop longue, on la tronque
203   
204   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-res-after-diffs.txt", "w+", encoding="utf-8") as f:  
205     res = "Résultats des stats sur les résumés d'édition tronqués (ajout de traductions par lots) :\n"
206     res += "Traductions ajoutées : " + str(cpt_trads) + "\n"
207     res += "Traductions ajoutées par des utilisateurs inscrits : " + str(count_contributors) + "\n"
208     res += "Traductions ajoutées par des utilisateurs non inscrits : " + str(count_ips) + "\n"
209     f.write(res)
210     
211   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-langs-after-diffs.txt", "w+", encoding="utf-8") as f:
212     f.write(str(stats_langs))
213   
214   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-dates-after-diffs.txt", "w+", encoding="utf-8") as f:
215     f.write(str(stats_dates))
216   
217   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-contributors-after-diffs.txt", "w+", encoding="utf-8") as f:
218     f.write(str(stats_contribs))
219       
220   with open("C:\\Users\\automatik\\Wiktionnaire\\Stats_translation_editor\\stats-trads-ips-after-diffs.txt", "w+", encoding="utf-8") as f:
221     f.write(str(stats_ips))
222     
223 if __name__ == '__main__':
224     start_time = time.time()
225     count_trads_per_diff()
226     print("--- %s seconds ---" % (time.time() - start_time))
227     # Les fonctions suivantes créent les fichiers et proposent à l’utilisateur de l’ouvrir (avec le programme notepad, voir plus haut)
228     # afin de pouvoir directement copier-coller le résultat sur le wiki.
229     raw_results_to_wikitable(file_dates, dest='wikitable-dates.txt', header1='Mois', header2='Traductions ajoutées')
230     raw_results_to_wikitable(file_langs, dest='wikitable-langs.txt', header1='Langue', header2='Traductions ajoutées')
231     raw_results_to_wikitable(file_ips, dest='wikitable-ips.txt', header1='Utilisateur non enregistré', header2='Traductions ajoutées')
232     raw_results_to_wikitable(file_contributors, dest='wikitable-contributors.txt', header1='Utilisateur enregistré', header2='Traductions ajoutées')
233     generate_langs_graph()
234     generate_langs_graph(15, 15, filename_var='langs2')
235     generate_dates_graph()

Ce script a pris moins de 10 minutes à s’exécuter (Windows 10, processeur quadricore (2.3GHz), 8 Go de RAM)

Il fait référence au fichier graph_template.txt suivant pour la génération des graphiques :

{{#tag:graph|
{
  "version": 4,
  "width": 1000,
  "height": 200,
  "padding": {"top": 20, "left": 65, "bottom": 60, "right": 10},
 
  "data": [
    {
      "name": "table",
      "values": [
__TO_REPLACE__
      ]
    }
  ],
 
  "signals": [
    {
      "name": "tooltip",
      "init": {},
      "streams": [
        {"type": "rect:mouseover", "expr": "datum"},
        {"type": "rect:mouseout", "expr": "{}"}
      ]
    }
  ],
 
  "predicates": [
    {
      "name": "tooltip", "type": "==",
      "operands": [{"signal": "tooltip._id"}, {"arg": "id"}]
    }
  ],
 
  "scales": [
    { "name": "xscale", "type": "ordinal", "range": "width",
      "domain": {"data": "table", "field": "__LABEL__"} },
    { "name": "yscale", "type": "linear", "range": "height",
      "domain": {"data": "table", "field": "amount"} }
  ],
 
  "axes": [
    { "type": "x", "scale": "xscale"__OBLIQUE_LABELS__},
    { "type": "y", "scale": "yscale" }
  ],
 
  "marks": [
    {
      "type": "rect",
      "from": {"data":"table"},
      "properties": {
        "enter": {
          "x": {"scale": "xscale", "field": "__LABEL__"},
          "width": {"scale": "xscale", "band": true, "offset": -1},
          "y": {"scale": "yscale", "field": "amount"},
          "y2": {"field": {"group": "height"} }
        },
        "update": { "fill": {"value": "steelblue"} },
        "hover": { "fill": {"value": "red"} }
      }
    },
    {
      "type": "text",
      "properties": {
        "enter": {
          "align": {"value": "center"},
          "fill": {"value": "#333"}
        },
        "update": {
          "x": {"scale": "xscale", "signal": "tooltip.__LABEL__"},
          "dx": {"scale": "xscale", "band": true, "mult": 0.5},
          "y": {"scale": "yscale", "signal": "tooltip.amount", "offset": -5},
          "text": {"signal": "tooltip.amount"},
          "fillOpacity": {
            "rule": [
              {
                "predicate": {"name": "tooltip", "id": {"value": null} },
                "value": 0
              },
              {"value": 1}
            ]
          }
        }
      }
    }
  ]
}
| mode=interactive }}