Passed
Branch dev (8d7b92)
by Dispositif
03:12
created

OuvrageEditWorker::initialize()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 12
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 8
nc 1
nop 0
dl 0
loc 12
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application;
11
12
use App\Domain\RefGoogleBook;
13
use App\Domain\Utils\WikiTextUtil;
14
use App\Infrastructure\DbAdapter;
15
use App\Infrastructure\Memory;
16
use App\Infrastructure\ServiceFactory;
17
use Exception;
18
use LogicException;
19
use Mediawiki\Api\UsageException;
20
use Mediawiki\DataModel\EditInfo;
21
use Normalizer;
22
use Throwable;
23
24
/**
25
 * Class OuvrageEditWorker
26
 *
27
 * @package App\Application\Examples
28
 */
29
class OuvrageEditWorker
30
{
31
    const TASK_NAME = 'Amélioration bibliographique';
32
    /**
33
     * poster ou pas le message en PD signalant les erreurs à résoudre
34
     */
35
    const EDIT_SIGNALEMENT = true;
36
37
    const CITATION_LIMIT         = 150;
38
    const DELAY_BOTFLAG_SECONDS  = 30;
39
    const DELAY_NOBOT_IN_SECONDS = 120;
40
    const ERROR_MSG_TEMPLATE     = __DIR__.'/templates/message_errors.wiki';
41
42
    public $verbose = false;
43
    private $db;
44
    private $bot;
45
    private $wiki;
46
    private $wikiText;
47
48
    private $citationSummary;
49
    private $citationVersion = '';
50
    private $errorWarning = [];
51
    private $importantSummary = [];
52
53
    private $nbRows;
54
55
    // Minor flag on edit
56
    private $minorFlag = true;
57
    // WikiBotConfig flag on edit
58
    private $botFlag = true;
59
60
    /**
61
     * @var Memory
62
     */
63
    private $memory;
64
    /**
65
     * @var RefGoogleBook
66
     */
67
    private $refGooConverter;
68
    /**
69
     * @var DataAnalysis|null
70
     */
71
    private $dataAnalysis;
72
73
    public function __construct(
74
        DbAdapter $dbAdapter,
75
        WikiBotConfig $bot,
76
        Memory $memory,
77
        RefGoogleBook $refGoogleBook,
78
        ?DataAnalysis $dataAnalysis = null
79
    ) {
80
        $this->db = $dbAdapter;
81
        $this->bot = $bot;
82
        $this->memory = $memory;
83
        $this->refGooConverter = $refGoogleBook;
84
        if ($dataAnalysis) {
85
            $this->dataAnalysis = $dataAnalysis;
86
        }
87
88
        $this->wikiLogin(true);
89
    }
90
91
    /**
92
     * @param bool $forceLogin
93
     *
94
     * @throws UsageException
95
     */
96
    private function wikiLogin($forceLogin = false): void
97
    {
98
        $this->wiki = ServiceFactory::wikiApi($forceLogin);
99
    }
100
101
    public function run(): void
102
    {
103
        while (true) {
104
            echo "\n-------------------------------------\n\n";
105
            echo date("Y-m-d H:i")."\n";
106
            if ($this->verbose) {
107
                $this->memory->echoMemory(true);
108
            }
109
            $this->pageProcess();
110
        }
111
    }
112
113
    private function pageProcess()
114
    {
115
        $this->initialize();
116
117
        // get a random queue line
118
        $json = $this->db->getAllRowsToEdit(self::CITATION_LIMIT);
119
        $data = json_decode($json, true);
120
121
        if (empty($data)) {
122
            echo "SKIP : no row to process\n";
123
            throw new Exception('no row to process');
124
        }
125
126
        try {
127
            $title = $data[0]['page'];
128
            echo "$title \n";
129
            $page = new WikiPageAction($this->wiki, $title);
130
        } catch (Exception $e) {
131
            echo "*** WikiPageAction error : $title \n";
132
            sleep(20);
133
134
            return false;
135
        }
136
137
        // HACK
138
        if (in_array($page->getLastEditor(), [getenv('BOT_NAME'), getenv('BOT_OWNER')])) {
139
            echo "SKIP : édité recemment par bot/dresseur.\n";
140
            $this->db->skipArticle($title);
141
142
            return false;
143
        }
144
        if ($page->getNs() !== 0) {
145
            echo "SKIP : page n'est pas dans Main (ns 0)\n";
146
            $this->db->skipArticle($title);
147
148
            return false;
149
        }
150
        $this->wikiText = $page->getText();
151
152
        if (empty($this->wikiText)) {
153
            return false;
154
        }
155
        if (WikiBotConfig::isEditionRestricted($this->wikiText)) {
156
            echo "SKIP : protection/3R.\n";
157
            $this->db->skipArticle($title);
158
159
            return false;
160
        }
161
162
        if ($this->bot->minutesSinceLastEdit($title) < 15) {
163
            echo "SKIP : édition humaine dans les dernières 15 minutes.\n";
164
165
            return false;
166
        }
167
168
        // Skip AdQ
169
        if (preg_match('#{{ ?En-tête label#i', $this->wikiText) > 0) {
170
            echo "SKIP : AdQ ou BA.\n";
171
            $this->db->skipArticle($title);
172
173
            return false;
174
        }
175
176
        // EXTERNAL DATA ANALYSIS (pas utile pour ce process)
177
        try {
178
            if (null !== $this->dataAnalysis) {
179
                $this->dataAnalysis->process($this->wikiText, $title);
180
            }
181
        } catch (Throwable $e) {
182
            unset($e);
183
        }
184
185
        // GET all article lines from db
186
        echo sprintf(">> %s rows to process\n", count($data));
187
188
        // foreach line
189
        $changed = false;
190
        foreach ($data as $dat) {
191
            // hack temporaire pour éviter articles dont CompleteProcess incomplet
192
            if (empty($dat['opti']) || empty($dat['optidate']) || $dat['optidate'] < DbAdapter::OPTI_VALID_DATE) {
193
                echo "SKIP : Complètement incomplet de l'article \n";
194
195
                return false;
196
            }
197
            $success = $this->dataProcess($dat);
198
            $changed = ($success) ? true : $changed;
199
        }
200
        if (!$changed) {
201
            echo "Rien à changer...\n\n";
202
            $this->db->skipArticle($title);
203
204
            return false;
205
        }
206
207
        // Conversion <ref>http//books.google
208
209
        try {
210
            $this->wikiText = $this->refGooConverter->process($this->wikiText);
211
        } catch (Throwable $e) {
212
            echo $e->getMessage();
213
            unset($e);
214
        }
215
216
        // EDIT THE PAGE
217
        if (!$this->wikiText) {
218
            return false;
219
        }
220
221
        $miniSummary = $this->generateSummary();
222
        echo $miniSummary."\n\n";
223
        if ($this->verbose) {
224
            echo "sleep 20...\n";
225
        }
226
        sleep(30);
227
228
        pageEdit:
229
230
        try {
231
            $editInfo = new EditInfo($miniSummary, $this->minorFlag, $this->botFlag, 5);
232
            $success = $page->editPage(Normalizer::normalize($this->wikiText), $editInfo);
233
        } catch (Throwable $e) {
234
            // Invalid CSRF token.
235
            if (strpos($e->getMessage(), 'Invalid CSRF token') !== false) {
236
                echo "*** Invalid CSRF token \n";
237
                throw new Exception('Invalid CSRF token');
238
            } else {
239
                dump($e); // todo log
240
                sleep(60);
241
242
                return false;
243
            }
244
        }
245
246
        if ($this->verbose) {
247
            echo ($success) ? "Ok\n" : "***** Erreur edit\n";
248
        }
249
250
        if ($success) {
251
            // updata DB
252
            foreach ($data as $dat) {
253
                $this->db->sendEditedData(['id' => $dat['id']]);
254
            }
255
256
            try {
257
                if (self::EDIT_SIGNALEMENT) {
258
                    $this->sendErrorMessage($data);
259
                }
260
            } catch (Throwable $e) {
261
                dump($e);
262
                unset($e);
263
            }
264
265
            if (!$this->botFlag) {
266
                if ($this->verbose) {
267
                    echo "sleep ".self::DELAY_NOBOT_IN_SECONDS."\n";
268
                }
269
                sleep(self::DELAY_NOBOT_IN_SECONDS);
270
            }
271
            if ($this->botFlag) {
272
                if ($this->verbose) {
273
                    echo "sleep ".self::DELAY_BOTFLAG_SECONDS."\n";
274
                }
275
                sleep(self::DELAY_BOTFLAG_SECONDS);
276
            }
277
        }
278
279
        return $success;
280
    }
281
282
    /**
283
     * @throws UsageException
284
     */
285
    private function initialize(): void
286
    {
287
        // initialisation vars
288
        $this->botFlag = true;
289
        $this->errorWarning = [];
290
        $this->wikiText = null;
291
        $this->citationSummary = [];
292
        $this->importantSummary = [];
293
        $this->minorFlag = true;
294
        $this->nbRows = 0;
295
296
        $this->bot->checkStopOnTalkpage(true);
297
    }
298
299
    private function dataProcess(array $data): bool
300
    {
301
        $origin = $data['raw'];
302
        $completed = $data['opti'];
303
304
        dump($origin, $completed, $data['modifs'], $data['version']);
305
306
        if (WikiTextUtil::isCommented($origin)) {
307
            echo "SKIP: template avec commentaire HTML\n";
308
            $this->db->skipRow(intval($data['id']));
309
310
            return false;
311
        }
312
313
        $find = mb_strpos($this->wikiText, $origin);
314
        if ($find === false) {
315
            echo "String non trouvée. \n\n";
316
            $this->db->skipRow(intval($data['id']));
317
318
            return false;
319
        }
320
321
        $this->checkErrorWarning($data);
322
323
        // Replace text
324
        $newText = WikiPageAction::replaceTemplateInText($this->wikiText, $origin, $completed);
325
326
        if (!$newText || $newText === $this->wikiText) {
327
            echo "newText error\n";
328
329
            return false;
330
        }
331
        $this->wikiText = $newText;
332
        $this->minorFlag = ('1' === $data['major']) ? false : $this->minorFlag;
333
        $this->citationVersion = $data['version'];
334
        $this->citationSummary[] = $data['modifs'];
335
        $this->nbRows++;
336
337
        return true;
338
    }
339
340
    /**
341
     * Vérifie alerte d'erreurs humaines.
342
     *
343
     * @param array $data
344
     *
345
     * @throws Exception
346
     */
347
    private function checkErrorWarning(array $data): void
348
    {
349
        if (!isset($data['opti'])) {
350
            throw new LogicException('Opti NULL');
351
        }
352
353
        // paramètre inconnu
354
        if (preg_match_all(
355
                "#\|[^|]+<!-- ?(PARAMETRE [^>]+ N'EXISTE PAS|VALEUR SANS NOM DE PARAMETRE|ERREUR [^>]+) ?-->#",
356
                $data['opti'],
357
                $matches
358
            ) > 0
359
        ) {
360
            foreach ($matches[0] as $line) {
361
                $this->addErrorWarning($data['page'], $line);
362
            }
363
            //  $this->botFlag = false;
364
            $this->addSummaryTag('paramètre non corrigé');
365
        }
366
367
        // ISBN invalide
368
        if (preg_match("#isbn invalide ?=[^|}]+#i", $data['opti'], $matches) > 0) {
369
            $this->addErrorWarning($data['page'], $matches[0]);
370
            $this->botFlag = false;
371
            $this->addSummaryTag('ISBN invalide');
372
        }
373
374
        // Edits avec ajout conséquent de donnée
375
        if (preg_match('#distinction des auteurs#', $data['modifs']) > 0) {
376
            $this->botFlag = false;
377
            $this->addSummaryTag('distinction des auteurs');
378
        }
379
        // prédiction paramètre correct
380
        if (preg_match('#[^,]+(=>|⇒)[^,]+#', $data['modifs'], $matches) > 0) {
381
            $this->botFlag = false;
382
            $this->addSummaryTag(sprintf('%s', $matches[0]));
383
        }
384
        if (preg_match('#\+\+sous-titre#', $data['modifs']) > 0) {
385
            $this->botFlag = false;
386
            $this->addSummaryTag('+sous-titre');
387
        }
388
        if (preg_match('#\+lieu#', $data['modifs']) > 0) {
389
            $this->addSummaryTag('+lieu');
390
        }
391
        if (preg_match('#tracking#', $data['modifs']) > 0) {
392
            $this->addSummaryTag('tracking');
393
        }
394
        if (preg_match('#présentation en ligne#', $data['modifs']) > 0) {
395
            $this->addSummaryTag('+présentation en ligne');
396
        }
397
        if (preg_match('#distinction auteurs#', $data['modifs']) > 0) {
398
            $this->addSummaryTag('distinction auteurs');
399
        }
400
        if (preg_match('#\+lire en ligne#', $data['modifs']) > 0) {
401
            $this->addSummaryTag('+lire en ligne');
402
        }
403
        if (preg_match('#\+lien #', $data['modifs']) > 0) {
404
            $this->addSummaryTag('wikif');
405
        }
406
407
        if (preg_match('#\+éditeur#', $data['modifs']) > 0) {
408
            $this->addSummaryTag('éditeur');
409
        }
410
        //        if (preg_match('#\+langue#', $data['modifs']) > 0) {
411
        //            $this->addSummaryTag('langue');
412
        //        }
413
414
        // mention BnF si ajout donnée + ajout identifiant bnf=
415
        if (!empty($this->importantSummary) && preg_match('#BnF#i', $data['modifs'], $matches) > 0) {
416
            $this->addSummaryTag('©BnF');
417
        }
418
    }
419
420
    /**
421
     * Pour éviter les doublons dans signalements d'erreur.
422
     *
423
     * @param string $page
424
     * @param string $text
425
     */
426
    private function addErrorWarning(string $page, string $text): void
427
    {
428
        if (!isset($this->errorWarning[$page]) || !in_array($text, $this->errorWarning[$page])) {
429
            $this->errorWarning[$page][] = $text;
430
        }
431
    }
432
433
    /**
434
     * For substantive or ambiguous modifications done.
435
     *
436
     * @param string $tag
437
     */
438
    private function addSummaryTag(string $tag)
439
    {
440
        if (!in_array($tag, $this->importantSummary)) {
441
            $this->importantSummary[] = $tag;
442
        }
443
    }
444
445
    /**
446
     * Generate wiki edition summary.
447
     *
448
     * @return string
449
     */
450
    public function generateSummary(): string
451
    {
452
        // Start summary with "WikiBotConfig" when using botflag, else "*"
453
        $prefix = ($this->botFlag) ? 'bot' : '☆'; //🧐 🤖
454
        // add "/!\" when errorWarning
455
        $prefix .= (!empty($this->errorWarning)) ? ' ⚠️' : '';
456
457
        // Covid :)
458
        $prefix .= (date('H:i') === '20:00') ? '🦠' : ''; // 🏥
459
460
        // basic modifs
461
        $citeSummary = implode(' ', $this->citationSummary);
462
        // replace by list of modifs to verify by humans
463
        if (!empty($this->importantSummary)) {
464
            $citeSummary = implode(', ', $this->importantSummary);
465
        }
466
467
        $summary = sprintf(
468
            '%s [%s/%s] %s %s : %s',
469
            $prefix,
470
            str_replace('v', '', $this->bot::getGitVersion()),
471
            str_replace(['v0.', 'v1.'], '', $this->citationVersion),
472
            self::TASK_NAME,
473
            $this->nbRows,
474
            $citeSummary
475
        );
476
477
        if (!empty($this->importantSummary)) {
478
            $summary .= '...';
479
        }
480
481
        // shrink long summary if no important details to verify
482
        if (empty($this->importantSummary)) {
483
            $length = strlen($summary);
484
            $summary = mb_substr($summary, 0, 80);
485
            $summary .= ($length > strlen($summary)) ? '…' : '';
486
        }
487
488
        return $summary;
489
    }
490
491
    /**
492
     * @param array $rows Collection of citations
493
     *
494
     * @return bool
495
     */
496
    private function sendErrorMessage(array $rows): bool
497
    {
498
        if (!isset($rows[0]) || empty($this->errorWarning[$rows[0]['page']])) {
499
            return false;
500
        }
501
        $mainTitle = $rows[0]['page'];
502
        if (!$this->botFlag) {
503
            echo "** Send Error Message on talk page. Wait 3... \n";
504
        }
505
        sleep(3);
506
507
        // format wiki message
508
        $errorList = '';
509
        foreach ($this->errorWarning[$mainTitle] as $error) {
510
            $errorList .= sprintf("* <span style=\"background:#FCDFE8\"><nowiki>%s</nowiki></span> \n", $error);
511
        }
512
513
        $diffStr = '';
514
        try {
515
            // get last bot revision ID
516
            $main = new WikiPageAction($this->wiki, $mainTitle);
517
            if (getenv('BOT_NAME') === $main->getLastRevision()->getUser()) {
518
                $id = $main->getLastRevision()->getId();
519
                $diffStr = sprintf(
520
                    ' ([https://fr.wikipedia.org/w/index.php?title=%s&diff=%s diff])',
521
                    str_replace(' ', '_', $mainTitle),
522
                    $id
523
                );
524
            }
525
        } catch (Throwable $e) {
526
            unset($e);
527
        }
528
529
        $errorCategoryName = sprintf('Signalement %s', getenv('BOT_NAME'));
530
531
        $errorMessage = file_get_contents(self::ERROR_MSG_TEMPLATE);
532
        $errorMessage = str_replace('##CATEGORY##', $errorCategoryName, $errorMessage);
533
        $errorMessage = str_replace('##ERROR LIST##', trim($errorList), $errorMessage);
534
        $errorMessage = str_replace('##ARTICLE##', $mainTitle, $errorMessage);
535
        $errorMessage = str_replace('##DIFF##', $diffStr, $errorMessage);
536
537
        // Edit wiki talk page
538
        try {
539
            $talkPage = new WikiPageAction($this->wiki, 'Discussion:'.$mainTitle);
540
            $editInfo = new EditInfo('Signalement erreur {ouvrage}', false, false, 5);
541
542
            return $talkPage->addToBottomOrCreatePage($errorMessage, $editInfo);
543
        } catch (Throwable $e) {
544
            dump($e);
545
546
            return false;
547
        }
548
    }
549
550
}
551