Test Failed
Push — master ( 3a8da6...f575e4 )
by Dispositif
05:38
created

OuvrageEditWorker::isTextCreatingError()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 4
ccs 0
cts 0
cp 0
crap 2
rs 10
1
<?php
2
3
/*
4
 * This file is part of dispositif/wikibot application (@github)
5
 * 2019/2020 © Philippe/Irønie  <[email protected]>
6
 * For the full copyright and MIT license information, view the license file.
7
 */
8
9
declare(strict_types=1);
10
11
namespace App\Application;
12
13
use App\Domain\Utils\WikiTextUtil;
14
use App\Infrastructure\DbAdapter;
15
use App\Infrastructure\Memory;
16
use App\Infrastructure\ServiceFactory;
17
use Codedungeon\PHPCliColors\Color;
18
use Exception;
19
use LogicException;
20
use Mediawiki\Api\UsageException;
21
use Normalizer;
22
use Psr\Log\LoggerInterface;
23
use Psr\Log\NullLogger;
24
use Throwable;
25
26
/**
27
 * Class OuvrageEditWorker
28
 *
29
 * @package App\Application\Examples
30
 */
31
class OuvrageEditWorker
32
{
33
    use EditSummaryTrait, TalkPageEditTrait;
34
35
    const TASK_NAME = '📗 Amélioration bibliographique'; // 📖📔📘📗
36
    /**
37
     * poster ou pas le message en PD signalant les erreurs à résoudre
38
     */
39
    const EDIT_SIGNALEMENT = true;
40
41
    const CITATION_LIMIT                 = 150;
42
    const DELAY_BOTFLAG_SECONDS          = 60;
43
    const DELAY_NO_BOTFLAG_SECONDS       = 60;
44
    const DELAY_MINUTES_AFTER_HUMAN_EDIT = 10;
45
    const ERROR_MSG_TEMPLATE             = __DIR__.'/templates/message_errors.wiki';
46
47
    private $db;
48
    private $bot;
49
    private $wikiText;
50
51
    private $citationSummary;
52
    private $errorWarning = [];
53
    public $importantSummary = [];
54
55
    private $nbRows;
56
57
    // Minor flag on edit
58
    private $minorFlag = true;
59
    // WikiBotConfig flag on edit
60
    private $botFlag = true;
61
62
    /**
63
     * @var Memory
64
     */
65
    private $memory;
66
67
    /**
68
     * @var LoggerInterface|NullLogger
69
     */
70
    private $log;
71
    /**
72
     * @var mixed
73
     */
74
    private $citationVersion;
75
76
    public function __construct(
77
        DbAdapter $dbAdapter,
78
        WikiBotConfig $bot,
79
        Memory $memory,
80
        ?LoggerInterface $log = null
81
    ) {
82
        $this->db = $dbAdapter;
83
        $this->bot = $bot;
84
        $this->memory = $memory;
85
        $this->log = $log ?? new NullLogger();
86
    }
87
88
    /**
89
     * @throws Exception
90
     */
91
    public function run(): void
92
    {
93
        while (true) {
94
            echo "\n-------------------------------------\n\n";
95
            echo date("Y-m-d H:i:s")." ";
96
            $this->log->info($this->memory->getMemory(true));
97
            $this->pageProcess();
98
            sleep(2); // précaution boucle infinie
99
        }
100
    }
101
102
    /**
103
     * @return bool
104
     * @throws UsageException
105
     * @throws Exception
106
     * @throws Exception
107
     */
108
    private function pageProcess()
109
    {
110
        $this->initialize();
111
112
        // get a random queue line
113
        $json = $this->db->getAllRowsToEdit(self::CITATION_LIMIT);
114
        $data = json_decode($json, true);
115
116
        if (empty($data)) {
117
            $this->log->alert("SKIP : OuvrageEditWorker / getAllRowsToEdit() no row to process\n");
118
            sleep(60);
119
            throw new Exception('no row to process');
120
        }
121
122
        try {
123
            $title = $data[0]['page'];
124
            echo Color::BG_CYAN.$title.Color::NORMAL." \n";
125
            $page = ServiceFactory::wikiPageAction($title, false); // , true ?
126
        } catch (Exception $e) {
127
            $this->log->warning("*** WikiPageAction error : ".$title." \n");
128
            sleep(20);
129
130
            return false;
131
        }
132
133
        // Page supprimée ?
134
        if ($page->getLastRevision() === null) {
135
            $this->log->warning("SKIP : page supprimée !\n");
136
            $this->db->deleteArticle($title);
137
138
            return false;
139
        }
140
141
        // HACK
142
        if (in_array($page->getLastEditor(), [getenv('BOT_NAME')])) {
143
            $this->log->notice("SKIP : édité recemment par bot.\n");
144
            $this->db->skipArticle($title);
145
146
            return false;
147
        }
148
        // todo include a sandbox page ?
149
        if ($page->getNs() !== 0) {
150
            $this->log->notice("SKIP : page n'est pas dans Main (ns 0)\n");
151
            $this->db->skipArticle($title);
152
153
            return false;
154
        }
155
        $this->wikiText = $page->getText();
156
157
        if (empty($this->wikiText)) {
158
            $this->log->warning("SKIP : this->wikitext vide\n");
159
            $this->db->skipArticle($title);
160
            return false;
161
        }
162
163
        // AdQ
164
        if (preg_match('#{{ ?En-tête label ?\| ?AdQ#i', $this->wikiText)) {
165
            $this->db->setLabel($title, 2);
166
            $this->log->warning("Article de Qualité !\n");
167
            $this->botFlag = false;
168
        }
169
        if (preg_match('#{{ ?En-tête label ?\| ?BA#i', $this->wikiText)) {
170
            $this->db->setLabel($title, 1);
171
            $this->botFlag = false;
172
            $this->log->warning("Bon article !!\n");
173
        }
174
175
        if (WikiBotConfig::isEditionRestricted($this->wikiText)) {
176
            // TODO Gestion d'une repasse dans X jours
177
            $this->log->info("SKIP : protection/3R/travaux.\n");
178
            $this->db->skipArticle($title);
179
180
            return false;
181
        }
182
183
        if ($this->bot->minutesSinceLastEdit($title) < 10) {
184
            // TODO Gestion d'une repasse dans X jours
185
            $this->log->notice(
186
                sprintf(
187
                    "SKIP : édition humaine dans les dernières %s minutes.\n",
188
                    self::DELAY_MINUTES_AFTER_HUMAN_EDIT
189
                )
190
            );
191
            sleep(60 * self::DELAY_MINUTES_AFTER_HUMAN_EDIT); // hack: waiting cycles
192
193
            return false;
194
        }
195
196
197
        // GET all article lines from db
198
        $this->log->info(sprintf("%s rows to process\n", count($data)));
199
200
        // foreach line
201
        $changed = false;
202
        foreach ($data as $dat) {
203
            // hack temporaire pour éviter articles dont CompleteProcess incomplet
204
            if (empty($dat['opti']) || empty($dat['optidate']) || $dat['optidate'] < DbAdapter::OPTI_VALID_DATE) {
205
                $this->log->notice("SKIP : Amélioration incomplet de l'article. sleep 10min");
206
                sleep(600);
207
208
                return false;
209
            }
210
            $success = $this->dataProcess($dat);
211
            $changed = ($success) ? true : $changed;
212
        }
213
        if (!$changed) {
214
            $this->log->debug("Rien à changer...");
215
            $this->db->skipArticle($title);
216
217
            return false;
218
        }
219
220
        // EDIT THE PAGE
221
        if (!$this->wikiText) {
222
            return false;
223
        }
224
225
        $miniSummary = $this->generateSummary();
226
        $this->log->notice($miniSummary);
227
        $this->log->debug("sleep 2...");
228
        sleep(2); // todo ???
229
230
        pageEdit:
231
232
        try {
233
            // corona Covid :)
234
            //$miniSummary .= (date('H:i') === '20:00') ? ' 🏥' : ''; // 🏥🦠
235
236
            $editInfo = ServiceFactory::editInfo($miniSummary, $this->minorFlag, $this->botFlag, 5);
237
            $success = $page->editPage(Normalizer::normalize($this->wikiText), $editInfo);
238
        } catch (Throwable $e) {
239
            // Invalid CSRF token.
240
            if (strpos($e->getMessage(), 'Invalid CSRF token') !== false) {
241
                $this->log->alert("*** Invalid CSRF token \n");
242
                throw new Exception('Invalid CSRF token');
243
            } else {
244
                $this->log->warning('Exception in editPage() '.$e->getMessage());
245
                sleep(10);
246
247
                return false;
248
            }
249
        }
250
251
        $this->log->info($success ? "Edition Ok\n" : "***** Edition KO !\n");
252
253
        if ($success) {
254
            // updata DB
255
            foreach ($data as $dat) {
256
                $this->db->sendEditedData(['id' => $dat['id']]);
257
            }
258
259
            try {
260
                if (self::EDIT_SIGNALEMENT && !empty($this->errorWarning[$title])) {
261
                    $this->sendOuvrageErrorsOnTalkPage($data, $this->log);
262
                }
263
            } catch (Throwable $e) {
264
                $this->log->warning('Exception in editPage() '.$e->getMessage());
265
                unset($e);
266
            }
267
268
            if (!$this->botFlag) {
269
                $this->log->debug("sleep ".self::DELAY_NO_BOTFLAG_SECONDS);
270
                sleep(self::DELAY_NO_BOTFLAG_SECONDS);
271
            }
272
            if ($this->botFlag) {
273
                $this->log->debug("sleep ".self::DELAY_BOTFLAG_SECONDS);
274
                sleep(self::DELAY_BOTFLAG_SECONDS);
275
            }
276
        }
277
278
        return $success;
279
    }
280
281
    /**
282
     * @throws UsageException
283
     */
284
    private function initialize(): void
285
    {
286
        // initialisation vars
287
        $this->botFlag = true;
288
        $this->errorWarning = [];
289
        $this->wikiText = null;
290
        $this->citationSummary = [];
291
        $this->importantSummary = [];
292
        $this->minorFlag = true;
293
        $this->nbRows = 0;
294
295
        $this->bot->checkStopOnTalkpage(true);
296
    }
297
298
    /**
299
     * @param array $data
300
     *
301
     * @return bool
302
     * @throws Exception
303
     */
304
    private function dataProcess(array $data): bool
305
    {
306
        $origin = $data['raw'];
307
        $completed = $data['opti'];
308
309
        $this->log->debug('origin: '.$origin);
310
        $this->log->debug('completed: '.$completed);
311
        $this->log->debug('modifs: '.$data['modifs']);
312
        $this->log->debug('version: '.$data['version']);
313
314
        if (WikiTextUtil::isCommented($origin) || $this->isTextCreatingError($origin)) {
315
            $this->log->notice("SKIP: template avec commentaire HTML ou modèle problématique.");
316
            $this->db->skipRow(intval($data['id']));
317
318
            return false;
319
        }
320
321
        $find = mb_strpos($this->wikiText, $origin);
322
        if ($find === false) {
323
            $this->log->notice("String non trouvée.");
324
            $this->db->skipRow(intval($data['id']));
325
326
            return false;
327
        }
328
329
        $this->checkErrorWarning($data);
330
331
        // Replace text
332
        $newText = WikiPageAction::replaceTemplateInText($this->wikiText, $origin, $completed);
333
334
        if (!$newText || $newText === $this->wikiText) {
335
            $this->log->warning("newText error");
336
337
            return false;
338
        }
339
        $this->wikiText = $newText;
340
        $this->minorFlag = ('1' === $data['major']) ? false : $this->minorFlag;
341
        $this->citationVersion = $data['version'];
342
        $this->citationSummary[] = $data['modifs'];
343
        $this->nbRows++;
344
345
        return true;
346
    }
347
348
    /**
349
     * todo extract
350
     * Vérifie alerte d'erreurs humaines.
351
     *
352
     * @param array $data
353
     *
354
     * @throws Exception
355
     */
356
    private function checkErrorWarning(array $data): void
357
    {
358
        if (!isset($data['opti'])) {
359
            throw new LogicException('Opti NULL');
360
        }
361
362
        // paramètre inconnu
363
        if (preg_match_all(
364
                "#\|[^|]+<!-- ?(PARAMETRE [^>]+ N'EXISTE PAS|VALEUR SANS NOM DE PARAMETRE|ERREUR [^>]+) ?-->#",
365
                $data['opti'],
366
                $matches
367
            ) > 0
368
        ) {
369
            foreach ($matches[0] as $line) {
370
                $this->addErrorWarning($data['page'], $line);
371
            }
372
            //  $this->botFlag = false;
373
            $this->addSummaryTag('paramètre non corrigé');
374
        }
375
376
        // ISBN invalide
377
        if (preg_match("#isbn invalide ?=[^|}]+#i", $data['opti'], $matches) > 0) {
378
            $this->addErrorWarning($data['page'], $matches[0]);
379
            $this->botFlag = false;
380
            $this->addSummaryTag('ISBN invalide 💩');
381
        }
382
383
        // Edits avec ajout conséquent de donnée
384
        if (preg_match('#distinction des auteurs#', $data['modifs']) > 0) {
385
            $this->botFlag = false;
386
            $this->addSummaryTag('distinction auteurs 🧠');
387
        }
388
        // prédiction paramètre correct
389
        if (preg_match('#[^,]+(=>|⇒)[^,]+#', $data['modifs'], $matches) > 0) {
390
            $this->botFlag = false;
391
            $this->addSummaryTag(sprintf('%s', $matches[0]));
392
        }
393
        if (preg_match('#\+\+sous-titre#', $data['modifs']) > 0) {
394
            $this->botFlag = false;
395
            $this->addSummaryTag('+sous-titre');
396
        }
397
        if (preg_match('#\+lieu#', $data['modifs']) > 0) {
398
            $this->addSummaryTag('+lieu');
399
        }
400
        if (preg_match('#tracking#', $data['modifs']) > 0) {
401
            $this->addSummaryTag('tracking');
402
        }
403
        if (preg_match('#présentation en ligne#', $data['modifs']) > 0) {
404
            $this->addSummaryTag('+présentation en ligne✨');
405
        }
406
        if (preg_match('#distinction auteurs#', $data['modifs']) > 0) {
407
            $this->addSummaryTag('distinction auteurs 🧠');
408
        }
409
        if (preg_match('#\+lire en ligne#', $data['modifs']) > 0) {
410
            $this->addSummaryTag('+lire en ligne✨');
411
        }
412
        if (preg_match('#\+lien #', $data['modifs']) > 0) {
413
            $this->addSummaryTag('wikif');
414
        }
415
416
        if (preg_match('#\+éditeur#', $data['modifs']) > 0) {
417
            $this->addSummaryTag('éditeur');
418
        }
419
        //        if (preg_match('#\+langue#', $data['modifs']) > 0) {
420
        //            $this->addSummaryTag('langue');
421
        //        }
422
423
        // mention BnF si ajout donnée + ajout identifiant bnf=
424
        if (!empty($this->importantSummary) && preg_match('#BnF#i', $data['modifs'], $matches) > 0) {
425
            $this->addSummaryTag('©[[BnF]]');
426
        }
427
    }
428
429
    /**
430
     * todo extract
431
     * Pour éviter les doublons dans signalements d'erreur.
432
     *
433
     * @param string $page
434
     * @param string $text
435
     */
436
    private function addErrorWarning(string $page, string $text): void
437
    {
438
        if (!isset($this->errorWarning[$page]) || !in_array($text, $this->errorWarning[$page])) {
439
            $this->errorWarning[$page][] = $text;
440
        }
441
    }
442
443
    private function isTextCreatingError(string $string): bool
444
    {
445
        // mauvaise Modèle:Sp
446
        return (preg_match('#\{\{-?(sp|s|sap)-?\|#', $string) === 1);
447
    }
448
449
}
450