Passed
Push — master ( 9ad278...1bcf8b )
by Dispositif
08:55
created

OuvrageEditWorker::addErrorWarning()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
eloc 2
nc 2
nop 2
dl 0
loc 4
ccs 0
cts 4
cp 0
crap 12
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * This file is part of dispositif/wikibot application (@github)
5
 * 2019/2020 © Philippe M. <[email protected]>
6
 * For the full copyright and MIT license information, please view the license file.
7
 */
8
9
declare(strict_types=1);
10
11
namespace App\Application;
12
13
use App\Domain\Utils\WikiTextUtil;
14
use App\Infrastructure\DbAdapter;
15
use App\Infrastructure\Memory;
16
use App\Infrastructure\ServiceFactory;
17
use Codedungeon\PHPCliColors\Color;
18
use Exception;
19
use LogicException;
20
use Mediawiki\Api\UsageException;
21
use Normalizer;
22
use Psr\Log\LoggerInterface;
23
use Psr\Log\NullLogger;
24
use Throwable;
25
26
/**
27
 * Class OuvrageEditWorker
28
 *
29
 * @package App\Application\Examples
30
 */
31
class OuvrageEditWorker
32
{
33
    use EditSummaryTrait, TalkPageEditTrait;
34
35
    const TASK_NAME = 'Amélioration bibliographique';
36
    /**
37
     * poster ou pas le message en PD signalant les erreurs à résoudre
38
     */
39
    const EDIT_SIGNALEMENT = true;
40
41
    const CITATION_LIMIT         = 150;
42
    const DELAY_BOTFLAG_SECONDS    = 20;
43
    const DELAY_NO_BOTFLAG_SECONDS = 50;
44
    const ERROR_MSG_TEMPLATE       = __DIR__.'/templates/message_errors.wiki';
45
46
    private $db;
47
    private $bot;
48
    private $wikiText;
49
50
    private $citationSummary;
51
    private $errorWarning = [];
52
    private $importantSummary = [];
53
54
    private $nbRows;
55
56
    // Minor flag on edit
57
    private $minorFlag = true;
58
    // WikiBotConfig flag on edit
59
    private $botFlag = true;
60
61
    /**
62
     * @var Memory
63
     */
64
    private $memory;
65
66
    /**
67
     * @var LoggerInterface|NullLogger
68
     */
69
    private $log;
70
    /**
71
     * @var mixed
72
     */
73
    private $citationVersion;
74
75
    public function __construct(
76
        DbAdapter $dbAdapter,
77
        WikiBotConfig $bot,
78
        Memory $memory,
79
        ?LoggerInterface $log = null
80
    ) {
81
        $this->db = $dbAdapter;
82
        $this->bot = $bot;
83
        $this->memory = $memory;
84
        $this->log = $log ?? new NullLogger();
85
    }
86
87
    /**
88
     * @throws Exception
89
     */
90
    public function run(): void
91
    {
92
        while (true) {
93
            echo "\n-------------------------------------\n\n";
94
            echo date("Y-m-d H:i")." ";
95
            $this->log->info($this->memory->getMemory(true));
96
            $this->pageProcess();
97
        }
98
    }
99
100
    /**
101
     * @return bool
102
     * @throws UsageException
103
     * @throws Exception
104
     * @throws Exception
105
     */
106
    private function pageProcess()
107
    {
108
        $this->initialize();
109
110
        // get a random queue line
111
        $json = $this->db->getAllRowsToEdit(self::CITATION_LIMIT);
112
        $data = json_decode($json, true);
113
114
        if (empty($data)) {
115
            $this->log->alert("SKIP : no row to process\n");
116
            throw new Exception('no row to process');
117
        }
118
119
        try {
120
            $title = $data[0]['page'];
121
            echo Color::BG_CYAN.$title.Color::NORMAL." \n";
122
            $page = ServiceFactory::wikiPageAction($title, false); // , true ?
123
        } catch (Exception $e) {
124
            $this->log->warning("*** WikiPageAction error : ".$title." \n");
125
            sleep(20);
126
127
            return false;
128
        }
129
130
        // Page supprimée ?
131
        if($page->getLastRevision() === null) {
132
            $this->log->warning("SKIP : page supprimée !\n");
133
            $this->db->deleteArticle($title);
134
135
            return false;
136
        }
137
138
        // HACK
139
        if (in_array($page->getLastEditor(), [getenv('BOT_NAME'), getenv('BOT_OWNER')])) {
140
            $this->log->notice("SKIP : édité recemment par bot/dresseur.\n");
141
            $this->db->skipArticle($title);
142
143
            return false;
144
        }
145
        if ($page->getNs() !== 0) {
146
            $this->log->notice("SKIP : page n'est pas dans Main (ns 0)\n");
147
            $this->db->skipArticle($title);
148
149
            return false;
150
        }
151
        $this->wikiText = $page->getText();
152
153
        if (empty($this->wikiText)) {
154
            return false;
155
        }
156
157
        // Skip AdQ
158
        if (preg_match('#{{ ?En-tête label ?\| ?AdQ#i', $this->wikiText)) {
159
            $this->db->setLabel($title, 2);
160
            $this->log->info("SKIP : AdQ.\n"); // BA ??
161
            $this->db->skipArticle($title);
162
163
            return false;
164
        }
165
        if (preg_match('#{{ ?En-tête label ?\| ?BA#i', $this->wikiText)) {
166
            $this->db->setLabel($title, 1);
167
            $this->botFlag = false;
168
            $this->log->warning("Bon article !!\n");
169
        }
170
171
        if (WikiBotConfig::isEditionRestricted($this->wikiText)) {
172
            $this->log->info("SKIP : protection/3R/travaux.\n");
173
            $this->db->skipArticle($title);
174
175
            return false;
176
        }
177
178
        if ($this->bot->minutesSinceLastEdit($title) < 20) {
179
            $this->log->info("SKIP : édition humaine dans les dernières 20 minutes.\n");
180
181
            return false;
182
        }
183
184
185
186
        // GET all article lines from db
187
        $this->log->info(sprintf("%s rows to process\n", count($data)));
188
189
        // foreach line
190
        $changed = false;
191
        foreach ($data as $dat) {
192
            // hack temporaire pour éviter articles dont CompleteProcess incomplet
193
            if (empty($dat['opti']) || empty($dat['optidate']) || $dat['optidate'] < DbAdapter::OPTI_VALID_DATE) {
194
                $this->log->notice("SKIP : Complètement incomplet de l'article. sleep 10min");
195
                sleep(600);
196
                return false;
197
            }
198
            $success = $this->dataProcess($dat);
199
            $changed = ($success) ? true : $changed;
200
        }
201
        if (!$changed) {
202
            $this->log->debug("Rien à changer...");
203
            $this->db->skipArticle($title);
204
205
            return false;
206
        }
207
208
        // EDIT THE PAGE
209
        if (!$this->wikiText) {
210
            return false;
211
        }
212
213
        $miniSummary = $this->generateSummary();
214
        $this->log->notice($miniSummary);
215
        $this->log->debug("sleep 2...");
216
        sleep(2); // todo ???
217
218
        pageEdit:
219
220
        try {
221
            // corona Covid :)
222
            //$miniSummary .= (date('H:i') === '20:00') ? ' 🏥' : ''; // 🏥🦠
223
224
            $editInfo = ServiceFactory::editInfo($miniSummary, $this->minorFlag, $this->botFlag, 5);
225
            $success = $page->editPage(Normalizer::normalize($this->wikiText), $editInfo);
226
        } catch (Throwable $e) {
227
            // Invalid CSRF token.
228
            if (strpos($e->getMessage(), 'Invalid CSRF token') !== false) {
229
                $this->log->alert("*** Invalid CSRF token \n");
230
                throw new Exception('Invalid CSRF token');
231
            } else {
232
                $this->log->warning('Exception in editPage() '.$e->getMessage());
233
                sleep(10);
234
235
                return false;
236
            }
237
        }
238
239
        $this->log->info($success ? "Edition Ok\n" : "***** Edition KO !\n");
240
241
        if ($success) {
242
            // updata DB
243
            foreach ($data as $dat) {
244
                $this->db->sendEditedData(['id' => $dat['id']]);
245
            }
246
247
            try {
248
                if (self::EDIT_SIGNALEMENT && !empty($this->errorWarning[$title])) {
249
                    $this->sendOuvrageErrorsOnTalkPage($data, $this->log);
250
                }
251
            } catch (Throwable $e) {
252
                $this->log->warning('Exception in editPage() '.$e->getMessage());
253
                unset($e);
254
            }
255
256
            if (!$this->botFlag) {
257
                $this->log->debug("sleep ".self::DELAY_NO_BOTFLAG_SECONDS);
258
                sleep(self::DELAY_NO_BOTFLAG_SECONDS);
259
            }
260
            if ($this->botFlag) {
261
                $this->log->debug("sleep ".self::DELAY_BOTFLAG_SECONDS);
262
                sleep(self::DELAY_BOTFLAG_SECONDS);
263
            }
264
        }
265
266
        return $success;
267
    }
268
269
    /**
270
     * @throws UsageException
271
     */
272
    private function initialize(): void
273
    {
274
        // initialisation vars
275
        $this->botFlag = true;
276
        $this->errorWarning = [];
277
        $this->wikiText = null;
278
        $this->citationSummary = [];
279
        $this->importantSummary = [];
280
        $this->minorFlag = true;
281
        $this->nbRows = 0;
282
283
        $this->bot->checkStopOnTalkpage(true);
284
    }
285
286
    /**
287
     * @param array $data
288
     *
289
     * @return bool
290
     * @throws Exception
291
     */
292
    private function dataProcess(array $data): bool
293
    {
294
        $origin = $data['raw'];
295
        $completed = $data['opti'];
296
297
        $this->log->debug('origin: '.$origin);
298
        $this->log->debug('completed: '.$completed);
299
        $this->log->debug('modifs: '.$data['modifs']);
300
        $this->log->debug('version: '.$data['version']);
301
302
        if (WikiTextUtil::isCommented($origin)) {
303
            $this->log->notice("SKIP: template avec commentaire HTML.");
304
            $this->db->skipRow(intval($data['id']));
305
306
            return false;
307
        }
308
309
        $find = mb_strpos($this->wikiText, $origin);
310
        if ($find === false) {
311
            $this->log->notice("String non trouvée.");
312
            $this->db->skipRow(intval($data['id']));
313
314
            return false;
315
        }
316
317
        $this->checkErrorWarning($data);
318
319
        // Replace text
320
        $newText = WikiPageAction::replaceTemplateInText($this->wikiText, $origin, $completed);
321
322
        if (!$newText || $newText === $this->wikiText) {
323
            $this->log->warning("newText error");
324
325
            return false;
326
        }
327
        $this->wikiText = $newText;
328
        $this->minorFlag = ('1' === $data['major']) ? false : $this->minorFlag;
329
        $this->citationVersion = $data['version'];
330
        $this->citationSummary[] = $data['modifs'];
331
        $this->nbRows++;
332
333
        return true;
334
    }
335
336
    /**
337
     * todo extract
338
     * Vérifie alerte d'erreurs humaines.
339
     *
340
     * @param array $data
341
     *
342
     * @throws Exception
343
     */
344
    private function checkErrorWarning(array $data): void
345
    {
346
        if (!isset($data['opti'])) {
347
            throw new LogicException('Opti NULL');
348
        }
349
350
        // paramètre inconnu
351
        if (preg_match_all(
352
                "#\|[^|]+<!-- ?(PARAMETRE [^>]+ N'EXISTE PAS|VALEUR SANS NOM DE PARAMETRE|ERREUR [^>]+) ?-->#",
353
                $data['opti'],
354
                $matches
355
            ) > 0
356
        ) {
357
            foreach ($matches[0] as $line) {
358
                $this->addErrorWarning($data['page'], $line);
359
            }
360
            //  $this->botFlag = false;
361
            $this->addSummaryTag('paramètre non corrigé');
362
        }
363
364
        // ISBN invalide
365
        if (preg_match("#isbn invalide ?=[^|}]+#i", $data['opti'], $matches) > 0) {
366
            $this->addErrorWarning($data['page'], $matches[0]);
367
            $this->botFlag = false;
368
            $this->addSummaryTag('ISBN invalide');
369
        }
370
371
        // Edits avec ajout conséquent de donnée
372
        if (preg_match('#distinction des auteurs#', $data['modifs']) > 0) {
373
            $this->botFlag = false;
374
            $this->addSummaryTag('distinction des auteurs');
375
        }
376
        // prédiction paramètre correct
377
        if (preg_match('#[^,]+(=>|⇒)[^,]+#', $data['modifs'], $matches) > 0) {
378
            $this->botFlag = false;
379
            $this->addSummaryTag(sprintf('%s', $matches[0]));
380
        }
381
        if (preg_match('#\+\+sous-titre#', $data['modifs']) > 0) {
382
            $this->botFlag = false;
383
            $this->addSummaryTag('+sous-titre');
384
        }
385
        if (preg_match('#\+lieu#', $data['modifs']) > 0) {
386
            $this->addSummaryTag('+lieu');
387
        }
388
        if (preg_match('#tracking#', $data['modifs']) > 0) {
389
            $this->addSummaryTag('tracking');
390
        }
391
        if (preg_match('#présentation en ligne#', $data['modifs']) > 0) {
392
            $this->addSummaryTag('+présentation en ligne');
393
        }
394
        if (preg_match('#distinction auteurs#', $data['modifs']) > 0) {
395
            $this->addSummaryTag('distinction auteurs');
396
        }
397
        if (preg_match('#\+lire en ligne#', $data['modifs']) > 0) {
398
            $this->addSummaryTag('+lire en ligne');
399
        }
400
        if (preg_match('#\+lien #', $data['modifs']) > 0) {
401
            $this->addSummaryTag('wikif');
402
        }
403
404
        if (preg_match('#\+éditeur#', $data['modifs']) > 0) {
405
            $this->addSummaryTag('éditeur');
406
        }
407
        //        if (preg_match('#\+langue#', $data['modifs']) > 0) {
408
        //            $this->addSummaryTag('langue');
409
        //        }
410
411
        // mention BnF si ajout donnée + ajout identifiant bnf=
412
        if (!empty($this->importantSummary) && preg_match('#BnF#i', $data['modifs'], $matches) > 0) {
413
            $this->addSummaryTag('©[[BnF]]');
414
        }
415
    }
416
417
    /**
418
     * todo extract
419
     * Pour éviter les doublons dans signalements d'erreur.
420
     *
421
     * @param string $page
422
     * @param string $text
423
     */
424
    private function addErrorWarning(string $page, string $text): void
425
    {
426
        if (!isset($this->errorWarning[$page]) || !in_array($text, $this->errorWarning[$page])) {
427
            $this->errorWarning[$page][] = $text;
428
        }
429
    }
430
431
}
432