Passed
Push — master ( dafac1...9ad278 )
by Dispositif
08:36
created

OuvrageEditWorker::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 4
nc 1
nop 4
dl 0
loc 10
ccs 0
cts 0
cp 0
crap 2
rs 10
c 1
b 0
f 0
1
<?php
2
3
/**
4
 * This file is part of dispositif/wikibot application (@github)
5
 * 2019/2020 © Philippe M. <[email protected]>
6
 * For the full copyright and MIT license information, please view the license file.
7
 */
8
9
declare(strict_types=1);
10
11
namespace App\Application;
12
13
use App\Domain\Utils\WikiTextUtil;
14
use App\Infrastructure\DbAdapter;
15
use App\Infrastructure\Memory;
16
use App\Infrastructure\ServiceFactory;
17
use Codedungeon\PHPCliColors\Color;
18
use Exception;
19
use LogicException;
20
use Mediawiki\Api\UsageException;
21
use Normalizer;
22
use Psr\Log\LoggerInterface;
23
use Psr\Log\NullLogger;
24
use Throwable;
25
26
/**
27
 * Class OuvrageEditWorker
28
 *
29
 * @package App\Application\Examples
30
 */
31
class OuvrageEditWorker
32
{
33
    use EditSummaryTrait, TalkPageEditTrait;
34
35
    const TASK_NAME = 'Amélioration bibliographique';
36
    /**
37
     * poster ou pas le message en PD signalant les erreurs à résoudre
38
     */
39
    const EDIT_SIGNALEMENT = true;
40
41
    const CITATION_LIMIT         = 150;
42
    const DELAY_BOTFLAG_SECONDS    = 20;
43
    const DELAY_NO_BOTFLAG_SECONDS = 50;
44
    const ERROR_MSG_TEMPLATE       = __DIR__.'/templates/message_errors.wiki';
45
46
    private $db;
47
    private $bot;
48
    private $wikiText;
49
50
    private $citationSummary;
51
    private $errorWarning = [];
52
    private $importantSummary = [];
53
54
    private $nbRows;
55
56
    // Minor flag on edit
57
    private $minorFlag = true;
58
    // WikiBotConfig flag on edit
59
    private $botFlag = true;
60
61
    /**
62
     * @var Memory
63
     */
64
    private $memory;
65
66
    /**
67
     * @var LoggerInterface|NullLogger
68
     */
69
    private $log;
70
    /**
71
     * @var mixed
72
     */
73
    private $citationVersion;
74
75
    public function __construct(
76
        DbAdapter $dbAdapter,
77
        WikiBotConfig $bot,
78
        Memory $memory,
79
        ?LoggerInterface $log = null
80
    ) {
81
        $this->db = $dbAdapter;
82
        $this->bot = $bot;
83
        $this->memory = $memory;
84
        $this->log = $log ?? new NullLogger();
85
    }
86
87
    /**
88
     * @throws Exception
89
     */
90
    public function run(): void
91
    {
92
        while (true) {
93
            echo "\n-------------------------------------\n\n";
94
            echo date("Y-m-d H:i")." ";
95
            $this->log->info($this->memory->getMemory(true));
96
            $this->pageProcess();
97
        }
98
    }
99
100
    /**
101
     * @return bool
102
     * @throws UsageException
103
     * @throws Exception
104
     * @throws Exception
105
     */
106
    private function pageProcess()
107
    {
108
        $this->initialize();
109
110
        // get a random queue line
111
        $json = $this->db->getAllRowsToEdit(self::CITATION_LIMIT);
112
        $data = json_decode($json, true);
113
114
        if (empty($data)) {
115
            $this->log->alert("SKIP : no row to process\n");
116
            throw new Exception('no row to process');
117
        }
118
119
        try {
120
            $title = $data[0]['page'];
121
            echo Color::BG_CYAN.$title.Color::NORMAL." \n";
122
            $page = ServiceFactory::wikiPageAction($title, true);
123
        } catch (Exception $e) {
124
            $this->log->warning("*** WikiPageAction error : ".$title." \n");
125
            sleep(20);
126
127
            return false;
128
        }
129
130
        // HACK
131
        if (in_array($page->getLastEditor(), [getenv('BOT_NAME'), getenv('BOT_OWNER')])) {
132
            $this->log->notice("SKIP : édité recemment par bot/dresseur.\n");
133
            $this->db->skipArticle($title);
134
135
            return false;
136
        }
137
        if ($page->getNs() !== 0) {
138
            $this->log->notice("SKIP : page n'est pas dans Main (ns 0)\n");
139
            $this->db->skipArticle($title);
140
141
            return false;
142
        }
143
        $this->wikiText = $page->getText();
144
145
        if (empty($this->wikiText)) {
146
            return false;
147
        }
148
149
        // Skip AdQ
150
        if (preg_match('#{{ ?En-tête label ?\| ?AdQ#i', $this->wikiText)) {
151
            $this->db->setLabel($title, 2);
152
            $this->log->info("SKIP : AdQ.\n"); // BA ??
153
            $this->db->skipArticle($title);
154
155
            return false;
156
        }
157
        if (preg_match('#{{ ?En-tête label ?\| ?BA#i', $this->wikiText)) {
158
            $this->db->setLabel($title, 1);
159
            $this->log->info("BA !!\n");
160
        }
161
162
        if (WikiBotConfig::isEditionRestricted($this->wikiText)) {
163
            $this->log->info("SKIP : protection/3R/travaux.\n");
164
            $this->db->skipArticle($title);
165
166
            return false;
167
        }
168
169
        if ($this->bot->minutesSinceLastEdit($title) < 20) {
170
            $this->log->info("SKIP : édition humaine dans les dernières 20 minutes.\n");
171
172
            return false;
173
        }
174
175
176
177
        // GET all article lines from db
178
        $this->log->info(sprintf("%s rows to process\n", count($data)));
179
180
        // foreach line
181
        $changed = false;
182
        foreach ($data as $dat) {
183
            // hack temporaire pour éviter articles dont CompleteProcess incomplet
184
            if (empty($dat['opti']) || empty($dat['optidate']) || $dat['optidate'] < DbAdapter::OPTI_VALID_DATE) {
185
                $this->log->notice("SKIP : Complètement incomplet de l'article. sleep 10min");
186
                sleep(600);
187
                return false;
188
            }
189
            $success = $this->dataProcess($dat);
190
            $changed = ($success) ? true : $changed;
191
        }
192
        if (!$changed) {
193
            $this->log->debug("Rien à changer...");
194
            $this->db->skipArticle($title);
195
196
            return false;
197
        }
198
199
        // EDIT THE PAGE
200
        if (!$this->wikiText) {
201
            return false;
202
        }
203
204
        $miniSummary = $this->generateSummary();
205
        $this->log->notice($miniSummary);
206
        $this->log->debug("sleep 2...");
207
        sleep(2); // todo ???
208
209
        pageEdit:
210
211
        try {
212
            // corona Covid :)
213
            //$miniSummary .= (date('H:i') === '20:00') ? ' 🏥' : ''; // 🏥🦠
214
215
            $editInfo = ServiceFactory::editInfo($miniSummary, $this->minorFlag, $this->botFlag, 5);
216
            $success = $page->editPage(Normalizer::normalize($this->wikiText), $editInfo);
217
        } catch (Throwable $e) {
218
            // Invalid CSRF token.
219
            if (strpos($e->getMessage(), 'Invalid CSRF token') !== false) {
220
                $this->log->alert("*** Invalid CSRF token \n");
221
                throw new Exception('Invalid CSRF token');
222
            } else {
223
                $this->log->warning('Exception in editPage() '.$e->getMessage());
224
                sleep(10);
225
226
                return false;
227
            }
228
        }
229
230
        $this->log->info($success ? "Edition Ok\n" : "***** Edition KO !\n");
231
232
        if ($success) {
233
            // updata DB
234
            foreach ($data as $dat) {
235
                $this->db->sendEditedData(['id' => $dat['id']]);
236
            }
237
238
            try {
239
                if (self::EDIT_SIGNALEMENT && !empty($this->errorWarning[$title])) {
240
                    $this->sendOuvrageErrorsOnTalkPage($data, $this->log);
241
                }
242
            } catch (Throwable $e) {
243
                $this->log->warning('Exception in editPage() '.$e->getMessage());
244
                unset($e);
245
            }
246
247
            if (!$this->botFlag) {
248
                $this->log->debug("sleep ".self::DELAY_NO_BOTFLAG_SECONDS);
249
                sleep(self::DELAY_NO_BOTFLAG_SECONDS);
250
            }
251
            if ($this->botFlag) {
252
                $this->log->debug("sleep ".self::DELAY_BOTFLAG_SECONDS);
253
                sleep(self::DELAY_BOTFLAG_SECONDS);
254
            }
255
        }
256
257
        return $success;
258
    }
259
260
    /**
261
     * @throws UsageException
262
     */
263
    private function initialize(): void
264
    {
265
        // initialisation vars
266
        $this->botFlag = true;
267
        $this->errorWarning = [];
268
        $this->wikiText = null;
269
        $this->citationSummary = [];
270
        $this->importantSummary = [];
271
        $this->minorFlag = true;
272
        $this->nbRows = 0;
273
274
        $this->bot->checkStopOnTalkpage(true);
275
    }
276
277
    /**
278
     * @param array $data
279
     *
280
     * @return bool
281
     * @throws Exception
282
     */
283
    private function dataProcess(array $data): bool
284
    {
285
        $origin = $data['raw'];
286
        $completed = $data['opti'];
287
288
        $this->log->debug('origin: '.$origin);
289
        $this->log->debug('completed: '.$completed);
290
        $this->log->debug('modifs: '.$data['modifs']);
291
        $this->log->debug('version: '.$data['version']);
292
293
        if (WikiTextUtil::isCommented($origin)) {
294
            $this->log->notice("SKIP: template avec commentaire HTML.");
295
            $this->db->skipRow(intval($data['id']));
296
297
            return false;
298
        }
299
300
        $find = mb_strpos($this->wikiText, $origin);
301
        if ($find === false) {
302
            $this->log->notice("String non trouvée.");
303
            $this->db->skipRow(intval($data['id']));
304
305
            return false;
306
        }
307
308
        $this->checkErrorWarning($data);
309
310
        // Replace text
311
        $newText = WikiPageAction::replaceTemplateInText($this->wikiText, $origin, $completed);
312
313
        if (!$newText || $newText === $this->wikiText) {
314
            $this->log->warning("newText error");
315
316
            return false;
317
        }
318
        $this->wikiText = $newText;
319
        $this->minorFlag = ('1' === $data['major']) ? false : $this->minorFlag;
320
        $this->citationVersion = $data['version'];
321
        $this->citationSummary[] = $data['modifs'];
322
        $this->nbRows++;
323
324
        return true;
325
    }
326
327
    /**
328
     * todo extract
329
     * Vérifie alerte d'erreurs humaines.
330
     *
331
     * @param array $data
332
     *
333
     * @throws Exception
334
     */
335
    private function checkErrorWarning(array $data): void
336
    {
337
        if (!isset($data['opti'])) {
338
            throw new LogicException('Opti NULL');
339
        }
340
341
        // paramètre inconnu
342
        if (preg_match_all(
343
                "#\|[^|]+<!-- ?(PARAMETRE [^>]+ N'EXISTE PAS|VALEUR SANS NOM DE PARAMETRE|ERREUR [^>]+) ?-->#",
344
                $data['opti'],
345
                $matches
346
            ) > 0
347
        ) {
348
            foreach ($matches[0] as $line) {
349
                $this->addErrorWarning($data['page'], $line);
350
            }
351
            //  $this->botFlag = false;
352
            $this->addSummaryTag('paramètre non corrigé');
353
        }
354
355
        // ISBN invalide
356
        if (preg_match("#isbn invalide ?=[^|}]+#i", $data['opti'], $matches) > 0) {
357
            $this->addErrorWarning($data['page'], $matches[0]);
358
            $this->botFlag = false;
359
            $this->addSummaryTag('ISBN invalide');
360
        }
361
362
        // Edits avec ajout conséquent de donnée
363
        if (preg_match('#distinction des auteurs#', $data['modifs']) > 0) {
364
            $this->botFlag = false;
365
            $this->addSummaryTag('distinction des auteurs');
366
        }
367
        // prédiction paramètre correct
368
        if (preg_match('#[^,]+(=>|⇒)[^,]+#', $data['modifs'], $matches) > 0) {
369
            $this->botFlag = false;
370
            $this->addSummaryTag(sprintf('%s', $matches[0]));
371
        }
372
        if (preg_match('#\+\+sous-titre#', $data['modifs']) > 0) {
373
            $this->botFlag = false;
374
            $this->addSummaryTag('+sous-titre');
375
        }
376
        if (preg_match('#\+lieu#', $data['modifs']) > 0) {
377
            $this->addSummaryTag('+lieu');
378
        }
379
        if (preg_match('#tracking#', $data['modifs']) > 0) {
380
            $this->addSummaryTag('tracking');
381
        }
382
        if (preg_match('#présentation en ligne#', $data['modifs']) > 0) {
383
            $this->addSummaryTag('+présentation en ligne');
384
        }
385
        if (preg_match('#distinction auteurs#', $data['modifs']) > 0) {
386
            $this->addSummaryTag('distinction auteurs');
387
        }
388
        if (preg_match('#\+lire en ligne#', $data['modifs']) > 0) {
389
            $this->addSummaryTag('+lire en ligne');
390
        }
391
        if (preg_match('#\+lien #', $data['modifs']) > 0) {
392
            $this->addSummaryTag('wikif');
393
        }
394
395
        if (preg_match('#\+éditeur#', $data['modifs']) > 0) {
396
            $this->addSummaryTag('éditeur');
397
        }
398
        //        if (preg_match('#\+langue#', $data['modifs']) > 0) {
399
        //            $this->addSummaryTag('langue');
400
        //        }
401
402
        // mention BnF si ajout donnée + ajout identifiant bnf=
403
        if (!empty($this->importantSummary) && preg_match('#BnF#i', $data['modifs'], $matches) > 0) {
404
            $this->addSummaryTag('©[[BnF]]');
405
        }
406
    }
407
408
    /**
409
     * todo extract
410
     * Pour éviter les doublons dans signalements d'erreur.
411
     *
412
     * @param string $page
413
     * @param string $text
414
     */
415
    private function addErrorWarning(string $page, string $text): void
416
    {
417
        if (!isset($this->errorWarning[$page]) || !in_array($text, $this->errorWarning[$page])) {
418
            $this->errorWarning[$page][] = $text;
419
        }
420
    }
421
422
}
423