Completed
Push — master ( 0349ad...e37ca9 )
by Dispositif
02:28
created

EditProcess::initialize()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 12
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 8
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 12
rs 10
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application\Examples;
11
12
use App\Application\Bot;
13
use App\Application\Memory;
14
use App\Application\WikiPageAction;
15
use App\Domain\Utils\WikiTextUtil;
16
use App\Infrastructure\DbAdapter;
17
use App\Infrastructure\ServiceFactory;
18
use Exception;
19
use LogicException;
20
use Mediawiki\DataModel\EditInfo;
21
use Normalizer;
22
use Throwable;
23
24
//use App\Application\CLI;
25
26
include __DIR__.'/../myBootstrap.php';
27
28
// sort of process management
29
while (true) {
30
    try {
31
        echo "*** NEW EDIT PROCESS\n";
32
        $process = new EditProcess();
33
        $process->run();
34
    } catch (\Throwable $e) {
35
        dump($e);
36
        unset($e);
37
    }
38
    unset($process);
39
    echo "Sleep 1h\n";
40
    sleep(60 * 60);
41
}
42
43
/**
44
 * TODO refac
45
 * Class EditProcess
46
 *
47
 * @package App\Application\Examples
48
 */
49
class EditProcess
50
{
51
    const TASK_NAME        = 'Amélioration bibliographique';
52
    const EDIT_SIGNALEMENT = true;
53
54
    const CITATION_LIMIT         = 150;
55
    const DELAY_BOTFLAG_SECONDS  = 10;
56
    const DELAY_NOBOT_IN_SECONDS = 60;
57
    const ERROR_MSG_TEMPLATE     = __DIR__.'/../templates/message_errors.wiki';
58
59
    private $db;
60
    private $bot;
61
    private $wiki;
62
    private $wikiText;
63
    private $citationSummary;
64
    private $citationVersion = '';
65
    private $errorWarning = [];
66
    private $importantSummary = [];
67
    private $nbRows;
68
    private $run = true;
0 ignored issues
show
introduced by
The private property $run is not used, and could be removed.
Loading history...
69
70
    // Minor flag on edit
71
    private $minorFlag = true;
72
    // Bot flag on edit
73
    private $botFlag = true;
74
75
    public function __construct()
76
    {
77
        $this->db = new DbAdapter();
78
        $this->bot = new Bot();
79
80
        $this->wikiLogin(true);
81
    }
82
83
    public function run(): void
84
    {
85
        $memory = new Memory();
86
        while (true) {
87
            echo "\n-------------------------------------\n\n";
88
            echo date("Y-m-d H:i")."\n";
89
            $memory->echoMemory(true);
90
91
            $this->pageProcess();
92
        }
93
    }
94
95
    private function pageProcess()
96
    {
97
        $this->initialize();
98
99
        // get a random queue line
100
        $json = $this->db->getAllRowsToEdit(self::CITATION_LIMIT);
101
        $data = json_decode($json, true);
102
103
        if (empty($data)) {
104
            echo "SKIP : no row to process\n";
105
            throw new \Exception('no row to process');
106
        }
107
108
        try {
109
            $title = $data[0]['page'];
110
            echo "$title \n";
111
            $page = new WikiPageAction($this->wiki, $title);
112
        } catch (Exception $e) {
113
            echo "*** WikiPageAction error : $title \n";
114
            sleep(20);
115
116
            return false;
117
        }
118
119
        // TODO : HACK
120
        if (in_array($page->getLastEditor(), [getenv('BOT_NAME'), getenv('BOT_OWNER')])) {
121
            echo "SKIP : édité recemment par bot/dresseur.\n";
122
            $this->db->skipArticle($title);
123
124
            return false;
125
        }
126
        $this->wikiText = $page->getText();
127
128
        if (BOT::isEditionRestricted($this->wikiText)) {
129
            echo "SKIP : protection/3R.\n";
130
            $this->db->skipArticle($title);
131
        }
132
133
        if ($this->bot->minutesSinceLastEdit($title) < 15) {
134
            echo "SKIP : édition humaine dans les dernières 15 minutes.\n";
135
136
            return false;
137
        }
138
139
        // Skip AdQ
140
        if (preg_match('#{{ ?En-tête label#i', $this->wikiText) > 0) {
141
            echo "SKIP : AdQ ou BA.\n";
142
            $this->db->skipArticle($title);
143
144
            return false;
145
        }
146
147
        // GET all article lines from db
148
        echo sprintf(">> %s rows to process\n", count($data));
149
150
        // foreach line
151
        $changed = false;
152
        foreach ($data as $dat) {
153
            // hack temporaire pour éviter articles dont CompleteProcess incomplet
154
            if (empty($dat['opti']) || empty($dat['optidate']) || $dat['optidate'] < DbAdapter::OPTI_VALID_DATE) {
155
                echo "SKIP : Complètement incomplet de l'article \n";
156
157
                return false;
158
            }
159
            $success = $this->dataProcess($dat);
160
            $changed = ($success) ? true : $changed;
161
        }
162
        if (!$changed) {
163
            echo "Rien à changer...\n\n";
164
            $this->db->skipArticle($title);
165
166
            return false;
167
        }
168
169
        // EDIT THE PAGE
170
        if (!$this->wikiText) {
171
            return false;
172
        }
173
174
        $miniSummary = $this->generateSummary();
175
        echo "Edition ?\n".$miniSummary."\n\n";
176
        echo "sleep 20...\n";
177
        sleep(20);
178
179
        pageEdit:
180
181
        try {
182
            $editInfo = new EditInfo($miniSummary, $this->minorFlag, $this->botFlag);
183
            $success = $page->editPage(Normalizer::normalize($this->wikiText), $editInfo);
184
        } catch (\Throwable $e) {
185
            // Invalid CSRF token.
186
            if (strpos($e->getMessage(), 'Invalid CSRF token') !== false) {
187
                echo "*** Invalid CSRF token \n";
188
                throw new \Exception('Invalid CSRF token');
189
            } else {
190
                dump($e); // todo log
191
                sleep(60);
192
193
                return false;
194
            }
195
        }
196
197
        echo ($success) ? "Ok\n" : "***** Erreur edit\n";
198
199
        if ($success) {
200
            // updata DB
201
            foreach ($data as $dat) {
202
                $this->db->sendEditedData(['id' => $dat['id']]);
203
            }
204
205
            try {
206
                if (self::EDIT_SIGNALEMENT) {
207
                    $this->sendErrorMessage($data);
208
                }
209
            } catch (Throwable $e) {
210
                dump($e);
211
                unset($e);
212
            }
213
214
            if (!$this->botFlag) {
215
                echo "sleep ".self::DELAY_NOBOT_IN_SECONDS."\n";
216
                sleep(self::DELAY_NOBOT_IN_SECONDS);
217
            }
218
            echo "sleep ".self::DELAY_BOTFLAG_SECONDS."\n";
219
            sleep(self::DELAY_BOTFLAG_SECONDS);
220
        }
221
222
        return $success;
223
    }
224
225
    private function dataProcess(array $data): bool
226
    {
227
        $origin = $data['raw'];
228
        $completed = $data['opti'];
229
230
        dump($origin, $completed, $data['modifs'], $data['version']);
231
232
        if (WikiTextUtil::isCommented($origin)) {
233
            echo "SKIP: template avec commentaire HTML\n";
234
            $this->db->skipRow(intval($data['id']));
235
236
            return false;
237
        }
238
239
        $find = mb_strpos($this->wikiText, $origin);
240
        if ($find === false) {
241
            echo "String non trouvée. \n\n";
242
            $this->db->skipRow(intval($data['id']));
243
244
            return false;
245
        }
246
247
        $this->checkErrorWarning($data);
248
249
        // Replace text
250
        $newText = WikiPageAction::replaceTemplateInText($this->wikiText, $origin, $completed);
251
252
        if (!$newText || $newText === $this->wikiText) {
253
            echo "newText error\n";
254
255
            return false;
256
        }
257
        $this->wikiText = $newText;
258
        $this->minorFlag = ('1' === $data['major']) ? false : $this->minorFlag;
259
        $this->citationVersion = $data['version'];
260
        $this->citationSummary[] = $data['modifs'];
261
        $this->nbRows++;
262
263
        return true;
264
    }
265
266
    /**
267
     * Generate wiki edition summary.
268
     *
269
     * @return string
270
     */
271
    public function generateSummary(): string
272
    {
273
        // Start summary with "Bot" when using botflag, else "*"
274
        $prefix = ($this->botFlag) ? 'bot' : '☆';
275
        // add "/!\" when errorWarning
276
        $prefix .= (!empty($this->errorWarning)) ? ' ⚠' : '';
277
278
279
        // basic modifs
280
        $citeSummary = implode(' ', $this->citationSummary);
281
        // replace by list of modifs to verify by humans
282
        if (!empty($this->importantSummary)) {
283
            $citeSummary = implode(', ', $this->importantSummary);
284
        }
285
286
        $summary = sprintf(
287
            '%s [%s/%s] %s %s : %s',
288
            $prefix,
289
            str_replace('v', '', $this->bot::getGitVersion()),
290
            str_replace('v0.', '', $this->citationVersion),
291
            self::TASK_NAME,
292
            $this->nbRows,
293
            $citeSummary
294
        );
295
296
        if (!empty($this->importantSummary)) {
297
            $summary .= '...';
298
        }
299
300
        // shrink long summary if no important details to verify
301
        if (empty($this->importantSummary)) {
302
            $length = strlen($summary);
303
            $summary = substr($summary, 0, 80);
304
            $summary .= ($length > strlen($summary)) ? '…' : '';
305
        }
306
307
        return $summary;
308
    }
309
310
    /**
311
     * Vérifie alerte d'erreurs humaines.
312
     *
313
     * @param array $data
314
     *
315
     * @throws Exception
316
     */
317
    private function checkErrorWarning(array $data): void
318
    {
319
        if (!isset($data['opti'])) {
320
            throw new LogicException('Opti NULL');
321
        }
322
323
        // paramètre inconnu
324
        if (preg_match(
325
                "#\|[^|]+<!-- ?(PARAMETRE [^>]+ N'EXISTE PAS|VALEUR SANS NOM DE PARAMETRE) ?-->#",
326
                $data['opti'],
327
                $matches
328
            ) > 0
329
        ) {
330
            $this->addErrorWarning($data['page'], $matches[0]);
331
            //            $this->botFlag = false;
332
            $this->addSummaryTag('paramètre non corrigé');
333
        }
334
335
        // ISBN invalide
336
        if (preg_match("#isbn invalide ?=[^|}]+#i", $data['opti'], $matches) > 0) {
337
            $this->addErrorWarning($data['page'], $matches[0]);
338
            $this->botFlag = false;
339
            $this->addSummaryTag('ISBN invalide');
340
        }
341
342
        // Edits avec ajout conséquent de donnée
343
        if (preg_match('#distinction des auteurs#', $data['modifs']) > 0) {
344
            $this->botFlag = false;
345
            $this->addSummaryTag('distinction des auteurs');
346
        }
347
        // prédiction paramètre correct
348
        if (preg_match('#[^,]+(=>|⇒)[^,]+#', $data['modifs'], $matches) > 0) {
349
            $this->botFlag = false;
350
            $this->addSummaryTag(sprintf('%s', $matches[0]));
351
        }
352
        if (preg_match('#\+\+sous-titre#', $data['modifs']) > 0) {
353
            $this->botFlag = false;
354
            $this->addSummaryTag('+sous-titre');
355
        }
356
        if (preg_match('#\+lieu#', $data['modifs']) > 0) {
357
            $this->addSummaryTag('+lieu');
358
        }
359
        if (preg_match('#présentation en ligne#', $data['modifs']) > 0) {
360
            $this->addSummaryTag('+présentation en ligne');
361
        }
362
        if (preg_match('#\+lire en ligne#', $data['modifs']) > 0) {
363
            $this->addSummaryTag('+lire en ligne');
364
        }
365
366
        if (preg_match('#\+éditeur#', $data['modifs']) > 0) {
367
            $this->addSummaryTag('éditeur');
368
        }
369
        //        if (preg_match('#\+langue#', $data['modifs']) > 0) {
370
        //            $this->addSummaryTag('langue');
371
        //        }
372
373
        // mention BnF si ajout donnée + ajout identifiant bnf=
374
        if (!empty($this->importantSummary) && preg_match('#\+bnf#i', $data['modifs'], $matches) > 0) {
375
            $this->addSummaryTag('[[BnF]]');
376
        }
377
    }
378
379
    /**
380
     * Pour éviter les doublons dans signalements d'erreur.
381
     *
382
     * @param string $page
383
     * @param string $text
384
     */
385
    private function addErrorWarning(string $page, string $text): void
386
    {
387
        if (!isset($this->errorWarning[$page]) || !in_array($text, $this->errorWarning[$page])) {
388
            $this->errorWarning[$page][] = $text;
389
        }
390
    }
391
392
    /**
393
     * For substantive or ambiguous modifications done.
394
     *
395
     * @param string $tag
396
     */
397
    private function addSummaryTag(string $tag)
398
    {
399
        if (!in_array($tag, $this->importantSummary)) {
400
            $this->importantSummary[] = $tag;
401
        }
402
    }
403
404
    /**
405
     * @param array $rows Collection of citations
406
     *
407
     * @return bool
408
     */
409
    private function sendErrorMessage(array $rows): bool
410
    {
411
        if (!isset($rows[0]) || empty($this->errorWarning[$rows[0]['page']])) {
412
            return false;
413
        }
414
        $mainTitle = $rows[0]['page'];
415
        echo "** Send Error Message on talk page. Wait 3... \n";
416
        sleep(3);
417
418
        // format wiki message
419
        $errorList = '';
420
        foreach ($this->errorWarning[$mainTitle] as $error) {
421
            $errorList .= sprintf("* <span style=\"background:#FCDFE8\"><nowiki>%s</nowiki></span> \n", $error);
422
        }
423
424
        $diffStr = '';
425
        try {
426
            // get last bot revision ID
427
            $main = new WikiPageAction($this->wiki, $mainTitle);
428
            if (getenv('BOT_NAME') === $main->getLastRevision()->getUser()) {
429
                $id = $main->getLastRevision()->getId();
430
                $diffStr = sprintf(
431
                    ' ([https://fr.wikipedia.org/w/index.php?title=%s&diff=%s diff])',
432
                    str_replace(' ', '_', $mainTitle),
433
                    $id
434
                );
435
            }
436
        } catch (\Throwable $e) {
437
            unset($e);
438
        }
439
440
        $errorMessage = file_get_contents(self::ERROR_MSG_TEMPLATE);
441
        $errorMessage = str_replace('##ERROR LIST##', trim($errorList), $errorMessage);
442
        $errorMessage = str_replace('##ARTICLE##', $mainTitle, $errorMessage);
443
        $errorMessage = str_replace('##DIFF##', $diffStr, $errorMessage);
444
445
        // Edit wiki talk page
446
        try {
447
            $talkPage = new WikiPageAction($this->wiki, 'Discussion:'.$mainTitle);
448
            $editInfo = new EditInfo('Signalement erreur {ouvrage}', false, false);
449
450
            return $talkPage->addToBottomOrCreatePage($errorMessage, $editInfo);
451
        } catch (Throwable $e) {
452
            dump($e);
453
454
            return false;
455
        }
456
    }
457
458
    private function initialize(): void
459
    {
460
        // initialisation vars
461
        $this->botFlag = true;
462
        $this->errorWarning = [];
463
        $this->wikiText = null;
464
        $this->citationSummary = [];
465
        $this->importantSummary = [];
466
        $this->minorFlag = true;
467
        $this->nbRows = 0;
468
469
        $this->bot->checkStopOnTalkpage(true);
470
    }
471
472
    /**
473
     * @param bool $forceLogin
474
     *
475
     * @throws \Mediawiki\Api\UsageException
476
     */
477
    private function wikiLogin($forceLogin = false): void
478
    {
479
        $this->wiki = ServiceFactory::wikiApi($forceLogin);
480
    }
481
482
}
483