Passed
Push — master ( fee625...9b7863 )
by Dispositif
05:51
created

OuvrageEditWorker::generateSummary()   B

Complexity

Conditions 7
Paths 48

Size

Total Lines 36
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 0
Metric Value
cc 7
eloc 20
nc 48
nop 0
dl 0
loc 36
ccs 0
cts 25
cp 0
crap 56
rs 8.6666
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * This file is part of dispositif/wikibot application (@github)
5
 * 2019/2020 © Philippe M. <[email protected]>
6
 * For the full copyright and MIT license information, please view the license file.
7
 */
8
9
declare(strict_types=1);
10
11
namespace App\Application;
12
13
use App\Domain\Utils\WikiTextUtil;
14
use App\Infrastructure\DbAdapter;
15
use App\Infrastructure\Memory;
16
use App\Infrastructure\ServiceFactory;
17
use Exception;
18
use LogicException;
19
use Mediawiki\Api\UsageException;
20
use Normalizer;
21
use Psr\Log\LoggerInterface;
22
use Psr\Log\NullLogger;
23
use Throwable;
24
25
/**
26
 * Class OuvrageEditWorker
27
 *
28
 * @package App\Application\Examples
29
 */
30
class OuvrageEditWorker
31
{
32
    use EditSummaryTrait, TalkPageEditTrait;
33
34
    const TASK_NAME = 'Amélioration bibliographique';
35
    /**
36
     * poster ou pas le message en PD signalant les erreurs à résoudre
37
     */
38
    const EDIT_SIGNALEMENT = true;
39
40
    const CITATION_LIMIT         = 150;
41
    const DELAY_BOTFLAG_SECONDS  = 30;
42
    const DELAY_NOBOT_IN_SECONDS = 120;
43
    const ERROR_MSG_TEMPLATE     = __DIR__.'/templates/message_errors.wiki';
44
45
    private $db;
46
    private $bot;
47
    private $wikiText;
48
49
    private $citationSummary;
50
    private $errorWarning = [];
51
    private $importantSummary = [];
52
53
    private $nbRows;
54
55
    // Minor flag on edit
56
    private $minorFlag = true;
57
    // WikiBotConfig flag on edit
58
    private $botFlag = true;
59
60
    /**
61
     * @var Memory
62
     */
63
    private $memory;
64
65
    /**
66
     * @var LoggerInterface|NullLogger
67
     */
68
    private $log;
69
    /**
70
     * @var mixed
71
     */
72
    private $citationVersion;
73
74
    /**
75
     * OuvrageEditWorker constructor.
76
     *
77
     * @param DbAdapter            $dbAdapter
78
     * @param WikiBotConfig        $bot
79
     * @param Memory               $memory
80
     * @param LoggerInterface|null $log
81
     */
82
    public function __construct(
83
        DbAdapter $dbAdapter,
84
        WikiBotConfig $bot,
85
        Memory $memory,
86
        ?LoggerInterface $log = null
87
    ) {
88
        $this->db = $dbAdapter;
89
        $this->bot = $bot;
90
        $this->memory = $memory;
91
        $this->log = $log ?? new NullLogger();
92
    }
93
94
    /**
95
     * @throws Exception
96
     */
97
    public function run(): void
98
    {
99
        while (true) {
100
            echo "\n-------------------------------------\n\n";
101
            echo date("Y-m-d H:i")."\n";
102
            $this->log->notice($this->memory->getMemory(true));
103
            $this->pageProcess();
104
        }
105
    }
106
107
    /**
108
     * @return bool
109
     * @throws UsageException
110
     * @throws Exception
111
     * @throws Exception
112
     */
113
    private function pageProcess()
114
    {
115
        $this->initialize();
116
117
        // get a random queue line
118
        $json = $this->db->getAllRowsToEdit(self::CITATION_LIMIT);
119
        $data = json_decode($json, true);
120
121
        if (empty($data)) {
122
            $this->log->alert("SKIP : no row to process\n");
123
            throw new Exception('no row to process');
124
        }
125
126
        try {
127
            $title = $data[0]['page'];
128
            echo $title." \n";
129
            $page = ServiceFactory::wikiPageAction($title, true);
130
        } catch (Exception $e) {
131
            $this->log->warning("*** WikiPageAction error : ".$title." \n");
132
            sleep(20);
133
134
            return false;
135
        }
136
137
        // HACK
138
        if (in_array($page->getLastEditor(), [getenv('BOT_NAME'), getenv('BOT_OWNER')])) {
139
            $this->log->notice("SKIP : édité recemment par bot/dresseur.\n");
140
            $this->db->skipArticle($title);
141
142
            return false;
143
        }
144
        if ($page->getNs() !== 0) {
145
            $this->log->notice("SKIP : page n'est pas dans Main (ns 0)\n");
146
            $this->db->skipArticle($title);
147
148
            return false;
149
        }
150
        $this->wikiText = $page->getText();
151
152
        if (empty($this->wikiText)) {
153
            return false;
154
        }
155
        if (WikiBotConfig::isEditionRestricted($this->wikiText)) {
156
            $this->log->info("SKIP : protection/3R.\n");
157
            $this->db->skipArticle($title);
158
159
            return false;
160
        }
161
162
        if ($this->bot->minutesSinceLastEdit($title) < 15) {
163
            $this->log->info("SKIP : édition humaine dans les dernières 15 minutes.\n");
164
165
            return false;
166
        }
167
168
        // Skip AdQ
169
        if (preg_match('#{{ ?En-tête label#i', $this->wikiText) > 0) {
170
            $this->log->info("SKIP : AdQ ou BA.\n");
171
            $this->db->skipArticle($title);
172
173
            return false;
174
        }
175
176
        // GET all article lines from db
177
        $this->log->info(sprintf("%s rows to process\n", count($data)));
178
179
        // foreach line
180
        $changed = false;
181
        foreach ($data as $dat) {
182
            // hack temporaire pour éviter articles dont CompleteProcess incomplet
183
            if (empty($dat['opti']) || empty($dat['optidate']) || $dat['optidate'] < DbAdapter::OPTI_VALID_DATE) {
184
                $this->log->notice("SKIP : Complètement incomplet de l'article");
185
186
                return false;
187
            }
188
            $success = $this->dataProcess($dat);
189
            $changed = ($success) ? true : $changed;
190
        }
191
        if (!$changed) {
192
            $this->log->debug("Rien à changer...");
193
            $this->db->skipArticle($title);
194
195
            return false;
196
        }
197
198
        // Conversion <ref>http//books.google
199
        //        try {
200
        //            $this->wikiText = $this->refGooConverter->process($this->wikiText);
201
        //        } catch (Throwable $e) {
202
        //            $this->log->warning('refGooConverter->process exception : '.$e->getMessage());
203
        //            unset($e);
204
        //        }
205
206
        // EDIT THE PAGE
207
        if (!$this->wikiText) {
208
            return false;
209
        }
210
211
        $miniSummary = $this->generateSummary();
212
        $this->log->notice($miniSummary);
213
        $this->log->info("sleep 30...");
214
        sleep(30);
215
216
        pageEdit:
217
218
        try {
219
            // corona Covid :)
220
            $miniSummary .= (date('H:i') === '20:00') ? ' 🏥' : ''; // 🏥🦠
221
222
            $editInfo = ServiceFactory::editInfo($miniSummary, $this->minorFlag, $this->botFlag, 5);
223
            $success = $page->editPage(Normalizer::normalize($this->wikiText), $editInfo);
224
        } catch (Throwable $e) {
225
            // Invalid CSRF token.
226
            if (strpos($e->getMessage(), 'Invalid CSRF token') !== false) {
227
                $this->log->alert("*** Invalid CSRF token \n");
228
                throw new Exception('Invalid CSRF token');
229
            } else {
230
                $this->log->warning('Exception in editPage() '.$e->getMessage());
231
                sleep(10);
232
233
                return false;
234
            }
235
        }
236
237
        $this->log->info($success ? "Ok\n" : "***** Erreur edit\n");
238
239
        if ($success) {
240
            // updata DB
241
            foreach ($data as $dat) {
242
                $this->db->sendEditedData(['id' => $dat['id']]);
243
            }
244
245
            try {
246
                if (self::EDIT_SIGNALEMENT && !empty($this->errorWarning[$title])) {
247
                    $this->sendOuvrageErrorsOnTalkPage($data, $this->log);
248
                }
249
            } catch (Throwable $e) {
250
                $this->log->warning('Exception in editPage() '.$e->getMessage());
251
                unset($e);
252
            }
253
254
            if (!$this->botFlag) {
255
                $this->log->info("sleep ".self::DELAY_NOBOT_IN_SECONDS);
256
                sleep(self::DELAY_NOBOT_IN_SECONDS);
257
            }
258
            if ($this->botFlag) {
259
                $this->log->info("sleep ".self::DELAY_BOTFLAG_SECONDS);
260
                sleep(self::DELAY_BOTFLAG_SECONDS);
261
            }
262
        }
263
264
        return $success;
265
    }
266
267
    /**
268
     * @throws UsageException
269
     */
270
    private function initialize(): void
271
    {
272
        // initialisation vars
273
        $this->botFlag = true;
274
        $this->errorWarning = [];
275
        $this->wikiText = null;
276
        $this->citationSummary = [];
277
        $this->importantSummary = [];
278
        $this->minorFlag = true;
279
        $this->nbRows = 0;
280
281
        $this->bot->checkStopOnTalkpage(true);
282
    }
283
284
    /**
285
     * @param array $data
286
     *
287
     * @return bool
288
     * @throws Exception
289
     */
290
    private function dataProcess(array $data): bool
291
    {
292
        $origin = $data['raw'];
293
        $completed = $data['opti'];
294
295
        $this->log->debug('origin: '.$origin);
296
        $this->log->debug('completed: '.$completed);
297
        $this->log->debug('modifs: '.$data['modifs']);
298
        $this->log->debug('version: '.$data['version']);
299
300
        if (WikiTextUtil::isCommented($origin)) {
301
            $this->log->notice("SKIP: template avec commentaire HTML.");
302
            $this->db->skipRow(intval($data['id']));
303
304
            return false;
305
        }
306
307
        $find = mb_strpos($this->wikiText, $origin);
308
        if ($find === false) {
309
            $this->log->notice("String non trouvée.");
310
            $this->db->skipRow(intval($data['id']));
311
312
            return false;
313
        }
314
315
        $this->checkErrorWarning($data);
316
317
        // Replace text
318
        $newText = WikiPageAction::replaceTemplateInText($this->wikiText, $origin, $completed);
319
320
        if (!$newText || $newText === $this->wikiText) {
321
            $this->log->warning("newText error");
322
323
            return false;
324
        }
325
        $this->wikiText = $newText;
326
        $this->minorFlag = ('1' === $data['major']) ? false : $this->minorFlag;
327
        $this->citationVersion = $data['version'];
328
        $this->citationSummary[] = $data['modifs'];
329
        $this->nbRows++;
330
331
        return true;
332
    }
333
334
    /**
335
     * todo extract
336
     * Vérifie alerte d'erreurs humaines.
337
     *
338
     * @param array $data
339
     *
340
     * @throws Exception
341
     */
342
    private function checkErrorWarning(array $data): void
343
    {
344
        if (!isset($data['opti'])) {
345
            throw new LogicException('Opti NULL');
346
        }
347
348
        // paramètre inconnu
349
        if (preg_match_all(
350
                "#\|[^|]+<!-- ?(PARAMETRE [^>]+ N'EXISTE PAS|VALEUR SANS NOM DE PARAMETRE|ERREUR [^>]+) ?-->#",
351
                $data['opti'],
352
                $matches
353
            ) > 0
354
        ) {
355
            foreach ($matches[0] as $line) {
356
                $this->addErrorWarning($data['page'], $line);
357
            }
358
            //  $this->botFlag = false;
359
            $this->addSummaryTag('paramètre non corrigé');
360
        }
361
362
        // ISBN invalide
363
        if (preg_match("#isbn invalide ?=[^|}]+#i", $data['opti'], $matches) > 0) {
364
            $this->addErrorWarning($data['page'], $matches[0]);
365
            $this->botFlag = false;
366
            $this->addSummaryTag('ISBN invalide');
367
        }
368
369
        // Edits avec ajout conséquent de donnée
370
        if (preg_match('#distinction des auteurs#', $data['modifs']) > 0) {
371
            $this->botFlag = false;
372
            $this->addSummaryTag('distinction des auteurs');
373
        }
374
        // prédiction paramètre correct
375
        if (preg_match('#[^,]+(=>|⇒)[^,]+#', $data['modifs'], $matches) > 0) {
376
            $this->botFlag = false;
377
            $this->addSummaryTag(sprintf('%s', $matches[0]));
378
        }
379
        if (preg_match('#\+\+sous-titre#', $data['modifs']) > 0) {
380
            $this->botFlag = false;
381
            $this->addSummaryTag('+sous-titre');
382
        }
383
        if (preg_match('#\+lieu#', $data['modifs']) > 0) {
384
            $this->addSummaryTag('+lieu');
385
        }
386
        if (preg_match('#tracking#', $data['modifs']) > 0) {
387
            $this->addSummaryTag('tracking');
388
        }
389
        if (preg_match('#présentation en ligne#', $data['modifs']) > 0) {
390
            $this->addSummaryTag('+présentation en ligne');
391
        }
392
        if (preg_match('#distinction auteurs#', $data['modifs']) > 0) {
393
            $this->addSummaryTag('distinction auteurs');
394
        }
395
        if (preg_match('#\+lire en ligne#', $data['modifs']) > 0) {
396
            $this->addSummaryTag('+lire en ligne');
397
        }
398
        if (preg_match('#\+lien #', $data['modifs']) > 0) {
399
            $this->addSummaryTag('wikif');
400
        }
401
402
        if (preg_match('#\+éditeur#', $data['modifs']) > 0) {
403
            $this->addSummaryTag('éditeur');
404
        }
405
        //        if (preg_match('#\+langue#', $data['modifs']) > 0) {
406
        //            $this->addSummaryTag('langue');
407
        //        }
408
409
        // mention BnF si ajout donnée + ajout identifiant bnf=
410
        if (!empty($this->importantSummary) && preg_match('#BnF#i', $data['modifs'], $matches) > 0) {
411
            $this->addSummaryTag('©[[BnF]]');
412
        }
413
    }
414
415
    /**
416
     * todo extract
417
     * Pour éviter les doublons dans signalements d'erreur.
418
     *
419
     * @param string $page
420
     * @param string $text
421
     */
422
    private function addErrorWarning(string $page, string $text): void
423
    {
424
        if (!isset($this->errorWarning[$page]) || !in_array($text, $this->errorWarning[$page])) {
425
            $this->errorWarning[$page][] = $text;
426
        }
427
    }
428
429
}
430