Passed
Push — master ( d644e9...e142f8 )
by Dispositif
15:55
created

AbstractBotTaskWorker::fixGenericWikiSyntax()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 1
c 0
b 0
f 0
dl 0
loc 3
ccs 0
cts 0
cp 0
rs 10
cc 1
nc 1
nop 1
crap 2
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application;
11
12
use App\Application\InfrastructurePorts\PageListForAppInterface as PageListInterface;
13
use App\Application\Traits\BotWorkerTrait;
14
use App\Application\Traits\WorkerAnalyzedTitlesTrait;
15
use App\Application\Traits\WorkerCLITrait;
16
use App\Domain\Exceptions\ConfigException;
17
use App\Domain\Exceptions\StopActionException;
18
use App\Domain\InfrastructurePorts\InternetDomainParserInterface;
19
use App\Domain\Models\Summary;
20
use App\Domain\Utils\WikiTextUtil;
21
use App\Infrastructure\ServiceFactory;
22
use Exception;
23
use Mediawiki\Api\MediawikiFactory;
24
use Mediawiki\DataModel\EditInfo;
25
use Psr\Log\LoggerInterface;
26
use Throwable;
27
28
abstract class AbstractBotTaskWorker
29
{
30
    use WorkerCLITrait, BotWorkerTrait, WorkerAnalyzedTitlesTrait;
31
32
    public const TASK_BOT_FLAG = false;
33
    public const SLEEP_AFTER_EDITION = 60;
34
    public const MINUTES_DELAY_AFTER_LAST_HUMAN_EDIT = 15;
35
    public const CHECK_EDIT_CONFLICT = true;
36
    public const ARTICLE_ANALYZED_FILENAME = __DIR__ . '/resources/article_edited.txt';
37
    public const SKIP_LASTEDIT_BY_BOT = true;
38
    public const SKIP_NOT_IN_MAIN_WIKISPACE = true;
39
    public const SKIP_ADQ = false;
40
    public const THROTTLE_DELAY_AFTER_EACH_TITLE = 2; //secs
41
    protected const GIT_COMMIT_HASH_PATH = __DIR__ . '/resources/commithash.txt';
42
43
    /**
44
     * @var PageListInterface
45
     */
46
    protected $pageListGenerator;
47
    /**
48
     * @var WikiBotConfig
49
     */
50
    protected $bot;
51
    /**
52
     * @var MediawikiFactory
53
     */
54
    protected $wiki;
55
    /**
56
     * @var WikiPageAction
57
     */
58
    protected $pageAction;
59
    protected $defaultTaskname;
60
    protected $modeAuto = false;
61
    protected $maxLag = 5;
62
    /**
63
     * @var LoggerInterface
64
     */
65
    protected $log;
66
    /**
67
     * @var array titles previously processed
68
     */
69
    protected $pastAnalyzed = [];
70
    /**
71
     * @var Summary
72
     */
73
    protected $summary;
74
75
    /**
76
     * @var InternetDomainParserInterface|null
77
     */
78
    protected $domainParser;
79
80
    public function __construct(
81
        WikiBotConfig      $bot,
82
        MediawikiFactory   $wiki,
83
        ?PageListInterface $pagesGen = null
84
    )
85
    {
86
        $this->log = $bot->getLogger();
87
        $this->wiki = $wiki;
88
        $this->bot = $bot;
89
        $this->defaultTaskname = $bot->getTaskName();
90
        if ($pagesGen instanceof PageListInterface) {
91
            $this->pageListGenerator = $pagesGen;
92
        }
93
94
        $this->initializePastAnalyzedTitles();
95
96
        // @throw exception on "Invalid CSRF token"
97
        $this->run();//todo delete that and use (Worker)->run($duration) or process management
98
    }
99
100
    /**
101
     * @throws ConfigException
102
     * @throws Throwable
103
     * @throws StopActionException
104
     */
105
    final public function run(): void
106
    {
107
        $this->log->notice('*** '.date('Y-m-d H:i')
108
            .' New BotTaskWorker: ' . $this->defaultTaskname, ['stats' => 'bottaskworker.instance']);
109
        $this->log->notice(sprintf(
110
            '*** Bot: %s - commit: %s',
111
            $this->bot::getBotName(),
112
            $this->bot->getCurrentGitCommitHash() ?? '??'
113
        ));
114
        $this->log->notice('*** Stats: ' . $this->log->stats::class);
0 ignored issues
show
Bug introduced by
Accessing stats on the interface Psr\Log\LoggerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?
Loading history...
115
116
        foreach ($this->getTitles() as $title) {
117
            try {
118
                $this->titleProcess($title);
119
            } catch (Exception $exception) {
120
                $this->log->error($exception->getMessage());
121
                if ($exception instanceof StopActionException) {
122
123
                    // just stop without fatal error, when "stop" action from talk page
124
                    return;
125
                }
126
127
                throw $exception;
128
            }
129
130
            sleep(self::THROTTLE_DELAY_AFTER_EACH_TITLE);
131
        }
132
    }
133
134
    /**
135
     * @throws ConfigException
136
     */
137
    protected function getTitles(): array
138
    {
139
        if ($this->pageListGenerator === null) {
140
            throw new ConfigException('Empty PageListGenerator');
141
        }
142
143
        return $this->pageListGenerator->getPageTitles();
144
    }
145
146
    protected function titleProcess(string $title): void
147
    {
148
        $this->printTitle($title);
149
150
        // move up ?
151
        if ($this->checkAlreadyAnalyzed($title)) {
152
            $this->log->notice("Skip : déjà analysé", ['stats' => 'bottaskworker.skip.dejaanalyse']);
153
154
            return;
155
        }
156
157
        try {
158
            $text = $this->getTextFromWikiAction($title);
159
        } catch (Exception $e) {
160
            $this->log->error($e->getMessage());
161
            return;
162
        }
163
164
        if (!$this->canProcessTitleArticle($title, $text)) {
165
            return;
166
        }
167
168
        $this->summary = new Summary($this->defaultTaskname);
169
        $this->summary->setBotFlag(static::TASK_BOT_FLAG);
170
        $newText = $this->processWithDomainWorker($title, $text);
0 ignored issues
show
Bug introduced by
It seems like $text can also be of type null; however, parameter $text of App\Application\Abstract...ocessWithDomainWorker() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

170
        $newText = $this->processWithDomainWorker($title, /** @scrutinizer ignore-type */ $text);
Loading history...
171
        $newText = $this->fixGenericWikiSyntax($newText);
172
        $this->memorizeAndSaveAnalyzedTitle($title); // improve : optionnal ?
173
174
        if ($this->isSomethingToChange($text, $newText) && $this->autoOrYesConfirmation()) {
175
            $this->doEdition($newText);
176
        }
177
    }
178
179
    /**
180
     * todo DI
181
     * @throws Exception
182
     * @throws Exception
183
     */
184
    protected function getTextFromWikiAction(string $title): ?string
185
    {
186
        $this->pageAction = ServiceFactory::wikiPageAction($title);
187
        if (static::SKIP_NOT_IN_MAIN_WIKISPACE && $this->pageAction->getNs() !== 0) {
188
            throw new Exception("La page n'est pas dans Main (ns!==0)");
189
        }
190
191
        return $this->pageAction->getText();
192
    }
193
194
    /**
195
     * return $newText for editing
196
     */
197
    abstract protected function processWithDomainWorker(string $title, string $text): ?string;
198
199
    /**
200
     * @throws Exception
201
     */
202
    protected function doEdition(string $newText): void
203
    {
204
        try {
205
            $result = $this->pageAction->editPage(
206
                $newText,
207
                new EditInfo(
208
                    $this->generateSummaryText(),
209
                    $this->summary->isMinorFlag(),
210
                    $this->summary->isBotFlag(),
211
                    $this->maxLag
212
                ),
213
                static::CHECK_EDIT_CONFLICT
214
            );
215
        } catch (Throwable $e) {
216
            if (preg_match('#Invalid CSRF token#', $e->getMessage())) {
217
                $this->log->stats->increment('bottaskworker.exception.invalidCSRFtoken');
0 ignored issues
show
Bug introduced by
Accessing stats on the interface Psr\Log\LoggerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?
Loading history...
218
219
                throw new Exception('Invalid CSRF token', $e->getCode(), $e);
220
            }
221
222
            // If not a critical edition error
223
            // example : Wiki Conflict : Page has been edited after getText()
224
            $this->log->warning($e->getMessage());
225
226
            return;
227
        }
228
229
        $this->log->notice($result ? '>> EDIT OK' : '>>  NOCHANGE');
230
        $this->log->debug("Sleep " . static::SLEEP_AFTER_EDITION);
231
        sleep(static::SLEEP_AFTER_EDITION);
232
    }
233
234
    /**
235
     * Minimalist summary as "bot: taskname".
236
     * ACHTUNG ! rewriting by some workers (ex: ExternRefWorker).
237
     */
238
    protected function generateSummaryText(): string
239
    {
240
        return $this->summary->serializePrefixAndTaskname();
241
    }
242
243
    /**
244
     * todo @notused
245
     * First instanciation on new commit: append git commit hash to taskname.
246
     * Exemple : "Bot 4a1b2c3 Améliorations bibliographiques"
247
     */
248
    protected function appendOneTimeGitCommitToTaskname(string $taskname): string
249
    {
250
        $commitHash = $this->bot->getCurrentGitCommitHash();
251
        $commitHashFromFile = @file_get_contents(self::GIT_COMMIT_HASH_PATH);
252
        if ($commitHash && $commitHashFromFile !== $commitHash) {
253
            file_put_contents(self::GIT_COMMIT_HASH_PATH, $commitHash);
254
            $taskname = sprintf('[%s] %s', substr($commitHash, 0, 6), $taskname);
255
        }
256
257
        return $taskname;
258
    }
259
260
    protected function fixGenericWikiSyntax(?string $newText): string
261
    {
262
        return WikiTextUtil::fixConcatenatedRefsSyntax($newText);
0 ignored issues
show
Bug introduced by
It seems like $newText can also be of type null; however, parameter $wikiText of App\Domain\Utils\WikiTex...oncatenatedRefsSyntax() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

262
        return WikiTextUtil::fixConcatenatedRefsSyntax(/** @scrutinizer ignore-type */ $newText);
Loading history...
263
    }
264
}
265