Issues (106)

src/Application/AbstractBotTaskWorker.php (4 issues)

Labels
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application;
11
12
use App\Application\InfrastructurePorts\PageListForAppInterface as PageListInterface;
13
use App\Application\Traits\BotWorkerTrait;
14
use App\Application\Traits\WorkerAnalyzedTitlesTrait;
15
use App\Application\Traits\WorkerCLITrait;
16
use App\Domain\Exceptions\ConfigException;
17
use App\Domain\Exceptions\StopActionException;
18
use App\Domain\InfrastructurePorts\InternetDomainParserInterface;
19
use App\Domain\Models\Summary;
20
use App\Domain\Utils\WikiRefsFixer;
21
use App\Infrastructure\ServiceFactory;
22
use Exception;
23
use Mediawiki\Api\MediawikiFactory;
24
use Mediawiki\DataModel\EditInfo;
25
use Psr\Log\LoggerInterface;
26
use Throwable;
27
28
abstract class AbstractBotTaskWorker
29
{
30
    use WorkerCLITrait, BotWorkerTrait, WorkerAnalyzedTitlesTrait;
31
32
    public const TASK_BOT_FLAG = false;
33
    public const SLEEP_AFTER_EDITION = 60;
34
    public const MINUTES_DELAY_AFTER_LAST_HUMAN_EDIT = 15;
35
    public const CHECK_EDIT_CONFLICT = true;
36
    public const ARTICLE_ANALYZED_FILENAME = __DIR__ . '/resources/article_edited.txt';
37
    public const SKIP_LASTEDIT_BY_BOT = true;
38
    public const SKIP_NOT_IN_MAIN_WIKISPACE = true;
39
    public const SKIP_ADQ = false;
40
    public const THROTTLE_DELAY_AFTER_EACH_TITLE = 2; //secs
41
    protected const GIT_COMMIT_HASH_PATH = __DIR__ . '/resources/commithash.txt';
42
43
    /**
44
     * @var PageListInterface
45
     */
46
    protected $pageListGenerator;
47
    /**
48
     * @var WikiBotConfig
49
     */
50
    protected $bot;
51
    /**
52
     * @var MediawikiFactory
53
     */
54
    protected $wiki;
55
    /**
56
     * @var WikiPageAction
57
     */
58
    protected $pageAction;
59
    protected $defaultTaskname;
60
    protected $modeAuto = false;
61
    protected $maxLag = 5;
62
    /**
63
     * @var LoggerInterface
64
     */
65
    protected $log;
66
    /**
67
     * @var array titles previously processed
68
     */
69
    protected $pastAnalyzed = [];
70
    /**
71
     * @var Summary
72
     */
73
    protected $summary;
74
75
    /**
76
     * @var InternetDomainParserInterface|null
77
     */
78
    protected $domainParser;
79
80
    public function __construct(
81
        WikiBotConfig      $bot,
82
        MediawikiFactory   $wiki,
83
        ?PageListInterface $pagesGen = null
84
    )
85
    {
86
        $this->log = $bot->getLogger();
87
        $this->wiki = $wiki;
88
        $this->bot = $bot;
89
        $this->defaultTaskname = $bot->getTaskName();
90
        if ($pagesGen instanceof PageListInterface) {
91
            $this->pageListGenerator = $pagesGen;
92
        }
93
94
        $this->initializePastAnalyzedTitles();
95
96
        // @throw exception on "Invalid CSRF token"
97
        $this->run();//todo delete that and use (Worker)->run($duration) or process management
98
    }
99
100
    /**
101
     * @throws ConfigException
102
     * @throws Throwable
103
     * @throws StopActionException
104
     */
105
    final public function run(): void
106
    {
107
        $this->log->notice('*** '.date('Y-m-d H:i')
108
            .' New BotTaskWorker: ' . $this->defaultTaskname, ['stats' => 'bottaskworker.instance']);
109
        $this->log->notice(sprintf(
110
            '*** Bot: %s - commit: %s',
111
            $this->bot::getBotName(),
112
            $this->bot->getCurrentGitCommitHash() ?? '??'
113
        ));
114
        $this->log->notice('*** Stats: ' . $this->log->stats::class);
0 ignored issues
show
Accessing stats on the interface Psr\Log\LoggerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?
Loading history...
115
116
        foreach ($this->getTitles() as $title) {
117
            try {
118
                $this->titleProcess($title);
119
            } catch (Exception $exception) {
120
                $this->log->error($exception->getMessage());
121
                if ($exception instanceof StopActionException) {
122
123
                    // just stop without fatal error, when "stop" action from talk page
124
                    return;
125
                }
126
127
                throw $exception;
128
            }
129
130
            sleep(self::THROTTLE_DELAY_AFTER_EACH_TITLE);
131
        }
132
    }
133
134
    /**
135
     * @throws ConfigException
136
     */
137
    protected function getTitles(): array
138
    {
139
        if ($this->pageListGenerator === null) {
140
            throw new ConfigException('Empty PageListGenerator');
141
        }
142
143
        return $this->pageListGenerator->getPageTitles();
144
    }
145
146
    protected function titleProcess(string $title): void
147
    {
148
        $this->printTitle($title);
149
150
        // move up ?
151
        if ($this->checkAlreadyAnalyzed($title)) {
152
            $this->log->notice("Skip : déjà analysé", ['stats' => 'bottaskworker.skip.dejaanalyse']);
153
154
            return;
155
        }
156
157
        try {
158
            $text = $this->getTextFromWikiAction($title);
159
        } catch (Exception $e) {
160
            $this->log->error($e->getMessage());
161
            return;
162
        }
163
164
        if (!$this->canProcessTitleArticle($title, $text)) {
165
            return;
166
        }
167
168
        $this->summary = new Summary($this->defaultTaskname);
169
        $this->summary->setBotFlag(static::TASK_BOT_FLAG);
170
        $newText = $this->processWithDomainWorker($title, $text);
0 ignored issues
show
It seems like $text can also be of type null; however, parameter $text of App\Application\Abstract...ocessWithDomainWorker() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

170
        $newText = $this->processWithDomainWorker($title, /** @scrutinizer ignore-type */ $text);
Loading history...
171
        // $newText = $this->fixGenericWikiSyntax($newText); // fixGenericWikiSyntax when no major changes ?
172
        $this->memorizeAndSaveAnalyzedTitle($title); // improve : optionnal ?
173
174
        if ($this->isSomethingToChange($text, $newText) && $this->autoOrYesConfirmation()) {
175
            $newText = $this->fixGenericWikiSyntax($newText);
176
            $this->doEdition($newText);
0 ignored issues
show
It seems like $newText can also be of type null; however, parameter $newText of App\Application\AbstractBotTaskWorker::doEdition() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

176
            $this->doEdition(/** @scrutinizer ignore-type */ $newText);
Loading history...
177
        }
178
    }
179
180
    /**
181
     * todo DI
182
     * @throws Exception
183
     * @throws Exception
184
     */
185
    protected function getTextFromWikiAction(string $title): ?string
186
    {
187
        $this->pageAction = ServiceFactory::wikiPageAction($title);
188
        if (static::SKIP_NOT_IN_MAIN_WIKISPACE && $this->pageAction->getNs() !== 0) {
189
            throw new Exception("La page n'est pas dans Main (ns!==0)");
190
        }
191
192
        return $this->pageAction->getText();
193
    }
194
195
    /**
196
     * return $newText for editing
197
     */
198
    abstract protected function processWithDomainWorker(string $title, string $text): ?string;
199
200
    /**
201
     * @throws Exception
202
     */
203
    protected function doEdition(string $newText): void
204
    {
205
        try {
206
            $result = $this->pageAction->editPage(
207
                $newText,
208
                new EditInfo(
209
                    $this->generateSummaryText(),
210
                    $this->summary->isMinorFlag(),
211
                    $this->summary->isBotFlag(),
212
                    $this->maxLag
213
                ),
214
                static::CHECK_EDIT_CONFLICT
215
            );
216
        } catch (Throwable $e) {
217
            if (preg_match('#Invalid CSRF token#', $e->getMessage())) {
218
                $this->log->stats->increment('bottaskworker.exception.invalidCSRFtoken');
0 ignored issues
show
Accessing stats on the interface Psr\Log\LoggerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?
Loading history...
219
220
                throw new Exception('Invalid CSRF token', $e->getCode(), $e);
221
            }
222
223
            // If not a critical edition error
224
            // example : Wiki Conflict : Page has been edited after getText()
225
            $this->log->warning($e->getMessage());
226
227
            return;
228
        }
229
230
        $this->log->notice($result ? '>> EDIT OK' : '>>  NOCHANGE');
231
        $this->log->debug("Sleep " . static::SLEEP_AFTER_EDITION);
232
        sleep(static::SLEEP_AFTER_EDITION);
233
    }
234
235
    /**
236
     * Minimalist summary as "bot: taskname".
237
     * ACHTUNG ! rewriting by some workers (ex: ExternRefWorker).
238
     */
239
    protected function generateSummaryText(): string
240
    {
241
        return $this->summary->serializePrefixAndTaskname();
242
    }
243
244
    /**
245
     * todo @notused
246
     * First instanciation on new commit: append git commit hash to taskname.
247
     * Exemple : "Bot 4a1b2c3 Améliorations bibliographiques"
248
     */
249
    protected function appendOneTimeGitCommitToTaskname(string $taskname): string
250
    {
251
        $commitHash = $this->bot->getCurrentGitCommitHash();
252
        $commitHashFromFile = @file_get_contents(self::GIT_COMMIT_HASH_PATH);
253
        if ($commitHash && $commitHashFromFile !== $commitHash) {
254
            file_put_contents(self::GIT_COMMIT_HASH_PATH, $commitHash);
255
            $taskname = sprintf('[%s] %s', substr($commitHash, 0, 6), $taskname);
256
        }
257
258
        return $taskname;
259
    }
260
261
    protected function fixGenericWikiSyntax(?string $text): ?string
262
    {
263
        if (empty($text)) {
264
            return null;
265
        }
266
        return WikiRefsFixer::fixRefWikiSyntax($text);
267
    }
268
}
269