Passed
Push — master ( 69b6a3...2b67eb )
by Dispositif
02:36
created

appendOneTimeGitCommitToTaskname()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 10
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
eloc 6
c 0
b 0
f 0
dl 0
loc 10
ccs 0
cts 0
cp 0
rs 10
cc 3
nc 2
nop 1
crap 12
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application;
11
12
use App\Application\InfrastructurePorts\PageListForAppInterface as PageListInterface;
13
use App\Application\Traits\BotWorkerTrait;
14
use App\Application\Traits\WorkerAnalyzedTitlesTrait;
15
use App\Application\Traits\WorkerCLITrait;
16
use App\Domain\Exceptions\ConfigException;
17
use App\Domain\Exceptions\StopActionException;
18
use App\Domain\InfrastructurePorts\InternetDomainParserInterface;
19
use App\Domain\Models\Summary;
20
use App\Infrastructure\ServiceFactory;
21
use Exception;
22
use Mediawiki\Api\MediawikiFactory;
23
use Mediawiki\DataModel\EditInfo;
24
use Psr\Log\LoggerInterface;
25
use Throwable;
26
27
abstract class AbstractBotTaskWorker
28
{
29
    use WorkerCLITrait, BotWorkerTrait, WorkerAnalyzedTitlesTrait;
30
31
    public const TASK_BOT_FLAG = false;
32
    public const SLEEP_AFTER_EDITION = 60;
33
    public const MINUTES_DELAY_AFTER_LAST_HUMAN_EDIT = 15;
34
    public const CHECK_EDIT_CONFLICT = true;
35
    public const ARTICLE_ANALYZED_FILENAME = __DIR__ . '/resources/article_edited.txt';
36
    public const SKIP_LASTEDIT_BY_BOT = true;
37
    public const SKIP_NOT_IN_MAIN_WIKISPACE = true;
38
    public const SKIP_ADQ = true;
39
    public const THROTTLE_DELAY_AFTER_EACH_TITLE = 1; //secs
40
    protected const GIT_COMMIT_HASH_PATH = __DIR__ . '/resources/commithash.txt';
41
42
    /**
43
     * @var PageListInterface
44
     */
45
    protected $pageListGenerator;
46
    /**
47
     * @var WikiBotConfig
48
     */
49
    protected $bot;
50
    /**
51
     * @var MediawikiFactory
52
     */
53
    protected $wiki;
54
    /**
55
     * @var WikiPageAction
56
     */
57
    protected $pageAction;
58
    protected $defaultTaskname;
59
    protected $modeAuto = false;
60
    protected $maxLag = 5;
61
    /**
62
     * @var LoggerInterface
63
     */
64
    protected $log;
65
    /**
66
     * @var array titles previously processed
67
     */
68
    protected $pastAnalyzed = [];
69
    /**
70
     * @var Summary
71
     */
72
    protected $summary;
73
74
    /**
75
     * @var InternetDomainParserInterface|null
76
     */
77
    protected $domainParser;
78
79
    public function __construct(
80
        WikiBotConfig      $bot,
81
        MediawikiFactory   $wiki,
82
        ?PageListInterface $pagesGen = null
83
    )
84
    {
85
        $this->log = $bot->getLogger();
86
        $this->wiki = $wiki;
87
        $this->bot = $bot;
88
        $this->defaultTaskname = $bot->getTaskName();
89
        if ($pagesGen instanceof PageListInterface) {
90
            $this->pageListGenerator = $pagesGen;
91
        }
92
93
        $this->initializePastAnalyzedTitles();
94
95
        // @throw exception on "Invalid CSRF token"
96
        $this->run();//todo delete that and use (Worker)->run($duration) or process management
97
    }
98
99
    /**
100
     * @throws ConfigException
101
     * @throws Throwable
102
     * @throws StopActionException
103
     */
104
    final public function run(): void
105
    {
106
        $this->log->notice('*** New BotTaskWorker: ' . $this->defaultTaskname, ['stats' => 'bottaskworker.instance']);
107
        $this->log->notice(sprintf(
108
            '*** Bot: %s - commit: %s',
109
            $this->bot::getBotName(),
110
            $this->bot->getCurrentGitCommitHash() ?? '??'
111
        ));
112
113
        foreach ($this->getTitles() as $title) {
114
            try {
115
                $this->titleProcess($title);
116
            } catch (Exception $exception) {
117
                $this->log->error($exception->getMessage());
118
                if ($exception instanceof StopActionException) {
119
120
                    // just stop without fatal error, when "stop" action from talk page
121
                    return;
122
                }
123
124
                throw $exception;
125
            }
126
127
            sleep(self::THROTTLE_DELAY_AFTER_EACH_TITLE);
128
        }
129
    }
130
131
    /**
132
     * @throws ConfigException
133
     */
134
    protected function getTitles(): array
135
    {
136
        if ($this->pageListGenerator === null) {
137
            throw new ConfigException('Empty PageListGenerator');
138
        }
139
140
        return $this->pageListGenerator->getPageTitles();
141
    }
142
143
    protected function titleProcess(string $title): void
144
    {
145
        $this->printTitle($title);
146
147
        // move up ?
148
        if ($this->checkAlreadyAnalyzed($title)) {
149
            $this->log->notice("Skip : déjà analysé", ['stats' => 'bottaskworker.skip.dejaanalyse']);
150
151
            return;
152
        }
153
154
        $text = $this->getTextFromWikiAction($title);
155
        if (!$this->canProcessTitleArticle($title, $text)) {
156
            return;
157
        }
158
159
        $this->summary = new Summary($this->defaultTaskname);
160
        $this->summary->setBotFlag(static::TASK_BOT_FLAG);
161
        $newText = $this->processWithDomainWorker($title, $text);
0 ignored issues
show
Bug introduced by
It seems like $text can also be of type null; however, parameter $text of App\Application\Abstract...ocessWithDomainWorker() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

161
        $newText = $this->processWithDomainWorker($title, /** @scrutinizer ignore-type */ $text);
Loading history...
162
        $this->memorizeAndSaveAnalyzedTitle($title); // improve : optionnal ?
163
164
        if ($this->isSomethingToChange($text, $newText) && $this->autoOrYesConfirmation()) {
165
            $this->doEdition($newText);
0 ignored issues
show
Bug introduced by
It seems like $newText can also be of type null; however, parameter $newText of App\Application\AbstractBotTaskWorker::doEdition() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

165
            $this->doEdition(/** @scrutinizer ignore-type */ $newText);
Loading history...
166
        }
167
    }
168
169
    /**
170
     * todo DI
171
     * @throws Exception
172
     * @throws Exception
173
     */
174
    protected function getTextFromWikiAction(string $title): ?string
175
    {
176
        $this->pageAction = ServiceFactory::wikiPageAction($title);
177
        if (static::SKIP_NOT_IN_MAIN_WIKISPACE && $this->pageAction->getNs() !== 0) {
178
            throw new Exception("La page n'est pas dans Main (ns!==0)");
179
        }
180
181
        return $this->pageAction->getText();
182
    }
183
184
    /**
185
     * return $newText for editing
186
     */
187
    abstract protected function processWithDomainWorker(string $title, string $text): ?string;
188
189
    /**
190
     * @throws Exception
191
     */
192
    protected function doEdition(string $newText): void
193
    {
194
        try {
195
            $result = $this->pageAction->editPage(
196
                $newText,
197
                new EditInfo(
198
                    $this->generateSummaryText(),
199
                    $this->summary->isMinorFlag(),
200
                    $this->summary->isBotFlag(),
201
                    $this->maxLag
202
                ),
203
                static::CHECK_EDIT_CONFLICT
204
            );
205
        } catch (Throwable $e) {
206
            if (preg_match('#Invalid CSRF token#', $e->getMessage())) {
207
                $this->log->stats->increment('bottaskworker.exception.invalidCSRFtoken');
0 ignored issues
show
Bug introduced by
Accessing stats on the interface Psr\Log\LoggerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?
Loading history...
208
209
                throw new Exception('Invalid CSRF token', $e->getCode(), $e);
210
            }
211
212
            // If not a critical edition error
213
            // example : Wiki Conflict : Page has been edited after getText()
214
            $this->log->warning($e->getMessage());
215
216
            return;
217
        }
218
219
        $this->log->notice($result ? '>> OK' : '>>  NOCHANGE');
220
        $this->log->debug("Sleep " . static::SLEEP_AFTER_EDITION);
221
        sleep(static::SLEEP_AFTER_EDITION);
222
    }
223
224
    /**
225
     * Minimalist summary as "bot: taskname".
226
     * ACHTUNG ! rewriting by some workers (ex: ExternRefWorker).
227
     */
228
    protected function generateSummaryText(): string
229
    {
230
        return $this->summary->serializePrefixAndTaskname();
231
    }
232
233
    /**
234
     * todo @notused
235
     * First instanciation on new commit: append git commit hash to taskname.
236
     * Exemple : "Bot 4a1b2c3 Améliorations bibliographiques"
237
     */
238
    protected function appendOneTimeGitCommitToTaskname(string $taskname): string
239
    {
240
        $commitHash = $this->bot->getCurrentGitCommitHash();
241
        $commitHashFromFile = @file_get_contents(self::GIT_COMMIT_HASH_PATH);
242
        if ($commitHash && $commitHashFromFile !== $commitHash) {
243
            file_put_contents(self::GIT_COMMIT_HASH_PATH, $commitHash);
244
            $taskname = sprintf('[%s] %s', substr($commitHash, 0, 6), $taskname, );
245
        }
246
247
        return $taskname;
248
    }
249
}
250