Issues (106)

src/Application/WikiPageAction.php (1 issue)

Severity
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application;
11
12
use App\Domain\Enums\Language;
13
use App\Infrastructure\Mediawiki\ExtendedMediawikiFactory;
14
use App\Infrastructure\TagParser;
15
use DomainException;
16
use Exception;
17
use Mediawiki\Api\MediawikiFactory;
18
use Mediawiki\DataModel\Content;
19
use Mediawiki\DataModel\EditInfo;
20
use Mediawiki\DataModel\Page;
21
use Mediawiki\DataModel\PageIdentifier;
22
use Mediawiki\DataModel\Revision;
23
use Mediawiki\DataModel\Title;
24
use Throwable;
25
26
class WikiPageAction
27
{
28
    final public const SKIP_LANG_INDICATOR = 'fr'; // skip {{fr}} before template
29
30
    /**
31
     * @var Page
32
     */
33
    public $page; // public for debug
34
35
    public $wiki;
36
    /**
37
     * Wiki namespace
38
     */
39
    private ?int $ns = null;
40
    private ?Revision $lastTextRevision = null;
41
42
    /**
43
     * @throws Exception
44
     */
45
    public function __construct(
46
        MediawikiFactory        $wiki, // api ?
47
        private readonly string $title
48
    )
49
    {
50
        $e = null;
0 ignored issues
show
The assignment to $e is dead and can be removed.
Loading history...
51
        $this->wiki = $wiki;
52
53
        try {
54
            $this->page = $wiki->newPageGetter()->getFromTitle($title);
55
            $this->ns = $this->page->getPageIdentifier()->getTitle()->getNs();
56
        } catch (Throwable $e) {
57
            throw new Exception('Erreur construct WikiPageAction ' . $e->getMessage() . $e->getFile() . $e->getLine(), $e->getCode(), $e);
58
        }
59
    }
60
61
    public function getNs(): ?int
62
    {
63
        return $this->ns;
64
    }
65
66
    /**
67
     * todo Move to WikiTextUtil ?
68
     * Replace serialized template and manage {{en}} prefix.
69
     * Don't delete {{fr}} on frwiki.
70
     */
71
    public static function replaceTemplateInText(string $text, string $tplOrigin, string $tplReplace): ?string
72
    {
73
        // "{{en}} {{zh}} {{ouvrage...}}"
74
        // todo test U
75
        if (preg_match_all(
76
            '#(?<langTemp>{{[a-z][a-z]}} ?{{[a-z][a-z]}}) ?' . preg_quote($tplOrigin, '#') . '#i',
77
            $text,
78
            $matches
79
        )
80
        ) {
81
            // Skip double lang prefix (like in "{{fr}} {{en}} {template}")
82
            echo 'SKIP ! double lang prefix !';
83
84
            return $text;
85
        }
86
87
        // hack // todo: autres patterns {{en}} ?
88
        // OK : {{en}} \n {{ouvrage}}
89
        if (preg_match_all(
90
                "#(?<langTemp>{{(?<lang>[a-z][a-z])}} *\n?)?" . preg_quote($tplOrigin, '#') . '#i',
91
                $text,
92
                $matches
93
            ) > 0
94
        ) {
95
            foreach ($matches[0] as $num => $mention) {
96
                $lang = $matches['lang'][$num] ?? '';
97
                if (!empty($lang)) {
98
                    $lang = Language::all2wiki($lang);
99
                }
100
101
                // detect inconsistency between lang indicator and lang param
102
                // example : {{en}} {{template|lang=ru}}
103
                if (!empty($lang) && self::SKIP_LANG_INDICATOR !== $lang
104
                    && preg_match('#langue *=#', $tplReplace)
105
                    && !preg_match('#langue *= ?' . $lang . '#i', $tplReplace)
106
                    && !preg_match('#\| ?langue *= ?\n?\|#', $tplReplace)
107
                ) {
108
                    echo sprintf(
109
                        'prefix %s incompatible avec langue de %s',
110
                        $matches['langTemp'][$num],
111
                        $tplReplace
112
                    );
113
114
                    // skip all the replacements of that template
115
                    return $text; // return null ?
116
                }
117
118
                // FIX dirty juil 2020 : {{en}} mais aucun param/value sur new template
119
                if (!empty($lang) && $lang !== 'fr' && !preg_match('#\| ?langue *=#', $tplReplace) > 0) {
120
                    // skip all the replacements of that template
121
122
                    return $text;
123
                }
124
125
                // FIX dirty : {{en}} mais langue= avec value non définie sur new template...
126
                if (!empty($lang) && preg_match('#\| ?(langue *=) ?\n? ?\|#', $tplReplace, $matchLangue) > 0) {
127
                    $previousTpl = $tplReplace;
128
                    $tplReplace = str_replace($matchLangue[1], 'langue=' . $lang, $tplReplace);
129
                    //dump('origin', $tplOrigin);
130
                    $text = str_replace($previousTpl, $tplReplace, $text);
131
                }
132
133
                // don't delete {{fr}} before {template} on frwiki
134
                if (self::SKIP_LANG_INDICATOR === $lang) {
135
                    $text = str_replace($tplOrigin, $tplReplace, $text);
136
137
                    continue;
138
                }
139
140
                // replace {template} and {{lang}} {template}
141
                $text = str_replace($mention, $tplReplace, $text);
142
                $text = str_replace(
143
                    $matches['langTemp'][$num] . $tplReplace,
144
                    $tplReplace,
145
                    $text
146
                ); // si 1er replace global sans
147
                // {{en}}
148
            }
149
        }
150
151
        return $text;
152
    }
153
154
    public function getLastEditor(): ?string
155
    {
156
        // page doesn't exist
157
        if (empty($this->page->getRevisions()->getLatest())) {
158
            return null;
159
        }
160
161
        $latest = $this->page->getRevisions()->getLatest();
162
163
        return ($latest) ? $latest->getUser() : null;
164
    }
165
166
    /**
167
     * Check if a frwiki disambiguation page.
168
     */
169
    public function isPageHomonymie(): bool
170
    {
171
        return false !== stristr($this->getText() ?? '', '{{homonymie');
172
    }
173
174
    /**
175
     * Get wiki text from the page.
176
     */
177
    public function getText(): ?string
178
    {
179
        $latest = $this->getLastRevision();
180
        $this->lastTextRevision = $latest;
181
182
        if (empty($latest)) {
183
            return null;
184
        }
185
186
        return $latest->getContent()->getData();
187
    }
188
189
    public function getLastRevision(): ?Revision
190
    {
191
        // page doesn't exist
192
        if (empty($this->page->getRevisions()->getLatest())) {
193
            return null;
194
        }
195
196
        return $this->page->getRevisions()->getLatest();
197
    }
198
199
    /**
200
     * Is it page with a redirection link ?
201
     */
202
    public function isRedirect(): bool
203
    {
204
        return !empty($this->getRedirect());
205
    }
206
207
    /**
208
     * Get redirection page title or null.
209
     */
210
    public function getRedirect(): ?string
211
    {
212
        if ($this->getText() && preg_match('/^#REDIRECT(?:ION)? ?\[\[([^]]+)]]/i', $this->getText(), $matches)) {
213
            return (string)trim($matches[1]);
214
        }
215
216
        return null;
217
    }
218
219
    /**
220
     * @return bool success
221
     * @throws Exception
222
     */
223
    public function addToBottomOrCreatePage(string $addText, EditInfo $editInfo): bool
224
    {
225
        if (empty($this->page->getRevisions()->getLatest())) {
226
            return $this->createPage($addText, $editInfo);
227
        }
228
229
        return $this->addToBottomOfThePage($addText, $editInfo);
230
    }
231
232
    /**
233
     * Create a new page.
234
     * @throws Exception
235 6
     */
236
    public function createPage(string $text, ?EditInfo $editInfo = null): bool
237
    {
238 6
        if (!empty($this->page->getRevisions()->getLatest())) {
239 6
            throw new Exception('That page already exists');
240 6
        }
241 6
242
        $newContent = new Content($text);
243
        // $identifier = $this->page->getPageIdentifier()
244
        $title = new Title($this->title);
245
        $identifier = new PageIdentifier($title);
246
        $revision = new Revision($newContent, $identifier);
247
248
        return $this->wiki->newRevisionSaver()->save($revision, $editInfo);
249
    }
250
251
    /**
252 6
     * Add text to the bottom of the article.
253 6
     * @return bool success
254 6
     * @throws Exception
255 6
     */
256 6
    public function addToBottomOfThePage(string $addText, EditInfo $editInfo): bool
257
    {
258 6
        if (empty($this->page->getRevisions()->getLatest())) {
259 6
            throw new Exception('That page does not exist');
260 6
        }
261 6
        $oldText = $this->getText();
262
        $newText = $oldText . "\n" . $addText;
263
264
        return $this->editPage($newText, $editInfo);
265
    }
266
267 6
    /**
268 6
     * Edit the page with new text.
269 6
     * Opti : EditInfo optional param ?
270
     */
271
    public function editPage(string $newText, EditInfo $editInfo, ?bool $checkConflict = false): bool
272
    {
273
        if ($checkConflict && $this->isPageEditedAfterGetText()) {
274
            throw new DomainException('Wiki Conflict : Page has been edited after getText()');
275
            // return false ?
276
        }
277
278
        $revision = $this->page->getPageIdentifier();
279
280
        $content = new Content($newText);
281
        $revision = new Revision($content, $revision);
282 6
283 2
        // TODO try/catch UsageExceptions badtoken
284 2
        // captchaId=12345&captchaWord=MediaWikiIsCool
285 2
        $revisionSaver = ExtendedMediawikiFactory::newRevisionSaverExtended();
286
        $result = $revisionSaver->save($revision, $editInfo);
287
        if (false === $result) {
288
            echo "Error editPage\n";
289 6
            print_r($revisionSaver->getErrors());
290 3
        }
291
292 3
        return $result;
293
    }
294
295
    /**
296 6
     * Check if wiki has been edited by someone since bot's getText().
297 6
     */
298 6
    private function isPageEditedAfterGetText(): bool
299 6
    {
300 6
        $updatedPage = $this->wiki->newPageGetter()->getFromTitle($this->title);
301
        $updatedLastRevision = $updatedPage->getRevisions()->getLatest();
302
        // Non-strict object equality comparison
303
        /** @noinspection PhpNonStrictObjectEqualityInspection */
304
        return !($updatedLastRevision && $updatedLastRevision == $this->lastTextRevision);
305
    }
306 6
307
    /**
308
     * Add text to the top of the page.
309
     * @return bool success
310
     * @throws Exception
311
     */
312
    public function addToTopOfThePage(string $addText, EditInfo $editInfo): bool
313
    {
314
        if (empty($this->page->getRevisions()->getLatest())) {
315
            throw new Exception('That page does not exist');
316
        }
317
        $oldText = $this->getText();
318
        $newText = $addText . $oldText;
319
320
        return $this->editPage($newText, $editInfo);
321
    }
322
323
    /**
324
     * Extract <ref> data from text.
325
     * @throws Exception
326
     */
327
    public function extractRefFromText(string $text): ?array
328
    {
329
        $parser = new TagParser(); // todo ParserFactory
330
        try {
331
            $refs = $parser->importHtml($text)->getRefValues();
332
        } catch (Exception $e) {
333
            echo "Error extractRefFromText: ".$e->getMessage()."\n";
334
            return [];
335
        }
336
337
        return $refs;
338
    }
339
340
    /**
341
     * TODO $url parameter
342
     * TODO? refactor with : parse_str() + parse_url($url, PHP_URL_QUERY)
343
     * check if any ref contains a targeted website/URL.
344
     */
345
    public function filterRefByURL(array $refs): array
346
    {
347
        $validRef = [];
348
        foreach ($refs as $ref) {
349
            if (preg_match(
350
                    '#(?<url>https?://(?:www\.)?lemonde\.fr/[^ \]]+)#i',
351
                    (string)$ref,
352
                    $matches
353
                ) > 0
354
            ) {
355
                $validRef[] = ['url' => $matches['url'], 'raw' => $ref];
356
            }
357
        }
358
359
        return $validRef;
360
    }
361
}
362