1 | <?php |
||
2 | /* |
||
3 | * This file is part of dispositif/wikibot application (@github) |
||
4 | * 2019-2023 © Philippe M./Irønie <[email protected]> |
||
5 | * For the full copyright and MIT license information, view the license file. |
||
6 | */ |
||
7 | |||
8 | declare(strict_types=1); |
||
9 | |||
10 | namespace App\Application; |
||
11 | |||
12 | use App\Domain\Enums\Language; |
||
13 | use App\Infrastructure\Mediawiki\ExtendedMediawikiFactory; |
||
14 | use App\Infrastructure\TagParser; |
||
15 | use DomainException; |
||
16 | use Exception; |
||
17 | use Mediawiki\Api\MediawikiFactory; |
||
18 | use Mediawiki\DataModel\Content; |
||
19 | use Mediawiki\DataModel\EditInfo; |
||
20 | use Mediawiki\DataModel\Page; |
||
21 | use Mediawiki\DataModel\PageIdentifier; |
||
22 | use Mediawiki\DataModel\Revision; |
||
23 | use Mediawiki\DataModel\Title; |
||
24 | use Throwable; |
||
25 | |||
26 | class WikiPageAction |
||
27 | { |
||
28 | final public const SKIP_LANG_INDICATOR = 'fr'; // skip {{fr}} before template |
||
29 | |||
30 | /** |
||
31 | * @var Page |
||
32 | */ |
||
33 | public $page; // public for debug |
||
34 | |||
35 | public $wiki; |
||
36 | /** |
||
37 | * Wiki namespace |
||
38 | */ |
||
39 | private ?int $ns = null; |
||
40 | private ?Revision $lastTextRevision = null; |
||
41 | |||
42 | /** |
||
43 | * @throws Exception |
||
44 | */ |
||
45 | public function __construct( |
||
46 | MediawikiFactory $wiki, // api ? |
||
47 | private readonly string $title |
||
48 | ) |
||
49 | { |
||
50 | $e = null; |
||
0 ignored issues
–
show
Unused Code
introduced
by
![]() |
|||
51 | $this->wiki = $wiki; |
||
52 | |||
53 | try { |
||
54 | $this->page = $wiki->newPageGetter()->getFromTitle($title); |
||
55 | $this->ns = $this->page->getPageIdentifier()->getTitle()->getNs(); |
||
56 | } catch (Throwable $e) { |
||
57 | throw new Exception('Erreur construct WikiPageAction ' . $e->getMessage() . $e->getFile() . $e->getLine(), $e->getCode(), $e); |
||
58 | } |
||
59 | } |
||
60 | |||
61 | public function getNs(): ?int |
||
62 | { |
||
63 | return $this->ns; |
||
64 | } |
||
65 | |||
66 | /** |
||
67 | * todo Move to WikiTextUtil ? |
||
68 | * Replace serialized template and manage {{en}} prefix. |
||
69 | * Don't delete {{fr}} on frwiki. |
||
70 | */ |
||
71 | public static function replaceTemplateInText(string $text, string $tplOrigin, string $tplReplace): ?string |
||
72 | { |
||
73 | // "{{en}} {{zh}} {{ouvrage...}}" |
||
74 | // todo test U |
||
75 | if (preg_match_all( |
||
76 | '#(?<langTemp>{{[a-z][a-z]}} ?{{[a-z][a-z]}}) ?' . preg_quote($tplOrigin, '#') . '#i', |
||
77 | $text, |
||
78 | $matches |
||
79 | ) |
||
80 | ) { |
||
81 | // Skip double lang prefix (like in "{{fr}} {{en}} {template}") |
||
82 | echo 'SKIP ! double lang prefix !'; |
||
83 | |||
84 | return $text; |
||
85 | } |
||
86 | |||
87 | // hack // todo: autres patterns {{en}} ? |
||
88 | // OK : {{en}} \n {{ouvrage}} |
||
89 | if (preg_match_all( |
||
90 | "#(?<langTemp>{{(?<lang>[a-z][a-z])}} *\n?)?" . preg_quote($tplOrigin, '#') . '#i', |
||
91 | $text, |
||
92 | $matches |
||
93 | ) > 0 |
||
94 | ) { |
||
95 | foreach ($matches[0] as $num => $mention) { |
||
96 | $lang = $matches['lang'][$num] ?? ''; |
||
97 | if (!empty($lang)) { |
||
98 | $lang = Language::all2wiki($lang); |
||
99 | } |
||
100 | |||
101 | // detect inconsistency between lang indicator and lang param |
||
102 | // example : {{en}} {{template|lang=ru}} |
||
103 | if (!empty($lang) && self::SKIP_LANG_INDICATOR !== $lang |
||
104 | && preg_match('#langue *=#', $tplReplace) |
||
105 | && !preg_match('#langue *= ?' . $lang . '#i', $tplReplace) |
||
106 | && !preg_match('#\| ?langue *= ?\n?\|#', $tplReplace) |
||
107 | ) { |
||
108 | echo sprintf( |
||
109 | 'prefix %s incompatible avec langue de %s', |
||
110 | $matches['langTemp'][$num], |
||
111 | $tplReplace |
||
112 | ); |
||
113 | |||
114 | // skip all the replacements of that template |
||
115 | return $text; // return null ? |
||
116 | } |
||
117 | |||
118 | // FIX dirty juil 2020 : {{en}} mais aucun param/value sur new template |
||
119 | if (!empty($lang) && $lang !== 'fr' && !preg_match('#\| ?langue *=#', $tplReplace) > 0) { |
||
120 | // skip all the replacements of that template |
||
121 | |||
122 | return $text; |
||
123 | } |
||
124 | |||
125 | // FIX dirty : {{en}} mais langue= avec value non définie sur new template... |
||
126 | if (!empty($lang) && preg_match('#\| ?(langue *=) ?\n? ?\|#', $tplReplace, $matchLangue) > 0) { |
||
127 | $previousTpl = $tplReplace; |
||
128 | $tplReplace = str_replace($matchLangue[1], 'langue=' . $lang, $tplReplace); |
||
129 | //dump('origin', $tplOrigin); |
||
130 | $text = str_replace($previousTpl, $tplReplace, $text); |
||
131 | } |
||
132 | |||
133 | // don't delete {{fr}} before {template} on frwiki |
||
134 | if (self::SKIP_LANG_INDICATOR === $lang) { |
||
135 | $text = str_replace($tplOrigin, $tplReplace, $text); |
||
136 | |||
137 | continue; |
||
138 | } |
||
139 | |||
140 | // replace {template} and {{lang}} {template} |
||
141 | $text = str_replace($mention, $tplReplace, $text); |
||
142 | $text = str_replace( |
||
143 | $matches['langTemp'][$num] . $tplReplace, |
||
144 | $tplReplace, |
||
145 | $text |
||
146 | ); // si 1er replace global sans |
||
147 | // {{en}} |
||
148 | } |
||
149 | } |
||
150 | |||
151 | return $text; |
||
152 | } |
||
153 | |||
154 | public function getLastEditor(): ?string |
||
155 | { |
||
156 | // page doesn't exist |
||
157 | if (empty($this->page->getRevisions()->getLatest())) { |
||
158 | return null; |
||
159 | } |
||
160 | |||
161 | $latest = $this->page->getRevisions()->getLatest(); |
||
162 | |||
163 | return ($latest) ? $latest->getUser() : null; |
||
164 | } |
||
165 | |||
166 | /** |
||
167 | * Check if a frwiki disambiguation page. |
||
168 | */ |
||
169 | public function isPageHomonymie(): bool |
||
170 | { |
||
171 | return false !== stristr($this->getText() ?? '', '{{homonymie'); |
||
172 | } |
||
173 | |||
174 | /** |
||
175 | * Get wiki text from the page. |
||
176 | */ |
||
177 | public function getText(): ?string |
||
178 | { |
||
179 | $latest = $this->getLastRevision(); |
||
180 | $this->lastTextRevision = $latest; |
||
181 | |||
182 | if (empty($latest)) { |
||
183 | return null; |
||
184 | } |
||
185 | |||
186 | return $latest->getContent()->getData(); |
||
187 | } |
||
188 | |||
189 | public function getLastRevision(): ?Revision |
||
190 | { |
||
191 | // page doesn't exist |
||
192 | if (empty($this->page->getRevisions()->getLatest())) { |
||
193 | return null; |
||
194 | } |
||
195 | |||
196 | return $this->page->getRevisions()->getLatest(); |
||
197 | } |
||
198 | |||
199 | /** |
||
200 | * Is it page with a redirection link ? |
||
201 | */ |
||
202 | public function isRedirect(): bool |
||
203 | { |
||
204 | return !empty($this->getRedirect()); |
||
205 | } |
||
206 | |||
207 | /** |
||
208 | * Get redirection page title or null. |
||
209 | */ |
||
210 | public function getRedirect(): ?string |
||
211 | { |
||
212 | if ($this->getText() && preg_match('/^#REDIRECT(?:ION)? ?\[\[([^]]+)]]/i', $this->getText(), $matches)) { |
||
213 | return (string)trim($matches[1]); |
||
214 | } |
||
215 | |||
216 | return null; |
||
217 | } |
||
218 | |||
219 | /** |
||
220 | * @return bool success |
||
221 | * @throws Exception |
||
222 | */ |
||
223 | public function addToBottomOrCreatePage(string $addText, EditInfo $editInfo): bool |
||
224 | { |
||
225 | if (empty($this->page->getRevisions()->getLatest())) { |
||
226 | return $this->createPage($addText, $editInfo); |
||
227 | } |
||
228 | |||
229 | return $this->addToBottomOfThePage($addText, $editInfo); |
||
230 | } |
||
231 | |||
232 | /** |
||
233 | * Create a new page. |
||
234 | * @throws Exception |
||
235 | 6 | */ |
|
236 | public function createPage(string $text, ?EditInfo $editInfo = null): bool |
||
237 | { |
||
238 | 6 | if (!empty($this->page->getRevisions()->getLatest())) { |
|
239 | 6 | throw new Exception('That page already exists'); |
|
240 | 6 | } |
|
241 | 6 | ||
242 | $newContent = new Content($text); |
||
243 | // $identifier = $this->page->getPageIdentifier() |
||
244 | $title = new Title($this->title); |
||
245 | $identifier = new PageIdentifier($title); |
||
246 | $revision = new Revision($newContent, $identifier); |
||
247 | |||
248 | return $this->wiki->newRevisionSaver()->save($revision, $editInfo); |
||
249 | } |
||
250 | |||
251 | /** |
||
252 | 6 | * Add text to the bottom of the article. |
|
253 | 6 | * @return bool success |
|
254 | 6 | * @throws Exception |
|
255 | 6 | */ |
|
256 | 6 | public function addToBottomOfThePage(string $addText, EditInfo $editInfo): bool |
|
257 | { |
||
258 | 6 | if (empty($this->page->getRevisions()->getLatest())) { |
|
259 | 6 | throw new Exception('That page does not exist'); |
|
260 | 6 | } |
|
261 | 6 | $oldText = $this->getText(); |
|
262 | $newText = $oldText . "\n" . $addText; |
||
263 | |||
264 | return $this->editPage($newText, $editInfo); |
||
265 | } |
||
266 | |||
267 | 6 | /** |
|
268 | 6 | * Edit the page with new text. |
|
269 | 6 | * Opti : EditInfo optional param ? |
|
270 | */ |
||
271 | public function editPage(string $newText, EditInfo $editInfo, ?bool $checkConflict = false): bool |
||
272 | { |
||
273 | if ($checkConflict && $this->isPageEditedAfterGetText()) { |
||
274 | throw new DomainException('Wiki Conflict : Page has been edited after getText()'); |
||
275 | // return false ? |
||
276 | } |
||
277 | |||
278 | $revision = $this->page->getPageIdentifier(); |
||
279 | |||
280 | $content = new Content($newText); |
||
281 | $revision = new Revision($content, $revision); |
||
282 | 6 | ||
283 | 2 | // TODO try/catch UsageExceptions badtoken |
|
284 | 2 | // captchaId=12345&captchaWord=MediaWikiIsCool |
|
285 | 2 | $revisionSaver = ExtendedMediawikiFactory::newRevisionSaverExtended(); |
|
286 | $result = $revisionSaver->save($revision, $editInfo); |
||
287 | if (false === $result) { |
||
288 | echo "Error editPage\n"; |
||
289 | 6 | print_r($revisionSaver->getErrors()); |
|
290 | 3 | } |
|
291 | |||
292 | 3 | return $result; |
|
293 | } |
||
294 | |||
295 | /** |
||
296 | 6 | * Check if wiki has been edited by someone since bot's getText(). |
|
297 | 6 | */ |
|
298 | 6 | private function isPageEditedAfterGetText(): bool |
|
299 | 6 | { |
|
300 | 6 | $updatedPage = $this->wiki->newPageGetter()->getFromTitle($this->title); |
|
301 | $updatedLastRevision = $updatedPage->getRevisions()->getLatest(); |
||
302 | // Non-strict object equality comparison |
||
303 | /** @noinspection PhpNonStrictObjectEqualityInspection */ |
||
304 | return !($updatedLastRevision && $updatedLastRevision == $this->lastTextRevision); |
||
305 | } |
||
306 | 6 | ||
307 | /** |
||
308 | * Add text to the top of the page. |
||
309 | * @return bool success |
||
310 | * @throws Exception |
||
311 | */ |
||
312 | public function addToTopOfThePage(string $addText, EditInfo $editInfo): bool |
||
313 | { |
||
314 | if (empty($this->page->getRevisions()->getLatest())) { |
||
315 | throw new Exception('That page does not exist'); |
||
316 | } |
||
317 | $oldText = $this->getText(); |
||
318 | $newText = $addText . $oldText; |
||
319 | |||
320 | return $this->editPage($newText, $editInfo); |
||
321 | } |
||
322 | |||
323 | /** |
||
324 | * Extract <ref> data from text. |
||
325 | * @throws Exception |
||
326 | */ |
||
327 | public function extractRefFromText(string $text): ?array |
||
328 | { |
||
329 | $parser = new TagParser(); // todo ParserFactory |
||
330 | try { |
||
331 | $refs = $parser->importHtml($text)->getRefValues(); |
||
332 | } catch (Exception $e) { |
||
333 | echo "Error extractRefFromText: ".$e->getMessage()."\n"; |
||
334 | return []; |
||
335 | } |
||
336 | |||
337 | return $refs; |
||
338 | } |
||
339 | |||
340 | /** |
||
341 | * TODO $url parameter |
||
342 | * TODO? refactor with : parse_str() + parse_url($url, PHP_URL_QUERY) |
||
343 | * check if any ref contains a targeted website/URL. |
||
344 | */ |
||
345 | public function filterRefByURL(array $refs): array |
||
346 | { |
||
347 | $validRef = []; |
||
348 | foreach ($refs as $ref) { |
||
349 | if (preg_match( |
||
350 | '#(?<url>https?://(?:www\.)?lemonde\.fr/[^ \]]+)#i', |
||
351 | (string)$ref, |
||
352 | $matches |
||
353 | ) > 0 |
||
354 | ) { |
||
355 | $validRef[] = ['url' => $matches['url'], 'raw' => $ref]; |
||
356 | } |
||
357 | } |
||
358 | |||
359 | return $validRef; |
||
360 | } |
||
361 | } |
||
362 |