Dispositif /
Wikibot
| 1 | <?php |
||
| 2 | /* |
||
| 3 | * This file is part of dispositif/wikibot application (@github) |
||
| 4 | * 2019-2023 © Philippe M./Irønie <[email protected]> |
||
| 5 | * For the full copyright and MIT license information, view the license file. |
||
| 6 | */ |
||
| 7 | |||
| 8 | declare(strict_types=1); |
||
| 9 | |||
| 10 | namespace App\Application; |
||
| 11 | |||
| 12 | use App\Domain\Enums\Language; |
||
| 13 | use App\Infrastructure\Mediawiki\ExtendedMediawikiFactory; |
||
| 14 | use App\Infrastructure\TagParser; |
||
| 15 | use DomainException; |
||
| 16 | use Exception; |
||
| 17 | use Mediawiki\Api\MediawikiFactory; |
||
| 18 | use Mediawiki\DataModel\Content; |
||
| 19 | use Mediawiki\DataModel\EditInfo; |
||
| 20 | use Mediawiki\DataModel\Page; |
||
| 21 | use Mediawiki\DataModel\PageIdentifier; |
||
| 22 | use Mediawiki\DataModel\Revision; |
||
| 23 | use Mediawiki\DataModel\Title; |
||
| 24 | use Throwable; |
||
| 25 | |||
| 26 | class WikiPageAction |
||
| 27 | { |
||
| 28 | final public const SKIP_LANG_INDICATOR = 'fr'; // skip {{fr}} before template |
||
| 29 | |||
| 30 | /** |
||
| 31 | * @var Page |
||
| 32 | */ |
||
| 33 | public $page; // public for debug |
||
| 34 | |||
| 35 | public $wiki; |
||
| 36 | /** |
||
| 37 | * Wiki namespace |
||
| 38 | */ |
||
| 39 | private ?int $ns = null; |
||
| 40 | private ?Revision $lastTextRevision = null; |
||
| 41 | |||
| 42 | /** |
||
| 43 | * @throws Exception |
||
| 44 | */ |
||
| 45 | public function __construct( |
||
| 46 | MediawikiFactory $wiki, // api ? |
||
| 47 | private readonly string $title |
||
| 48 | ) |
||
| 49 | { |
||
| 50 | $e = null; |
||
|
0 ignored issues
–
show
Unused Code
introduced
by
Loading history...
|
|||
| 51 | $this->wiki = $wiki; |
||
| 52 | |||
| 53 | try { |
||
| 54 | $this->page = $wiki->newPageGetter()->getFromTitle($title); |
||
| 55 | $this->ns = $this->page->getPageIdentifier()->getTitle()->getNs(); |
||
| 56 | } catch (Throwable $e) { |
||
| 57 | throw new Exception('Erreur construct WikiPageAction ' . $e->getMessage() . $e->getFile() . $e->getLine(), $e->getCode(), $e); |
||
| 58 | } |
||
| 59 | } |
||
| 60 | |||
| 61 | public function getNs(): ?int |
||
| 62 | { |
||
| 63 | return $this->ns; |
||
| 64 | } |
||
| 65 | |||
| 66 | /** |
||
| 67 | * todo Move to WikiTextUtil ? |
||
| 68 | * Replace serialized template and manage {{en}} prefix. |
||
| 69 | * Don't delete {{fr}} on frwiki. |
||
| 70 | */ |
||
| 71 | public static function replaceTemplateInText(string $text, string $tplOrigin, string $tplReplace): ?string |
||
| 72 | { |
||
| 73 | // "{{en}} {{zh}} {{ouvrage...}}" |
||
| 74 | // todo test U |
||
| 75 | if (preg_match_all( |
||
| 76 | '#(?<langTemp>{{[a-z][a-z]}} ?{{[a-z][a-z]}}) ?' . preg_quote($tplOrigin, '#') . '#i', |
||
| 77 | $text, |
||
| 78 | $matches |
||
| 79 | ) |
||
| 80 | ) { |
||
| 81 | // Skip double lang prefix (like in "{{fr}} {{en}} {template}") |
||
| 82 | echo 'SKIP ! double lang prefix !'; |
||
| 83 | |||
| 84 | return $text; |
||
| 85 | } |
||
| 86 | |||
| 87 | // hack // todo: autres patterns {{en}} ? |
||
| 88 | // OK : {{en}} \n {{ouvrage}} |
||
| 89 | if (preg_match_all( |
||
| 90 | "#(?<langTemp>{{(?<lang>[a-z][a-z])}} *\n?)?" . preg_quote($tplOrigin, '#') . '#i', |
||
| 91 | $text, |
||
| 92 | $matches |
||
| 93 | ) > 0 |
||
| 94 | ) { |
||
| 95 | foreach ($matches[0] as $num => $mention) { |
||
| 96 | $lang = $matches['lang'][$num] ?? ''; |
||
| 97 | if (!empty($lang)) { |
||
| 98 | $lang = Language::all2wiki($lang); |
||
| 99 | } |
||
| 100 | |||
| 101 | // detect inconsistency between lang indicator and lang param |
||
| 102 | // example : {{en}} {{template|lang=ru}} |
||
| 103 | if (!empty($lang) && self::SKIP_LANG_INDICATOR !== $lang |
||
| 104 | && preg_match('#langue *=#', $tplReplace) |
||
| 105 | && !preg_match('#langue *= ?' . $lang . '#i', $tplReplace) |
||
| 106 | && !preg_match('#\| ?langue *= ?\n?\|#', $tplReplace) |
||
| 107 | ) { |
||
| 108 | echo sprintf( |
||
| 109 | 'prefix %s incompatible avec langue de %s', |
||
| 110 | $matches['langTemp'][$num], |
||
| 111 | $tplReplace |
||
| 112 | ); |
||
| 113 | |||
| 114 | // skip all the replacements of that template |
||
| 115 | return $text; // return null ? |
||
| 116 | } |
||
| 117 | |||
| 118 | // FIX dirty juil 2020 : {{en}} mais aucun param/value sur new template |
||
| 119 | if (!empty($lang) && $lang !== 'fr' && !preg_match('#\| ?langue *=#', $tplReplace) > 0) { |
||
| 120 | // skip all the replacements of that template |
||
| 121 | |||
| 122 | return $text; |
||
| 123 | } |
||
| 124 | |||
| 125 | // FIX dirty : {{en}} mais langue= avec value non définie sur new template... |
||
| 126 | if (!empty($lang) && preg_match('#\| ?(langue *=) ?\n? ?\|#', $tplReplace, $matchLangue) > 0) { |
||
| 127 | $previousTpl = $tplReplace; |
||
| 128 | $tplReplace = str_replace($matchLangue[1], 'langue=' . $lang, $tplReplace); |
||
| 129 | //dump('origin', $tplOrigin); |
||
| 130 | $text = str_replace($previousTpl, $tplReplace, $text); |
||
| 131 | } |
||
| 132 | |||
| 133 | // don't delete {{fr}} before {template} on frwiki |
||
| 134 | if (self::SKIP_LANG_INDICATOR === $lang) { |
||
| 135 | $text = str_replace($tplOrigin, $tplReplace, $text); |
||
| 136 | |||
| 137 | continue; |
||
| 138 | } |
||
| 139 | |||
| 140 | // replace {template} and {{lang}} {template} |
||
| 141 | $text = str_replace($mention, $tplReplace, $text); |
||
| 142 | $text = str_replace( |
||
| 143 | $matches['langTemp'][$num] . $tplReplace, |
||
| 144 | $tplReplace, |
||
| 145 | $text |
||
| 146 | ); // si 1er replace global sans |
||
| 147 | // {{en}} |
||
| 148 | } |
||
| 149 | } |
||
| 150 | |||
| 151 | return $text; |
||
| 152 | } |
||
| 153 | |||
| 154 | public function getLastEditor(): ?string |
||
| 155 | { |
||
| 156 | // page doesn't exist |
||
| 157 | if (empty($this->page->getRevisions()->getLatest())) { |
||
| 158 | return null; |
||
| 159 | } |
||
| 160 | |||
| 161 | $latest = $this->page->getRevisions()->getLatest(); |
||
| 162 | |||
| 163 | return ($latest) ? $latest->getUser() : null; |
||
| 164 | } |
||
| 165 | |||
| 166 | /** |
||
| 167 | * Check if a frwiki disambiguation page. |
||
| 168 | */ |
||
| 169 | public function isPageHomonymie(): bool |
||
| 170 | { |
||
| 171 | return false !== stristr($this->getText() ?? '', '{{homonymie'); |
||
| 172 | } |
||
| 173 | |||
| 174 | /** |
||
| 175 | * Get wiki text from the page. |
||
| 176 | */ |
||
| 177 | public function getText(): ?string |
||
| 178 | { |
||
| 179 | $latest = $this->getLastRevision(); |
||
| 180 | $this->lastTextRevision = $latest; |
||
| 181 | |||
| 182 | if (empty($latest)) { |
||
| 183 | return null; |
||
| 184 | } |
||
| 185 | |||
| 186 | return $latest->getContent()->getData(); |
||
| 187 | } |
||
| 188 | |||
| 189 | public function getLastRevision(): ?Revision |
||
| 190 | { |
||
| 191 | // page doesn't exist |
||
| 192 | if (empty($this->page->getRevisions()->getLatest())) { |
||
| 193 | return null; |
||
| 194 | } |
||
| 195 | |||
| 196 | return $this->page->getRevisions()->getLatest(); |
||
| 197 | } |
||
| 198 | |||
| 199 | /** |
||
| 200 | * Is it page with a redirection link ? |
||
| 201 | */ |
||
| 202 | public function isRedirect(): bool |
||
| 203 | { |
||
| 204 | return !empty($this->getRedirect()); |
||
| 205 | } |
||
| 206 | |||
| 207 | /** |
||
| 208 | * Get redirection page title or null. |
||
| 209 | */ |
||
| 210 | public function getRedirect(): ?string |
||
| 211 | { |
||
| 212 | if ($this->getText() && preg_match('/^#REDIRECT(?:ION)? ?\[\[([^]]+)]]/i', $this->getText(), $matches)) { |
||
| 213 | return (string)trim($matches[1]); |
||
| 214 | } |
||
| 215 | |||
| 216 | return null; |
||
| 217 | } |
||
| 218 | |||
| 219 | /** |
||
| 220 | * @return bool success |
||
| 221 | * @throws Exception |
||
| 222 | */ |
||
| 223 | public function addToBottomOrCreatePage(string $addText, EditInfo $editInfo): bool |
||
| 224 | { |
||
| 225 | if (empty($this->page->getRevisions()->getLatest())) { |
||
| 226 | return $this->createPage($addText, $editInfo); |
||
| 227 | } |
||
| 228 | |||
| 229 | return $this->addToBottomOfThePage($addText, $editInfo); |
||
| 230 | } |
||
| 231 | |||
| 232 | /** |
||
| 233 | * Create a new page. |
||
| 234 | * @throws Exception |
||
| 235 | 6 | */ |
|
| 236 | public function createPage(string $text, ?EditInfo $editInfo = null): bool |
||
| 237 | { |
||
| 238 | 6 | if (!empty($this->page->getRevisions()->getLatest())) { |
|
| 239 | 6 | throw new Exception('That page already exists'); |
|
| 240 | 6 | } |
|
| 241 | 6 | ||
| 242 | $newContent = new Content($text); |
||
| 243 | // $identifier = $this->page->getPageIdentifier() |
||
| 244 | $title = new Title($this->title); |
||
| 245 | $identifier = new PageIdentifier($title); |
||
| 246 | $revision = new Revision($newContent, $identifier); |
||
| 247 | |||
| 248 | return $this->wiki->newRevisionSaver()->save($revision, $editInfo); |
||
| 249 | } |
||
| 250 | |||
| 251 | /** |
||
| 252 | 6 | * Add text to the bottom of the article. |
|
| 253 | 6 | * @return bool success |
|
| 254 | 6 | * @throws Exception |
|
| 255 | 6 | */ |
|
| 256 | 6 | public function addToBottomOfThePage(string $addText, EditInfo $editInfo): bool |
|
| 257 | { |
||
| 258 | 6 | if (empty($this->page->getRevisions()->getLatest())) { |
|
| 259 | 6 | throw new Exception('That page does not exist'); |
|
| 260 | 6 | } |
|
| 261 | 6 | $oldText = $this->getText(); |
|
| 262 | $newText = $oldText . "\n" . $addText; |
||
| 263 | |||
| 264 | return $this->editPage($newText, $editInfo); |
||
| 265 | } |
||
| 266 | |||
| 267 | 6 | /** |
|
| 268 | 6 | * Edit the page with new text. |
|
| 269 | 6 | * Opti : EditInfo optional param ? |
|
| 270 | */ |
||
| 271 | public function editPage(string $newText, EditInfo $editInfo, ?bool $checkConflict = false): bool |
||
| 272 | { |
||
| 273 | if ($checkConflict && $this->isPageEditedAfterGetText()) { |
||
| 274 | throw new DomainException('Wiki Conflict : Page has been edited after getText()'); |
||
| 275 | // return false ? |
||
| 276 | } |
||
| 277 | |||
| 278 | $revision = $this->page->getPageIdentifier(); |
||
| 279 | |||
| 280 | $content = new Content($newText); |
||
| 281 | $revision = new Revision($content, $revision); |
||
| 282 | 6 | ||
| 283 | 2 | // TODO try/catch UsageExceptions badtoken |
|
| 284 | 2 | // captchaId=12345&captchaWord=MediaWikiIsCool |
|
| 285 | 2 | $revisionSaver = ExtendedMediawikiFactory::newRevisionSaverExtended(); |
|
| 286 | $result = $revisionSaver->save($revision, $editInfo); |
||
| 287 | if (false === $result) { |
||
| 288 | echo "Error editPage\n"; |
||
| 289 | 6 | print_r($revisionSaver->getErrors()); |
|
| 290 | 3 | } |
|
| 291 | |||
| 292 | 3 | return $result; |
|
| 293 | } |
||
| 294 | |||
| 295 | /** |
||
| 296 | 6 | * Check if wiki has been edited by someone since bot's getText(). |
|
| 297 | 6 | */ |
|
| 298 | 6 | private function isPageEditedAfterGetText(): bool |
|
| 299 | 6 | { |
|
| 300 | 6 | $updatedPage = $this->wiki->newPageGetter()->getFromTitle($this->title); |
|
| 301 | $updatedLastRevision = $updatedPage->getRevisions()->getLatest(); |
||
| 302 | // Non-strict object equality comparison |
||
| 303 | /** @noinspection PhpNonStrictObjectEqualityInspection */ |
||
| 304 | return !($updatedLastRevision && $updatedLastRevision == $this->lastTextRevision); |
||
| 305 | } |
||
| 306 | 6 | ||
| 307 | /** |
||
| 308 | * Add text to the top of the page. |
||
| 309 | * @return bool success |
||
| 310 | * @throws Exception |
||
| 311 | */ |
||
| 312 | public function addToTopOfThePage(string $addText, EditInfo $editInfo): bool |
||
| 313 | { |
||
| 314 | if (empty($this->page->getRevisions()->getLatest())) { |
||
| 315 | throw new Exception('That page does not exist'); |
||
| 316 | } |
||
| 317 | $oldText = $this->getText(); |
||
| 318 | $newText = $addText . $oldText; |
||
| 319 | |||
| 320 | return $this->editPage($newText, $editInfo); |
||
| 321 | } |
||
| 322 | |||
| 323 | /** |
||
| 324 | * Extract <ref> data from text. |
||
| 325 | * @throws Exception |
||
| 326 | */ |
||
| 327 | public function extractRefFromText(string $text): ?array |
||
| 328 | { |
||
| 329 | $parser = new TagParser(); // todo ParserFactory |
||
| 330 | try { |
||
| 331 | $refs = $parser->importHtml($text)->getRefValues(); |
||
| 332 | } catch (Exception $e) { |
||
| 333 | echo "Error extractRefFromText: ".$e->getMessage()."\n"; |
||
| 334 | return []; |
||
| 335 | } |
||
| 336 | |||
| 337 | return $refs; |
||
| 338 | } |
||
| 339 | |||
| 340 | /** |
||
| 341 | * TODO $url parameter |
||
| 342 | * TODO? refactor with : parse_str() + parse_url($url, PHP_URL_QUERY) |
||
| 343 | * check if any ref contains a targeted website/URL. |
||
| 344 | */ |
||
| 345 | public function filterRefByURL(array $refs): array |
||
| 346 | { |
||
| 347 | $validRef = []; |
||
| 348 | foreach ($refs as $ref) { |
||
| 349 | if (preg_match( |
||
| 350 | '#(?<url>https?://(?:www\.)?lemonde\.fr/[^ \]]+)#i', |
||
| 351 | (string)$ref, |
||
| 352 | $matches |
||
| 353 | ) > 0 |
||
| 354 | ) { |
||
| 355 | $validRef[] = ['url' => $matches['url'], 'raw' => $ref]; |
||
| 356 | } |
||
| 357 | } |
||
| 358 | |||
| 359 | return $validRef; |
||
| 360 | } |
||
| 361 | } |
||
| 362 |