This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
0 ignored issues
–
show
|
|||
2 | /** |
||
3 | * BackupDumper that postprocesses XML dumps from dumpBackup.php to add page text |
||
4 | * |
||
5 | * Copyright (C) 2005 Brion Vibber <[email protected]> |
||
6 | * https://www.mediawiki.org/ |
||
7 | * |
||
8 | * This program is free software; you can redistribute it and/or modify |
||
9 | * it under the terms of the GNU General Public License as published by |
||
10 | * the Free Software Foundation; either version 2 of the License, or |
||
11 | * (at your option) any later version. |
||
12 | * |
||
13 | * This program is distributed in the hope that it will be useful, |
||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
16 | * GNU General Public License for more details. |
||
17 | * |
||
18 | * You should have received a copy of the GNU General Public License along |
||
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
21 | * http://www.gnu.org/copyleft/gpl.html |
||
22 | * |
||
23 | * @file |
||
24 | * @ingroup Maintenance |
||
25 | */ |
||
26 | |||
27 | require_once __DIR__ . '/backup.inc'; |
||
28 | require_once __DIR__ . '/../includes/export/WikiExporter.php'; |
||
29 | |||
30 | /** |
||
31 | * @ingroup Maintenance |
||
32 | */ |
||
33 | class TextPassDumper extends BackupDumper { |
||
34 | public $prefetch = null; |
||
35 | |||
36 | // when we spend more than maxTimeAllowed seconds on this run, we continue |
||
37 | // processing until we write out the next complete page, then save output file(s), |
||
38 | // rename it/them and open new one(s) |
||
39 | public $maxTimeAllowed = 0; // 0 = no limit |
||
40 | |||
41 | protected $input = "php://stdin"; |
||
42 | protected $history = WikiExporter::FULL; |
||
43 | protected $fetchCount = 0; |
||
44 | protected $prefetchCount = 0; |
||
45 | protected $prefetchCountLast = 0; |
||
46 | protected $fetchCountLast = 0; |
||
47 | |||
48 | protected $maxFailures = 5; |
||
49 | protected $maxConsecutiveFailedTextRetrievals = 200; |
||
50 | protected $failureTimeout = 5; // Seconds to sleep after db failure |
||
51 | |||
52 | protected $bufferSize = 524288; // In bytes. Maximum size to read from the stub in on go. |
||
53 | |||
54 | protected $php = "php"; |
||
55 | protected $spawn = false; |
||
56 | |||
57 | /** |
||
58 | * @var bool|resource |
||
59 | */ |
||
60 | protected $spawnProc = false; |
||
61 | |||
62 | /** |
||
63 | * @var bool|resource |
||
64 | */ |
||
65 | protected $spawnWrite = false; |
||
66 | |||
67 | /** |
||
68 | * @var bool|resource |
||
69 | */ |
||
70 | protected $spawnRead = false; |
||
71 | |||
72 | /** |
||
73 | * @var bool|resource |
||
74 | */ |
||
75 | protected $spawnErr = false; |
||
76 | |||
77 | /** |
||
78 | * @var bool|XmlDumpWriter |
||
79 | */ |
||
80 | protected $xmlwriterobj = false; |
||
81 | |||
82 | protected $timeExceeded = false; |
||
83 | protected $firstPageWritten = false; |
||
84 | protected $lastPageWritten = false; |
||
85 | protected $checkpointJustWritten = false; |
||
86 | protected $checkpointFiles = []; |
||
87 | |||
88 | /** |
||
89 | * @var Database |
||
90 | */ |
||
91 | protected $db; |
||
92 | |||
93 | /** |
||
94 | * @param array $args For backward compatibility |
||
95 | */ |
||
96 | function __construct( $args = null ) { |
||
97 | parent::__construct(); |
||
98 | |||
99 | $this->addDescription( <<<TEXT |
||
100 | This script postprocesses XML dumps from dumpBackup.php to add |
||
101 | page text which was stubbed out (using --stub). |
||
102 | |||
103 | XML input is accepted on stdin. |
||
104 | XML output is sent to stdout; progress reports are sent to stderr. |
||
105 | TEXT |
||
106 | ); |
||
107 | $this->stderr = fopen( "php://stderr", "wt" ); |
||
108 | |||
109 | $this->addOption( 'stub', 'To load a compressed stub dump instead of stdin. ' . |
||
110 | 'Specify as --stub=<type>:<file>.', false, true ); |
||
111 | $this->addOption( 'prefetch', 'Use a prior dump file as a text source, to savepressure on the ' . |
||
112 | 'database. (Requires the XMLReader extension). Specify as --prefetch=<type>:<file>', |
||
113 | false, true ); |
||
114 | $this->addOption( 'maxtime', 'Write out checkpoint file after this many minutes (writing' . |
||
115 | 'out complete page, closing xml file properly, and opening new one' . |
||
116 | 'with header). This option requires the checkpointfile option.', false, true ); |
||
117 | $this->addOption( 'checkpointfile', 'Use this string for checkpoint filenames,substituting ' . |
||
118 | 'first pageid written for the first %s (required) and the last pageid written for the ' . |
||
119 | 'second %s if it exists.', false, true, false, true ); // This can be specified multiple times |
||
120 | $this->addOption( 'quiet', 'Don\'t dump status reports to stderr.' ); |
||
121 | $this->addOption( 'current', 'Base ETA on number of pages in database instead of all revisions' ); |
||
122 | $this->addOption( 'spawn', 'Spawn a subprocess for loading text records' ); |
||
123 | $this->addOption( 'buffersize', 'Buffer size in bytes to use for reading the stub. ' . |
||
124 | '(Default: 512KB, Minimum: 4KB)', false, true ); |
||
125 | |||
126 | if ( $args ) { |
||
127 | $this->loadWithArgv( $args ); |
||
128 | $this->processOptions(); |
||
129 | } |
||
130 | } |
||
131 | |||
132 | function execute() { |
||
133 | $this->processOptions(); |
||
134 | $this->dump( true ); |
||
135 | } |
||
136 | |||
137 | function processOptions() { |
||
138 | global $IP; |
||
139 | |||
140 | parent::processOptions(); |
||
141 | |||
142 | if ( $this->hasOption( 'buffersize' ) ) { |
||
143 | $this->bufferSize = max( intval( $this->getOption( 'buffersize' ) ), 4 * 1024 ); |
||
144 | } |
||
145 | |||
146 | if ( $this->hasOption( 'prefetch' ) ) { |
||
147 | require_once "$IP/maintenance/backupPrefetch.inc"; |
||
148 | $url = $this->processFileOpt( $this->getOption( 'prefetch' ) ); |
||
149 | $this->prefetch = new BaseDump( $url ); |
||
150 | } |
||
151 | |||
152 | if ( $this->hasOption( 'stub' ) ) { |
||
153 | $this->input = $this->processFileOpt( $this->getOption( 'stub' ) ); |
||
154 | } |
||
155 | |||
156 | if ( $this->hasOption( 'maxtime' ) ) { |
||
157 | $this->maxTimeAllowed = intval( $this->getOption( 'maxtime' ) ) * 60; |
||
158 | } |
||
159 | |||
160 | if ( $this->hasOption( 'checkpointfile' ) ) { |
||
161 | $this->checkpointFiles = $this->getOption( 'checkpointfile' ); |
||
162 | } |
||
163 | |||
164 | if ( $this->hasOption( 'current' ) ) { |
||
165 | $this->history = WikiExporter::CURRENT; |
||
166 | } |
||
167 | |||
168 | if ( $this->hasOption( 'full' ) ) { |
||
169 | $this->history = WikiExporter::FULL; |
||
170 | } |
||
171 | |||
172 | if ( $this->hasOption( 'spawn' ) ) { |
||
173 | $this->spawn = true; |
||
174 | $val = $this->getOption( 'spawn' ); |
||
175 | if ( $val !== 1 ) { |
||
176 | $this->php = $val; |
||
177 | } |
||
178 | } |
||
179 | } |
||
180 | |||
181 | /** |
||
182 | * Drop the database connection $this->db and try to get a new one. |
||
183 | * |
||
184 | * This function tries to get a /different/ connection if this is |
||
185 | * possible. Hence, (if this is possible) it switches to a different |
||
186 | * failover upon each call. |
||
187 | * |
||
188 | * This function resets $this->lb and closes all connections on it. |
||
189 | * |
||
190 | * @throws MWException |
||
191 | */ |
||
192 | function rotateDb() { |
||
193 | // Cleaning up old connections |
||
194 | if ( isset( $this->lb ) ) { |
||
195 | $this->lb->closeAll(); |
||
196 | unset( $this->lb ); |
||
197 | } |
||
198 | |||
199 | if ( $this->forcedDb !== null ) { |
||
200 | $this->db = $this->forcedDb; |
||
201 | |||
202 | return; |
||
203 | } |
||
204 | |||
205 | if ( isset( $this->db ) && $this->db->isOpen() ) { |
||
206 | throw new MWException( 'DB is set and has not been closed by the Load Balancer' ); |
||
207 | } |
||
208 | |||
209 | unset( $this->db ); |
||
210 | |||
211 | // Trying to set up new connection. |
||
212 | // We do /not/ retry upon failure, but delegate to encapsulating logic, to avoid |
||
213 | // individually retrying at different layers of code. |
||
214 | |||
215 | // 1. The LoadBalancer. |
||
216 | try { |
||
217 | $this->lb = wfGetLBFactory()->newMainLB(); |
||
218 | } catch ( Exception $e ) { |
||
219 | throw new MWException( __METHOD__ |
||
220 | . " rotating DB failed to obtain new load balancer (" . $e->getMessage() . ")" ); |
||
221 | } |
||
222 | |||
223 | // 2. The Connection, through the load balancer. |
||
224 | try { |
||
225 | $this->db = $this->lb->getConnection( DB_REPLICA, 'dump' ); |
||
226 | } catch ( Exception $e ) { |
||
227 | throw new MWException( __METHOD__ |
||
228 | . " rotating DB failed to obtain new database (" . $e->getMessage() . ")" ); |
||
229 | } |
||
230 | } |
||
231 | |||
232 | function initProgress( $history = WikiExporter::FULL ) { |
||
233 | parent::initProgress(); |
||
234 | $this->timeOfCheckpoint = $this->startTime; |
||
235 | } |
||
236 | |||
237 | function dump( $history, $text = WikiExporter::TEXT ) { |
||
238 | // Notice messages will foul up your XML output even if they're |
||
239 | // relatively harmless. |
||
240 | if ( ini_get( 'display_errors' ) ) { |
||
241 | ini_set( 'display_errors', 'stderr' ); |
||
242 | } |
||
243 | |||
244 | $this->initProgress( $this->history ); |
||
245 | |||
246 | // We are trying to get an initial database connection to avoid that the |
||
247 | // first try of this request's first call to getText fails. However, if |
||
248 | // obtaining a good DB connection fails it's not a serious issue, as |
||
249 | // getText does retry upon failure and can start without having a working |
||
250 | // DB connection. |
||
251 | try { |
||
252 | $this->rotateDb(); |
||
253 | } catch ( Exception $e ) { |
||
254 | // We do not even count this as failure. Just let eventual |
||
255 | // watchdogs know. |
||
256 | $this->progress( "Getting initial DB connection failed (" . |
||
257 | $e->getMessage() . ")" ); |
||
258 | } |
||
259 | |||
260 | $this->egress = new ExportProgressFilter( $this->sink, $this ); |
||
261 | |||
262 | // it would be nice to do it in the constructor, oh well. need egress set |
||
263 | $this->finalOptionCheck(); |
||
264 | |||
265 | // we only want this so we know how to close a stream :-P |
||
266 | $this->xmlwriterobj = new XmlDumpWriter(); |
||
267 | |||
268 | $input = fopen( $this->input, "rt" ); |
||
269 | $this->readDump( $input ); |
||
270 | |||
271 | if ( $this->spawnProc ) { |
||
272 | $this->closeSpawn(); |
||
273 | } |
||
274 | |||
275 | $this->report( true ); |
||
276 | } |
||
277 | |||
278 | function processFileOpt( $opt ) { |
||
279 | $split = explode( ':', $opt, 2 ); |
||
280 | $val = $split[0]; |
||
281 | $param = ''; |
||
282 | if ( count( $split ) === 2 ) { |
||
283 | $param = $split[1]; |
||
284 | } |
||
285 | $fileURIs = explode( ';', $param ); |
||
286 | foreach ( $fileURIs as $URI ) { |
||
287 | switch ( $val ) { |
||
288 | case "file": |
||
289 | $newURI = $URI; |
||
290 | break; |
||
291 | case "gzip": |
||
292 | $newURI = "compress.zlib://$URI"; |
||
293 | break; |
||
294 | case "bzip2": |
||
295 | $newURI = "compress.bzip2://$URI"; |
||
296 | break; |
||
297 | case "7zip": |
||
298 | $newURI = "mediawiki.compress.7z://$URI"; |
||
299 | break; |
||
300 | default: |
||
301 | $newURI = $URI; |
||
302 | } |
||
303 | $newFileURIs[] = $newURI; |
||
304 | } |
||
305 | $val = implode( ';', $newFileURIs ); |
||
306 | |||
307 | return $val; |
||
308 | } |
||
309 | |||
310 | /** |
||
311 | * Overridden to include prefetch ratio if enabled. |
||
312 | */ |
||
313 | function showReport() { |
||
314 | if ( !$this->prefetch ) { |
||
315 | parent::showReport(); |
||
316 | |||
317 | return; |
||
318 | } |
||
319 | |||
320 | if ( $this->reporting ) { |
||
321 | $now = wfTimestamp( TS_DB ); |
||
322 | $nowts = microtime( true ); |
||
323 | $deltaAll = $nowts - $this->startTime; |
||
324 | $deltaPart = $nowts - $this->lastTime; |
||
325 | $this->pageCountPart = $this->pageCount - $this->pageCountLast; |
||
326 | $this->revCountPart = $this->revCount - $this->revCountLast; |
||
327 | |||
328 | if ( $deltaAll ) { |
||
329 | $portion = $this->revCount / $this->maxCount; |
||
330 | $eta = $this->startTime + $deltaAll / $portion; |
||
331 | $etats = wfTimestamp( TS_DB, intval( $eta ) ); |
||
332 | if ( $this->fetchCount ) { |
||
333 | $fetchRate = 100.0 * $this->prefetchCount / $this->fetchCount; |
||
334 | } else { |
||
335 | $fetchRate = '-'; |
||
336 | } |
||
337 | $pageRate = $this->pageCount / $deltaAll; |
||
338 | $revRate = $this->revCount / $deltaAll; |
||
339 | } else { |
||
340 | $pageRate = '-'; |
||
341 | $revRate = '-'; |
||
342 | $etats = '-'; |
||
343 | $fetchRate = '-'; |
||
344 | } |
||
345 | if ( $deltaPart ) { |
||
346 | if ( $this->fetchCountLast ) { |
||
347 | $fetchRatePart = 100.0 * $this->prefetchCountLast / $this->fetchCountLast; |
||
348 | } else { |
||
349 | $fetchRatePart = '-'; |
||
350 | } |
||
351 | $pageRatePart = $this->pageCountPart / $deltaPart; |
||
352 | $revRatePart = $this->revCountPart / $deltaPart; |
||
353 | } else { |
||
354 | $fetchRatePart = '-'; |
||
355 | $pageRatePart = '-'; |
||
356 | $revRatePart = '-'; |
||
357 | } |
||
358 | $this->progress( sprintf( |
||
359 | "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), " |
||
360 | . "%d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% " |
||
361 | . "prefetched (all|curr), ETA %s [max %d]", |
||
362 | $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, |
||
363 | $pageRatePart, $this->revCount, $revRate, $revRatePart, |
||
364 | $fetchRate, $fetchRatePart, $etats, $this->maxCount |
||
365 | ) ); |
||
366 | $this->lastTime = $nowts; |
||
367 | $this->revCountLast = $this->revCount; |
||
368 | $this->prefetchCountLast = $this->prefetchCount; |
||
369 | $this->fetchCountLast = $this->fetchCount; |
||
370 | } |
||
371 | } |
||
372 | |||
373 | function setTimeExceeded() { |
||
374 | $this->timeExceeded = true; |
||
375 | } |
||
376 | |||
377 | function checkIfTimeExceeded() { |
||
378 | if ( $this->maxTimeAllowed |
||
379 | && ( $this->lastTime - $this->timeOfCheckpoint > $this->maxTimeAllowed ) |
||
380 | ) { |
||
381 | return true; |
||
382 | } |
||
383 | |||
384 | return false; |
||
385 | } |
||
386 | |||
387 | function finalOptionCheck() { |
||
388 | if ( ( $this->checkpointFiles && !$this->maxTimeAllowed ) |
||
389 | || ( $this->maxTimeAllowed && !$this->checkpointFiles ) |
||
390 | ) { |
||
391 | throw new MWException( "Options checkpointfile and maxtime must be specified together.\n" ); |
||
392 | } |
||
393 | foreach ( $this->checkpointFiles as $checkpointFile ) { |
||
394 | $count = substr_count( $checkpointFile, "%s" ); |
||
395 | if ( $count != 2 ) { |
||
396 | throw new MWException( "Option checkpointfile must contain two '%s' " |
||
397 | . "for substitution of first and last pageids, count is $count instead, " |
||
398 | . "file is $checkpointFile.\n" ); |
||
399 | } |
||
400 | } |
||
401 | |||
402 | if ( $this->checkpointFiles ) { |
||
403 | $filenameList = (array)$this->egress->getFilenames(); |
||
404 | if ( count( $filenameList ) != count( $this->checkpointFiles ) ) { |
||
405 | throw new MWException( "One checkpointfile must be specified " |
||
406 | . "for each output option, if maxtime is used.\n" ); |
||
407 | } |
||
408 | } |
||
409 | } |
||
410 | |||
411 | /** |
||
412 | * @throws MWException Failure to parse XML input |
||
413 | * @param string $input |
||
414 | * @return bool |
||
415 | */ |
||
416 | function readDump( $input ) { |
||
417 | $this->buffer = ""; |
||
418 | $this->openElement = false; |
||
419 | $this->atStart = true; |
||
420 | $this->state = ""; |
||
421 | $this->lastName = ""; |
||
422 | $this->thisPage = 0; |
||
423 | $this->thisRev = 0; |
||
424 | $this->thisRevModel = null; |
||
425 | $this->thisRevFormat = null; |
||
426 | |||
427 | $parser = xml_parser_create( "UTF-8" ); |
||
428 | xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); |
||
429 | |||
430 | xml_set_element_handler( |
||
431 | $parser, |
||
432 | [ $this, 'startElement' ], |
||
433 | [ $this, 'endElement' ] |
||
434 | ); |
||
435 | xml_set_character_data_handler( $parser, [ $this, 'characterData' ] ); |
||
436 | |||
437 | $offset = 0; // for context extraction on error reporting |
||
438 | do { |
||
439 | if ( $this->checkIfTimeExceeded() ) { |
||
440 | $this->setTimeExceeded(); |
||
441 | } |
||
442 | $chunk = fread( $input, $this->bufferSize ); |
||
443 | if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) { |
||
444 | wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" ); |
||
445 | |||
446 | $byte = xml_get_current_byte_index( $parser ); |
||
447 | $msg = wfMessage( 'xml-error-string', |
||
448 | 'XML import parse failure', |
||
449 | xml_get_current_line_number( $parser ), |
||
450 | xml_get_current_column_number( $parser ), |
||
451 | $byte . ( is_null( $chunk ) ? null : ( '; "' . substr( $chunk, $byte - $offset, 16 ) . '"' ) ), |
||
452 | xml_error_string( xml_get_error_code( $parser ) ) )->escaped(); |
||
453 | |||
454 | xml_parser_free( $parser ); |
||
455 | |||
456 | throw new MWException( $msg ); |
||
457 | } |
||
458 | $offset += strlen( $chunk ); |
||
459 | } while ( $chunk !== false && !feof( $input ) ); |
||
460 | if ( $this->maxTimeAllowed ) { |
||
461 | $filenameList = (array)$this->egress->getFilenames(); |
||
462 | // we wrote some stuff after last checkpoint that needs renamed |
||
463 | if ( file_exists( $filenameList[0] ) ) { |
||
464 | $newFilenames = []; |
||
465 | # we might have just written the header and footer and had no |
||
466 | # pages or revisions written... perhaps they were all deleted |
||
467 | # there's no pageID 0 so we use that. the caller is responsible |
||
468 | # for deciding what to do with a file containing only the |
||
469 | # siteinfo information and the mw tags. |
||
470 | if ( !$this->firstPageWritten ) { |
||
471 | $firstPageID = str_pad( 0, 9, "0", STR_PAD_LEFT ); |
||
472 | $lastPageID = str_pad( 0, 9, "0", STR_PAD_LEFT ); |
||
473 | } else { |
||
474 | $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT ); |
||
475 | $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT ); |
||
476 | } |
||
477 | |||
478 | $filenameCount = count( $filenameList ); |
||
479 | View Code Duplication | for ( $i = 0; $i < $filenameCount; $i++ ) { |
|
480 | $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID ); |
||
481 | $fileinfo = pathinfo( $filenameList[$i] ); |
||
482 | $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn; |
||
483 | } |
||
484 | $this->egress->closeAndRename( $newFilenames ); |
||
485 | } |
||
486 | } |
||
487 | xml_parser_free( $parser ); |
||
488 | |||
489 | return true; |
||
490 | } |
||
491 | |||
492 | /** |
||
493 | * Applies applicable export transformations to $text. |
||
494 | * |
||
495 | * @param string $text |
||
496 | * @param string $model |
||
497 | * @param string|null $format |
||
498 | * |
||
499 | * @return string |
||
500 | */ |
||
501 | private function exportTransform( $text, $model, $format = null ) { |
||
502 | try { |
||
503 | $handler = ContentHandler::getForModelID( $model ); |
||
504 | $text = $handler->exportTransform( $text, $format ); |
||
505 | } |
||
506 | catch ( MWException $ex ) { |
||
507 | $this->progress( |
||
508 | "Unable to apply export transformation for content model '$model': " . |
||
509 | $ex->getMessage() |
||
510 | ); |
||
511 | } |
||
512 | |||
513 | return $text; |
||
514 | } |
||
515 | |||
516 | /** |
||
517 | * Tries to get the revision text for a revision id. |
||
518 | * Export transformations are applied if the content model can is given or can be |
||
519 | * determined from the database. |
||
520 | * |
||
521 | * Upon errors, retries (Up to $this->maxFailures tries each call). |
||
522 | * If still no good revision get could be found even after this retrying, "" is returned. |
||
523 | * If no good revision text could be returned for |
||
524 | * $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException |
||
525 | * is thrown. |
||
526 | * |
||
527 | * @param string $id The revision id to get the text for |
||
528 | * @param string|bool|null $model The content model used to determine |
||
529 | * applicable export transformations. |
||
530 | * If $model is null, it will be determined from the database. |
||
531 | * @param string|null $format The content format used when applying export transformations. |
||
532 | * |
||
533 | * @throws MWException |
||
534 | * @return string The revision text for $id, or "" |
||
535 | */ |
||
536 | function getText( $id, $model = null, $format = null ) { |
||
537 | global $wgContentHandlerUseDB; |
||
538 | |||
539 | $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch. |
||
540 | $text = false; // The candidate for a good text. false if no proper value. |
||
541 | $failures = 0; // The number of times, this invocation of getText already failed. |
||
542 | |||
543 | // The number of times getText failed without yielding a good text in between. |
||
544 | static $consecutiveFailedTextRetrievals = 0; |
||
545 | |||
546 | $this->fetchCount++; |
||
547 | |||
548 | // To allow to simply return on success and do not have to worry about book keeping, |
||
549 | // we assume, this fetch works (possible after some retries). Nevertheless, we koop |
||
550 | // the old value, so we can restore it, if problems occur (See after the while loop). |
||
551 | $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals; |
||
552 | $consecutiveFailedTextRetrievals = 0; |
||
553 | |||
554 | if ( $model === null && $wgContentHandlerUseDB ) { |
||
555 | $row = $this->db->selectRow( |
||
556 | 'revision', |
||
557 | [ 'rev_content_model', 'rev_content_format' ], |
||
558 | [ 'rev_id' => $this->thisRev ], |
||
559 | __METHOD__ |
||
560 | ); |
||
561 | |||
562 | if ( $row ) { |
||
563 | $model = $row->rev_content_model; |
||
564 | $format = $row->rev_content_format; |
||
565 | } |
||
566 | } |
||
567 | |||
568 | if ( $model === null || $model === '' ) { |
||
569 | $model = false; |
||
570 | } |
||
571 | |||
572 | while ( $failures < $this->maxFailures ) { |
||
573 | |||
574 | // As soon as we found a good text for the $id, we will return immediately. |
||
575 | // Hence, if we make it past the try catch block, we know that we did not |
||
576 | // find a good text. |
||
577 | |||
578 | try { |
||
579 | // Step 1: Get some text (or reuse from previous iteratuon if checking |
||
580 | // for plausibility failed) |
||
581 | |||
582 | // Trying to get prefetch, if it has not been tried before |
||
583 | if ( $text === false && isset( $this->prefetch ) && $prefetchNotTried ) { |
||
584 | $prefetchNotTried = false; |
||
585 | $tryIsPrefetch = true; |
||
586 | $text = $this->prefetch->prefetch( intval( $this->thisPage ), |
||
587 | intval( $this->thisRev ) ); |
||
588 | |||
589 | if ( $text === null ) { |
||
590 | $text = false; |
||
591 | } |
||
592 | |||
593 | View Code Duplication | if ( is_string( $text ) && $model !== false ) { |
|
594 | // Apply export transformation to text coming from an old dump. |
||
595 | // The purpose of this transformation is to convert up from legacy |
||
596 | // formats, which may still be used in the older dump that is used |
||
597 | // for pre-fetching. Applying the transformation again should not |
||
598 | // interfere with content that is already in the correct form. |
||
599 | $text = $this->exportTransform( $text, $model, $format ); |
||
600 | } |
||
601 | } |
||
602 | |||
603 | if ( $text === false ) { |
||
604 | // Fallback to asking the database |
||
605 | $tryIsPrefetch = false; |
||
606 | if ( $this->spawn ) { |
||
607 | $text = $this->getTextSpawned( $id ); |
||
608 | } else { |
||
609 | $text = $this->getTextDb( $id ); |
||
610 | } |
||
611 | |||
612 | View Code Duplication | if ( $text !== false && $model !== false ) { |
|
613 | // Apply export transformation to text coming from the database. |
||
614 | // Prefetched text should already have transformations applied. |
||
615 | $text = $this->exportTransform( $text, $model, $format ); |
||
616 | } |
||
617 | |||
618 | // No more checks for texts from DB for now. |
||
619 | // If we received something that is not false, |
||
620 | // We treat it as good text, regardless of whether it actually is or is not |
||
621 | if ( $text !== false ) { |
||
622 | return $text; |
||
623 | } |
||
624 | } |
||
625 | |||
626 | if ( $text === false ) { |
||
627 | throw new MWException( "Generic error while obtaining text for id " . $id ); |
||
628 | } |
||
629 | |||
630 | // We received a good candidate for the text of $id via some method |
||
631 | |||
632 | // Step 2: Checking for plausibility and return the text if it is |
||
633 | // plausible |
||
634 | $revID = intval( $this->thisRev ); |
||
635 | if ( !isset( $this->db ) ) { |
||
636 | throw new MWException( "No database available" ); |
||
637 | } |
||
638 | |||
639 | if ( $model !== CONTENT_MODEL_WIKITEXT ) { |
||
640 | $revLength = strlen( $text ); |
||
641 | } else { |
||
642 | $revLength = $this->db->selectField( 'revision', 'rev_len', [ 'rev_id' => $revID ] ); |
||
643 | } |
||
644 | |||
645 | if ( strlen( $text ) == $revLength ) { |
||
646 | if ( $tryIsPrefetch ) { |
||
647 | $this->prefetchCount++; |
||
648 | } |
||
649 | |||
650 | return $text; |
||
651 | } |
||
652 | |||
653 | $text = false; |
||
654 | throw new MWException( "Received text is unplausible for id " . $id ); |
||
655 | } catch ( Exception $e ) { |
||
656 | $msg = "getting/checking text " . $id . " failed (" . $e->getMessage() . ")"; |
||
657 | if ( $failures + 1 < $this->maxFailures ) { |
||
658 | $msg .= " (Will retry " . ( $this->maxFailures - $failures - 1 ) . " more times)"; |
||
659 | } |
||
660 | $this->progress( $msg ); |
||
661 | } |
||
662 | |||
663 | // Something went wrong; we did not a text that was plausible :( |
||
664 | $failures++; |
||
665 | |||
666 | // A failure in a prefetch hit does not warrant resetting db connection etc. |
||
667 | if ( !$tryIsPrefetch ) { |
||
668 | // After backing off for some time, we try to reboot the whole process as |
||
669 | // much as possible to not carry over failures from one part to the other |
||
670 | // parts |
||
671 | sleep( $this->failureTimeout ); |
||
672 | try { |
||
673 | $this->rotateDb(); |
||
674 | if ( $this->spawn ) { |
||
675 | $this->closeSpawn(); |
||
676 | $this->openSpawn(); |
||
677 | } |
||
678 | } catch ( Exception $e ) { |
||
679 | $this->progress( "Rebooting getText infrastructure failed (" . $e->getMessage() . ")" . |
||
680 | " Trying to continue anyways" ); |
||
681 | } |
||
682 | } |
||
683 | } |
||
684 | |||
685 | // Retirieving a good text for $id failed (at least) maxFailures times. |
||
686 | // We abort for this $id. |
||
687 | |||
688 | // Restoring the consecutive failures, and maybe aborting, if the dump |
||
689 | // is too broken. |
||
690 | $consecutiveFailedTextRetrievals = $oldConsecutiveFailedTextRetrievals + 1; |
||
691 | if ( $consecutiveFailedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals ) { |
||
692 | throw new MWException( "Graceful storage failure" ); |
||
693 | } |
||
694 | |||
695 | return ""; |
||
696 | } |
||
697 | |||
698 | /** |
||
699 | * May throw a database error if, say, the server dies during query. |
||
700 | * @param int $id |
||
701 | * @return bool|string |
||
702 | * @throws MWException |
||
703 | */ |
||
704 | private function getTextDb( $id ) { |
||
705 | global $wgContLang; |
||
706 | if ( !isset( $this->db ) ) { |
||
707 | throw new MWException( __METHOD__ . "No database available" ); |
||
708 | } |
||
709 | $row = $this->db->selectRow( 'text', |
||
710 | [ 'old_text', 'old_flags' ], |
||
711 | [ 'old_id' => $id ], |
||
712 | __METHOD__ ); |
||
713 | $text = Revision::getRevisionText( $row ); |
||
714 | if ( $text === false ) { |
||
715 | return false; |
||
716 | } |
||
717 | $stripped = str_replace( "\r", "", $text ); |
||
718 | $normalized = $wgContLang->normalize( $stripped ); |
||
719 | |||
720 | return $normalized; |
||
721 | } |
||
722 | |||
723 | private function getTextSpawned( $id ) { |
||
724 | MediaWiki\suppressWarnings(); |
||
725 | if ( !$this->spawnProc ) { |
||
726 | // First time? |
||
727 | $this->openSpawn(); |
||
728 | } |
||
729 | $text = $this->getTextSpawnedOnce( $id ); |
||
730 | MediaWiki\restoreWarnings(); |
||
731 | |||
732 | return $text; |
||
733 | } |
||
734 | |||
735 | function openSpawn() { |
||
736 | global $IP; |
||
737 | |||
738 | if ( file_exists( "$IP/../multiversion/MWScript.php" ) ) { |
||
739 | $cmd = implode( " ", |
||
740 | array_map( 'wfEscapeShellArg', |
||
741 | [ |
||
742 | $this->php, |
||
743 | "$IP/../multiversion/MWScript.php", |
||
744 | "fetchText.php", |
||
745 | '--wiki', wfWikiID() ] ) ); |
||
746 | } else { |
||
747 | $cmd = implode( " ", |
||
748 | array_map( 'wfEscapeShellArg', |
||
749 | [ |
||
750 | $this->php, |
||
751 | "$IP/maintenance/fetchText.php", |
||
752 | '--wiki', wfWikiID() ] ) ); |
||
753 | } |
||
754 | $spec = [ |
||
755 | 0 => [ "pipe", "r" ], |
||
756 | 1 => [ "pipe", "w" ], |
||
757 | 2 => [ "file", "/dev/null", "a" ] ]; |
||
758 | $pipes = []; |
||
759 | |||
760 | $this->progress( "Spawning database subprocess: $cmd" ); |
||
761 | $this->spawnProc = proc_open( $cmd, $spec, $pipes ); |
||
762 | if ( !$this->spawnProc ) { |
||
763 | $this->progress( "Subprocess spawn failed." ); |
||
764 | |||
765 | return false; |
||
766 | } |
||
767 | list( |
||
768 | $this->spawnWrite, // -> stdin |
||
769 | $this->spawnRead, // <- stdout |
||
770 | ) = $pipes; |
||
771 | |||
772 | return true; |
||
773 | } |
||
774 | |||
775 | private function closeSpawn() { |
||
776 | MediaWiki\suppressWarnings(); |
||
777 | if ( $this->spawnRead ) { |
||
778 | fclose( $this->spawnRead ); |
||
779 | } |
||
780 | $this->spawnRead = false; |
||
781 | if ( $this->spawnWrite ) { |
||
782 | fclose( $this->spawnWrite ); |
||
783 | } |
||
784 | $this->spawnWrite = false; |
||
785 | if ( $this->spawnErr ) { |
||
786 | fclose( $this->spawnErr ); |
||
787 | } |
||
788 | $this->spawnErr = false; |
||
789 | if ( $this->spawnProc ) { |
||
790 | pclose( $this->spawnProc ); |
||
791 | } |
||
792 | $this->spawnProc = false; |
||
793 | MediaWiki\restoreWarnings(); |
||
794 | } |
||
795 | |||
796 | private function getTextSpawnedOnce( $id ) { |
||
797 | global $wgContLang; |
||
798 | |||
799 | $ok = fwrite( $this->spawnWrite, "$id\n" ); |
||
800 | // $this->progress( ">> $id" ); |
||
801 | if ( !$ok ) { |
||
802 | return false; |
||
803 | } |
||
804 | |||
805 | $ok = fflush( $this->spawnWrite ); |
||
806 | // $this->progress( ">> [flush]" ); |
||
807 | if ( !$ok ) { |
||
808 | return false; |
||
809 | } |
||
810 | |||
811 | // check that the text id they are sending is the one we asked for |
||
812 | // this avoids out of sync revision text errors we have encountered in the past |
||
813 | $newId = fgets( $this->spawnRead ); |
||
814 | if ( $newId === false ) { |
||
815 | return false; |
||
816 | } |
||
817 | if ( $id != intval( $newId ) ) { |
||
818 | return false; |
||
819 | } |
||
820 | |||
821 | $len = fgets( $this->spawnRead ); |
||
822 | // $this->progress( "<< " . trim( $len ) ); |
||
823 | if ( $len === false ) { |
||
824 | return false; |
||
825 | } |
||
826 | |||
827 | $nbytes = intval( $len ); |
||
828 | // actual error, not zero-length text |
||
829 | if ( $nbytes < 0 ) { |
||
830 | return false; |
||
831 | } |
||
832 | |||
833 | $text = ""; |
||
834 | |||
835 | // Subprocess may not send everything at once, we have to loop. |
||
836 | while ( $nbytes > strlen( $text ) ) { |
||
837 | $buffer = fread( $this->spawnRead, $nbytes - strlen( $text ) ); |
||
838 | if ( $buffer === false ) { |
||
839 | break; |
||
840 | } |
||
841 | $text .= $buffer; |
||
842 | } |
||
843 | |||
844 | $gotbytes = strlen( $text ); |
||
845 | if ( $gotbytes != $nbytes ) { |
||
846 | $this->progress( "Expected $nbytes bytes from database subprocess, got $gotbytes " ); |
||
847 | |||
848 | return false; |
||
849 | } |
||
850 | |||
851 | // Do normalization in the dump thread... |
||
852 | $stripped = str_replace( "\r", "", $text ); |
||
853 | $normalized = $wgContLang->normalize( $stripped ); |
||
854 | |||
855 | return $normalized; |
||
856 | } |
||
857 | |||
858 | function startElement( $parser, $name, $attribs ) { |
||
859 | $this->checkpointJustWritten = false; |
||
860 | |||
861 | $this->clearOpenElement( null ); |
||
862 | $this->lastName = $name; |
||
863 | |||
864 | if ( $name == 'revision' ) { |
||
865 | $this->state = $name; |
||
866 | $this->egress->writeOpenPage( null, $this->buffer ); |
||
867 | $this->buffer = ""; |
||
868 | } elseif ( $name == 'page' ) { |
||
869 | $this->state = $name; |
||
870 | if ( $this->atStart ) { |
||
871 | $this->egress->writeOpenStream( $this->buffer ); |
||
872 | $this->buffer = ""; |
||
873 | $this->atStart = false; |
||
874 | } |
||
875 | } |
||
876 | |||
877 | if ( $name == "text" && isset( $attribs['id'] ) ) { |
||
878 | $id = $attribs['id']; |
||
879 | $model = trim( $this->thisRevModel ); |
||
880 | $format = trim( $this->thisRevFormat ); |
||
881 | |||
882 | $model = $model === '' ? null : $model; |
||
883 | $format = $format === '' ? null : $format; |
||
884 | |||
885 | $text = $this->getText( $id, $model, $format ); |
||
886 | $this->openElement = [ $name, [ 'xml:space' => 'preserve' ] ]; |
||
887 | if ( strlen( $text ) > 0 ) { |
||
888 | $this->characterData( $parser, $text ); |
||
889 | } |
||
890 | } else { |
||
891 | $this->openElement = [ $name, $attribs ]; |
||
892 | } |
||
893 | } |
||
894 | |||
895 | function endElement( $parser, $name ) { |
||
896 | $this->checkpointJustWritten = false; |
||
897 | |||
898 | if ( $this->openElement ) { |
||
899 | $this->clearOpenElement( "" ); |
||
900 | } else { |
||
901 | $this->buffer .= "</$name>"; |
||
902 | } |
||
903 | |||
904 | if ( $name == 'revision' ) { |
||
905 | $this->egress->writeRevision( null, $this->buffer ); |
||
906 | $this->buffer = ""; |
||
907 | $this->thisRev = ""; |
||
908 | $this->thisRevModel = null; |
||
909 | $this->thisRevFormat = null; |
||
910 | } elseif ( $name == 'page' ) { |
||
911 | if ( !$this->firstPageWritten ) { |
||
912 | $this->firstPageWritten = trim( $this->thisPage ); |
||
913 | } |
||
914 | $this->lastPageWritten = trim( $this->thisPage ); |
||
915 | if ( $this->timeExceeded ) { |
||
916 | $this->egress->writeClosePage( $this->buffer ); |
||
917 | // nasty hack, we can't just write the chardata after the |
||
918 | // page tag, it will include leading blanks from the next line |
||
919 | $this->egress->sink->write( "\n" ); |
||
920 | |||
921 | $this->buffer = $this->xmlwriterobj->closeStream(); |
||
922 | $this->egress->writeCloseStream( $this->buffer ); |
||
923 | |||
924 | $this->buffer = ""; |
||
925 | $this->thisPage = ""; |
||
926 | // this could be more than one file if we had more than one output arg |
||
927 | |||
928 | $filenameList = (array)$this->egress->getFilenames(); |
||
929 | $newFilenames = []; |
||
930 | $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT ); |
||
931 | $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT ); |
||
932 | $filenamesCount = count( $filenameList ); |
||
933 | View Code Duplication | for ( $i = 0; $i < $filenamesCount; $i++ ) { |
|
934 | $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID ); |
||
935 | $fileinfo = pathinfo( $filenameList[$i] ); |
||
936 | $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn; |
||
937 | } |
||
938 | $this->egress->closeRenameAndReopen( $newFilenames ); |
||
939 | $this->buffer = $this->xmlwriterobj->openStream(); |
||
940 | $this->timeExceeded = false; |
||
941 | $this->timeOfCheckpoint = $this->lastTime; |
||
942 | $this->firstPageWritten = false; |
||
943 | $this->checkpointJustWritten = true; |
||
944 | } else { |
||
945 | $this->egress->writeClosePage( $this->buffer ); |
||
946 | $this->buffer = ""; |
||
947 | $this->thisPage = ""; |
||
948 | } |
||
949 | } elseif ( $name == 'mediawiki' ) { |
||
950 | $this->egress->writeCloseStream( $this->buffer ); |
||
951 | $this->buffer = ""; |
||
952 | } |
||
953 | } |
||
954 | |||
955 | function characterData( $parser, $data ) { |
||
956 | $this->clearOpenElement( null ); |
||
957 | if ( $this->lastName == "id" ) { |
||
958 | if ( $this->state == "revision" ) { |
||
959 | $this->thisRev .= $data; |
||
960 | } elseif ( $this->state == "page" ) { |
||
961 | $this->thisPage .= $data; |
||
962 | } |
||
963 | } elseif ( $this->lastName == "model" ) { |
||
964 | $this->thisRevModel .= $data; |
||
965 | } elseif ( $this->lastName == "format" ) { |
||
966 | $this->thisRevFormat .= $data; |
||
967 | } |
||
968 | |||
969 | // have to skip the newline left over from closepagetag line of |
||
970 | // end of checkpoint files. nasty hack!! |
||
971 | if ( $this->checkpointJustWritten ) { |
||
972 | if ( $data[0] == "\n" ) { |
||
973 | $data = substr( $data, 1 ); |
||
974 | } |
||
975 | $this->checkpointJustWritten = false; |
||
976 | } |
||
977 | $this->buffer .= htmlspecialchars( $data ); |
||
978 | } |
||
979 | |||
980 | function clearOpenElement( $style ) { |
||
981 | if ( $this->openElement ) { |
||
982 | $this->buffer .= Xml::element( $this->openElement[0], $this->openElement[1], $style ); |
||
983 | $this->openElement = false; |
||
984 | } |
||
985 | } |
||
986 | } |
||
987 | |||
988 | $maintClass = 'TextPassDumper'; |
||
989 | require_once RUN_MAINTENANCE_IF_MAIN; |
||
990 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.