This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
0 ignored issues
–
show
|
|||
2 | /** |
||
3 | * Moves blobs indexed by trackBlobs.php to a specified list of destination |
||
4 | * clusters, and recompresses them in the process. |
||
5 | * |
||
6 | * This program is free software; you can redistribute it and/or modify |
||
7 | * it under the terms of the GNU General Public License as published by |
||
8 | * the Free Software Foundation; either version 2 of the License, or |
||
9 | * (at your option) any later version. |
||
10 | * |
||
11 | * This program is distributed in the hope that it will be useful, |
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
14 | * GNU General Public License for more details. |
||
15 | * |
||
16 | * You should have received a copy of the GNU General Public License along |
||
17 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
19 | * http://www.gnu.org/copyleft/gpl.html |
||
20 | * |
||
21 | * @file |
||
22 | * @ingroup Maintenance ExternalStorage |
||
23 | */ |
||
24 | |||
25 | use MediaWiki\Logger\LegacyLogger; |
||
26 | use MediaWiki\MediaWikiServices; |
||
27 | |||
28 | $optionsWithArgs = RecompressTracked::getOptionsWithArgs(); |
||
29 | require __DIR__ . '/../commandLine.inc'; |
||
30 | |||
31 | if ( count( $args ) < 1 ) { |
||
32 | echo "Usage: php recompressTracked.php [options] <cluster> [... <cluster>...] |
||
33 | Moves blobs indexed by trackBlobs.php to a specified list of destination clusters, |
||
34 | and recompresses them in the process. Restartable. |
||
35 | |||
36 | Options: |
||
37 | --procs <procs> Set the number of child processes (default 1) |
||
38 | --copy-only Copy only, do not update the text table. Restart |
||
39 | without this option to complete. |
||
40 | --debug-log <file> Log debugging data to the specified file |
||
41 | --info-log <file> Log progress messages to the specified file |
||
42 | --critical-log <file> Log error messages to the specified file |
||
43 | "; |
||
44 | exit( 1 ); |
||
45 | } |
||
46 | |||
47 | $job = RecompressTracked::newFromCommandLine( $args, $options ); |
||
48 | $job->execute(); |
||
49 | |||
50 | /** |
||
51 | * Maintenance script that moves blobs indexed by trackBlobs.php to a specified |
||
52 | * list of destination clusters, and recompresses them in the process. |
||
53 | * |
||
54 | * @ingroup Maintenance ExternalStorage |
||
55 | */ |
||
56 | class RecompressTracked { |
||
57 | public $destClusters; |
||
58 | public $batchSize = 1000; |
||
59 | public $orphanBatchSize = 1000; |
||
60 | public $reportingInterval = 10; |
||
61 | public $numProcs = 1; |
||
62 | public $numBatches = 0; |
||
63 | public $pageBlobClass, $orphanBlobClass; |
||
0 ignored issues
–
show
|
|||
64 | public $replicaPipes, $replicaProcs, $prevReplicaId; |
||
0 ignored issues
–
show
|
|||
65 | public $copyOnly = false; |
||
66 | public $isChild = false; |
||
67 | public $replicaId = false; |
||
68 | public $noCount = false; |
||
69 | public $debugLog, $infoLog, $criticalLog; |
||
0 ignored issues
–
show
|
|||
70 | public $store; |
||
71 | |||
72 | private static $optionsWithArgs = [ |
||
73 | 'procs', |
||
74 | 'replica-id', |
||
75 | 'debug-log', |
||
76 | 'info-log', |
||
77 | 'critical-log' |
||
78 | ]; |
||
79 | |||
80 | private static $cmdLineOptionMap = [ |
||
81 | 'no-count' => 'noCount', |
||
82 | 'procs' => 'numProcs', |
||
83 | 'copy-only' => 'copyOnly', |
||
84 | 'child' => 'isChild', |
||
85 | 'replica-id' => 'replicaId', |
||
86 | 'debug-log' => 'debugLog', |
||
87 | 'info-log' => 'infoLog', |
||
88 | 'critical-log' => 'criticalLog', |
||
89 | ]; |
||
90 | |||
91 | static function getOptionsWithArgs() { |
||
92 | return self::$optionsWithArgs; |
||
93 | } |
||
94 | |||
95 | static function newFromCommandLine( $args, $options ) { |
||
96 | $jobOptions = [ 'destClusters' => $args ]; |
||
97 | foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) { |
||
98 | if ( isset( $options[$cmdOption] ) ) { |
||
99 | $jobOptions[$classOption] = $options[$cmdOption]; |
||
100 | } |
||
101 | } |
||
102 | |||
103 | return new self( $jobOptions ); |
||
104 | } |
||
105 | |||
106 | function __construct( $options ) { |
||
0 ignored issues
–
show
__construct uses the super-global variable $GLOBALS which is generally not recommended.
Instead of super-globals, we recommend to explicitly inject the dependencies of your class. This makes your code less dependent on global state and it becomes generally more testable: // Bad
class Router
{
public function generate($path)
{
return $_SERVER['HOST'].$path;
}
}
// Better
class Router
{
private $host;
public function __construct($host)
{
$this->host = $host;
}
public function generate($path)
{
return $this->host.$path;
}
}
class Controller
{
public function myAction(Request $request)
{
// Instead of
$page = isset($_GET['page']) ? intval($_GET['page']) : 1;
// Better (assuming you use the Symfony2 request)
$page = $request->query->get('page', 1);
}
}
![]() |
|||
107 | foreach ( $options as $name => $value ) { |
||
108 | $this->$name = $value; |
||
109 | } |
||
110 | $this->store = new ExternalStoreDB; |
||
111 | if ( !$this->isChild ) { |
||
112 | $GLOBALS['wgDebugLogPrefix'] = "RCT M: "; |
||
113 | } elseif ( $this->replicaId !== false ) { |
||
114 | $GLOBALS['wgDebugLogPrefix'] = "RCT {$this->replicaId}: "; |
||
115 | } |
||
116 | $this->pageBlobClass = function_exists( 'xdiff_string_bdiff' ) ? |
||
117 | 'DiffHistoryBlob' : 'ConcatenatedGzipHistoryBlob'; |
||
118 | $this->orphanBlobClass = 'ConcatenatedGzipHistoryBlob'; |
||
119 | } |
||
120 | |||
121 | function debug( $msg ) { |
||
122 | wfDebug( "$msg\n" ); |
||
123 | if ( $this->debugLog ) { |
||
124 | $this->logToFile( $msg, $this->debugLog ); |
||
125 | } |
||
126 | } |
||
127 | |||
128 | function info( $msg ) { |
||
129 | echo "$msg\n"; |
||
130 | if ( $this->infoLog ) { |
||
131 | $this->logToFile( $msg, $this->infoLog ); |
||
132 | } |
||
133 | } |
||
134 | |||
135 | function critical( $msg ) { |
||
136 | echo "$msg\n"; |
||
137 | if ( $this->criticalLog ) { |
||
138 | $this->logToFile( $msg, $this->criticalLog ); |
||
139 | } |
||
140 | } |
||
141 | |||
142 | function logToFile( $msg, $file ) { |
||
143 | $header = '[' . date( 'd\TH:i:s' ) . '] ' . wfHostname() . ' ' . posix_getpid(); |
||
144 | if ( $this->replicaId !== false ) { |
||
145 | $header .= "({$this->replicaId})"; |
||
146 | } |
||
147 | $header .= ' ' . wfWikiID(); |
||
148 | LegacyLogger::emit( sprintf( "%-50s %s\n", $header, $msg ), $file ); |
||
149 | } |
||
150 | |||
151 | /** |
||
152 | * Wait until the selected replica DB has caught up to the master. |
||
153 | * This allows us to use the replica DB for things that were committed in a |
||
154 | * previous part of this batch process. |
||
155 | */ |
||
156 | function syncDBs() { |
||
157 | $dbw = wfGetDB( DB_MASTER ); |
||
158 | $dbr = wfGetDB( DB_REPLICA ); |
||
159 | $pos = $dbw->getMasterPos(); |
||
160 | $dbr->masterPosWait( $pos, 100000 ); |
||
161 | } |
||
162 | |||
163 | /** |
||
164 | * Execute parent or child depending on the isChild option |
||
165 | */ |
||
166 | function execute() { |
||
167 | if ( $this->isChild ) { |
||
168 | $this->executeChild(); |
||
169 | } else { |
||
170 | $this->executeParent(); |
||
171 | } |
||
172 | } |
||
173 | |||
174 | /** |
||
175 | * Execute the parent process |
||
176 | */ |
||
177 | function executeParent() { |
||
178 | if ( !$this->checkTrackingTable() ) { |
||
179 | return; |
||
180 | } |
||
181 | |||
182 | $this->syncDBs(); |
||
183 | $this->startReplicaProcs(); |
||
184 | $this->doAllPages(); |
||
185 | $this->doAllOrphans(); |
||
186 | $this->killReplicaProcs(); |
||
187 | } |
||
188 | |||
189 | /** |
||
190 | * Make sure the tracking table exists and isn't empty |
||
191 | * @return bool |
||
192 | */ |
||
193 | function checkTrackingTable() { |
||
194 | $dbr = wfGetDB( DB_REPLICA ); |
||
195 | if ( !$dbr->tableExists( 'blob_tracking' ) ) { |
||
196 | $this->critical( "Error: blob_tracking table does not exist" ); |
||
197 | |||
198 | return false; |
||
199 | } |
||
200 | $row = $dbr->selectRow( 'blob_tracking', '*', '', __METHOD__ ); |
||
201 | if ( !$row ) { |
||
202 | $this->info( "Warning: blob_tracking table contains no rows, skipping this wiki." ); |
||
203 | |||
204 | return false; |
||
205 | } |
||
206 | |||
207 | return true; |
||
208 | } |
||
209 | |||
210 | /** |
||
211 | * Start the worker processes. |
||
212 | * These processes will listen on stdin for commands. |
||
213 | * This necessary because text recompression is slow: loading, compressing and |
||
214 | * writing are all slow. |
||
215 | */ |
||
216 | function startReplicaProcs() { |
||
217 | $cmd = 'php ' . wfEscapeShellArg( __FILE__ ); |
||
218 | foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) { |
||
219 | if ( $cmdOption == 'replica-id' ) { |
||
220 | continue; |
||
221 | } elseif ( in_array( $cmdOption, self::$optionsWithArgs ) && isset( $this->$classOption ) ) { |
||
222 | $cmd .= " --$cmdOption " . wfEscapeShellArg( $this->$classOption ); |
||
223 | } elseif ( $this->$classOption ) { |
||
224 | $cmd .= " --$cmdOption"; |
||
225 | } |
||
226 | } |
||
227 | $cmd .= ' --child' . |
||
228 | ' --wiki ' . wfEscapeShellArg( wfWikiID() ) . |
||
229 | ' ' . call_user_func_array( 'wfEscapeShellArg', $this->destClusters ); |
||
230 | |||
231 | $this->replicaPipes = $this->replicaProcs = []; |
||
232 | for ( $i = 0; $i < $this->numProcs; $i++ ) { |
||
233 | $pipes = []; |
||
234 | $spec = [ |
||
235 | [ 'pipe', 'r' ], |
||
236 | [ 'file', 'php://stdout', 'w' ], |
||
237 | [ 'file', 'php://stderr', 'w' ] |
||
238 | ]; |
||
239 | MediaWiki\suppressWarnings(); |
||
240 | $proc = proc_open( "$cmd --replica-id $i", $spec, $pipes ); |
||
241 | MediaWiki\restoreWarnings(); |
||
242 | if ( !$proc ) { |
||
243 | $this->critical( "Error opening replica DB process: $cmd" ); |
||
244 | exit( 1 ); |
||
0 ignored issues
–
show
The method
startReplicaProcs() contains an exit expression.
An exit expression should only be used in rare cases. For example, if you write a short command line script. In most cases however, using an ![]() |
|||
245 | } |
||
246 | $this->replicaProcs[$i] = $proc; |
||
247 | $this->replicaPipes[$i] = $pipes[0]; |
||
248 | } |
||
249 | $this->prevReplicaId = -1; |
||
250 | } |
||
251 | |||
252 | /** |
||
253 | * Gracefully terminate the child processes |
||
254 | */ |
||
255 | function killReplicaProcs() { |
||
256 | $this->info( "Waiting for replica DB processes to finish..." ); |
||
257 | for ( $i = 0; $i < $this->numProcs; $i++ ) { |
||
258 | $this->dispatchToReplica( $i, 'quit' ); |
||
259 | } |
||
260 | for ( $i = 0; $i < $this->numProcs; $i++ ) { |
||
261 | $status = proc_close( $this->replicaProcs[$i] ); |
||
262 | if ( $status ) { |
||
263 | $this->critical( "Warning: child #$i exited with status $status" ); |
||
264 | } |
||
265 | } |
||
266 | $this->info( "Done." ); |
||
267 | } |
||
268 | |||
269 | /** |
||
270 | * Dispatch a command to the next available replica DB. |
||
271 | * This may block until a replica DB finishes its work and becomes available. |
||
272 | */ |
||
273 | function dispatch( /*...*/ ) { |
||
274 | $args = func_get_args(); |
||
275 | $pipes = $this->replicaPipes; |
||
276 | $numPipes = stream_select( $x = [], $pipes, $y = [], 3600 ); |
||
0 ignored issues
–
show
|
|||
277 | if ( !$numPipes ) { |
||
278 | $this->critical( "Error waiting to write to replica DBs. Aborting" ); |
||
279 | exit( 1 ); |
||
0 ignored issues
–
show
The method
dispatch() contains an exit expression.
An exit expression should only be used in rare cases. For example, if you write a short command line script. In most cases however, using an ![]() |
|||
280 | } |
||
281 | for ( $i = 0; $i < $this->numProcs; $i++ ) { |
||
282 | $replicaId = ( $i + $this->prevReplicaId + 1 ) % $this->numProcs; |
||
283 | if ( isset( $pipes[$replicaId] ) ) { |
||
284 | $this->prevReplicaId = $replicaId; |
||
285 | $this->dispatchToReplica( $replicaId, $args ); |
||
286 | |||
287 | return; |
||
288 | } |
||
289 | } |
||
290 | $this->critical( "Unreachable" ); |
||
291 | exit( 1 ); |
||
0 ignored issues
–
show
The method
dispatch() contains an exit expression.
An exit expression should only be used in rare cases. For example, if you write a short command line script. In most cases however, using an ![]() |
|||
292 | } |
||
293 | |||
294 | /** |
||
295 | * Dispatch a command to a specified replica DB |
||
296 | * @param int $replicaId |
||
297 | * @param array|string $args |
||
298 | */ |
||
299 | function dispatchToReplica( $replicaId, $args ) { |
||
300 | $args = (array)$args; |
||
301 | $cmd = implode( ' ', $args ); |
||
302 | fwrite( $this->replicaPipes[$replicaId], "$cmd\n" ); |
||
303 | } |
||
304 | |||
305 | /** |
||
306 | * Move all tracked pages to the new clusters |
||
307 | */ |
||
308 | function doAllPages() { |
||
309 | $dbr = wfGetDB( DB_REPLICA ); |
||
310 | $i = 0; |
||
311 | $startId = 0; |
||
312 | View Code Duplication | if ( $this->noCount ) { |
|
313 | $numPages = '[unknown]'; |
||
314 | } else { |
||
315 | $numPages = $dbr->selectField( 'blob_tracking', |
||
316 | 'COUNT(DISTINCT bt_page)', |
||
317 | # A condition is required so that this query uses the index |
||
318 | [ 'bt_moved' => 0 ], |
||
319 | __METHOD__ |
||
320 | ); |
||
321 | } |
||
322 | if ( $this->copyOnly ) { |
||
323 | $this->info( "Copying pages..." ); |
||
324 | } else { |
||
325 | $this->info( "Moving pages..." ); |
||
326 | } |
||
327 | while ( true ) { |
||
328 | $res = $dbr->select( 'blob_tracking', |
||
329 | [ 'bt_page' ], |
||
330 | [ |
||
331 | 'bt_moved' => 0, |
||
332 | 'bt_page > ' . $dbr->addQuotes( $startId ) |
||
333 | ], |
||
334 | __METHOD__, |
||
335 | [ |
||
336 | 'DISTINCT', |
||
337 | 'ORDER BY' => 'bt_page', |
||
338 | 'LIMIT' => $this->batchSize, |
||
339 | ] |
||
340 | ); |
||
341 | if ( !$res->numRows() ) { |
||
342 | break; |
||
343 | } |
||
344 | foreach ( $res as $row ) { |
||
345 | $startId = $row->bt_page; |
||
346 | $this->dispatch( 'doPage', $row->bt_page ); |
||
347 | $i++; |
||
348 | } |
||
349 | $this->report( 'pages', $i, $numPages ); |
||
350 | } |
||
351 | $this->report( 'pages', $i, $numPages ); |
||
352 | if ( $this->copyOnly ) { |
||
353 | $this->info( "All page copies queued." ); |
||
354 | } else { |
||
355 | $this->info( "All page moves queued." ); |
||
356 | } |
||
357 | } |
||
358 | |||
359 | /** |
||
360 | * Display a progress report |
||
361 | * @param string $label |
||
362 | * @param int $current |
||
363 | * @param int $end |
||
364 | */ |
||
365 | function report( $label, $current, $end ) { |
||
366 | $this->numBatches++; |
||
367 | if ( $current == $end || $this->numBatches >= $this->reportingInterval ) { |
||
368 | $this->numBatches = 0; |
||
369 | $this->info( "$label: $current / $end" ); |
||
370 | MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication(); |
||
371 | } |
||
372 | } |
||
373 | |||
374 | /** |
||
375 | * Move all orphan text to the new clusters |
||
376 | */ |
||
377 | function doAllOrphans() { |
||
378 | $dbr = wfGetDB( DB_REPLICA ); |
||
379 | $startId = 0; |
||
380 | $i = 0; |
||
381 | View Code Duplication | if ( $this->noCount ) { |
|
382 | $numOrphans = '[unknown]'; |
||
383 | } else { |
||
384 | $numOrphans = $dbr->selectField( 'blob_tracking', |
||
385 | 'COUNT(DISTINCT bt_text_id)', |
||
386 | [ 'bt_moved' => 0, 'bt_page' => 0 ], |
||
387 | __METHOD__ ); |
||
388 | if ( !$numOrphans ) { |
||
389 | return; |
||
390 | } |
||
391 | } |
||
392 | if ( $this->copyOnly ) { |
||
393 | $this->info( "Copying orphans..." ); |
||
394 | } else { |
||
395 | $this->info( "Moving orphans..." ); |
||
396 | } |
||
397 | |||
398 | while ( true ) { |
||
399 | $res = $dbr->select( 'blob_tracking', |
||
400 | [ 'bt_text_id' ], |
||
401 | [ |
||
402 | 'bt_moved' => 0, |
||
403 | 'bt_page' => 0, |
||
404 | 'bt_text_id > ' . $dbr->addQuotes( $startId ) |
||
405 | ], |
||
406 | __METHOD__, |
||
407 | [ |
||
408 | 'DISTINCT', |
||
409 | 'ORDER BY' => 'bt_text_id', |
||
410 | 'LIMIT' => $this->batchSize |
||
411 | ] |
||
412 | ); |
||
413 | if ( !$res->numRows() ) { |
||
414 | break; |
||
415 | } |
||
416 | $ids = []; |
||
417 | foreach ( $res as $row ) { |
||
418 | $startId = $row->bt_text_id; |
||
419 | $ids[] = $row->bt_text_id; |
||
420 | $i++; |
||
421 | } |
||
422 | // Need to send enough orphan IDs to the child at a time to fill a blob, |
||
423 | // so orphanBatchSize needs to be at least ~100. |
||
424 | // batchSize can be smaller or larger. |
||
425 | while ( count( $ids ) > $this->orphanBatchSize ) { |
||
426 | $args = array_slice( $ids, 0, $this->orphanBatchSize ); |
||
427 | $ids = array_slice( $ids, $this->orphanBatchSize ); |
||
428 | array_unshift( $args, 'doOrphanList' ); |
||
429 | call_user_func_array( [ $this, 'dispatch' ], $args ); |
||
430 | } |
||
431 | if ( count( $ids ) ) { |
||
432 | $args = $ids; |
||
433 | array_unshift( $args, 'doOrphanList' ); |
||
434 | call_user_func_array( [ $this, 'dispatch' ], $args ); |
||
435 | } |
||
436 | |||
437 | $this->report( 'orphans', $i, $numOrphans ); |
||
438 | } |
||
439 | $this->report( 'orphans', $i, $numOrphans ); |
||
440 | $this->info( "All orphans queued." ); |
||
441 | } |
||
442 | |||
443 | /** |
||
444 | * Main entry point for worker processes |
||
445 | */ |
||
446 | function executeChild() { |
||
447 | $this->debug( 'starting' ); |
||
448 | $this->syncDBs(); |
||
449 | |||
450 | while ( !feof( STDIN ) ) { |
||
451 | $line = rtrim( fgets( STDIN ) ); |
||
452 | if ( $line == '' ) { |
||
453 | continue; |
||
454 | } |
||
455 | $this->debug( $line ); |
||
456 | $args = explode( ' ', $line ); |
||
457 | $cmd = array_shift( $args ); |
||
458 | switch ( $cmd ) { |
||
459 | case 'doPage': |
||
460 | $this->doPage( intval( $args[0] ) ); |
||
461 | break; |
||
462 | case 'doOrphanList': |
||
463 | $this->doOrphanList( array_map( 'intval', $args ) ); |
||
464 | break; |
||
465 | case 'quit': |
||
466 | return; |
||
467 | } |
||
468 | MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication(); |
||
469 | } |
||
470 | } |
||
471 | |||
472 | /** |
||
473 | * Move tracked text in a given page |
||
474 | * |
||
475 | * @param int $pageId |
||
476 | */ |
||
477 | function doPage( $pageId ) { |
||
478 | $title = Title::newFromID( $pageId ); |
||
479 | if ( $title ) { |
||
480 | $titleText = $title->getPrefixedText(); |
||
481 | } else { |
||
482 | $titleText = '[deleted]'; |
||
483 | } |
||
484 | $dbr = wfGetDB( DB_REPLICA ); |
||
485 | |||
486 | // Finish any incomplete transactions |
||
487 | if ( !$this->copyOnly ) { |
||
488 | $this->finishIncompleteMoves( [ 'bt_page' => $pageId ] ); |
||
489 | $this->syncDBs(); |
||
490 | } |
||
491 | |||
492 | $startId = 0; |
||
493 | $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); |
||
494 | |||
495 | $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); |
||
496 | while ( true ) { |
||
497 | $res = $dbr->select( |
||
498 | [ 'blob_tracking', 'text' ], |
||
499 | '*', |
||
500 | [ |
||
501 | 'bt_page' => $pageId, |
||
502 | 'bt_text_id > ' . $dbr->addQuotes( $startId ), |
||
503 | 'bt_moved' => 0, |
||
504 | 'bt_new_url IS NULL', |
||
505 | 'bt_text_id=old_id', |
||
506 | ], |
||
507 | __METHOD__, |
||
508 | [ |
||
509 | 'ORDER BY' => 'bt_text_id', |
||
510 | 'LIMIT' => $this->batchSize |
||
511 | ] |
||
512 | ); |
||
513 | if ( !$res->numRows() ) { |
||
514 | break; |
||
515 | } |
||
516 | |||
517 | $lastTextId = 0; |
||
518 | foreach ( $res as $row ) { |
||
519 | $startId = $row->bt_text_id; |
||
520 | if ( $lastTextId == $row->bt_text_id ) { |
||
521 | // Duplicate (null edit) |
||
522 | continue; |
||
523 | } |
||
524 | $lastTextId = $row->bt_text_id; |
||
525 | // Load the text |
||
526 | $text = Revision::getRevisionText( $row ); |
||
527 | if ( $text === false ) { |
||
528 | $this->critical( "Error loading {$row->bt_rev_id}/{$row->bt_text_id}" ); |
||
529 | continue; |
||
530 | } |
||
531 | |||
532 | // Queue it |
||
533 | View Code Duplication | if ( !$trx->addItem( $text, $row->bt_text_id ) ) { |
|
534 | $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); |
||
535 | $trx->commit(); |
||
536 | $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); |
||
537 | $lbFactory->waitForReplication(); |
||
538 | } |
||
539 | } |
||
540 | } |
||
541 | |||
542 | $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); |
||
543 | $trx->commit(); |
||
544 | } |
||
545 | |||
546 | /** |
||
547 | * Atomic move operation. |
||
548 | * |
||
549 | * Write the new URL to the text table and set the bt_moved flag. |
||
550 | * |
||
551 | * This is done in a single transaction to provide restartable behavior |
||
552 | * without data loss. |
||
553 | * |
||
554 | * The transaction is kept short to reduce locking. |
||
555 | * |
||
556 | * @param int $textId |
||
557 | * @param string $url |
||
558 | */ |
||
559 | function moveTextRow( $textId, $url ) { |
||
560 | if ( $this->copyOnly ) { |
||
561 | $this->critical( "Internal error: can't call moveTextRow() in --copy-only mode" ); |
||
562 | exit( 1 ); |
||
0 ignored issues
–
show
The method
moveTextRow() contains an exit expression.
An exit expression should only be used in rare cases. For example, if you write a short command line script. In most cases however, using an ![]() |
|||
563 | } |
||
564 | $dbw = wfGetDB( DB_MASTER ); |
||
565 | $dbw->begin( __METHOD__ ); |
||
566 | $dbw->update( 'text', |
||
567 | [ // set |
||
568 | 'old_text' => $url, |
||
569 | 'old_flags' => 'external,utf-8', |
||
570 | ], |
||
571 | [ // where |
||
572 | 'old_id' => $textId |
||
573 | ], |
||
574 | __METHOD__ |
||
575 | ); |
||
576 | $dbw->update( 'blob_tracking', |
||
577 | [ 'bt_moved' => 1 ], |
||
578 | [ 'bt_text_id' => $textId ], |
||
579 | __METHOD__ |
||
580 | ); |
||
581 | $dbw->commit( __METHOD__ ); |
||
582 | } |
||
583 | |||
584 | /** |
||
585 | * Moves are done in two phases: bt_new_url and then bt_moved. |
||
586 | * - bt_new_url indicates that the text has been copied to the new cluster. |
||
587 | * - bt_moved indicates that the text table has been updated. |
||
588 | * |
||
589 | * This function completes any moves that only have done bt_new_url. This |
||
590 | * can happen when the script is interrupted, or when --copy-only is used. |
||
591 | * |
||
592 | * @param array $conds |
||
593 | */ |
||
594 | function finishIncompleteMoves( $conds ) { |
||
595 | $dbr = wfGetDB( DB_REPLICA ); |
||
596 | $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); |
||
597 | |||
598 | $startId = 0; |
||
599 | $conds = array_merge( $conds, [ |
||
600 | 'bt_moved' => 0, |
||
601 | 'bt_new_url IS NOT NULL' |
||
602 | ] ); |
||
603 | while ( true ) { |
||
604 | $res = $dbr->select( 'blob_tracking', |
||
605 | '*', |
||
606 | array_merge( $conds, [ 'bt_text_id > ' . $dbr->addQuotes( $startId ) ] ), |
||
607 | __METHOD__, |
||
608 | [ |
||
609 | 'ORDER BY' => 'bt_text_id', |
||
610 | 'LIMIT' => $this->batchSize, |
||
611 | ] |
||
612 | ); |
||
613 | if ( !$res->numRows() ) { |
||
614 | break; |
||
615 | } |
||
616 | $this->debug( 'Incomplete: ' . $res->numRows() . ' rows' ); |
||
617 | foreach ( $res as $row ) { |
||
618 | $startId = $row->bt_text_id; |
||
619 | $this->moveTextRow( $row->bt_text_id, $row->bt_new_url ); |
||
620 | if ( $row->bt_text_id % 10 == 0 ) { |
||
621 | $lbFactory->waitForReplication(); |
||
622 | } |
||
623 | } |
||
624 | } |
||
625 | } |
||
626 | |||
627 | /** |
||
628 | * Returns the name of the next target cluster |
||
629 | * @return string |
||
630 | */ |
||
631 | function getTargetCluster() { |
||
632 | $cluster = next( $this->destClusters ); |
||
633 | if ( $cluster === false ) { |
||
634 | $cluster = reset( $this->destClusters ); |
||
635 | } |
||
636 | |||
637 | return $cluster; |
||
638 | } |
||
639 | |||
640 | /** |
||
641 | * Gets a DB master connection for the given external cluster name |
||
642 | * @param string $cluster |
||
643 | * @return Database |
||
644 | */ |
||
645 | function getExtDB( $cluster ) { |
||
646 | $lb = wfGetLBFactory()->getExternalLB( $cluster ); |
||
0 ignored issues
–
show
The function
wfGetLBFactory() has been deprecated with message: since 1.27, use MediaWikiServices::getDBLoadBalancerFactory() instead.
This function has been deprecated. The supplier of the file has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead. ![]() |
|||
647 | |||
648 | return $lb->getConnection( DB_MASTER ); |
||
649 | } |
||
650 | |||
651 | /** |
||
652 | * Move an orphan text_id to the new cluster |
||
653 | * |
||
654 | * @param array $textIds |
||
655 | */ |
||
656 | function doOrphanList( $textIds ) { |
||
657 | // Finish incomplete moves |
||
658 | if ( !$this->copyOnly ) { |
||
659 | $this->finishIncompleteMoves( [ 'bt_text_id' => $textIds ] ); |
||
660 | $this->syncDBs(); |
||
661 | } |
||
662 | |||
663 | $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); |
||
664 | |||
665 | $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); |
||
666 | $res = wfGetDB( DB_REPLICA )->select( |
||
667 | [ 'text', 'blob_tracking' ], |
||
668 | [ 'old_id', 'old_text', 'old_flags' ], |
||
669 | [ |
||
670 | 'old_id' => $textIds, |
||
671 | 'bt_text_id=old_id', |
||
672 | 'bt_moved' => 0, |
||
673 | ], |
||
674 | __METHOD__, |
||
675 | [ 'DISTINCT' ] |
||
676 | ); |
||
677 | |||
678 | foreach ( $res as $row ) { |
||
679 | $text = Revision::getRevisionText( $row ); |
||
680 | if ( $text === false ) { |
||
681 | $this->critical( "Error: cannot load revision text for old_id={$row->old_id}" ); |
||
682 | continue; |
||
683 | } |
||
684 | |||
685 | View Code Duplication | if ( !$trx->addItem( $text, $row->old_id ) ) { |
|
686 | $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); |
||
687 | $trx->commit(); |
||
688 | $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); |
||
689 | $lbFactory->waitForReplication(); |
||
690 | } |
||
691 | } |
||
692 | $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); |
||
693 | $trx->commit(); |
||
694 | } |
||
695 | } |
||
696 | |||
697 | /** |
||
698 | * Class to represent a recompression operation for a single CGZ blob |
||
699 | */ |
||
700 | class CgzCopyTransaction { |
||
701 | /** @var RecompressTracked */ |
||
702 | public $parent; |
||
703 | public $blobClass; |
||
704 | /** @var ConcatenatedGzipHistoryBlob */ |
||
705 | public $cgz; |
||
706 | public $referrers; |
||
707 | |||
708 | /** |
||
709 | * Create a transaction from a RecompressTracked object |
||
710 | * @param RecompressTracked $parent |
||
711 | * @param string $blobClass |
||
712 | */ |
||
713 | function __construct( $parent, $blobClass ) { |
||
714 | $this->blobClass = $blobClass; |
||
715 | $this->cgz = false; |
||
0 ignored issues
–
show
It seems like
false of type false is incompatible with the declared type object<ConcatenatedGzipHistoryBlob> of property $cgz .
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property. Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property.. ![]() |
|||
716 | $this->texts = []; |
||
0 ignored issues
–
show
The property
texts does not exist. Did you maybe forget to declare it?
In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code: class MyClass { }
$x = new MyClass();
$x->foo = true;
Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion: class MyClass {
public $foo;
}
$x = new MyClass();
$x->foo = true;
![]() |
|||
717 | $this->parent = $parent; |
||
718 | } |
||
719 | |||
720 | /** |
||
721 | * Add text. |
||
722 | * Returns false if it's ready to commit. |
||
723 | * @param string $text |
||
724 | * @param int $textId |
||
725 | * @return bool |
||
726 | */ |
||
727 | View Code Duplication | function addItem( $text, $textId ) { |
|
728 | if ( !$this->cgz ) { |
||
729 | $class = $this->blobClass; |
||
730 | $this->cgz = new $class; |
||
731 | } |
||
732 | $hash = $this->cgz->addItem( $text ); |
||
733 | $this->referrers[$textId] = $hash; |
||
734 | $this->texts[$textId] = $text; |
||
735 | |||
736 | return $this->cgz->isHappy(); |
||
737 | } |
||
738 | |||
739 | function getSize() { |
||
740 | return count( $this->texts ); |
||
741 | } |
||
742 | |||
743 | /** |
||
744 | * Recompress text after some aberrant modification |
||
745 | */ |
||
746 | View Code Duplication | function recompress() { |
|
747 | $class = $this->blobClass; |
||
748 | $this->cgz = new $class; |
||
749 | $this->referrers = []; |
||
750 | foreach ( $this->texts as $textId => $text ) { |
||
751 | $hash = $this->cgz->addItem( $text ); |
||
752 | $this->referrers[$textId] = $hash; |
||
753 | } |
||
754 | } |
||
755 | |||
756 | /** |
||
757 | * Commit the blob. |
||
758 | * Does nothing if no text items have been added. |
||
759 | * May skip the move if --copy-only is set. |
||
760 | */ |
||
761 | function commit() { |
||
762 | $originalCount = count( $this->texts ); |
||
763 | if ( !$originalCount ) { |
||
764 | return; |
||
765 | } |
||
766 | |||
767 | /* Check to see if the target text_ids have been moved already. |
||
768 | * |
||
769 | * We originally read from the replica DB, so this can happen when a single |
||
770 | * text_id is shared between multiple pages. It's rare, but possible |
||
771 | * if a delete/move/undelete cycle splits up a null edit. |
||
772 | * |
||
773 | * We do a locking read to prevent closer-run race conditions. |
||
774 | */ |
||
775 | $dbw = wfGetDB( DB_MASTER ); |
||
776 | $dbw->begin( __METHOD__ ); |
||
777 | $res = $dbw->select( 'blob_tracking', |
||
778 | [ 'bt_text_id', 'bt_moved' ], |
||
779 | [ 'bt_text_id' => array_keys( $this->referrers ) ], |
||
780 | __METHOD__, [ 'FOR UPDATE' ] ); |
||
781 | $dirty = false; |
||
782 | foreach ( $res as $row ) { |
||
783 | if ( $row->bt_moved ) { |
||
784 | # This row has already been moved, remove it |
||
785 | $this->parent->debug( "TRX: conflict detected in old_id={$row->bt_text_id}" ); |
||
786 | unset( $this->texts[$row->bt_text_id] ); |
||
787 | $dirty = true; |
||
788 | } |
||
789 | } |
||
790 | |||
791 | // Recompress the blob if necessary |
||
792 | if ( $dirty ) { |
||
793 | if ( !count( $this->texts ) ) { |
||
794 | // All have been moved already |
||
795 | if ( $originalCount > 1 ) { |
||
796 | // This is suspcious, make noise |
||
797 | $this->parent->critical( |
||
798 | "Warning: concurrent operation detected, are there two conflicting " . |
||
799 | "processes running, doing the same job?" ); |
||
800 | } |
||
801 | |||
802 | return; |
||
803 | } |
||
804 | $this->recompress(); |
||
805 | } |
||
806 | |||
807 | // Insert the data into the destination cluster |
||
808 | $targetCluster = $this->parent->getTargetCluster(); |
||
809 | $store = $this->parent->store; |
||
810 | $targetDB = $store->getMaster( $targetCluster ); |
||
811 | $targetDB->clearFlag( DBO_TRX ); // we manage the transactions |
||
812 | $targetDB->begin( __METHOD__ ); |
||
813 | $baseUrl = $this->parent->store->store( $targetCluster, serialize( $this->cgz ) ); |
||
814 | |||
815 | // Write the new URLs to the blob_tracking table |
||
816 | foreach ( $this->referrers as $textId => $hash ) { |
||
817 | $url = $baseUrl . '/' . $hash; |
||
818 | $dbw->update( 'blob_tracking', |
||
819 | [ 'bt_new_url' => $url ], |
||
820 | [ |
||
821 | 'bt_text_id' => $textId, |
||
822 | 'bt_moved' => 0, # Check for concurrent conflicting update |
||
823 | ], |
||
824 | __METHOD__ |
||
825 | ); |
||
826 | } |
||
827 | |||
828 | $targetDB->commit( __METHOD__ ); |
||
829 | // Critical section here: interruption at this point causes blob duplication |
||
830 | // Reversing the order of the commits would cause data loss instead |
||
831 | $dbw->commit( __METHOD__ ); |
||
832 | |||
833 | // Write the new URLs to the text table and set the moved flag |
||
834 | if ( !$this->parent->copyOnly ) { |
||
835 | foreach ( $this->referrers as $textId => $hash ) { |
||
836 | $url = $baseUrl . '/' . $hash; |
||
837 | $this->parent->moveTextRow( $textId, $url ); |
||
838 | } |
||
839 | } |
||
840 | } |
||
841 | } |
||
842 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.