Completed
Branch master (4b8315)
by
unknown
17:52
created

maintenance/storage/checkStorage.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
0 ignored issues
show
Coding Style Compatibility introduced by
For compatibility and reusability of your code, PSR1 recommends that a file should introduce either new symbols (like classes, functions, etc.) or have side-effects (like outputting something, or including other files), but not both at the same time. The first symbol is defined on line 46 and the first side effect is on line 25.

The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.

The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.

To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.

Loading history...
2
/**
3
 * Fsck for MediaWiki
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup Maintenance ExternalStorage
22
 */
23
24
if ( !defined( 'MEDIAWIKI' ) ) {
25
	$optionsWithoutArgs = [ 'fix' ];
26
	require_once __DIR__ . '/../commandLine.inc';
27
28
	$cs = new CheckStorage;
29
	$fix = isset( $options['fix'] );
30
	if ( isset( $args[0] ) ) {
31
		$xml = $args[0];
32
	} else {
33
		$xml = false;
34
	}
35
	$cs->check( $fix, $xml );
36
}
37
38
// ----------------------------------------------------------------------------------
39
40
/**
41
 * Maintenance script to do various checks on external storage.
42
 *
43
 * @fixme this should extend the base Maintenance class
44
 * @ingroup Maintenance ExternalStorage
45
 */
46
class CheckStorage {
47
	const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
48
	public $oldIdMap, $errors;
49
	public $dbStore = null;
50
51
	public $errorDescriptions = [
52
		'restore text' => 'Damaged text, need to be restored from a backup',
53
		'restore revision' => 'Damaged revision row, need to be restored from a backup',
54
		'unfixable' => 'Unexpected errors with no automated fixing method',
55
		'fixed' => 'Errors already fixed',
56
		'fixable' => 'Errors which would already be fixed if --fix was specified',
57
	];
58
59
	function check( $fix = false, $xml = '' ) {
60
		$dbr = wfGetDB( DB_REPLICA );
61
		if ( $fix ) {
62
			print "Checking, will fix errors if possible...\n";
63
		} else {
64
			print "Checking...\n";
65
		}
66
		$maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
67
		$chunkSize = 1000;
68
		$flagStats = [];
69
		$objectStats = [];
70
		$knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
71
		$this->errors = [
72
			'restore text' => [],
73
			'restore revision' => [],
74
			'unfixable' => [],
75
			'fixed' => [],
76
			'fixable' => [],
77
		];
78
79
		for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
80
			$chunkEnd = $chunkStart + $chunkSize - 1;
81
			// print "$chunkStart of $maxRevId\n";
82
83
			// Fetch revision rows
84
			$this->oldIdMap = [];
85
			$dbr->ping();
86
			$res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
87
				[ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
88
			foreach ( $res as $row ) {
89
				$this->oldIdMap[$row->rev_id] = $row->rev_text_id;
90
			}
91
			$dbr->freeResult( $res );
92
93
			if ( !count( $this->oldIdMap ) ) {
94
				continue;
95
			}
96
97
			// Fetch old_flags
98
			$missingTextRows = array_flip( $this->oldIdMap );
99
			$externalRevs = [];
100
			$objectRevs = [];
101
			$res = $dbr->select( 'text', [ 'old_id', 'old_flags' ],
102
				'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', __METHOD__ );
103
			foreach ( $res as $row ) {
104
				/**
105
				 * @var $flags int
106
				 */
107
				$flags = $row->old_flags;
108
				$id = $row->old_id;
109
110
				// Create flagStats row if it doesn't exist
111
				$flagStats = $flagStats + [ $flags => 0 ];
112
				// Increment counter
113
				$flagStats[$flags]++;
114
115
				// Not missing
116
				unset( $missingTextRows[$row->old_id] );
117
118
				// Check for external or object
119
				if ( $flags == '' ) {
120
					$flagArray = [];
121
				} else {
122
					$flagArray = explode( ',', $flags );
123
				}
124
				if ( in_array( 'external', $flagArray ) ) {
125
					$externalRevs[] = $id;
126
				} elseif ( in_array( 'object', $flagArray ) ) {
127
					$objectRevs[] = $id;
128
				}
129
130
				// Check for unrecognised flags
131
				if ( $flags == '0' ) {
132
					// This is a known bug from 2004
133
					// It's safe to just erase the old_flags field
134
					if ( $fix ) {
135
						$this->error( 'fixed', "Warning: old_flags set to 0", $id );
136
						$dbw = wfGetDB( DB_MASTER );
137
						$dbw->ping();
138
						$dbw->update( 'text', [ 'old_flags' => '' ],
139
							[ 'old_id' => $id ], __METHOD__ );
140
						echo "Fixed\n";
141
					} else {
142
						$this->error( 'fixable', "Warning: old_flags set to 0", $id );
143
					}
144
				} elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
145
					$this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
146
				}
147
			}
148
			$dbr->freeResult( $res );
149
150
			// Output errors for any missing text rows
151
			foreach ( $missingTextRows as $oldId => $revId ) {
152
				$this->error( 'restore revision', "Error: missing text row", $oldId );
153
			}
154
155
			// Verify external revisions
156
			$externalConcatBlobs = [];
157
			$externalNormalBlobs = [];
158
			if ( count( $externalRevs ) ) {
159
				$res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ],
160
					[ 'old_id IN (' . implode( ',', $externalRevs ) . ')' ], __METHOD__ );
161
				foreach ( $res as $row ) {
162
					$urlParts = explode( '://', $row->old_text, 2 );
163
					if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
164
						$this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
165
						continue;
166
					}
167
					list( $proto, ) = $urlParts;
168
					if ( $proto != 'DB' ) {
169
						$this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
170
						continue;
171
					}
172
					$path = explode( '/', $row->old_text );
173
					$cluster = $path[2];
174
					$id = $path[3];
175
					if ( isset( $path[4] ) ) {
176
						$externalConcatBlobs[$cluster][$id][] = $row->old_id;
177
					} else {
178
						$externalNormalBlobs[$cluster][$id][] = $row->old_id;
179
					}
180
				}
181
				$dbr->freeResult( $res );
182
			}
183
184
			// Check external concat blobs for the right header
185
			$this->checkExternalConcatBlobs( $externalConcatBlobs );
186
187
			// Check external normal blobs for existence
188
			if ( count( $externalNormalBlobs ) ) {
189
				if ( is_null( $this->dbStore ) ) {
190
					$this->dbStore = new ExternalStoreDB;
191
				}
192
				foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
193
					$blobIds = array_keys( $xBlobIds );
194
					$extDb =& $this->dbStore->getSlave( $cluster );
195
					$blobsTable = $this->dbStore->getTable( $extDb );
196
					$res = $extDb->select( $blobsTable,
197
						[ 'blob_id' ],
198
						[ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
199
					foreach ( $res as $row ) {
200
						unset( $xBlobIds[$row->blob_id] );
201
					}
202
					$extDb->freeResult( $res );
203
					// Print errors for missing blobs rows
204
					foreach ( $xBlobIds as $blobId => $oldId ) {
205
						$this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
206
					}
207
				}
208
			}
209
210
			// Check local objects
211
			$dbr->ping();
212
			$concatBlobs = [];
213
			$curIds = [];
214
			if ( count( $objectRevs ) ) {
215
				$headerLength = 300;
216
				$res = $dbr->select(
217
					'text',
218
					[ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
219
					[ 'old_id IN (' . implode( ',', $objectRevs ) . ')' ],
220
					__METHOD__
221
				);
222
				foreach ( $res as $row ) {
223
					$oldId = $row->old_id;
224
					$matches = [];
225
					if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
226
						$this->error( 'restore text', "Error: invalid object header", $oldId );
227
						continue;
228
					}
229
230
					$className = strtolower( $matches[2] );
231
					if ( strlen( $className ) != $matches[1] ) {
232
						$this->error(
233
							'restore text',
234
							"Error: invalid object header, wrong class name length",
235
							$oldId
236
						);
237
						continue;
238
					}
239
240
					$objectStats = $objectStats + [ $className => 0 ];
241
					$objectStats[$className]++;
242
243
					switch ( $className ) {
244
						case 'concatenatedgziphistoryblob':
245
							// Good
246
							break;
247
						case 'historyblobstub':
248
						case 'historyblobcurstub':
249
							if ( strlen( $row->header ) == $headerLength ) {
250
								$this->error( 'unfixable', "Error: overlong stub header", $oldId );
251
								continue;
252
							}
253
							$stubObj = unserialize( $row->header );
254
							if ( !is_object( $stubObj ) ) {
255
								$this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
256
								continue;
257
							}
258
							if ( $className == 'historyblobstub' ) {
259
								$concatBlobs[$stubObj->mOldId][] = $oldId;
260
							} else {
261
								$curIds[$stubObj->mCurId][] = $oldId;
262
							}
263
							break;
264
						default:
265
							$this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
266
					}
267
				}
268
				$dbr->freeResult( $res );
269
			}
270
271
			// Check local concat blob validity
272
			$externalConcatBlobs = [];
273
			if ( count( $concatBlobs ) ) {
274
				$headerLength = 300;
275
				$res = $dbr->select(
276
					'text',
277
					[ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
278
					[ 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ],
279
					__METHOD__
280
				);
281
				foreach ( $res as $row ) {
282
					$flags = explode( ',', $row->old_flags );
283
					if ( in_array( 'external', $flags ) ) {
284
						// Concat blob is in external storage?
285
						if ( in_array( 'object', $flags ) ) {
286
							$urlParts = explode( '/', $row->header );
287
							if ( $urlParts[0] != 'DB:' ) {
288
								$this->error(
289
									'unfixable',
290
									"Error: unrecognised external storage type \"{$urlParts[0]}",
291
									$row->old_id
292
								);
293
							} else {
294
								$cluster = $urlParts[2];
295
								$id = $urlParts[3];
296
								if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
297
									$externalConcatBlobs[$cluster][$id] = [];
298
								}
299
								$externalConcatBlobs[$cluster][$id] = array_merge(
300
									$externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
301
								);
302
							}
303
						} else {
304
							$this->error(
305
								'unfixable',
306
								"Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
307
								$concatBlobs[$row->old_id] );
308
						}
309
					} elseif ( strcasecmp(
310
						substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
311
						self::CONCAT_HEADER
312
					) ) {
313
						$this->error(
314
							'restore text',
315
							"Error: Incorrect object header for concat bulk row {$row->old_id}",
316
							$concatBlobs[$row->old_id]
317
						);
318
					} # else good
319
320
					unset( $concatBlobs[$row->old_id] );
321
				}
322
				$dbr->freeResult( $res );
323
			}
324
325
			// Check targets of unresolved stubs
326
			$this->checkExternalConcatBlobs( $externalConcatBlobs );
327
			// next chunk
328
		}
329
330
		print "\n\nErrors:\n";
331
		foreach ( $this->errors as $name => $errors ) {
332
			if ( count( $errors ) ) {
333
				$description = $this->errorDescriptions[$name];
334
				echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
335
			}
336
		}
337
338
		if ( count( $this->errors['restore text'] ) && $fix ) {
339
			if ( (string)$xml !== '' ) {
340
				$this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
341
			} else {
342
				echo "Can't fix text, no XML backup specified\n";
343
			}
344
		}
345
346
		print "\nFlag statistics:\n";
347
		$total = array_sum( $flagStats );
348
		foreach ( $flagStats as $flag => $count ) {
349
			printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
350
		}
351
		print "\nLocal object statistics:\n";
352
		$total = array_sum( $objectStats );
353
		foreach ( $objectStats as $className => $count ) {
354
			printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
355
		}
356
	}
357
358
	function error( $type, $msg, $ids ) {
359
		if ( is_array( $ids ) && count( $ids ) == 1 ) {
360
			$ids = reset( $ids );
361
		}
362
		if ( is_array( $ids ) ) {
363
			$revIds = [];
364
			foreach ( $ids as $id ) {
365
				$revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
366
			}
367
			print "$msg in text rows " . implode( ', ', $ids ) .
368
				", revisions " . implode( ', ', $revIds ) . "\n";
369
		} else {
370
			$id = $ids;
371
			$revIds = array_keys( $this->oldIdMap, $id );
372
			if ( count( $revIds ) == 1 ) {
373
				print "$msg in old_id $id, rev_id {$revIds[0]}\n";
374
			} else {
375
				print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
376
			}
377
		}
378
		$this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
379
	}
380
381
	function checkExternalConcatBlobs( $externalConcatBlobs ) {
382
		if ( !count( $externalConcatBlobs ) ) {
383
			return;
384
		}
385
386
		if ( is_null( $this->dbStore ) ) {
387
			$this->dbStore = new ExternalStoreDB;
388
		}
389
390
		foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
391
			$blobIds = array_keys( $oldIds );
392
			$extDb =& $this->dbStore->getSlave( $cluster );
393
			$blobsTable = $this->dbStore->getTable( $extDb );
394
			$headerLength = strlen( self::CONCAT_HEADER );
395
			$res = $extDb->select( $blobsTable,
396
				[ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
397
				[ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
398
			foreach ( $res as $row ) {
399
				if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
400
					$this->error(
401
						'restore text',
402
						"Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
403
						$oldIds[$row->blob_id]
404
					);
405
				}
406
				unset( $oldIds[$row->blob_id] );
407
			}
408
			$extDb->freeResult( $res );
409
410
			// Print errors for missing blobs rows
411
			foreach ( $oldIds as $blobId => $oldIds2 ) {
412
				$this->error(
413
					'restore text',
414
					"Error: missing target $cluster/$blobId for two-part ES URL",
415
					$oldIds2
416
				);
417
			}
418
		}
419
	}
420
421
	function restoreText( $revIds, $xml ) {
422
		global $wgDBname;
423
		$tmpDir = wfTempDir();
424
425
		if ( !count( $revIds ) ) {
426
			return;
427
		}
428
429
		print "Restoring text from XML backup...\n";
430
431
		$revFileName = "$tmpDir/broken-revlist-$wgDBname";
432
		$filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
433
434
		// Write revision list
435
		if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
436
			echo "Error writing revision list, can't restore text\n";
437
438
			return;
439
		}
440
441
		// Run mwdumper
442
		echo "Filtering XML dump...\n";
443
		$exitStatus = 0;
444
		passthru( 'mwdumper ' .
445
			wfEscapeShellArg(
446
				"--output=file:$filteredXmlFileName",
447
				"--filter=revlist:$revFileName",
448
				$xml
449
			), $exitStatus
450
		);
451
452
		if ( $exitStatus ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $exitStatus of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
453
			echo "mwdumper died with exit status $exitStatus\n";
454
455
			return;
456
		}
457
458
		$file = fopen( $filteredXmlFileName, 'r' );
459
		if ( !$file ) {
460
			echo "Unable to open filtered XML file\n";
461
462
			return;
463
		}
464
465
		$dbr = wfGetDB( DB_REPLICA );
466
		$dbw = wfGetDB( DB_MASTER );
467
		$dbr->ping();
468
		$dbw->ping();
469
470
		$source = new ImportStreamSource( $file );
471
		$importer = new WikiImporter(
472
			$source,
473
			ConfigFactory::getDefaultInstance()->makeConfig( 'main' )
474
		);
475
		$importer->setRevisionCallback( [ $this, 'importRevision' ] );
476
		$importer->doImport();
477
	}
478
479
	function importRevision( &$revision, &$importer ) {
480
		$id = $revision->getID();
481
		$content = $revision->getContent( Revision::RAW );
482
		$id = $id ? $id : '';
483
484
		if ( $content === null ) {
485
			echo "Revision $id is broken, we have no content available\n";
486
487
			return;
488
		}
489
490
		$text = $content->serialize();
491
		if ( $text === '' ) {
492
			// This is what happens if the revision was broken at the time the
493
			// dump was made. Unfortunately, it also happens if the revision was
494
			// legitimately blank, so there's no way to tell the difference. To
495
			// be safe, we'll skip it and leave it broken
496
497
			echo "Revision $id is blank in the dump, may have been broken before export\n";
498
499
			return;
500
		}
501
502
		if ( !$id ) {
503
			// No ID, can't import
504
			echo "No id tag in revision, can't import\n";
505
506
			return;
507
		}
508
509
		// Find text row again
510
		$dbr = wfGetDB( DB_REPLICA );
511
		$oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
512
		if ( !$oldId ) {
513
			echo "Missing revision row for rev_id $id\n";
514
515
			return;
516
		}
517
518
		// Compress the text
519
		$flags = Revision::compressRevisionText( $text );
520
521
		// Update the text row
522
		$dbw = wfGetDB( DB_MASTER );
523
		$dbw->update( 'text',
524
			[ 'old_flags' => $flags, 'old_text' => $text ],
525
			[ 'old_id' => $oldId ],
526
			__METHOD__, [ 'LIMIT' => 1 ]
527
		);
528
529
		// Remove it from the unfixed list and add it to the fixed list
530
		unset( $this->errors['restore text'][$id] );
531
		$this->errors['fixed'][$id] = true;
532
	}
533
}
534