Completed
Branch master (939199)
by
unknown
39:35
created

maintenance/backup.inc (7 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Base classes for database dumpers
4
 *
5
 * Copyright © 2005 Brion Vibber <[email protected]>
6
 * https://www.mediawiki.org/
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License along
19
 * with this program; if not, write to the Free Software Foundation, Inc.,
20
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21
 * http://www.gnu.org/copyleft/gpl.html
22
 *
23
 * @file
24
 * @ingroup Dump Maintenance
25
 */
26
27
require_once __DIR__ . '/Maintenance.php';
28
require_once __DIR__ . '/../includes/export/DumpFilter.php';
29
30
/**
31
 * @ingroup Dump Maintenance
32
 */
33
class BackupDumper extends Maintenance {
34
	public $reporting = true;
35
	public $pages = null; // all pages
36
	public $skipHeader = false; // don't output <mediawiki> and <siteinfo>
37
	public $skipFooter = false; // don't output </mediawiki>
38
	public $startId = 0;
39
	public $endId = 0;
40
	public $revStartId = 0;
41
	public $revEndId = 0;
42
	public $dumpUploads = false;
43
	public $dumpUploadFileContents = false;
44
	public $orderRevs = false;
45
46
	protected $reportingInterval = 100;
47
	protected $pageCount = 0;
48
	protected $revCount = 0;
49
	protected $server = null; // use default
50
	protected $sink = null; // Output filters
51
	protected $lastTime = 0;
52
	protected $pageCountLast = 0;
53
	protected $revCountLast = 0;
54
55
	protected $outputTypes = [];
56
	protected $filterTypes = [];
57
58
	protected $ID = 0;
59
60
	/**
61
	 * The dependency-injected database to use.
62
	 *
63
	 * @var DatabaseBase|null
64
	 *
65
	 * @see self::setDB
66
	 */
67
	protected $forcedDb = null;
68
69
	/** @var LoadBalancer */
70
	protected $lb;
71
72
	// @todo Unused?
73
	private $stubText = false; // include rev_text_id instead of text; for 2-pass dump
74
75
	/**
76
	 * @param array $args For backward compatibility
77
	 */
78
	function __construct( $args = null ) {
79
		parent::__construct();
80
		$this->stderr = fopen( "php://stderr", "wt" );
81
82
		// Built-in output and filter plugins
83
		$this->registerOutput( 'file', 'DumpFileOutput' );
84
		$this->registerOutput( 'gzip', 'DumpGZipOutput' );
85
		$this->registerOutput( 'bzip2', 'DumpBZip2Output' );
86
		$this->registerOutput( 'dbzip2', 'DumpDBZip2Output' );
87
		$this->registerOutput( '7zip', 'Dump7ZipOutput' );
88
89
		$this->registerFilter( 'latest', 'DumpLatestFilter' );
90
		$this->registerFilter( 'notalk', 'DumpNotalkFilter' );
91
		$this->registerFilter( 'namespace', 'DumpNamespaceFilter' );
92
93
		// These three can be specified multiple times
94
		$this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
95
			false, true, false, true );
96
		$this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
97
			'<type>s: file, gzip, bzip2, 7zip, dbzip2', false, true, false, true );
98
		$this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
99
			'<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
100
		$this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
101
			'Default: 100.', false, true );
102
		$this->addOption( 'server', 'Force reading from MySQL server', false, true );
103
		$this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' .
104
			'-mx option to 7za command.', false, true );
105
106
		if ( $args ) {
107
			// Args should be loaded and processed so that dump() can be called directly
108
			// instead of execute()
109
			$this->loadWithArgv( $args );
110
			$this->processOptions();
111
		}
112
	}
113
114
	/**
115
	 * @param string $name
116
	 * @param string $class Name of output filter plugin class
117
	 */
118
	function registerOutput( $name, $class ) {
119
		$this->outputTypes[$name] = $class;
120
	}
121
122
	/**
123
	 * @param string $name
124
	 * @param string $class Name of filter plugin class
125
	 */
126
	function registerFilter( $name, $class ) {
127
		$this->filterTypes[$name] = $class;
128
	}
129
130
	/**
131
	 * Load a plugin and register it
132
	 *
133
	 * @param string $class Name of plugin class; must have a static 'register'
134
	 *   method that takes a BackupDumper as a parameter.
135
	 * @param string $file Full or relative path to the PHP file to load, or empty
136
	 */
137
	function loadPlugin( $class, $file ) {
138
		if ( $file != '' ) {
139
			require_once $file;
140
		}
141
		$register = [ $class, 'register' ];
142
		call_user_func_array( $register, [ $this ] );
143
	}
144
145
	function execute() {
146
		throw new MWException( 'execute() must be overridden in subclasses' );
147
	}
148
149
	/**
150
	 * Processes arguments and sets $this->$sink accordingly
151
	 */
152
	function processOptions() {
153
		$sink = null;
154
		$sinks = [];
155
156
		$options = $this->orderedOptions;
157
		foreach ( $options as $arg ) {
158
			$opt = $arg[0];
159
			$param = $arg[1];
160
161
			switch ( $opt ) {
162
				case 'plugin':
163
					$val = explode( ':', $param );
164
165
					if ( count( $val ) === 1 ) {
166
						$this->loadPlugin( $val[0] );
167
					} elseif ( count( $val ) === 2 ) {
168
						$this->loadPlugin( $val[0], $val[1] );
169
					} else {
170
						$this->fatalError( 'Invalid plugin parameter' );
171
						return;
172
					}
173
174
					break;
175
				case 'output':
176
					$split = explode( ':', $param, 2 );
177
					if ( count( $split ) !== 2 ) {
178
						$this->fatalError( 'Invalid output parameter' );
179
					}
180
					list( $type, $file ) = $split;
181
					if ( !is_null( $sink ) ) {
182
						$sinks[] = $sink;
183
					}
184
					if ( !isset( $this->outputTypes[$type] ) ) {
185
						$this->fatalError( "Unrecognized output sink type '$type'" );
186
					}
187
					$class = $this->outputTypes[$type];
188
					if ( $type === "7zip" ) {
189
						$sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) );
190
					} else {
191
						$sink = new $class( $file );
192
					}
193
194
					break;
195
				case 'filter':
196
					if ( is_null( $sink ) ) {
197
						$sink = new DumpOutput();
198
					}
199
200
					$split = explode( ':', $param );
201
					$key = $split[0];
202
203
					if ( !isset( $this->filterTypes[$key] ) ) {
204
						$this->fatalError( "Unrecognized filter type '$key'" );
205
					}
206
207
					$type = $this->filterTypes[$key];
208
209
					if ( count( $split ) === 1 ) {
210
						$filter = new $type( $sink );
211
					} elseif ( count( $split ) === 2 ) {
212
						$filter = new $type( $sink, $split[1] );
213
					} else {
214
						$this->fatalError( 'Invalid filter parameter' );
215
					}
216
217
					// references are lame in php...
218
					unset( $sink );
219
					$sink = $filter;
220
221
					break;
222
			}
223
		}
224
225
		if ( $this->hasOption( 'report' ) ) {
226
			$this->reportingInterval = intval( $this->getOption( 'report' ) );
227
		}
228
229
		if ( $this->hasOption( 'server' ) ) {
230
			$this->server = $this->getOption( 'server' );
231
		}
232
233
		if ( is_null( $sink ) ) {
234
			$sink = new DumpOutput();
235
		}
236
		$sinks[] = $sink;
237
238
		if ( count( $sinks ) > 1 ) {
239
			$this->sink = new DumpMultiWriter( $sinks );
240
		} else {
241
			$this->sink = $sink;
242
		}
243
	}
244
245
	function dump( $history, $text = WikiExporter::TEXT ) {
246
		# Notice messages will foul up your XML output even if they're
247
		# relatively harmless.
248
		if ( ini_get( 'display_errors' ) ) {
249
			ini_set( 'display_errors', 'stderr' );
250
		}
251
252
		$this->initProgress( $history );
253
254
		$db = $this->backupDb();
255
		$exporter = new WikiExporter( $db, $history, WikiExporter::STREAM, $text );
256
		$exporter->dumpUploads = $this->dumpUploads;
257
		$exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
258
259
		$wrapper = new ExportProgressFilter( $this->sink, $this );
260
		$exporter->setOutputSink( $wrapper );
261
262
		if ( !$this->skipHeader ) {
263
			$exporter->openStream();
264
		}
265
		# Log item dumps: all or by range
266
		if ( $history & WikiExporter::LOGS ) {
267
			if ( $this->startId || $this->endId ) {
268
				$exporter->logsByRange( $this->startId, $this->endId );
269
			} else {
270
				$exporter->allLogs();
271
			}
272
		} elseif ( is_null( $this->pages ) ) {
273
			# Page dumps: all or by page ID range
274
			if ( $this->startId || $this->endId ) {
275
				$exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs );
276
			} elseif ( $this->revStartId || $this->revEndId ) {
277
				$exporter->revsByRange( $this->revStartId, $this->revEndId );
278
			} else {
279
				$exporter->allPages();
280
			}
281
		} else {
282
			# Dump of specific pages
283
			$exporter->pagesByName( $this->pages );
284
		}
285
286
		if ( !$this->skipFooter ) {
287
			$exporter->closeStream();
288
		}
289
290
		$this->report( true );
291
	}
292
293
	/**
294
	 * Initialise starting time and maximum revision count.
295
	 * We'll make ETA calculations based an progress, assuming relatively
296
	 * constant per-revision rate.
297
	 * @param int $history WikiExporter::CURRENT or WikiExporter::FULL
298
	 */
299
	function initProgress( $history = WikiExporter::FULL ) {
300
		$table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
301
		$field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
302
303
		$dbr = $this->forcedDb;
304
		if ( $this->forcedDb === null ) {
305
			$dbr = wfGetDB( DB_REPLICA );
306
		}
307
		$this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ );
308
		$this->startTime = microtime( true );
309
		$this->lastTime = $this->startTime;
0 ignored issues
show
Documentation Bug introduced by
The property $lastTime was declared of type integer, but $this->startTime is of type double. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
310
		$this->ID = getmypid();
311
	}
312
313
	/**
314
	 * @todo Fixme: the --server parameter is currently not respected, as it
315
	 * doesn't seem terribly easy to ask the load balancer for a particular
316
	 * connection by name.
317
	 * @return DatabaseBase
318
	 */
319
	function backupDb() {
320
		if ( $this->forcedDb !== null ) {
321
			return $this->forcedDb;
322
		}
323
324
		$this->lb = wfGetLBFactory()->newMainLB();
325
		$db = $this->lb->getConnection( DB_REPLICA, 'dump' );
326
327
		// Discourage the server from disconnecting us if it takes a long time
328
		// to read out the big ol' batch query.
329
		$db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] );
330
331
		return $db;
332
	}
333
334
	/**
335
	 * Force the dump to use the provided database connection for database
336
	 * operations, wherever possible.
337
	 *
338
	 * @param DatabaseBase|null $db (Optional) the database connection to use. If null, resort to
339
	 *   use the globally provided ways to get database connections.
340
	 */
341
	function setDB( IDatabase $db = null ) {
342
		parent::setDB( $db );
343
		$this->forcedDb = $db;
344
	}
345
346
	function __destruct() {
347
		if ( isset( $this->lb ) ) {
348
			$this->lb->closeAll();
349
		}
350
	}
351
352
	function backupServer() {
353
		global $wgDBserver;
354
355
		return $this->server
356
			? $this->server
357
			: $wgDBserver;
358
	}
359
360
	function reportPage() {
361
		$this->pageCount++;
362
	}
363
364
	function revCount() {
365
		$this->revCount++;
366
		$this->report();
367
	}
368
369
	function report( $final = false ) {
370
		if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
371
			$this->showReport();
372
		}
373
	}
374
375
	function showReport() {
376
		if ( $this->reporting ) {
377
			$now = wfTimestamp( TS_DB );
378
			$nowts = microtime( true );
379
			$deltaAll = $nowts - $this->startTime;
380
			$deltaPart = $nowts - $this->lastTime;
381
			$this->pageCountPart = $this->pageCount - $this->pageCountLast;
0 ignored issues
show
The property pageCountPart does not seem to exist. Did you mean pageCount?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
382
			$this->revCountPart = $this->revCount - $this->revCountLast;
0 ignored issues
show
The property revCountPart does not seem to exist. Did you mean revCount?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
383
384
			if ( $deltaAll ) {
385
				$portion = $this->revCount / $this->maxCount;
386
				$eta = $this->startTime + $deltaAll / $portion;
387
				$etats = wfTimestamp( TS_DB, intval( $eta ) );
388
				$pageRate = $this->pageCount / $deltaAll;
389
				$revRate = $this->revCount / $deltaAll;
390
			} else {
391
				$pageRate = '-';
392
				$revRate = '-';
393
				$etats = '-';
394
			}
395
			if ( $deltaPart ) {
396
				$pageRatePart = $this->pageCountPart / $deltaPart;
0 ignored issues
show
The property pageCountPart does not seem to exist. Did you mean pageCount?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
397
				$revRatePart = $this->revCountPart / $deltaPart;
0 ignored issues
show
The property revCountPart does not seem to exist. Did you mean revCount?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
398
			} else {
399
				$pageRatePart = '-';
400
				$revRatePart = '-';
401
			}
402
			$this->progress( sprintf(
403
				"%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
404
					. "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
405
				$now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
406
				$pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
407
				$this->maxCount
408
			) );
409
			$this->lastTime = $nowts;
0 ignored issues
show
Documentation Bug introduced by
The property $lastTime was declared of type integer, but $nowts is of type double. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
410
			$this->revCountLast = $this->revCount;
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->revCount can also be of type double. However, the property $revCountLast is declared as type integer. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
411
		}
412
	}
413
414
	function progress( $string ) {
415
		if ( $this->reporting ) {
416
			fwrite( $this->stderr, $string . "\n" );
417
		}
418
	}
419
420
	function fatalError( $msg ) {
421
		$this->error( "$msg\n", 1 );
422
	}
423
}
424
425
class ExportProgressFilter extends DumpFilter {
426
	function __construct( &$sink, &$progress ) {
427
		parent::__construct( $sink );
428
		$this->progress = $progress;
429
	}
430
431
	function writeClosePage( $string ) {
432
		parent::writeClosePage( $string );
433
		$this->progress->reportPage();
434
	}
435
436
	function writeRevision( $rev, $string ) {
437
		parent::writeRevision( $rev, $string );
438
		$this->progress->revCount();
439
	}
440
}
441