1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Base classes for database dumpers |
4
|
|
|
* |
5
|
|
|
* Copyright © 2005 Brion Vibber <[email protected]> |
6
|
|
|
* https://www.mediawiki.org/ |
7
|
|
|
* |
8
|
|
|
* This program is free software; you can redistribute it and/or modify |
9
|
|
|
* it under the terms of the GNU General Public License as published by |
10
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
11
|
|
|
* (at your option) any later version. |
12
|
|
|
* |
13
|
|
|
* This program is distributed in the hope that it will be useful, |
14
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
15
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16
|
|
|
* GNU General Public License for more details. |
17
|
|
|
* |
18
|
|
|
* You should have received a copy of the GNU General Public License along |
19
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
20
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
22
|
|
|
* |
23
|
|
|
* @file |
24
|
|
|
* @ingroup Dump Maintenance |
25
|
|
|
*/ |
26
|
|
|
|
27
|
|
|
require_once __DIR__ . '/Maintenance.php'; |
28
|
|
|
require_once __DIR__ . '/../includes/export/DumpFilter.php'; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @ingroup Dump Maintenance |
32
|
|
|
*/ |
33
|
|
|
class BackupDumper extends Maintenance { |
34
|
|
|
public $reporting = true; |
35
|
|
|
public $pages = null; // all pages |
36
|
|
|
public $skipHeader = false; // don't output <mediawiki> and <siteinfo> |
37
|
|
|
public $skipFooter = false; // don't output </mediawiki> |
38
|
|
|
public $startId = 0; |
39
|
|
|
public $endId = 0; |
40
|
|
|
public $revStartId = 0; |
41
|
|
|
public $revEndId = 0; |
42
|
|
|
public $dumpUploads = false; |
43
|
|
|
public $dumpUploadFileContents = false; |
44
|
|
|
public $orderRevs = false; |
45
|
|
|
|
46
|
|
|
protected $reportingInterval = 100; |
47
|
|
|
protected $pageCount = 0; |
48
|
|
|
protected $revCount = 0; |
49
|
|
|
protected $server = null; // use default |
50
|
|
|
protected $sink = null; // Output filters |
51
|
|
|
protected $lastTime = 0; |
52
|
|
|
protected $pageCountLast = 0; |
53
|
|
|
protected $revCountLast = 0; |
54
|
|
|
|
55
|
|
|
protected $outputTypes = []; |
56
|
|
|
protected $filterTypes = []; |
57
|
|
|
|
58
|
|
|
protected $ID = 0; |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* The dependency-injected database to use. |
62
|
|
|
* |
63
|
|
|
* @var DatabaseBase|null |
64
|
|
|
* |
65
|
|
|
* @see self::setDB |
66
|
|
|
*/ |
67
|
|
|
protected $forcedDb = null; |
68
|
|
|
|
69
|
|
|
/** @var LoadBalancer */ |
70
|
|
|
protected $lb; |
71
|
|
|
|
72
|
|
|
// @todo Unused? |
73
|
|
|
private $stubText = false; // include rev_text_id instead of text; for 2-pass dump |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* @param array $args For backward compatibility |
77
|
|
|
*/ |
78
|
|
|
function __construct( $args = null ) { |
79
|
|
|
parent::__construct(); |
80
|
|
|
$this->stderr = fopen( "php://stderr", "wt" ); |
|
|
|
|
81
|
|
|
|
82
|
|
|
// Built-in output and filter plugins |
83
|
|
|
$this->registerOutput( 'file', 'DumpFileOutput' ); |
84
|
|
|
$this->registerOutput( 'gzip', 'DumpGZipOutput' ); |
85
|
|
|
$this->registerOutput( 'bzip2', 'DumpBZip2Output' ); |
86
|
|
|
$this->registerOutput( 'dbzip2', 'DumpDBZip2Output' ); |
87
|
|
|
$this->registerOutput( '7zip', 'Dump7ZipOutput' ); |
88
|
|
|
|
89
|
|
|
$this->registerFilter( 'latest', 'DumpLatestFilter' ); |
90
|
|
|
$this->registerFilter( 'notalk', 'DumpNotalkFilter' ); |
91
|
|
|
$this->registerFilter( 'namespace', 'DumpNamespaceFilter' ); |
92
|
|
|
|
93
|
|
|
// These three can be specified multiple times |
94
|
|
|
$this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].', |
95
|
|
|
false, true, false, true ); |
96
|
|
|
$this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' . |
97
|
|
|
'<type>s: file, gzip, bzip2, 7zip, dbzip2', false, true, false, true ); |
98
|
|
|
$this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' . |
99
|
|
|
'<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true ); |
100
|
|
|
$this->addOption( 'report', 'Report position and speed after every n pages processed. ' . |
101
|
|
|
'Default: 100.', false, true ); |
102
|
|
|
$this->addOption( 'server', 'Force reading from MySQL server', false, true ); |
103
|
|
|
$this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' . |
104
|
|
|
'-mx option to 7za command.', false, true ); |
105
|
|
|
|
106
|
|
|
if ( $args ) { |
107
|
|
|
// Args should be loaded and processed so that dump() can be called directly |
108
|
|
|
// instead of execute() |
109
|
|
|
$this->loadWithArgv( $args ); |
110
|
|
|
$this->processOptions(); |
111
|
|
|
} |
112
|
|
|
} |
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* @param string $name |
116
|
|
|
* @param string $class Name of output filter plugin class |
117
|
|
|
*/ |
118
|
|
|
function registerOutput( $name, $class ) { |
119
|
|
|
$this->outputTypes[$name] = $class; |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
/** |
123
|
|
|
* @param string $name |
124
|
|
|
* @param string $class Name of filter plugin class |
125
|
|
|
*/ |
126
|
|
|
function registerFilter( $name, $class ) { |
127
|
|
|
$this->filterTypes[$name] = $class; |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Load a plugin and register it |
132
|
|
|
* |
133
|
|
|
* @param string $class Name of plugin class; must have a static 'register' |
134
|
|
|
* method that takes a BackupDumper as a parameter. |
135
|
|
|
* @param string $file Full or relative path to the PHP file to load, or empty |
136
|
|
|
*/ |
137
|
|
|
function loadPlugin( $class, $file ) { |
138
|
|
|
if ( $file != '' ) { |
139
|
|
|
require_once $file; |
140
|
|
|
} |
141
|
|
|
$register = [ $class, 'register' ]; |
142
|
|
|
call_user_func_array( $register, [ $this ] ); |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
function execute() { |
146
|
|
|
throw new MWException( 'execute() must be overridden in subclasses' ); |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
/** |
150
|
|
|
* Processes arguments and sets $this->$sink accordingly |
151
|
|
|
*/ |
152
|
|
|
function processOptions() { |
153
|
|
|
$sink = null; |
154
|
|
|
$sinks = []; |
155
|
|
|
|
156
|
|
|
$options = $this->orderedOptions; |
157
|
|
|
foreach ( $options as $arg ) { |
158
|
|
|
$opt = $arg[0]; |
159
|
|
|
$param = $arg[1]; |
160
|
|
|
|
161
|
|
|
switch ( $opt ) { |
162
|
|
|
case 'plugin': |
163
|
|
|
$val = explode( ':', $param ); |
164
|
|
|
|
165
|
|
|
if ( count( $val ) === 1 ) { |
166
|
|
|
$this->loadPlugin( $val[0] ); |
|
|
|
|
167
|
|
|
} elseif ( count( $val ) === 2 ) { |
168
|
|
|
$this->loadPlugin( $val[0], $val[1] ); |
169
|
|
|
} else { |
170
|
|
|
$this->fatalError( 'Invalid plugin parameter' ); |
171
|
|
|
return; |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
break; |
175
|
|
|
case 'output': |
176
|
|
|
$split = explode( ':', $param, 2 ); |
177
|
|
|
if ( count( $split ) !== 2 ) { |
178
|
|
|
$this->fatalError( 'Invalid output parameter' ); |
179
|
|
|
} |
180
|
|
|
list( $type, $file ) = $split; |
181
|
|
|
if ( !is_null( $sink ) ) { |
182
|
|
|
$sinks[] = $sink; |
183
|
|
|
} |
184
|
|
|
if ( !isset( $this->outputTypes[$type] ) ) { |
185
|
|
|
$this->fatalError( "Unrecognized output sink type '$type'" ); |
186
|
|
|
} |
187
|
|
|
$class = $this->outputTypes[$type]; |
188
|
|
|
if ( $type === "7zip" ) { |
189
|
|
|
$sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) ); |
190
|
|
|
} else { |
191
|
|
|
$sink = new $class( $file ); |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
break; |
195
|
|
|
case 'filter': |
196
|
|
|
if ( is_null( $sink ) ) { |
197
|
|
|
$sink = new DumpOutput(); |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
$split = explode( ':', $param ); |
201
|
|
|
$key = $split[0]; |
202
|
|
|
|
203
|
|
|
if ( !isset( $this->filterTypes[$key] ) ) { |
204
|
|
|
$this->fatalError( "Unrecognized filter type '$key'" ); |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
$type = $this->filterTypes[$key]; |
208
|
|
|
|
209
|
|
|
if ( count( $split ) === 1 ) { |
210
|
|
|
$filter = new $type( $sink ); |
211
|
|
|
} elseif ( count( $split ) === 2 ) { |
212
|
|
|
$filter = new $type( $sink, $split[1] ); |
213
|
|
|
} else { |
214
|
|
|
$this->fatalError( 'Invalid filter parameter' ); |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
// references are lame in php... |
218
|
|
|
unset( $sink ); |
219
|
|
|
$sink = $filter; |
|
|
|
|
220
|
|
|
|
221
|
|
|
break; |
222
|
|
|
} |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
if ( $this->hasOption( 'report' ) ) { |
226
|
|
|
$this->reportingInterval = intval( $this->getOption( 'report' ) ); |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
if ( $this->hasOption( 'server' ) ) { |
230
|
|
|
$this->server = $this->getOption( 'server' ); |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
if ( is_null( $sink ) ) { |
234
|
|
|
$sink = new DumpOutput(); |
235
|
|
|
} |
236
|
|
|
$sinks[] = $sink; |
237
|
|
|
|
238
|
|
|
if ( count( $sinks ) > 1 ) { |
239
|
|
|
$this->sink = new DumpMultiWriter( $sinks ); |
240
|
|
|
} else { |
241
|
|
|
$this->sink = $sink; |
242
|
|
|
} |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
function dump( $history, $text = WikiExporter::TEXT ) { |
246
|
|
|
# Notice messages will foul up your XML output even if they're |
247
|
|
|
# relatively harmless. |
248
|
|
|
if ( ini_get( 'display_errors' ) ) { |
249
|
|
|
ini_set( 'display_errors', 'stderr' ); |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
$this->initProgress( $history ); |
253
|
|
|
|
254
|
|
|
$db = $this->backupDb(); |
255
|
|
|
$exporter = new WikiExporter( $db, $history, WikiExporter::STREAM, $text ); |
|
|
|
|
256
|
|
|
$exporter->dumpUploads = $this->dumpUploads; |
257
|
|
|
$exporter->dumpUploadFileContents = $this->dumpUploadFileContents; |
258
|
|
|
|
259
|
|
|
$wrapper = new ExportProgressFilter( $this->sink, $this ); |
260
|
|
|
$exporter->setOutputSink( $wrapper ); |
261
|
|
|
|
262
|
|
|
if ( !$this->skipHeader ) { |
263
|
|
|
$exporter->openStream(); |
264
|
|
|
} |
265
|
|
|
# Log item dumps: all or by range |
266
|
|
|
if ( $history & WikiExporter::LOGS ) { |
267
|
|
|
if ( $this->startId || $this->endId ) { |
268
|
|
|
$exporter->logsByRange( $this->startId, $this->endId ); |
269
|
|
|
} else { |
270
|
|
|
$exporter->allLogs(); |
271
|
|
|
} |
272
|
|
|
} elseif ( is_null( $this->pages ) ) { |
273
|
|
|
# Page dumps: all or by page ID range |
274
|
|
|
if ( $this->startId || $this->endId ) { |
275
|
|
|
$exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs ); |
276
|
|
|
} elseif ( $this->revStartId || $this->revEndId ) { |
277
|
|
|
$exporter->revsByRange( $this->revStartId, $this->revEndId ); |
278
|
|
|
} else { |
279
|
|
|
$exporter->allPages(); |
280
|
|
|
} |
281
|
|
|
} else { |
282
|
|
|
# Dump of specific pages |
283
|
|
|
$exporter->pagesByName( $this->pages ); |
284
|
|
|
} |
285
|
|
|
|
286
|
|
|
if ( !$this->skipFooter ) { |
287
|
|
|
$exporter->closeStream(); |
288
|
|
|
} |
289
|
|
|
|
290
|
|
|
$this->report( true ); |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
/** |
294
|
|
|
* Initialise starting time and maximum revision count. |
295
|
|
|
* We'll make ETA calculations based an progress, assuming relatively |
296
|
|
|
* constant per-revision rate. |
297
|
|
|
* @param int $history WikiExporter::CURRENT or WikiExporter::FULL |
298
|
|
|
*/ |
299
|
|
|
function initProgress( $history = WikiExporter::FULL ) { |
300
|
|
|
$table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision'; |
301
|
|
|
$field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id'; |
302
|
|
|
|
303
|
|
|
$dbr = $this->forcedDb; |
304
|
|
|
if ( $this->forcedDb === null ) { |
305
|
|
|
$dbr = wfGetDB( DB_REPLICA ); |
306
|
|
|
} |
307
|
|
|
$this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ ); |
|
|
|
|
308
|
|
|
$this->startTime = microtime( true ); |
|
|
|
|
309
|
|
|
$this->lastTime = $this->startTime; |
|
|
|
|
310
|
|
|
$this->ID = getmypid(); |
311
|
|
|
} |
312
|
|
|
|
313
|
|
|
/** |
314
|
|
|
* @todo Fixme: the --server parameter is currently not respected, as it |
315
|
|
|
* doesn't seem terribly easy to ask the load balancer for a particular |
316
|
|
|
* connection by name. |
317
|
|
|
* @return DatabaseBase |
318
|
|
|
*/ |
319
|
|
|
function backupDb() { |
320
|
|
|
if ( $this->forcedDb !== null ) { |
321
|
|
|
return $this->forcedDb; |
322
|
|
|
} |
323
|
|
|
|
324
|
|
|
$this->lb = wfGetLBFactory()->newMainLB(); |
|
|
|
|
325
|
|
|
$db = $this->lb->getConnection( DB_REPLICA, 'dump' ); |
326
|
|
|
|
327
|
|
|
// Discourage the server from disconnecting us if it takes a long time |
328
|
|
|
// to read out the big ol' batch query. |
329
|
|
|
$db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] ); |
330
|
|
|
|
331
|
|
|
return $db; |
332
|
|
|
} |
333
|
|
|
|
334
|
|
|
/** |
335
|
|
|
* Force the dump to use the provided database connection for database |
336
|
|
|
* operations, wherever possible. |
337
|
|
|
* |
338
|
|
|
* @param DatabaseBase|null $db (Optional) the database connection to use. If null, resort to |
339
|
|
|
* use the globally provided ways to get database connections. |
340
|
|
|
*/ |
341
|
|
|
function setDB( IDatabase $db = null ) { |
342
|
|
|
parent::setDB( $db ); |
|
|
|
|
343
|
|
|
$this->forcedDb = $db; |
|
|
|
|
344
|
|
|
} |
345
|
|
|
|
346
|
|
|
function __destruct() { |
347
|
|
|
if ( isset( $this->lb ) ) { |
348
|
|
|
$this->lb->closeAll(); |
349
|
|
|
} |
350
|
|
|
} |
351
|
|
|
|
352
|
|
|
function backupServer() { |
353
|
|
|
global $wgDBserver; |
354
|
|
|
|
355
|
|
|
return $this->server |
356
|
|
|
? $this->server |
357
|
|
|
: $wgDBserver; |
358
|
|
|
} |
359
|
|
|
|
360
|
|
|
function reportPage() { |
361
|
|
|
$this->pageCount++; |
362
|
|
|
} |
363
|
|
|
|
364
|
|
|
function revCount() { |
365
|
|
|
$this->revCount++; |
366
|
|
|
$this->report(); |
367
|
|
|
} |
368
|
|
|
|
369
|
|
|
function report( $final = false ) { |
370
|
|
|
if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) { |
371
|
|
|
$this->showReport(); |
372
|
|
|
} |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
function showReport() { |
376
|
|
|
if ( $this->reporting ) { |
377
|
|
|
$now = wfTimestamp( TS_DB ); |
378
|
|
|
$nowts = microtime( true ); |
379
|
|
|
$deltaAll = $nowts - $this->startTime; |
380
|
|
|
$deltaPart = $nowts - $this->lastTime; |
381
|
|
|
$this->pageCountPart = $this->pageCount - $this->pageCountLast; |
|
|
|
|
382
|
|
|
$this->revCountPart = $this->revCount - $this->revCountLast; |
|
|
|
|
383
|
|
|
|
384
|
|
|
if ( $deltaAll ) { |
385
|
|
|
$portion = $this->revCount / $this->maxCount; |
386
|
|
|
$eta = $this->startTime + $deltaAll / $portion; |
387
|
|
|
$etats = wfTimestamp( TS_DB, intval( $eta ) ); |
388
|
|
|
$pageRate = $this->pageCount / $deltaAll; |
389
|
|
|
$revRate = $this->revCount / $deltaAll; |
390
|
|
|
} else { |
391
|
|
|
$pageRate = '-'; |
392
|
|
|
$revRate = '-'; |
393
|
|
|
$etats = '-'; |
394
|
|
|
} |
395
|
|
|
if ( $deltaPart ) { |
396
|
|
|
$pageRatePart = $this->pageCountPart / $deltaPart; |
|
|
|
|
397
|
|
|
$revRatePart = $this->revCountPart / $deltaPart; |
|
|
|
|
398
|
|
|
} else { |
399
|
|
|
$pageRatePart = '-'; |
400
|
|
|
$revRatePart = '-'; |
401
|
|
|
} |
402
|
|
|
$this->progress( sprintf( |
403
|
|
|
"%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), " |
404
|
|
|
. "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]", |
405
|
|
|
$now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, |
406
|
|
|
$pageRatePart, $this->revCount, $revRate, $revRatePart, $etats, |
407
|
|
|
$this->maxCount |
408
|
|
|
) ); |
409
|
|
|
$this->lastTime = $nowts; |
|
|
|
|
410
|
|
|
$this->revCountLast = $this->revCount; |
|
|
|
|
411
|
|
|
} |
412
|
|
|
} |
413
|
|
|
|
414
|
|
|
function progress( $string ) { |
415
|
|
|
if ( $this->reporting ) { |
416
|
|
|
fwrite( $this->stderr, $string . "\n" ); |
417
|
|
|
} |
418
|
|
|
} |
419
|
|
|
|
420
|
|
|
function fatalError( $msg ) { |
421
|
|
|
$this->error( "$msg\n", 1 ); |
422
|
|
|
} |
423
|
|
|
} |
424
|
|
|
|
425
|
|
|
class ExportProgressFilter extends DumpFilter { |
426
|
|
|
function __construct( &$sink, &$progress ) { |
427
|
|
|
parent::__construct( $sink ); |
428
|
|
|
$this->progress = $progress; |
|
|
|
|
429
|
|
|
} |
430
|
|
|
|
431
|
|
|
function writeClosePage( $string ) { |
432
|
|
|
parent::writeClosePage( $string ); |
433
|
|
|
$this->progress->reportPage(); |
434
|
|
|
} |
435
|
|
|
|
436
|
|
|
function writeRevision( $rev, $string ) { |
437
|
|
|
parent::writeRevision( $rev, $string ); |
438
|
|
|
$this->progress->revCount(); |
439
|
|
|
} |
440
|
|
|
} |
441
|
|
|
|
In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:
Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion: