These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * Base classes for database dumpers |
||
4 | * |
||
5 | * Copyright © 2005 Brion Vibber <[email protected]> |
||
6 | * https://www.mediawiki.org/ |
||
7 | * |
||
8 | * This program is free software; you can redistribute it and/or modify |
||
9 | * it under the terms of the GNU General Public License as published by |
||
10 | * the Free Software Foundation; either version 2 of the License, or |
||
11 | * (at your option) any later version. |
||
12 | * |
||
13 | * This program is distributed in the hope that it will be useful, |
||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
16 | * GNU General Public License for more details. |
||
17 | * |
||
18 | * You should have received a copy of the GNU General Public License along |
||
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
21 | * http://www.gnu.org/copyleft/gpl.html |
||
22 | * |
||
23 | * @file |
||
24 | * @ingroup Dump Maintenance |
||
25 | */ |
||
26 | |||
27 | require_once __DIR__ . '/Maintenance.php'; |
||
28 | require_once __DIR__ . '/../includes/export/DumpFilter.php'; |
||
29 | |||
30 | /** |
||
31 | * @ingroup Dump Maintenance |
||
32 | */ |
||
33 | class BackupDumper extends Maintenance { |
||
34 | public $reporting = true; |
||
35 | public $pages = null; // all pages |
||
36 | public $skipHeader = false; // don't output <mediawiki> and <siteinfo> |
||
37 | public $skipFooter = false; // don't output </mediawiki> |
||
38 | public $startId = 0; |
||
39 | public $endId = 0; |
||
40 | public $revStartId = 0; |
||
41 | public $revEndId = 0; |
||
42 | public $dumpUploads = false; |
||
43 | public $dumpUploadFileContents = false; |
||
44 | public $orderRevs = false; |
||
45 | |||
46 | protected $reportingInterval = 100; |
||
47 | protected $pageCount = 0; |
||
48 | protected $revCount = 0; |
||
49 | protected $server = null; // use default |
||
50 | protected $sink = null; // Output filters |
||
51 | protected $lastTime = 0; |
||
52 | protected $pageCountLast = 0; |
||
53 | protected $revCountLast = 0; |
||
54 | |||
55 | protected $outputTypes = []; |
||
56 | protected $filterTypes = []; |
||
57 | |||
58 | protected $ID = 0; |
||
59 | |||
60 | /** |
||
61 | * The dependency-injected database to use. |
||
62 | * |
||
63 | * @var DatabaseBase|null |
||
64 | * |
||
65 | * @see self::setDB |
||
66 | */ |
||
67 | protected $forcedDb = null; |
||
68 | |||
69 | /** @var LoadBalancer */ |
||
70 | protected $lb; |
||
71 | |||
72 | // @todo Unused? |
||
73 | private $stubText = false; // include rev_text_id instead of text; for 2-pass dump |
||
74 | |||
75 | /** |
||
76 | * @param array $args For backward compatibility |
||
77 | */ |
||
78 | function __construct( $args = null ) { |
||
79 | parent::__construct(); |
||
80 | $this->stderr = fopen( "php://stderr", "wt" ); |
||
81 | |||
82 | // Built-in output and filter plugins |
||
83 | $this->registerOutput( 'file', 'DumpFileOutput' ); |
||
84 | $this->registerOutput( 'gzip', 'DumpGZipOutput' ); |
||
85 | $this->registerOutput( 'bzip2', 'DumpBZip2Output' ); |
||
86 | $this->registerOutput( 'dbzip2', 'DumpDBZip2Output' ); |
||
87 | $this->registerOutput( '7zip', 'Dump7ZipOutput' ); |
||
88 | |||
89 | $this->registerFilter( 'latest', 'DumpLatestFilter' ); |
||
90 | $this->registerFilter( 'notalk', 'DumpNotalkFilter' ); |
||
91 | $this->registerFilter( 'namespace', 'DumpNamespaceFilter' ); |
||
92 | |||
93 | // These three can be specified multiple times |
||
94 | $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].', |
||
95 | false, true, false, true ); |
||
96 | $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' . |
||
97 | '<type>s: file, gzip, bzip2, 7zip, dbzip2', false, true, false, true ); |
||
98 | $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' . |
||
99 | '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true ); |
||
100 | $this->addOption( 'report', 'Report position and speed after every n pages processed. ' . |
||
101 | 'Default: 100.', false, true ); |
||
102 | $this->addOption( 'server', 'Force reading from MySQL server', false, true ); |
||
103 | $this->addOption( '7ziplevel', '7zip compression level for all 7zip outputs. Used for ' . |
||
104 | '-mx option to 7za command.', false, true ); |
||
105 | |||
106 | if ( $args ) { |
||
107 | // Args should be loaded and processed so that dump() can be called directly |
||
108 | // instead of execute() |
||
109 | $this->loadWithArgv( $args ); |
||
110 | $this->processOptions(); |
||
111 | } |
||
112 | } |
||
113 | |||
114 | /** |
||
115 | * @param string $name |
||
116 | * @param string $class Name of output filter plugin class |
||
117 | */ |
||
118 | function registerOutput( $name, $class ) { |
||
119 | $this->outputTypes[$name] = $class; |
||
120 | } |
||
121 | |||
122 | /** |
||
123 | * @param string $name |
||
124 | * @param string $class Name of filter plugin class |
||
125 | */ |
||
126 | function registerFilter( $name, $class ) { |
||
127 | $this->filterTypes[$name] = $class; |
||
128 | } |
||
129 | |||
130 | /** |
||
131 | * Load a plugin and register it |
||
132 | * |
||
133 | * @param string $class Name of plugin class; must have a static 'register' |
||
134 | * method that takes a BackupDumper as a parameter. |
||
135 | * @param string $file Full or relative path to the PHP file to load, or empty |
||
136 | */ |
||
137 | function loadPlugin( $class, $file ) { |
||
138 | if ( $file != '' ) { |
||
139 | require_once $file; |
||
140 | } |
||
141 | $register = [ $class, 'register' ]; |
||
142 | call_user_func_array( $register, [ $this ] ); |
||
143 | } |
||
144 | |||
145 | function execute() { |
||
146 | throw new MWException( 'execute() must be overridden in subclasses' ); |
||
147 | } |
||
148 | |||
149 | /** |
||
150 | * Processes arguments and sets $this->$sink accordingly |
||
151 | */ |
||
152 | function processOptions() { |
||
153 | $sink = null; |
||
154 | $sinks = []; |
||
155 | |||
156 | $options = $this->orderedOptions; |
||
157 | foreach ( $options as $arg ) { |
||
158 | $opt = $arg[0]; |
||
159 | $param = $arg[1]; |
||
160 | |||
161 | switch ( $opt ) { |
||
162 | case 'plugin': |
||
163 | $val = explode( ':', $param ); |
||
164 | |||
165 | if ( count( $val ) === 1 ) { |
||
166 | $this->loadPlugin( $val[0] ); |
||
167 | } elseif ( count( $val ) === 2 ) { |
||
168 | $this->loadPlugin( $val[0], $val[1] ); |
||
169 | } else { |
||
170 | $this->fatalError( 'Invalid plugin parameter' ); |
||
171 | return; |
||
172 | } |
||
173 | |||
174 | break; |
||
175 | case 'output': |
||
176 | $split = explode( ':', $param, 2 ); |
||
177 | if ( count( $split ) !== 2 ) { |
||
178 | $this->fatalError( 'Invalid output parameter' ); |
||
179 | } |
||
180 | list( $type, $file ) = $split; |
||
181 | if ( !is_null( $sink ) ) { |
||
182 | $sinks[] = $sink; |
||
183 | } |
||
184 | if ( !isset( $this->outputTypes[$type] ) ) { |
||
185 | $this->fatalError( "Unrecognized output sink type '$type'" ); |
||
186 | } |
||
187 | $class = $this->outputTypes[$type]; |
||
188 | if ( $type === "7zip" ) { |
||
189 | $sink = new $class( $file, intval( $this->getOption( '7ziplevel' ) ) ); |
||
190 | } else { |
||
191 | $sink = new $class( $file ); |
||
192 | } |
||
193 | |||
194 | break; |
||
195 | case 'filter': |
||
196 | if ( is_null( $sink ) ) { |
||
197 | $sink = new DumpOutput(); |
||
198 | } |
||
199 | |||
200 | $split = explode( ':', $param ); |
||
201 | $key = $split[0]; |
||
202 | |||
203 | if ( !isset( $this->filterTypes[$key] ) ) { |
||
204 | $this->fatalError( "Unrecognized filter type '$key'" ); |
||
205 | } |
||
206 | |||
207 | $type = $this->filterTypes[$key]; |
||
208 | |||
209 | if ( count( $split ) === 1 ) { |
||
210 | $filter = new $type( $sink ); |
||
211 | } elseif ( count( $split ) === 2 ) { |
||
212 | $filter = new $type( $sink, $split[1] ); |
||
213 | } else { |
||
214 | $this->fatalError( 'Invalid filter parameter' ); |
||
215 | } |
||
216 | |||
217 | // references are lame in php... |
||
218 | unset( $sink ); |
||
219 | $sink = $filter; |
||
220 | |||
221 | break; |
||
222 | } |
||
223 | } |
||
224 | |||
225 | if ( $this->hasOption( 'report' ) ) { |
||
226 | $this->reportingInterval = intval( $this->getOption( 'report' ) ); |
||
227 | } |
||
228 | |||
229 | if ( $this->hasOption( 'server' ) ) { |
||
230 | $this->server = $this->getOption( 'server' ); |
||
231 | } |
||
232 | |||
233 | if ( is_null( $sink ) ) { |
||
234 | $sink = new DumpOutput(); |
||
235 | } |
||
236 | $sinks[] = $sink; |
||
237 | |||
238 | if ( count( $sinks ) > 1 ) { |
||
239 | $this->sink = new DumpMultiWriter( $sinks ); |
||
240 | } else { |
||
241 | $this->sink = $sink; |
||
242 | } |
||
243 | } |
||
244 | |||
245 | function dump( $history, $text = WikiExporter::TEXT ) { |
||
246 | # Notice messages will foul up your XML output even if they're |
||
247 | # relatively harmless. |
||
248 | if ( ini_get( 'display_errors' ) ) { |
||
249 | ini_set( 'display_errors', 'stderr' ); |
||
250 | } |
||
251 | |||
252 | $this->initProgress( $history ); |
||
253 | |||
254 | $db = $this->backupDb(); |
||
255 | $exporter = new WikiExporter( $db, $history, WikiExporter::STREAM, $text ); |
||
256 | $exporter->dumpUploads = $this->dumpUploads; |
||
257 | $exporter->dumpUploadFileContents = $this->dumpUploadFileContents; |
||
258 | |||
259 | $wrapper = new ExportProgressFilter( $this->sink, $this ); |
||
260 | $exporter->setOutputSink( $wrapper ); |
||
261 | |||
262 | if ( !$this->skipHeader ) { |
||
263 | $exporter->openStream(); |
||
264 | } |
||
265 | # Log item dumps: all or by range |
||
266 | if ( $history & WikiExporter::LOGS ) { |
||
267 | if ( $this->startId || $this->endId ) { |
||
268 | $exporter->logsByRange( $this->startId, $this->endId ); |
||
269 | } else { |
||
270 | $exporter->allLogs(); |
||
271 | } |
||
272 | } elseif ( is_null( $this->pages ) ) { |
||
273 | # Page dumps: all or by page ID range |
||
274 | if ( $this->startId || $this->endId ) { |
||
275 | $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs ); |
||
276 | } elseif ( $this->revStartId || $this->revEndId ) { |
||
277 | $exporter->revsByRange( $this->revStartId, $this->revEndId ); |
||
278 | } else { |
||
279 | $exporter->allPages(); |
||
280 | } |
||
281 | } else { |
||
282 | # Dump of specific pages |
||
283 | $exporter->pagesByName( $this->pages ); |
||
284 | } |
||
285 | |||
286 | if ( !$this->skipFooter ) { |
||
287 | $exporter->closeStream(); |
||
288 | } |
||
289 | |||
290 | $this->report( true ); |
||
291 | } |
||
292 | |||
293 | /** |
||
294 | * Initialise starting time and maximum revision count. |
||
295 | * We'll make ETA calculations based an progress, assuming relatively |
||
296 | * constant per-revision rate. |
||
297 | * @param int $history WikiExporter::CURRENT or WikiExporter::FULL |
||
298 | */ |
||
299 | function initProgress( $history = WikiExporter::FULL ) { |
||
300 | $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision'; |
||
301 | $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id'; |
||
302 | |||
303 | $dbr = $this->forcedDb; |
||
304 | if ( $this->forcedDb === null ) { |
||
305 | $dbr = wfGetDB( DB_REPLICA ); |
||
306 | } |
||
307 | $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ ); |
||
308 | $this->startTime = microtime( true ); |
||
309 | $this->lastTime = $this->startTime; |
||
0 ignored issues
–
show
|
|||
310 | $this->ID = getmypid(); |
||
311 | } |
||
312 | |||
313 | /** |
||
314 | * @todo Fixme: the --server parameter is currently not respected, as it |
||
315 | * doesn't seem terribly easy to ask the load balancer for a particular |
||
316 | * connection by name. |
||
317 | * @return DatabaseBase |
||
318 | */ |
||
319 | function backupDb() { |
||
320 | if ( $this->forcedDb !== null ) { |
||
321 | return $this->forcedDb; |
||
322 | } |
||
323 | |||
324 | $this->lb = wfGetLBFactory()->newMainLB(); |
||
325 | $db = $this->lb->getConnection( DB_REPLICA, 'dump' ); |
||
326 | |||
327 | // Discourage the server from disconnecting us if it takes a long time |
||
328 | // to read out the big ol' batch query. |
||
329 | $db->setSessionOptions( [ 'connTimeout' => 3600 * 24 ] ); |
||
330 | |||
331 | return $db; |
||
332 | } |
||
333 | |||
334 | /** |
||
335 | * Force the dump to use the provided database connection for database |
||
336 | * operations, wherever possible. |
||
337 | * |
||
338 | * @param DatabaseBase|null $db (Optional) the database connection to use. If null, resort to |
||
339 | * use the globally provided ways to get database connections. |
||
340 | */ |
||
341 | function setDB( IDatabase $db = null ) { |
||
342 | parent::setDB( $db ); |
||
343 | $this->forcedDb = $db; |
||
344 | } |
||
345 | |||
346 | function __destruct() { |
||
347 | if ( isset( $this->lb ) ) { |
||
348 | $this->lb->closeAll(); |
||
349 | } |
||
350 | } |
||
351 | |||
352 | function backupServer() { |
||
353 | global $wgDBserver; |
||
354 | |||
355 | return $this->server |
||
356 | ? $this->server |
||
357 | : $wgDBserver; |
||
358 | } |
||
359 | |||
360 | function reportPage() { |
||
361 | $this->pageCount++; |
||
362 | } |
||
363 | |||
364 | function revCount() { |
||
365 | $this->revCount++; |
||
366 | $this->report(); |
||
367 | } |
||
368 | |||
369 | function report( $final = false ) { |
||
370 | if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) { |
||
371 | $this->showReport(); |
||
372 | } |
||
373 | } |
||
374 | |||
375 | function showReport() { |
||
376 | if ( $this->reporting ) { |
||
377 | $now = wfTimestamp( TS_DB ); |
||
378 | $nowts = microtime( true ); |
||
379 | $deltaAll = $nowts - $this->startTime; |
||
380 | $deltaPart = $nowts - $this->lastTime; |
||
381 | $this->pageCountPart = $this->pageCount - $this->pageCountLast; |
||
0 ignored issues
–
show
The property
pageCountPart does not seem to exist. Did you mean pageCount ?
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name. If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading. ![]() |
|||
382 | $this->revCountPart = $this->revCount - $this->revCountLast; |
||
0 ignored issues
–
show
The property
revCountPart does not seem to exist. Did you mean revCount ?
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name. If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading. ![]() |
|||
383 | |||
384 | if ( $deltaAll ) { |
||
385 | $portion = $this->revCount / $this->maxCount; |
||
386 | $eta = $this->startTime + $deltaAll / $portion; |
||
387 | $etats = wfTimestamp( TS_DB, intval( $eta ) ); |
||
388 | $pageRate = $this->pageCount / $deltaAll; |
||
389 | $revRate = $this->revCount / $deltaAll; |
||
390 | } else { |
||
391 | $pageRate = '-'; |
||
392 | $revRate = '-'; |
||
393 | $etats = '-'; |
||
394 | } |
||
395 | if ( $deltaPart ) { |
||
396 | $pageRatePart = $this->pageCountPart / $deltaPart; |
||
0 ignored issues
–
show
The property
pageCountPart does not seem to exist. Did you mean pageCount ?
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name. If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading. ![]() |
|||
397 | $revRatePart = $this->revCountPart / $deltaPart; |
||
0 ignored issues
–
show
The property
revCountPart does not seem to exist. Did you mean revCount ?
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name. If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading. ![]() |
|||
398 | } else { |
||
399 | $pageRatePart = '-'; |
||
400 | $revRatePart = '-'; |
||
401 | } |
||
402 | $this->progress( sprintf( |
||
403 | "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), " |
||
404 | . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]", |
||
405 | $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, |
||
406 | $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats, |
||
407 | $this->maxCount |
||
408 | ) ); |
||
409 | $this->lastTime = $nowts; |
||
0 ignored issues
–
show
The property
$lastTime was declared of type integer , but $nowts is of type double . Maybe add a type cast?
This check looks for assignments to scalar types that may be of the wrong type. To ensure the code behaves as expected, it may be a good idea to add an explicit type cast. $answer = 42;
$correct = false;
$correct = (bool) $answer;
![]() |
|||
410 | $this->revCountLast = $this->revCount; |
||
0 ignored issues
–
show
It seems like
$this->revCount can also be of type double . However, the property $revCountLast is declared as type integer . Maybe add an additional type check?
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly. For example, imagine you have a variable Either this assignment is in error or a type check should be added for that assignment. class Id
{
public $id;
public function __construct($id)
{
$this->id = $id;
}
}
class Account
{
/** @var Id $id */
public $id;
}
$account_id = false;
if (starsAreRight()) {
$account_id = new Id(42);
}
$account = new Account();
if ($account instanceof Id)
{
$account->id = $account_id;
}
![]() |
|||
411 | } |
||
412 | } |
||
413 | |||
414 | function progress( $string ) { |
||
415 | if ( $this->reporting ) { |
||
416 | fwrite( $this->stderr, $string . "\n" ); |
||
417 | } |
||
418 | } |
||
419 | |||
420 | function fatalError( $msg ) { |
||
421 | $this->error( "$msg\n", 1 ); |
||
422 | } |
||
423 | } |
||
424 | |||
425 | class ExportProgressFilter extends DumpFilter { |
||
426 | function __construct( &$sink, &$progress ) { |
||
427 | parent::__construct( $sink ); |
||
428 | $this->progress = $progress; |
||
429 | } |
||
430 | |||
431 | function writeClosePage( $string ) { |
||
432 | parent::writeClosePage( $string ); |
||
433 | $this->progress->reportPage(); |
||
434 | } |
||
435 | |||
436 | function writeRevision( $rev, $string ) { |
||
437 | parent::writeRevision( $rev, $string ); |
||
438 | $this->progress->revCount(); |
||
439 | } |
||
440 | } |
||
441 |
This check looks for assignments to scalar types that may be of the wrong type.
To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.