|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* MediaWiki page data importer. |
|
4
|
|
|
* |
|
5
|
|
|
* Copyright © 2003,2005 Brion Vibber <[email protected]> |
|
6
|
|
|
* https://www.mediawiki.org/ |
|
7
|
|
|
* |
|
8
|
|
|
* This program is free software; you can redistribute it and/or modify |
|
9
|
|
|
* it under the terms of the GNU General Public License as published by |
|
10
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
11
|
|
|
* (at your option) any later version. |
|
12
|
|
|
* |
|
13
|
|
|
* This program is distributed in the hope that it will be useful, |
|
14
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
16
|
|
|
* GNU General Public License for more details. |
|
17
|
|
|
* |
|
18
|
|
|
* You should have received a copy of the GNU General Public License along |
|
19
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
|
20
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
21
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
|
22
|
|
|
* |
|
23
|
|
|
* @file |
|
24
|
|
|
* @ingroup SpecialPage |
|
25
|
|
|
*/ |
|
26
|
|
|
|
|
27
|
|
|
/** |
|
28
|
|
|
* XML file reader for the page data importer. |
|
29
|
|
|
* |
|
30
|
|
|
* implements Special:Import |
|
31
|
|
|
* @ingroup SpecialPage |
|
32
|
|
|
*/ |
|
33
|
|
|
class WikiImporter { |
|
34
|
|
|
private $reader = null; |
|
35
|
|
|
private $foreignNamespaces = null; |
|
36
|
|
|
private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback; |
|
|
|
|
|
|
37
|
|
|
private $mSiteInfoCallback, $mPageOutCallback; |
|
|
|
|
|
|
38
|
|
|
private $mNoticeCallback, $mDebug; |
|
|
|
|
|
|
39
|
|
|
private $mImportUploads, $mImageBasePath; |
|
|
|
|
|
|
40
|
|
|
private $mNoUpdates = false; |
|
41
|
|
|
/** @var Config */ |
|
42
|
|
|
private $config; |
|
43
|
|
|
/** @var ImportTitleFactory */ |
|
44
|
|
|
private $importTitleFactory; |
|
45
|
|
|
/** @var array */ |
|
46
|
|
|
private $countableCache = []; |
|
47
|
|
|
|
|
48
|
|
|
/** |
|
49
|
|
|
* Creates an ImportXMLReader drawing from the source provided |
|
50
|
|
|
* @param ImportSource $source |
|
51
|
|
|
* @param Config $config |
|
52
|
|
|
* @throws Exception |
|
53
|
|
|
*/ |
|
54
|
|
|
function __construct( ImportSource $source, Config $config = null ) { |
|
55
|
|
|
if ( !class_exists( 'XMLReader' ) ) { |
|
56
|
|
|
throw new Exception( 'Import requires PHP to have been compiled with libxml support' ); |
|
57
|
|
|
} |
|
58
|
|
|
|
|
59
|
|
|
$this->reader = new XMLReader(); |
|
60
|
|
|
if ( !$config ) { |
|
61
|
|
|
wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' ); |
|
62
|
|
|
$config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' ); |
|
63
|
|
|
} |
|
64
|
|
|
$this->config = $config; |
|
65
|
|
|
|
|
66
|
|
|
if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) { |
|
67
|
|
|
stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); |
|
68
|
|
|
} |
|
69
|
|
|
$id = UploadSourceAdapter::registerSource( $source ); |
|
70
|
|
|
|
|
71
|
|
|
// Enable the entity loader, as it is needed for loading external URLs via |
|
72
|
|
|
// XMLReader::open (T86036) |
|
73
|
|
|
$oldDisable = libxml_disable_entity_loader( false ); |
|
74
|
|
|
if ( defined( 'LIBXML_PARSEHUGE' ) ) { |
|
75
|
|
|
$status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); |
|
76
|
|
|
} else { |
|
77
|
|
|
$status = $this->reader->open( "uploadsource://$id" ); |
|
78
|
|
|
} |
|
79
|
|
|
if ( !$status ) { |
|
80
|
|
|
$error = libxml_get_last_error(); |
|
81
|
|
|
libxml_disable_entity_loader( $oldDisable ); |
|
82
|
|
|
throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' . |
|
83
|
|
|
$error->message ); |
|
84
|
|
|
} |
|
85
|
|
|
libxml_disable_entity_loader( $oldDisable ); |
|
86
|
|
|
|
|
87
|
|
|
// Default callbacks |
|
88
|
|
|
$this->setPageCallback( [ $this, 'beforeImportPage' ] ); |
|
89
|
|
|
$this->setRevisionCallback( [ $this, "importRevision" ] ); |
|
90
|
|
|
$this->setUploadCallback( [ $this, 'importUpload' ] ); |
|
91
|
|
|
$this->setLogItemCallback( [ $this, 'importLogItem' ] ); |
|
92
|
|
|
$this->setPageOutCallback( [ $this, 'finishImportPage' ] ); |
|
93
|
|
|
|
|
94
|
|
|
$this->importTitleFactory = new NaiveImportTitleFactory(); |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
|
|
/** |
|
98
|
|
|
* @return null|XMLReader |
|
99
|
|
|
*/ |
|
100
|
|
|
public function getReader() { |
|
101
|
|
|
return $this->reader; |
|
102
|
|
|
} |
|
103
|
|
|
|
|
104
|
|
|
public function throwXmlError( $err ) { |
|
105
|
|
|
$this->debug( "FAILURE: $err" ); |
|
106
|
|
|
wfDebug( "WikiImporter XML error: $err\n" ); |
|
107
|
|
|
} |
|
108
|
|
|
|
|
109
|
|
|
public function debug( $data ) { |
|
110
|
|
|
if ( $this->mDebug ) { |
|
111
|
|
|
wfDebug( "IMPORT: $data\n" ); |
|
112
|
|
|
} |
|
113
|
|
|
} |
|
114
|
|
|
|
|
115
|
|
|
public function warn( $data ) { |
|
116
|
|
|
wfDebug( "IMPORT: $data\n" ); |
|
117
|
|
|
} |
|
118
|
|
|
|
|
119
|
|
|
public function notice( $msg /*, $param, ...*/ ) { |
|
120
|
|
|
$params = func_get_args(); |
|
121
|
|
|
array_shift( $params ); |
|
122
|
|
|
|
|
123
|
|
|
if ( is_callable( $this->mNoticeCallback ) ) { |
|
124
|
|
|
call_user_func( $this->mNoticeCallback, $msg, $params ); |
|
125
|
|
|
} else { # No ImportReporter -> CLI |
|
126
|
|
|
echo wfMessage( $msg, $params )->text() . "\n"; |
|
127
|
|
|
} |
|
128
|
|
|
} |
|
129
|
|
|
|
|
130
|
|
|
/** |
|
131
|
|
|
* Set debug mode... |
|
132
|
|
|
* @param bool $debug |
|
133
|
|
|
*/ |
|
134
|
|
|
function setDebug( $debug ) { |
|
135
|
|
|
$this->mDebug = $debug; |
|
136
|
|
|
} |
|
137
|
|
|
|
|
138
|
|
|
/** |
|
139
|
|
|
* Set 'no updates' mode. In this mode, the link tables will not be updated by the importer |
|
140
|
|
|
* @param bool $noupdates |
|
141
|
|
|
*/ |
|
142
|
|
|
function setNoUpdates( $noupdates ) { |
|
143
|
|
|
$this->mNoUpdates = $noupdates; |
|
144
|
|
|
} |
|
145
|
|
|
|
|
146
|
|
|
/** |
|
147
|
|
|
* Set a callback that displays notice messages |
|
148
|
|
|
* |
|
149
|
|
|
* @param callable $callback |
|
150
|
|
|
* @return callable |
|
151
|
|
|
*/ |
|
152
|
|
|
public function setNoticeCallback( $callback ) { |
|
153
|
|
|
return wfSetVar( $this->mNoticeCallback, $callback ); |
|
154
|
|
|
} |
|
155
|
|
|
|
|
156
|
|
|
/** |
|
157
|
|
|
* Sets the action to perform as each new page in the stream is reached. |
|
158
|
|
|
* @param callable $callback |
|
159
|
|
|
* @return callable |
|
160
|
|
|
*/ |
|
161
|
|
|
public function setPageCallback( $callback ) { |
|
162
|
|
|
$previous = $this->mPageCallback; |
|
163
|
|
|
$this->mPageCallback = $callback; |
|
164
|
|
|
return $previous; |
|
165
|
|
|
} |
|
166
|
|
|
|
|
167
|
|
|
/** |
|
168
|
|
|
* Sets the action to perform as each page in the stream is completed. |
|
169
|
|
|
* Callback accepts the page title (as a Title object), a second object |
|
170
|
|
|
* with the original title form (in case it's been overridden into a |
|
171
|
|
|
* local namespace), and a count of revisions. |
|
172
|
|
|
* |
|
173
|
|
|
* @param callable $callback |
|
174
|
|
|
* @return callable |
|
175
|
|
|
*/ |
|
176
|
|
|
public function setPageOutCallback( $callback ) { |
|
177
|
|
|
$previous = $this->mPageOutCallback; |
|
178
|
|
|
$this->mPageOutCallback = $callback; |
|
179
|
|
|
return $previous; |
|
180
|
|
|
} |
|
181
|
|
|
|
|
182
|
|
|
/** |
|
183
|
|
|
* Sets the action to perform as each page revision is reached. |
|
184
|
|
|
* @param callable $callback |
|
185
|
|
|
* @return callable |
|
186
|
|
|
*/ |
|
187
|
|
|
public function setRevisionCallback( $callback ) { |
|
188
|
|
|
$previous = $this->mRevisionCallback; |
|
189
|
|
|
$this->mRevisionCallback = $callback; |
|
190
|
|
|
return $previous; |
|
191
|
|
|
} |
|
192
|
|
|
|
|
193
|
|
|
/** |
|
194
|
|
|
* Sets the action to perform as each file upload version is reached. |
|
195
|
|
|
* @param callable $callback |
|
196
|
|
|
* @return callable |
|
197
|
|
|
*/ |
|
198
|
|
|
public function setUploadCallback( $callback ) { |
|
199
|
|
|
$previous = $this->mUploadCallback; |
|
200
|
|
|
$this->mUploadCallback = $callback; |
|
201
|
|
|
return $previous; |
|
202
|
|
|
} |
|
203
|
|
|
|
|
204
|
|
|
/** |
|
205
|
|
|
* Sets the action to perform as each log item reached. |
|
206
|
|
|
* @param callable $callback |
|
207
|
|
|
* @return callable |
|
208
|
|
|
*/ |
|
209
|
|
|
public function setLogItemCallback( $callback ) { |
|
210
|
|
|
$previous = $this->mLogItemCallback; |
|
211
|
|
|
$this->mLogItemCallback = $callback; |
|
212
|
|
|
return $previous; |
|
213
|
|
|
} |
|
214
|
|
|
|
|
215
|
|
|
/** |
|
216
|
|
|
* Sets the action to perform when site info is encountered |
|
217
|
|
|
* @param callable $callback |
|
218
|
|
|
* @return callable |
|
219
|
|
|
*/ |
|
220
|
|
|
public function setSiteInfoCallback( $callback ) { |
|
221
|
|
|
$previous = $this->mSiteInfoCallback; |
|
222
|
|
|
$this->mSiteInfoCallback = $callback; |
|
223
|
|
|
return $previous; |
|
224
|
|
|
} |
|
225
|
|
|
|
|
226
|
|
|
/** |
|
227
|
|
|
* Sets the factory object to use to convert ForeignTitle objects into local |
|
228
|
|
|
* Title objects |
|
229
|
|
|
* @param ImportTitleFactory $factory |
|
230
|
|
|
*/ |
|
231
|
|
|
public function setImportTitleFactory( $factory ) { |
|
232
|
|
|
$this->importTitleFactory = $factory; |
|
233
|
|
|
} |
|
234
|
|
|
|
|
235
|
|
|
/** |
|
236
|
|
|
* Set a target namespace to override the defaults |
|
237
|
|
|
* @param null|int $namespace |
|
238
|
|
|
* @return bool |
|
239
|
|
|
*/ |
|
240
|
|
|
public function setTargetNamespace( $namespace ) { |
|
241
|
|
|
if ( is_null( $namespace ) ) { |
|
242
|
|
|
// Don't override namespaces |
|
243
|
|
|
$this->setImportTitleFactory( new NaiveImportTitleFactory() ); |
|
244
|
|
|
return true; |
|
245
|
|
|
} elseif ( |
|
246
|
|
|
$namespace >= 0 && |
|
247
|
|
|
MWNamespace::exists( intval( $namespace ) ) |
|
248
|
|
|
) { |
|
249
|
|
|
$namespace = intval( $namespace ); |
|
250
|
|
|
$this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) ); |
|
251
|
|
|
return true; |
|
252
|
|
|
} else { |
|
253
|
|
|
return false; |
|
254
|
|
|
} |
|
255
|
|
|
} |
|
256
|
|
|
|
|
257
|
|
|
/** |
|
258
|
|
|
* Set a target root page under which all pages are imported |
|
259
|
|
|
* @param null|string $rootpage |
|
260
|
|
|
* @return Status |
|
261
|
|
|
*/ |
|
262
|
|
|
public function setTargetRootPage( $rootpage ) { |
|
263
|
|
|
$status = Status::newGood(); |
|
264
|
|
|
if ( is_null( $rootpage ) ) { |
|
265
|
|
|
// No rootpage |
|
266
|
|
|
$this->setImportTitleFactory( new NaiveImportTitleFactory() ); |
|
267
|
|
|
} elseif ( $rootpage !== '' ) { |
|
268
|
|
|
$rootpage = rtrim( $rootpage, '/' ); // avoid double slashes |
|
269
|
|
|
$title = Title::newFromText( $rootpage ); |
|
270
|
|
|
|
|
271
|
|
|
if ( !$title || $title->isExternal() ) { |
|
272
|
|
|
$status->fatal( 'import-rootpage-invalid' ); |
|
273
|
|
|
} else { |
|
274
|
|
|
if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) { |
|
275
|
|
|
global $wgContLang; |
|
276
|
|
|
|
|
277
|
|
|
$displayNSText = $title->getNamespace() == NS_MAIN |
|
278
|
|
|
? wfMessage( 'blanknamespace' )->text() |
|
279
|
|
|
: $wgContLang->getNsText( $title->getNamespace() ); |
|
280
|
|
|
$status->fatal( 'import-rootpage-nosubpage', $displayNSText ); |
|
281
|
|
|
} else { |
|
282
|
|
|
// set namespace to 'all', so the namespace check in processTitle() can pass |
|
283
|
|
|
$this->setTargetNamespace( null ); |
|
284
|
|
|
$this->setImportTitleFactory( new SubpageImportTitleFactory( $title ) ); |
|
285
|
|
|
} |
|
286
|
|
|
} |
|
287
|
|
|
} |
|
288
|
|
|
return $status; |
|
289
|
|
|
} |
|
290
|
|
|
|
|
291
|
|
|
/** |
|
292
|
|
|
* @param string $dir |
|
293
|
|
|
*/ |
|
294
|
|
|
public function setImageBasePath( $dir ) { |
|
295
|
|
|
$this->mImageBasePath = $dir; |
|
296
|
|
|
} |
|
297
|
|
|
|
|
298
|
|
|
/** |
|
299
|
|
|
* @param bool $import |
|
300
|
|
|
*/ |
|
301
|
|
|
public function setImportUploads( $import ) { |
|
302
|
|
|
$this->mImportUploads = $import; |
|
303
|
|
|
} |
|
304
|
|
|
|
|
305
|
|
|
/** |
|
306
|
|
|
* Default per-page callback. Sets up some things related to site statistics |
|
307
|
|
|
* @param array $titleAndForeignTitle Two-element array, with Title object at |
|
308
|
|
|
* index 0 and ForeignTitle object at index 1 |
|
309
|
|
|
* @return bool |
|
310
|
|
|
*/ |
|
311
|
|
|
public function beforeImportPage( $titleAndForeignTitle ) { |
|
312
|
|
|
$title = $titleAndForeignTitle[0]; |
|
313
|
|
|
$page = WikiPage::factory( $title ); |
|
314
|
|
|
$this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable(); |
|
315
|
|
|
return true; |
|
316
|
|
|
} |
|
317
|
|
|
|
|
318
|
|
|
/** |
|
319
|
|
|
* Default per-revision callback, performs the import. |
|
320
|
|
|
* @param WikiRevision $revision |
|
321
|
|
|
* @return bool |
|
322
|
|
|
*/ |
|
323
|
|
|
public function importRevision( $revision ) { |
|
324
|
|
|
if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) { |
|
325
|
|
|
$this->notice( 'import-error-bad-location', |
|
326
|
|
|
$revision->getTitle()->getPrefixedText(), |
|
327
|
|
|
$revision->getID(), |
|
328
|
|
|
$revision->getModel(), |
|
329
|
|
|
$revision->getFormat() ); |
|
330
|
|
|
|
|
331
|
|
|
return false; |
|
332
|
|
|
} |
|
333
|
|
|
|
|
334
|
|
|
try { |
|
335
|
|
|
return $revision->importOldRevision(); |
|
336
|
|
|
} catch ( MWContentSerializationException $ex ) { |
|
337
|
|
|
$this->notice( 'import-error-unserialize', |
|
338
|
|
|
$revision->getTitle()->getPrefixedText(), |
|
339
|
|
|
$revision->getID(), |
|
340
|
|
|
$revision->getModel(), |
|
341
|
|
|
$revision->getFormat() ); |
|
342
|
|
|
} |
|
343
|
|
|
|
|
344
|
|
|
return false; |
|
345
|
|
|
} |
|
346
|
|
|
|
|
347
|
|
|
/** |
|
348
|
|
|
* Default per-revision callback, performs the import. |
|
349
|
|
|
* @param WikiRevision $revision |
|
350
|
|
|
* @return bool |
|
351
|
|
|
*/ |
|
352
|
|
|
public function importLogItem( $revision ) { |
|
353
|
|
|
return $revision->importLogItem(); |
|
354
|
|
|
} |
|
355
|
|
|
|
|
356
|
|
|
/** |
|
357
|
|
|
* Dummy for now... |
|
358
|
|
|
* @param WikiRevision $revision |
|
359
|
|
|
* @return bool |
|
360
|
|
|
*/ |
|
361
|
|
|
public function importUpload( $revision ) { |
|
362
|
|
|
return $revision->importUpload(); |
|
363
|
|
|
} |
|
364
|
|
|
|
|
365
|
|
|
/** |
|
366
|
|
|
* Mostly for hook use |
|
367
|
|
|
* @param Title $title |
|
368
|
|
|
* @param ForeignTitle $foreignTitle |
|
369
|
|
|
* @param int $revCount |
|
370
|
|
|
* @param int $sRevCount |
|
371
|
|
|
* @param array $pageInfo |
|
372
|
|
|
* @return bool |
|
373
|
|
|
*/ |
|
374
|
|
|
public function finishImportPage( $title, $foreignTitle, $revCount, |
|
375
|
|
|
$sRevCount, $pageInfo ) { |
|
376
|
|
|
|
|
377
|
|
|
// Update article count statistics (T42009) |
|
378
|
|
|
// The normal counting logic in WikiPage->doEditUpdates() is designed for |
|
379
|
|
|
// one-revision-at-a-time editing, not bulk imports. In this situation it |
|
380
|
|
|
// suffers from issues of replica DB lag. We let WikiPage handle the total page |
|
381
|
|
|
// and revision count, and we implement our own custom logic for the |
|
382
|
|
|
// article (content page) count. |
|
383
|
|
|
$page = WikiPage::factory( $title ); |
|
384
|
|
|
$page->loadPageData( 'fromdbmaster' ); |
|
385
|
|
|
$content = $page->getContent(); |
|
386
|
|
|
if ( $content === null ) { |
|
387
|
|
|
wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title . |
|
388
|
|
|
' because WikiPage::getContent() returned null' ); |
|
389
|
|
|
} else { |
|
390
|
|
|
$editInfo = $page->prepareContentForEdit( $content ); |
|
391
|
|
|
$countKey = 'title_' . $title->getPrefixedText(); |
|
392
|
|
|
$countable = $page->isCountable( $editInfo ); |
|
393
|
|
|
if ( array_key_exists( $countKey, $this->countableCache ) && |
|
394
|
|
|
$countable != $this->countableCache[$countKey] ) { |
|
395
|
|
|
DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [ |
|
396
|
|
|
'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] ) |
|
397
|
|
|
] ) ); |
|
398
|
|
|
} |
|
399
|
|
|
} |
|
400
|
|
|
|
|
401
|
|
|
$args = func_get_args(); |
|
402
|
|
|
return Hooks::run( 'AfterImportPage', $args ); |
|
403
|
|
|
} |
|
404
|
|
|
|
|
405
|
|
|
/** |
|
406
|
|
|
* Alternate per-revision callback, for debugging. |
|
407
|
|
|
* @param WikiRevision $revision |
|
408
|
|
|
*/ |
|
409
|
|
|
public function debugRevisionHandler( &$revision ) { |
|
410
|
|
|
$this->debug( "Got revision:" ); |
|
411
|
|
|
if ( is_object( $revision->title ) ) { |
|
412
|
|
|
$this->debug( "-- Title: " . $revision->title->getPrefixedText() ); |
|
413
|
|
|
} else { |
|
414
|
|
|
$this->debug( "-- Title: <invalid>" ); |
|
415
|
|
|
} |
|
416
|
|
|
$this->debug( "-- User: " . $revision->user_text ); |
|
417
|
|
|
$this->debug( "-- Timestamp: " . $revision->timestamp ); |
|
418
|
|
|
$this->debug( "-- Comment: " . $revision->comment ); |
|
419
|
|
|
$this->debug( "-- Text: " . $revision->text ); |
|
420
|
|
|
} |
|
421
|
|
|
|
|
422
|
|
|
/** |
|
423
|
|
|
* Notify the callback function of site info |
|
424
|
|
|
* @param array $siteInfo |
|
425
|
|
|
* @return bool|mixed |
|
426
|
|
|
*/ |
|
427
|
|
|
private function siteInfoCallback( $siteInfo ) { |
|
428
|
|
|
if ( isset( $this->mSiteInfoCallback ) ) { |
|
429
|
|
|
return call_user_func_array( $this->mSiteInfoCallback, |
|
430
|
|
|
[ $siteInfo, $this ] ); |
|
431
|
|
|
} else { |
|
432
|
|
|
return false; |
|
433
|
|
|
} |
|
434
|
|
|
} |
|
435
|
|
|
|
|
436
|
|
|
/** |
|
437
|
|
|
* Notify the callback function when a new "<page>" is reached. |
|
438
|
|
|
* @param Title $title |
|
439
|
|
|
*/ |
|
440
|
|
|
function pageCallback( $title ) { |
|
441
|
|
|
if ( isset( $this->mPageCallback ) ) { |
|
442
|
|
|
call_user_func( $this->mPageCallback, $title ); |
|
443
|
|
|
} |
|
444
|
|
|
} |
|
445
|
|
|
|
|
446
|
|
|
/** |
|
447
|
|
|
* Notify the callback function when a "</page>" is closed. |
|
448
|
|
|
* @param Title $title |
|
449
|
|
|
* @param ForeignTitle $foreignTitle |
|
450
|
|
|
* @param int $revCount |
|
451
|
|
|
* @param int $sucCount Number of revisions for which callback returned true |
|
452
|
|
|
* @param array $pageInfo Associative array of page information |
|
453
|
|
|
*/ |
|
454
|
|
|
private function pageOutCallback( $title, $foreignTitle, $revCount, |
|
455
|
|
|
$sucCount, $pageInfo ) { |
|
456
|
|
|
if ( isset( $this->mPageOutCallback ) ) { |
|
457
|
|
|
$args = func_get_args(); |
|
458
|
|
|
call_user_func_array( $this->mPageOutCallback, $args ); |
|
459
|
|
|
} |
|
460
|
|
|
} |
|
461
|
|
|
|
|
462
|
|
|
/** |
|
463
|
|
|
* Notify the callback function of a revision |
|
464
|
|
|
* @param WikiRevision $revision |
|
465
|
|
|
* @return bool|mixed |
|
466
|
|
|
*/ |
|
467
|
|
|
private function revisionCallback( $revision ) { |
|
468
|
|
|
if ( isset( $this->mRevisionCallback ) ) { |
|
469
|
|
|
return call_user_func_array( $this->mRevisionCallback, |
|
470
|
|
|
[ $revision, $this ] ); |
|
471
|
|
|
} else { |
|
472
|
|
|
return false; |
|
473
|
|
|
} |
|
474
|
|
|
} |
|
475
|
|
|
|
|
476
|
|
|
/** |
|
477
|
|
|
* Notify the callback function of a new log item |
|
478
|
|
|
* @param WikiRevision $revision |
|
479
|
|
|
* @return bool|mixed |
|
480
|
|
|
*/ |
|
481
|
|
|
private function logItemCallback( $revision ) { |
|
482
|
|
|
if ( isset( $this->mLogItemCallback ) ) { |
|
483
|
|
|
return call_user_func_array( $this->mLogItemCallback, |
|
484
|
|
|
[ $revision, $this ] ); |
|
485
|
|
|
} else { |
|
486
|
|
|
return false; |
|
487
|
|
|
} |
|
488
|
|
|
} |
|
489
|
|
|
|
|
490
|
|
|
/** |
|
491
|
|
|
* Retrieves the contents of the named attribute of the current element. |
|
492
|
|
|
* @param string $attr The name of the attribute |
|
493
|
|
|
* @return string The value of the attribute or an empty string if it is not set in the current |
|
494
|
|
|
* element. |
|
495
|
|
|
*/ |
|
496
|
|
|
public function nodeAttribute( $attr ) { |
|
497
|
|
|
return $this->reader->getAttribute( $attr ); |
|
498
|
|
|
} |
|
499
|
|
|
|
|
500
|
|
|
/** |
|
501
|
|
|
* Shouldn't something like this be built-in to XMLReader? |
|
502
|
|
|
* Fetches text contents of the current element, assuming |
|
503
|
|
|
* no sub-elements or such scary things. |
|
504
|
|
|
* @return string |
|
505
|
|
|
* @access private |
|
506
|
|
|
*/ |
|
507
|
|
View Code Duplication |
public function nodeContents() { |
|
508
|
|
|
if ( $this->reader->isEmptyElement ) { |
|
509
|
|
|
return ""; |
|
510
|
|
|
} |
|
511
|
|
|
$buffer = ""; |
|
512
|
|
|
while ( $this->reader->read() ) { |
|
513
|
|
|
switch ( $this->reader->nodeType ) { |
|
514
|
|
|
case XMLReader::TEXT: |
|
515
|
|
|
case XMLReader::CDATA: |
|
516
|
|
|
case XMLReader::SIGNIFICANT_WHITESPACE: |
|
517
|
|
|
$buffer .= $this->reader->value; |
|
518
|
|
|
break; |
|
519
|
|
|
case XMLReader::END_ELEMENT: |
|
520
|
|
|
return $buffer; |
|
521
|
|
|
} |
|
522
|
|
|
} |
|
523
|
|
|
|
|
524
|
|
|
$this->reader->close(); |
|
525
|
|
|
return ''; |
|
526
|
|
|
} |
|
527
|
|
|
|
|
528
|
|
|
/** |
|
529
|
|
|
* Primary entry point |
|
530
|
|
|
* @throws MWException |
|
531
|
|
|
* @return bool |
|
532
|
|
|
*/ |
|
533
|
|
|
public function doImport() { |
|
534
|
|
|
// Calls to reader->read need to be wrapped in calls to |
|
535
|
|
|
// libxml_disable_entity_loader() to avoid local file |
|
536
|
|
|
// inclusion attacks (bug 46932). |
|
537
|
|
|
$oldDisable = libxml_disable_entity_loader( true ); |
|
538
|
|
|
$this->reader->read(); |
|
539
|
|
|
|
|
540
|
|
|
if ( $this->reader->localName != 'mediawiki' ) { |
|
541
|
|
|
libxml_disable_entity_loader( $oldDisable ); |
|
542
|
|
|
throw new MWException( "Expected <mediawiki> tag, got " . |
|
543
|
|
|
$this->reader->localName ); |
|
544
|
|
|
} |
|
545
|
|
|
$this->debug( "<mediawiki> tag is correct." ); |
|
546
|
|
|
|
|
547
|
|
|
$this->debug( "Starting primary dump processing loop." ); |
|
548
|
|
|
|
|
549
|
|
|
$keepReading = $this->reader->read(); |
|
550
|
|
|
$skip = false; |
|
551
|
|
|
$rethrow = null; |
|
552
|
|
|
try { |
|
553
|
|
|
while ( $keepReading ) { |
|
554
|
|
|
$tag = $this->reader->localName; |
|
555
|
|
|
$type = $this->reader->nodeType; |
|
556
|
|
|
|
|
557
|
|
|
if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) { |
|
|
|
|
|
|
558
|
|
|
// Do nothing |
|
559
|
|
|
} elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) { |
|
560
|
|
|
break; |
|
561
|
|
|
} elseif ( $tag == 'siteinfo' ) { |
|
562
|
|
|
$this->handleSiteInfo(); |
|
563
|
|
|
} elseif ( $tag == 'page' ) { |
|
564
|
|
|
$this->handlePage(); |
|
565
|
|
|
} elseif ( $tag == 'logitem' ) { |
|
566
|
|
|
$this->handleLogItem(); |
|
567
|
|
|
} elseif ( $tag != '#text' ) { |
|
568
|
|
|
$this->warn( "Unhandled top-level XML tag $tag" ); |
|
569
|
|
|
|
|
570
|
|
|
$skip = true; |
|
571
|
|
|
} |
|
572
|
|
|
|
|
573
|
|
|
if ( $skip ) { |
|
574
|
|
|
$keepReading = $this->reader->next(); |
|
575
|
|
|
$skip = false; |
|
576
|
|
|
$this->debug( "Skip" ); |
|
577
|
|
|
} else { |
|
578
|
|
|
$keepReading = $this->reader->read(); |
|
579
|
|
|
} |
|
580
|
|
|
} |
|
581
|
|
|
} catch ( Exception $ex ) { |
|
582
|
|
|
$rethrow = $ex; |
|
583
|
|
|
} |
|
584
|
|
|
|
|
585
|
|
|
// finally |
|
586
|
|
|
libxml_disable_entity_loader( $oldDisable ); |
|
587
|
|
|
$this->reader->close(); |
|
588
|
|
|
|
|
589
|
|
|
if ( $rethrow ) { |
|
590
|
|
|
throw $rethrow; |
|
591
|
|
|
} |
|
592
|
|
|
|
|
593
|
|
|
return true; |
|
594
|
|
|
} |
|
595
|
|
|
|
|
596
|
|
|
private function handleSiteInfo() { |
|
597
|
|
|
$this->debug( "Enter site info handler." ); |
|
598
|
|
|
$siteInfo = []; |
|
599
|
|
|
|
|
600
|
|
|
// Fields that can just be stuffed in the siteInfo object |
|
601
|
|
|
$normalFields = [ 'sitename', 'base', 'generator', 'case' ]; |
|
602
|
|
|
|
|
603
|
|
|
while ( $this->reader->read() ) { |
|
604
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT && |
|
605
|
|
|
$this->reader->localName == 'siteinfo' ) { |
|
606
|
|
|
break; |
|
607
|
|
|
} |
|
608
|
|
|
|
|
609
|
|
|
$tag = $this->reader->localName; |
|
610
|
|
|
|
|
611
|
|
|
if ( $tag == 'namespace' ) { |
|
612
|
|
|
$this->foreignNamespaces[$this->nodeAttribute( 'key' )] = |
|
613
|
|
|
$this->nodeContents(); |
|
614
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) { |
|
615
|
|
|
$siteInfo[$tag] = $this->nodeContents(); |
|
616
|
|
|
} |
|
617
|
|
|
} |
|
618
|
|
|
|
|
619
|
|
|
$siteInfo['_namespaces'] = $this->foreignNamespaces; |
|
620
|
|
|
$this->siteInfoCallback( $siteInfo ); |
|
621
|
|
|
} |
|
622
|
|
|
|
|
623
|
|
|
private function handleLogItem() { |
|
624
|
|
|
$this->debug( "Enter log item handler." ); |
|
625
|
|
|
$logInfo = []; |
|
626
|
|
|
|
|
627
|
|
|
// Fields that can just be stuffed in the pageInfo object |
|
628
|
|
|
$normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp', |
|
629
|
|
|
'logtitle', 'params' ]; |
|
630
|
|
|
|
|
631
|
|
|
while ( $this->reader->read() ) { |
|
632
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT && |
|
633
|
|
|
$this->reader->localName == 'logitem' ) { |
|
634
|
|
|
break; |
|
635
|
|
|
} |
|
636
|
|
|
|
|
637
|
|
|
$tag = $this->reader->localName; |
|
638
|
|
|
|
|
639
|
|
View Code Duplication |
if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [ |
|
|
|
|
|
|
640
|
|
|
$this, $logInfo |
|
641
|
|
|
] ) ) { |
|
642
|
|
|
// Do nothing |
|
643
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) { |
|
644
|
|
|
$logInfo[$tag] = $this->nodeContents(); |
|
645
|
|
|
} elseif ( $tag == 'contributor' ) { |
|
646
|
|
|
$logInfo['contributor'] = $this->handleContributor(); |
|
647
|
|
|
} elseif ( $tag != '#text' ) { |
|
648
|
|
|
$this->warn( "Unhandled log-item XML tag $tag" ); |
|
649
|
|
|
} |
|
650
|
|
|
} |
|
651
|
|
|
|
|
652
|
|
|
$this->processLogItem( $logInfo ); |
|
653
|
|
|
} |
|
654
|
|
|
|
|
655
|
|
|
/** |
|
656
|
|
|
* @param array $logInfo |
|
657
|
|
|
* @return bool|mixed |
|
658
|
|
|
*/ |
|
659
|
|
|
private function processLogItem( $logInfo ) { |
|
660
|
|
|
|
|
661
|
|
|
$revision = new WikiRevision( $this->config ); |
|
662
|
|
|
|
|
663
|
|
|
if ( isset( $logInfo['id'] ) ) { |
|
664
|
|
|
$revision->setID( $logInfo['id'] ); |
|
665
|
|
|
} |
|
666
|
|
|
$revision->setType( $logInfo['type'] ); |
|
667
|
|
|
$revision->setAction( $logInfo['action'] ); |
|
668
|
|
|
if ( isset( $logInfo['timestamp'] ) ) { |
|
669
|
|
|
$revision->setTimestamp( $logInfo['timestamp'] ); |
|
670
|
|
|
} |
|
671
|
|
|
if ( isset( $logInfo['params'] ) ) { |
|
672
|
|
|
$revision->setParams( $logInfo['params'] ); |
|
673
|
|
|
} |
|
674
|
|
|
if ( isset( $logInfo['logtitle'] ) ) { |
|
675
|
|
|
// @todo Using Title for non-local titles is a recipe for disaster. |
|
676
|
|
|
// We should use ForeignTitle here instead. |
|
677
|
|
|
$revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); |
|
|
|
|
|
|
678
|
|
|
} |
|
679
|
|
|
|
|
680
|
|
|
$revision->setNoUpdates( $this->mNoUpdates ); |
|
681
|
|
|
|
|
682
|
|
|
if ( isset( $logInfo['comment'] ) ) { |
|
683
|
|
|
$revision->setComment( $logInfo['comment'] ); |
|
684
|
|
|
} |
|
685
|
|
|
|
|
686
|
|
|
if ( isset( $logInfo['contributor']['ip'] ) ) { |
|
687
|
|
|
$revision->setUserIP( $logInfo['contributor']['ip'] ); |
|
688
|
|
|
} |
|
689
|
|
|
|
|
690
|
|
|
if ( !isset( $logInfo['contributor']['username'] ) ) { |
|
691
|
|
|
$revision->setUsername( 'Unknown user' ); |
|
692
|
|
|
} else { |
|
693
|
|
|
$revision->setUsername( $logInfo['contributor']['username'] ); |
|
694
|
|
|
} |
|
695
|
|
|
|
|
696
|
|
|
return $this->logItemCallback( $revision ); |
|
697
|
|
|
} |
|
698
|
|
|
|
|
699
|
|
|
private function handlePage() { |
|
700
|
|
|
// Handle page data. |
|
701
|
|
|
$this->debug( "Enter page handler." ); |
|
702
|
|
|
$pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ]; |
|
703
|
|
|
|
|
704
|
|
|
// Fields that can just be stuffed in the pageInfo object |
|
705
|
|
|
$normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ]; |
|
706
|
|
|
|
|
707
|
|
|
$skip = false; |
|
708
|
|
|
$badTitle = false; |
|
709
|
|
|
|
|
710
|
|
|
while ( $skip ? $this->reader->next() : $this->reader->read() ) { |
|
711
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT && |
|
712
|
|
|
$this->reader->localName == 'page' ) { |
|
713
|
|
|
break; |
|
714
|
|
|
} |
|
715
|
|
|
|
|
716
|
|
|
$skip = false; |
|
717
|
|
|
|
|
718
|
|
|
$tag = $this->reader->localName; |
|
719
|
|
|
|
|
720
|
|
|
if ( $badTitle ) { |
|
721
|
|
|
// The title is invalid, bail out of this page |
|
722
|
|
|
$skip = true; |
|
723
|
|
|
} elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this, |
|
|
|
|
|
|
724
|
|
|
&$pageInfo ] ) ) { |
|
725
|
|
|
// Do nothing |
|
726
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) { |
|
727
|
|
|
// An XML snippet: |
|
728
|
|
|
// <page> |
|
729
|
|
|
// <id>123</id> |
|
730
|
|
|
// <title>Page</title> |
|
731
|
|
|
// <redirect title="NewTitle"/> |
|
732
|
|
|
// ... |
|
733
|
|
|
// Because the redirect tag is built differently, we need special handling for that case. |
|
734
|
|
|
if ( $tag == 'redirect' ) { |
|
735
|
|
|
$pageInfo[$tag] = $this->nodeAttribute( 'title' ); |
|
736
|
|
|
} else { |
|
737
|
|
|
$pageInfo[$tag] = $this->nodeContents(); |
|
738
|
|
|
} |
|
739
|
|
|
} elseif ( $tag == 'revision' || $tag == 'upload' ) { |
|
740
|
|
|
if ( !isset( $title ) ) { |
|
741
|
|
|
$title = $this->processTitle( $pageInfo['title'], |
|
742
|
|
|
isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null ); |
|
743
|
|
|
|
|
744
|
|
|
// $title is either an array of two titles or false. |
|
745
|
|
|
if ( is_array( $title ) ) { |
|
746
|
|
|
$this->pageCallback( $title ); |
|
747
|
|
|
list( $pageInfo['_title'], $foreignTitle ) = $title; |
|
748
|
|
|
} else { |
|
749
|
|
|
$badTitle = true; |
|
750
|
|
|
$skip = true; |
|
751
|
|
|
} |
|
752
|
|
|
} |
|
753
|
|
|
|
|
754
|
|
|
if ( $title ) { |
|
755
|
|
|
if ( $tag == 'revision' ) { |
|
756
|
|
|
$this->handleRevision( $pageInfo ); |
|
757
|
|
|
} else { |
|
758
|
|
|
$this->handleUpload( $pageInfo ); |
|
759
|
|
|
} |
|
760
|
|
|
} |
|
761
|
|
|
} elseif ( $tag != '#text' ) { |
|
762
|
|
|
$this->warn( "Unhandled page XML tag $tag" ); |
|
763
|
|
|
$skip = true; |
|
764
|
|
|
} |
|
765
|
|
|
} |
|
766
|
|
|
|
|
767
|
|
|
// @note $pageInfo is only set if a valid $title is processed above with |
|
768
|
|
|
// no error. If we have a valid $title, then pageCallback is called |
|
769
|
|
|
// above, $pageInfo['title'] is set and we do pageOutCallback here. |
|
770
|
|
|
// If $pageInfo['_title'] is not set, then $foreignTitle is also not |
|
771
|
|
|
// set since they both come from $title above. |
|
772
|
|
|
if ( array_key_exists( '_title', $pageInfo ) ) { |
|
773
|
|
|
$this->pageOutCallback( $pageInfo['_title'], $foreignTitle, |
|
|
|
|
|
|
774
|
|
|
$pageInfo['revisionCount'], |
|
775
|
|
|
$pageInfo['successfulRevisionCount'], |
|
776
|
|
|
$pageInfo ); |
|
777
|
|
|
} |
|
778
|
|
|
} |
|
779
|
|
|
|
|
780
|
|
|
/** |
|
781
|
|
|
* @param array $pageInfo |
|
782
|
|
|
*/ |
|
783
|
|
|
private function handleRevision( &$pageInfo ) { |
|
784
|
|
|
$this->debug( "Enter revision handler" ); |
|
785
|
|
|
$revisionInfo = []; |
|
786
|
|
|
|
|
787
|
|
|
$normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ]; |
|
788
|
|
|
|
|
789
|
|
|
$skip = false; |
|
790
|
|
|
|
|
791
|
|
|
while ( $skip ? $this->reader->next() : $this->reader->read() ) { |
|
792
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT && |
|
793
|
|
|
$this->reader->localName == 'revision' ) { |
|
794
|
|
|
break; |
|
795
|
|
|
} |
|
796
|
|
|
|
|
797
|
|
|
$tag = $this->reader->localName; |
|
798
|
|
|
|
|
799
|
|
View Code Duplication |
if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [ |
|
|
|
|
|
|
800
|
|
|
$this, $pageInfo, $revisionInfo |
|
801
|
|
|
] ) ) { |
|
802
|
|
|
// Do nothing |
|
803
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) { |
|
804
|
|
|
$revisionInfo[$tag] = $this->nodeContents(); |
|
805
|
|
|
} elseif ( $tag == 'contributor' ) { |
|
806
|
|
|
$revisionInfo['contributor'] = $this->handleContributor(); |
|
807
|
|
|
} elseif ( $tag != '#text' ) { |
|
808
|
|
|
$this->warn( "Unhandled revision XML tag $tag" ); |
|
809
|
|
|
$skip = true; |
|
810
|
|
|
} |
|
811
|
|
|
} |
|
812
|
|
|
|
|
813
|
|
|
$pageInfo['revisionCount']++; |
|
814
|
|
|
if ( $this->processRevision( $pageInfo, $revisionInfo ) ) { |
|
815
|
|
|
$pageInfo['successfulRevisionCount']++; |
|
816
|
|
|
} |
|
817
|
|
|
} |
|
818
|
|
|
|
|
819
|
|
|
/** |
|
820
|
|
|
* @param array $pageInfo |
|
821
|
|
|
* @param array $revisionInfo |
|
822
|
|
|
* @return bool|mixed |
|
823
|
|
|
*/ |
|
824
|
|
|
private function processRevision( $pageInfo, $revisionInfo ) { |
|
825
|
|
|
global $wgMaxArticleSize; |
|
826
|
|
|
|
|
827
|
|
|
// Make sure revisions won't violate $wgMaxArticleSize, which could lead to |
|
828
|
|
|
// database errors and instability. Testing for revisions with only listed |
|
829
|
|
|
// content models, as other content models might use serialization formats |
|
830
|
|
|
// which aren't checked against $wgMaxArticleSize. |
|
831
|
|
|
if ( ( !isset( $revisionInfo['model'] ) || |
|
832
|
|
|
in_array( $revisionInfo['model'], [ |
|
833
|
|
|
'wikitext', |
|
834
|
|
|
'css', |
|
835
|
|
|
'json', |
|
836
|
|
|
'javascript', |
|
837
|
|
|
'text', |
|
838
|
|
|
'' |
|
839
|
|
|
] ) ) && |
|
840
|
|
|
strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024 |
|
841
|
|
|
) { |
|
842
|
|
|
throw new MWException( 'The text of ' . |
|
843
|
|
|
( isset( $revisionInfo['id'] ) ? |
|
844
|
|
|
"the revision with ID $revisionInfo[id]" : |
|
845
|
|
|
'a revision' |
|
846
|
|
|
) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" ); |
|
847
|
|
|
} |
|
848
|
|
|
|
|
849
|
|
|
$revision = new WikiRevision( $this->config ); |
|
850
|
|
|
|
|
851
|
|
|
if ( isset( $revisionInfo['id'] ) ) { |
|
852
|
|
|
$revision->setID( $revisionInfo['id'] ); |
|
853
|
|
|
} |
|
854
|
|
|
if ( isset( $revisionInfo['model'] ) ) { |
|
855
|
|
|
$revision->setModel( $revisionInfo['model'] ); |
|
856
|
|
|
} |
|
857
|
|
|
if ( isset( $revisionInfo['format'] ) ) { |
|
858
|
|
|
$revision->setFormat( $revisionInfo['format'] ); |
|
859
|
|
|
} |
|
860
|
|
|
$revision->setTitle( $pageInfo['_title'] ); |
|
861
|
|
|
|
|
862
|
|
|
if ( isset( $revisionInfo['text'] ) ) { |
|
863
|
|
|
$handler = $revision->getContentHandler(); |
|
864
|
|
|
$text = $handler->importTransform( |
|
865
|
|
|
$revisionInfo['text'], |
|
866
|
|
|
$revision->getFormat() ); |
|
867
|
|
|
|
|
868
|
|
|
$revision->setText( $text ); |
|
869
|
|
|
} |
|
870
|
|
|
if ( isset( $revisionInfo['timestamp'] ) ) { |
|
871
|
|
|
$revision->setTimestamp( $revisionInfo['timestamp'] ); |
|
872
|
|
|
} else { |
|
873
|
|
|
$revision->setTimestamp( wfTimestampNow() ); |
|
|
|
|
|
|
874
|
|
|
} |
|
875
|
|
|
|
|
876
|
|
|
if ( isset( $revisionInfo['comment'] ) ) { |
|
877
|
|
|
$revision->setComment( $revisionInfo['comment'] ); |
|
878
|
|
|
} |
|
879
|
|
|
|
|
880
|
|
|
if ( isset( $revisionInfo['minor'] ) ) { |
|
881
|
|
|
$revision->setMinor( true ); |
|
882
|
|
|
} |
|
883
|
|
|
if ( isset( $revisionInfo['contributor']['ip'] ) ) { |
|
884
|
|
|
$revision->setUserIP( $revisionInfo['contributor']['ip'] ); |
|
885
|
|
|
} elseif ( isset( $revisionInfo['contributor']['username'] ) ) { |
|
886
|
|
|
$revision->setUsername( $revisionInfo['contributor']['username'] ); |
|
887
|
|
|
} else { |
|
888
|
|
|
$revision->setUsername( 'Unknown user' ); |
|
889
|
|
|
} |
|
890
|
|
|
$revision->setNoUpdates( $this->mNoUpdates ); |
|
891
|
|
|
|
|
892
|
|
|
return $this->revisionCallback( $revision ); |
|
893
|
|
|
} |
|
894
|
|
|
|
|
895
|
|
|
/** |
|
896
|
|
|
* @param array $pageInfo |
|
897
|
|
|
* @return mixed |
|
898
|
|
|
*/ |
|
899
|
|
|
private function handleUpload( &$pageInfo ) { |
|
900
|
|
|
$this->debug( "Enter upload handler" ); |
|
901
|
|
|
$uploadInfo = []; |
|
902
|
|
|
|
|
903
|
|
|
$normalFields = [ 'timestamp', 'comment', 'filename', 'text', |
|
904
|
|
|
'src', 'size', 'sha1base36', 'archivename', 'rel' ]; |
|
905
|
|
|
|
|
906
|
|
|
$skip = false; |
|
907
|
|
|
|
|
908
|
|
|
while ( $skip ? $this->reader->next() : $this->reader->read() ) { |
|
909
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT && |
|
910
|
|
|
$this->reader->localName == 'upload' ) { |
|
911
|
|
|
break; |
|
912
|
|
|
} |
|
913
|
|
|
|
|
914
|
|
|
$tag = $this->reader->localName; |
|
915
|
|
|
|
|
916
|
|
|
if ( !Hooks::run( 'ImportHandleUploadXMLTag', [ |
|
|
|
|
|
|
917
|
|
|
$this, $pageInfo |
|
918
|
|
|
] ) ) { |
|
919
|
|
|
// Do nothing |
|
920
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) { |
|
921
|
|
|
$uploadInfo[$tag] = $this->nodeContents(); |
|
922
|
|
|
} elseif ( $tag == 'contributor' ) { |
|
923
|
|
|
$uploadInfo['contributor'] = $this->handleContributor(); |
|
924
|
|
|
} elseif ( $tag == 'contents' ) { |
|
925
|
|
|
$contents = $this->nodeContents(); |
|
926
|
|
|
$encoding = $this->reader->getAttribute( 'encoding' ); |
|
927
|
|
|
if ( $encoding === 'base64' ) { |
|
928
|
|
|
$uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); |
|
929
|
|
|
$uploadInfo['isTempSrc'] = true; |
|
930
|
|
|
} |
|
931
|
|
|
} elseif ( $tag != '#text' ) { |
|
932
|
|
|
$this->warn( "Unhandled upload XML tag $tag" ); |
|
933
|
|
|
$skip = true; |
|
934
|
|
|
} |
|
935
|
|
|
} |
|
936
|
|
|
|
|
937
|
|
|
if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) { |
|
938
|
|
|
$path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; |
|
939
|
|
|
if ( file_exists( $path ) ) { |
|
940
|
|
|
$uploadInfo['fileSrc'] = $path; |
|
941
|
|
|
$uploadInfo['isTempSrc'] = false; |
|
942
|
|
|
} |
|
943
|
|
|
} |
|
944
|
|
|
|
|
945
|
|
|
if ( $this->mImportUploads ) { |
|
946
|
|
|
return $this->processUpload( $pageInfo, $uploadInfo ); |
|
947
|
|
|
} |
|
948
|
|
|
} |
|
949
|
|
|
|
|
950
|
|
|
/** |
|
951
|
|
|
* @param string $contents |
|
952
|
|
|
* @return string |
|
953
|
|
|
*/ |
|
954
|
|
|
private function dumpTemp( $contents ) { |
|
955
|
|
|
$filename = tempnam( wfTempDir(), 'importupload' ); |
|
956
|
|
|
file_put_contents( $filename, $contents ); |
|
957
|
|
|
return $filename; |
|
958
|
|
|
} |
|
959
|
|
|
|
|
960
|
|
|
/** |
|
961
|
|
|
* @param array $pageInfo |
|
962
|
|
|
* @param array $uploadInfo |
|
963
|
|
|
* @return mixed |
|
964
|
|
|
*/ |
|
965
|
|
|
private function processUpload( $pageInfo, $uploadInfo ) { |
|
966
|
|
|
$revision = new WikiRevision( $this->config ); |
|
967
|
|
|
$text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : ''; |
|
968
|
|
|
|
|
969
|
|
|
$revision->setTitle( $pageInfo['_title'] ); |
|
970
|
|
|
$revision->setID( $pageInfo['id'] ); |
|
971
|
|
|
$revision->setTimestamp( $uploadInfo['timestamp'] ); |
|
972
|
|
|
$revision->setText( $text ); |
|
973
|
|
|
$revision->setFilename( $uploadInfo['filename'] ); |
|
974
|
|
|
if ( isset( $uploadInfo['archivename'] ) ) { |
|
975
|
|
|
$revision->setArchiveName( $uploadInfo['archivename'] ); |
|
976
|
|
|
} |
|
977
|
|
|
$revision->setSrc( $uploadInfo['src'] ); |
|
978
|
|
|
if ( isset( $uploadInfo['fileSrc'] ) ) { |
|
979
|
|
|
$revision->setFileSrc( $uploadInfo['fileSrc'], |
|
980
|
|
|
!empty( $uploadInfo['isTempSrc'] ) ); |
|
981
|
|
|
} |
|
982
|
|
|
if ( isset( $uploadInfo['sha1base36'] ) ) { |
|
983
|
|
|
$revision->setSha1Base36( $uploadInfo['sha1base36'] ); |
|
984
|
|
|
} |
|
985
|
|
|
$revision->setSize( intval( $uploadInfo['size'] ) ); |
|
986
|
|
|
$revision->setComment( $uploadInfo['comment'] ); |
|
987
|
|
|
|
|
988
|
|
|
if ( isset( $uploadInfo['contributor']['ip'] ) ) { |
|
989
|
|
|
$revision->setUserIP( $uploadInfo['contributor']['ip'] ); |
|
990
|
|
|
} |
|
991
|
|
|
if ( isset( $uploadInfo['contributor']['username'] ) ) { |
|
992
|
|
|
$revision->setUsername( $uploadInfo['contributor']['username'] ); |
|
993
|
|
|
} |
|
994
|
|
|
$revision->setNoUpdates( $this->mNoUpdates ); |
|
995
|
|
|
|
|
996
|
|
|
return call_user_func( $this->mUploadCallback, $revision ); |
|
997
|
|
|
} |
|
998
|
|
|
|
|
999
|
|
|
/** |
|
1000
|
|
|
* @return array |
|
1001
|
|
|
*/ |
|
1002
|
|
|
private function handleContributor() { |
|
1003
|
|
|
$fields = [ 'id', 'ip', 'username' ]; |
|
1004
|
|
|
$info = []; |
|
1005
|
|
|
|
|
1006
|
|
|
if ( $this->reader->isEmptyElement ) { |
|
1007
|
|
|
return $info; |
|
1008
|
|
|
} |
|
1009
|
|
|
while ( $this->reader->read() ) { |
|
1010
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT && |
|
1011
|
|
|
$this->reader->localName == 'contributor' ) { |
|
1012
|
|
|
break; |
|
1013
|
|
|
} |
|
1014
|
|
|
|
|
1015
|
|
|
$tag = $this->reader->localName; |
|
1016
|
|
|
|
|
1017
|
|
|
if ( in_array( $tag, $fields ) ) { |
|
1018
|
|
|
$info[$tag] = $this->nodeContents(); |
|
1019
|
|
|
} |
|
1020
|
|
|
} |
|
1021
|
|
|
|
|
1022
|
|
|
return $info; |
|
1023
|
|
|
} |
|
1024
|
|
|
|
|
1025
|
|
|
/** |
|
1026
|
|
|
* @param string $text |
|
1027
|
|
|
* @param string|null $ns |
|
1028
|
|
|
* @return array|bool |
|
1029
|
|
|
*/ |
|
1030
|
|
|
private function processTitle( $text, $ns = null ) { |
|
1031
|
|
|
if ( is_null( $this->foreignNamespaces ) ) { |
|
1032
|
|
|
$foreignTitleFactory = new NaiveForeignTitleFactory(); |
|
1033
|
|
|
} else { |
|
1034
|
|
|
$foreignTitleFactory = new NamespaceAwareForeignTitleFactory( |
|
1035
|
|
|
$this->foreignNamespaces ); |
|
1036
|
|
|
} |
|
1037
|
|
|
|
|
1038
|
|
|
$foreignTitle = $foreignTitleFactory->createForeignTitle( $text, |
|
1039
|
|
|
intval( $ns ) ); |
|
1040
|
|
|
|
|
1041
|
|
|
$title = $this->importTitleFactory->createTitleFromForeignTitle( |
|
1042
|
|
|
$foreignTitle ); |
|
1043
|
|
|
|
|
1044
|
|
|
$commandLineMode = $this->config->get( 'CommandLineMode' ); |
|
1045
|
|
|
if ( is_null( $title ) ) { |
|
1046
|
|
|
# Invalid page title? Ignore the page |
|
1047
|
|
|
$this->notice( 'import-error-invalid', $foreignTitle->getFullText() ); |
|
1048
|
|
|
return false; |
|
1049
|
|
|
} elseif ( $title->isExternal() ) { |
|
1050
|
|
|
$this->notice( 'import-error-interwiki', $title->getPrefixedText() ); |
|
1051
|
|
|
return false; |
|
1052
|
|
|
} elseif ( !$title->canExist() ) { |
|
1053
|
|
|
$this->notice( 'import-error-special', $title->getPrefixedText() ); |
|
1054
|
|
|
return false; |
|
1055
|
|
|
} elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) { |
|
1056
|
|
|
# Do not import if the importing wiki user cannot edit this page |
|
1057
|
|
|
$this->notice( 'import-error-edit', $title->getPrefixedText() ); |
|
1058
|
|
|
return false; |
|
1059
|
|
|
} elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) { |
|
1060
|
|
|
# Do not import if the importing wiki user cannot create this page |
|
1061
|
|
|
$this->notice( 'import-error-create', $title->getPrefixedText() ); |
|
1062
|
|
|
return false; |
|
1063
|
|
|
} |
|
1064
|
|
|
|
|
1065
|
|
|
return [ $title, $foreignTitle ]; |
|
1066
|
|
|
} |
|
1067
|
|
|
} |
|
1068
|
|
|
|
Only declaring a single property per statement allows you to later on add doc comments more easily.
It is also recommended by PSR2, so it is a common style that many people expect.