Completed
Branch master (939199)
by
unknown
39:35
created

includes/import/WikiRevision.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * MediaWiki page data importer.
4
 *
5
 * Copyright © 2003,2005 Brion Vibber <[email protected]>
6
 * https://www.mediawiki.org/
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License along
19
 * with this program; if not, write to the Free Software Foundation, Inc.,
20
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21
 * http://www.gnu.org/copyleft/gpl.html
22
 *
23
 * @file
24
 * @ingroup SpecialPage
25
 */
26
27
/**
28
 * Represents a revision, log entry or upload during the import process.
29
 * This class sticks closely to the structure of the XML dump.
30
 *
31
 * @ingroup SpecialPage
32
 */
33
class WikiRevision {
34
	/** @todo Unused? */
35
	public $importer = null;
36
37
	/** @var Title */
38
	public $title = null;
39
40
	/** @var int */
41
	public $id = 0;
42
43
	/** @var string */
44
	public $timestamp = "20010115000000";
45
46
	/**
47
	 * @var int
48
	 * @todo Can't find any uses. Public, because that's suspicious. Get clarity. */
49
	public $user = 0;
50
51
	/** @var string */
52
	public $user_text = "";
53
54
	/** @var User */
55
	public $userObj = null;
56
57
	/** @var string */
58
	public $model = null;
59
60
	/** @var string */
61
	public $format = null;
62
63
	/** @var string */
64
	public $text = "";
65
66
	/** @var int */
67
	protected $size;
68
69
	/** @var Content */
70
	public $content = null;
71
72
	/** @var ContentHandler */
73
	protected $contentHandler = null;
74
75
	/** @var string */
76
	public $comment = "";
77
78
	/** @var bool */
79
	public $minor = false;
80
81
	/** @var string */
82
	public $type = "";
83
84
	/** @var string */
85
	public $action = "";
86
87
	/** @var string */
88
	public $params = "";
89
90
	/** @var string */
91
	public $fileSrc = '';
92
93
	/** @var bool|string */
94
	public $sha1base36 = false;
95
96
	/**
97
	 * @var bool
98
	 * @todo Unused?
99
	 */
100
	public $isTemp = false;
101
102
	/** @var string */
103
	public $archiveName = '';
104
105
	protected $filename;
106
107
	/** @var mixed */
108
	protected $src;
109
110
	/** @todo Unused? */
111
	public $fileIsTemp;
112
113
	/** @var bool */
114
	private $mNoUpdates = false;
115
116
	/** @var Config $config */
117
	private $config;
118
119
	public function __construct( Config $config ) {
120
		$this->config = $config;
121
	}
122
123
	/**
124
	 * @param Title $title
125
	 * @throws MWException
126
	 */
127
	function setTitle( $title ) {
128
		if ( is_object( $title ) ) {
129
			$this->title = $title;
130
		} elseif ( is_null( $title ) ) {
131
			throw new MWException( "WikiRevision given a null title in import. "
132
				. "You may need to adjust \$wgLegalTitleChars." );
133
		} else {
134
			throw new MWException( "WikiRevision given non-object title in import." );
135
		}
136
	}
137
138
	/**
139
	 * @param int $id
140
	 */
141
	function setID( $id ) {
142
		$this->id = $id;
143
	}
144
145
	/**
146
	 * @param string $ts
147
	 */
148
	function setTimestamp( $ts ) {
149
		# 2003-08-05T18:30:02Z
150
		$this->timestamp = wfTimestamp( TS_MW, $ts );
151
	}
152
153
	/**
154
	 * @param string $user
155
	 */
156
	function setUsername( $user ) {
157
		$this->user_text = $user;
158
	}
159
160
	/**
161
	 * @param User $user
162
	 */
163
	function setUserObj( $user ) {
164
		$this->userObj = $user;
165
	}
166
167
	/**
168
	 * @param string $ip
169
	 */
170
	function setUserIP( $ip ) {
171
		$this->user_text = $ip;
172
	}
173
174
	/**
175
	 * @param string $model
176
	 */
177
	function setModel( $model ) {
178
		$this->model = $model;
179
	}
180
181
	/**
182
	 * @param string $format
183
	 */
184
	function setFormat( $format ) {
185
		$this->format = $format;
186
	}
187
188
	/**
189
	 * @param string $text
190
	 */
191
	function setText( $text ) {
192
		$this->text = $text;
193
	}
194
195
	/**
196
	 * @param string $text
197
	 */
198
	function setComment( $text ) {
199
		$this->comment = $text;
200
	}
201
202
	/**
203
	 * @param bool $minor
204
	 */
205
	function setMinor( $minor ) {
206
		$this->minor = (bool)$minor;
207
	}
208
209
	/**
210
	 * @param mixed $src
211
	 */
212
	function setSrc( $src ) {
213
		$this->src = $src;
214
	}
215
216
	/**
217
	 * @param string $src
218
	 * @param bool $isTemp
219
	 */
220
	function setFileSrc( $src, $isTemp ) {
221
		$this->fileSrc = $src;
222
		$this->fileIsTemp = $isTemp;
223
	}
224
225
	/**
226
	 * @param string $sha1base36
227
	 */
228
	function setSha1Base36( $sha1base36 ) {
229
		$this->sha1base36 = $sha1base36;
230
	}
231
232
	/**
233
	 * @param string $filename
234
	 */
235
	function setFilename( $filename ) {
236
		$this->filename = $filename;
237
	}
238
239
	/**
240
	 * @param string $archiveName
241
	 */
242
	function setArchiveName( $archiveName ) {
243
		$this->archiveName = $archiveName;
244
	}
245
246
	/**
247
	 * @param int $size
248
	 */
249
	function setSize( $size ) {
250
		$this->size = intval( $size );
251
	}
252
253
	/**
254
	 * @param string $type
255
	 */
256
	function setType( $type ) {
257
		$this->type = $type;
258
	}
259
260
	/**
261
	 * @param string $action
262
	 */
263
	function setAction( $action ) {
264
		$this->action = $action;
265
	}
266
267
	/**
268
	 * @param array $params
269
	 */
270
	function setParams( $params ) {
271
		$this->params = $params;
272
	}
273
274
	/**
275
	 * @param bool $noupdates
276
	 */
277
	public function setNoUpdates( $noupdates ) {
278
		$this->mNoUpdates = $noupdates;
279
	}
280
281
	/**
282
	 * @return Title
283
	 */
284
	function getTitle() {
285
		return $this->title;
286
	}
287
288
	/**
289
	 * @return int
290
	 */
291
	function getID() {
292
		return $this->id;
293
	}
294
295
	/**
296
	 * @return string
297
	 */
298
	function getTimestamp() {
299
		return $this->timestamp;
300
	}
301
302
	/**
303
	 * @return string
304
	 */
305
	function getUser() {
306
		return $this->user_text;
307
	}
308
309
	/**
310
	 * @return User
311
	 */
312
	function getUserObj() {
313
		return $this->userObj;
314
	}
315
316
	/**
317
	 * @return string
318
	 *
319
	 * @deprecated Since 1.21, use getContent() instead.
320
	 */
321
	function getText() {
322
		wfDeprecated( __METHOD__, '1.21' );
323
324
		return $this->text;
325
	}
326
327
	/**
328
	 * @return ContentHandler
329
	 */
330
	function getContentHandler() {
331
		if ( is_null( $this->contentHandler ) ) {
332
			$this->contentHandler = ContentHandler::getForModelID( $this->getModel() );
333
		}
334
335
		return $this->contentHandler;
336
	}
337
338
	/**
339
	 * @return Content
340
	 */
341
	function getContent() {
342
		if ( is_null( $this->content ) ) {
343
			$handler = $this->getContentHandler();
344
			$this->content = $handler->unserializeContent( $this->text, $this->getFormat() );
345
		}
346
347
		return $this->content;
348
	}
349
350
	/**
351
	 * @return string
352
	 */
353
	function getModel() {
354
		if ( is_null( $this->model ) ) {
355
			$this->model = $this->getTitle()->getContentModel();
356
		}
357
358
		return $this->model;
359
	}
360
361
	/**
362
	 * @return string
363
	 */
364
	function getFormat() {
365
		if ( is_null( $this->format ) ) {
366
			$this->format = $this->getContentHandler()->getDefaultFormat();
367
		}
368
369
		return $this->format;
370
	}
371
372
	/**
373
	 * @return string
374
	 */
375
	function getComment() {
376
		return $this->comment;
377
	}
378
379
	/**
380
	 * @return bool
381
	 */
382
	function getMinor() {
383
		return $this->minor;
384
	}
385
386
	/**
387
	 * @return mixed
388
	 */
389
	function getSrc() {
390
		return $this->src;
391
	}
392
393
	/**
394
	 * @return bool|string
395
	 */
396
	function getSha1() {
397
		if ( $this->sha1base36 ) {
398
			return Wikimedia\base_convert( $this->sha1base36, 36, 16 );
399
		}
400
		return false;
401
	}
402
403
	/**
404
	 * @return string
405
	 */
406
	function getFileSrc() {
407
		return $this->fileSrc;
408
	}
409
410
	/**
411
	 * @return bool
412
	 */
413
	function isTempSrc() {
414
		return $this->isTemp;
415
	}
416
417
	/**
418
	 * @return mixed
419
	 */
420
	function getFilename() {
421
		return $this->filename;
422
	}
423
424
	/**
425
	 * @return string
426
	 */
427
	function getArchiveName() {
428
		return $this->archiveName;
429
	}
430
431
	/**
432
	 * @return mixed
433
	 */
434
	function getSize() {
435
		return $this->size;
436
	}
437
438
	/**
439
	 * @return string
440
	 */
441
	function getType() {
442
		return $this->type;
443
	}
444
445
	/**
446
	 * @return string
447
	 */
448
	function getAction() {
449
		return $this->action;
450
	}
451
452
	/**
453
	 * @return string
454
	 */
455
	function getParams() {
456
		return $this->params;
457
	}
458
459
	/**
460
	 * @return bool
461
	 */
462
	function importOldRevision() {
463
		$dbw = wfGetDB( DB_MASTER );
464
465
		# Sneak a single revision into place
466
		$user = $this->getUserObj() ?: User::newFromName( $this->getUser() );
467 View Code Duplication
		if ( $user ) {
468
			$userId = intval( $user->getId() );
469
			$userText = $user->getName();
470
		} else {
471
			$userId = 0;
472
			$userText = $this->getUser();
473
			$user = new User;
474
		}
475
476
		// avoid memory leak...?
477
		Title::clearCaches();
478
479
		$page = WikiPage::factory( $this->title );
480
		$page->loadPageData( 'fromdbmaster' );
481
		if ( !$page->exists() ) {
482
			// must create the page...
483
			$pageId = $page->insertOn( $dbw );
484
			$created = true;
485
			$oldcountable = null;
486
		} else {
487
			$pageId = $page->getId();
488
			$created = false;
489
490
			$prior = $dbw->selectField( 'revision', '1',
491
				[ 'rev_page' => $pageId,
492
					'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
493
					'rev_user_text' => $userText,
494
					'rev_comment' => $this->getComment() ],
495
				__METHOD__
496
			);
497 View Code Duplication
			if ( $prior ) {
498
				// @todo FIXME: This could fail slightly for multiple matches :P
499
				wfDebug( __METHOD__ . ": skipping existing revision for [[" .
500
					$this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
501
				return false;
502
			}
503
		}
504
505 View Code Duplication
		if ( !$pageId ) {
506
			// This seems to happen if two clients simultaneously try to import the
507
			// same page
508
			wfDebug( __METHOD__ . ': got invalid $pageId when importing revision of [[' .
509
				$this->title->getPrefixedText() . ']], timestamp ' . $this->timestamp . "\n" );
510
			return false;
511
		}
512
513
		// Select previous version to make size diffs correct
514
		// @todo This assumes that multiple revisions of the same page are imported
515
		// in order from oldest to newest.
516
		$prevId = $dbw->selectField( 'revision', 'rev_id',
517
			[
518
				'rev_page' => $pageId,
519
				'rev_timestamp <= ' . $dbw->addQuotes( $dbw->timestamp( $this->timestamp ) ),
520
			],
521
			__METHOD__,
522
			[ 'ORDER BY' => [
523
					'rev_timestamp DESC',
524
					'rev_id DESC', // timestamp is not unique per page
525
				]
526
			]
527
		);
528
529
		# @todo FIXME: Use original rev_id optionally (better for backups)
530
		# Insert the row
531
		$revision = new Revision( [
532
			'title' => $this->title,
533
			'page' => $pageId,
534
			'content_model' => $this->getModel(),
535
			'content_format' => $this->getFormat(),
536
			// XXX: just set 'content' => $this->getContent()?
537
			'text' => $this->getContent()->serialize( $this->getFormat() ),
538
			'comment' => $this->getComment(),
539
			'user' => $userId,
540
			'user_text' => $userText,
541
			'timestamp' => $this->timestamp,
542
			'minor_edit' => $this->minor,
543
			'parent_id' => $prevId,
544
			] );
545
		$revision->insertOn( $dbw );
546
		$changed = $page->updateIfNewerOn( $dbw, $revision );
547
548
		if ( $changed !== false && !$this->mNoUpdates ) {
549
			wfDebug( __METHOD__ . ": running updates\n" );
550
			// countable/oldcountable stuff is handled in WikiImporter::finishImportPage
551
			$page->doEditUpdates(
552
				$revision,
553
				$user,
554
				[ 'created' => $created, 'oldcountable' => 'no-change' ]
555
			);
556
		}
557
558
		return true;
559
	}
560
561
	function importLogItem() {
562
		$dbw = wfGetDB( DB_MASTER );
563
564
		$user = $this->getUserObj() ?: User::newFromName( $this->getUser() );
565 View Code Duplication
		if ( $user ) {
566
			$userId = intval( $user->getId() );
567
			$userText = $user->getName();
568
		} else {
569
			$userId = 0;
570
			$userText = $this->getUser();
571
		}
572
573
		# @todo FIXME: This will not record autoblocks
574
		if ( !$this->getTitle() ) {
575
			wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
576
				$this->timestamp . "\n" );
577
			return false;
578
		}
579
		# Check if it exists already
580
		// @todo FIXME: Use original log ID (better for backups)
581
		$prior = $dbw->selectField( 'logging', '1',
582
			[ 'log_type' => $this->getType(),
583
				'log_action' => $this->getAction(),
584
				'log_timestamp' => $dbw->timestamp( $this->timestamp ),
585
				'log_namespace' => $this->getTitle()->getNamespace(),
586
				'log_title' => $this->getTitle()->getDBkey(),
587
				'log_comment' => $this->getComment(),
588
				# 'log_user_text' => $this->user_text,
589
				'log_params' => $this->params ],
590
			__METHOD__
591
		);
592
		// @todo FIXME: This could fail slightly for multiple matches :P
593
		if ( $prior ) {
594
			wfDebug( __METHOD__
595
				. ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp "
596
				. $this->timestamp . "\n" );
597
			return false;
598
		}
599
		$log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
600
		$data = [
601
			'log_id' => $log_id,
602
			'log_type' => $this->type,
603
			'log_action' => $this->action,
604
			'log_timestamp' => $dbw->timestamp( $this->timestamp ),
605
			'log_user' => $userId,
606
			'log_user_text' => $userText,
607
			'log_namespace' => $this->getTitle()->getNamespace(),
608
			'log_title' => $this->getTitle()->getDBkey(),
609
			'log_comment' => $this->getComment(),
610
			'log_params' => $this->params
611
		];
612
		$dbw->insert( 'logging', $data, __METHOD__ );
613
614
		return true;
615
	}
616
617
	/**
618
	 * @return bool
619
	 */
620
	function importUpload() {
621
		# Construct a file
622
		$archiveName = $this->getArchiveName();
623
		if ( $archiveName ) {
624
			wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
625
			$file = OldLocalFile::newFromArchiveName( $this->getTitle(),
626
				RepoGroup::singleton()->getLocalRepo(), $archiveName );
627
		} else {
628
			$file = wfLocalFile( $this->getTitle() );
629
			$file->load( File::READ_LATEST );
630
			wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
631
			if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
632
				$archiveName = $file->getTimestamp() . '!' . $file->getName();
633
				$file = OldLocalFile::newFromArchiveName( $this->getTitle(),
634
					RepoGroup::singleton()->getLocalRepo(), $archiveName );
635
				wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
636
			}
637
		}
638
		if ( !$file ) {
639
			wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
640
			return false;
641
		}
642
643
		# Get the file source or download if necessary
644
		$source = $this->getFileSrc();
645
		$autoDeleteSource = $this->isTempSrc();
646
		if ( !strlen( $source ) ) {
647
			$source = $this->downloadSource();
648
			$autoDeleteSource = true;
649
		}
650
		if ( !strlen( $source ) ) {
651
			wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
652
			return false;
653
		}
654
655
		$tmpFile = new TempFSFile( $source );
656
		if ( $autoDeleteSource ) {
657
			$tmpFile->autocollect();
658
		}
659
660
		$sha1File = ltrim( sha1_file( $source ), '0' );
661
		$sha1 = $this->getSha1();
662
		if ( $sha1 && ( $sha1 !== $sha1File ) ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $sha1 of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
663
			wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
664
			return false;
665
		}
666
667
		$user = $this->getUserObj() ?: User::newFromName( $this->getUser() );
668
669
		# Do the actual upload
670
		if ( $archiveName ) {
671
			$status = $file->uploadOld( $source, $archiveName,
672
				$this->getTimestamp(), $this->getComment(), $user );
673
		} else {
674
			$flags = 0;
675
			$status = $file->upload( $source, $this->getComment(), $this->getComment(),
676
				$flags, false, $this->getTimestamp(), $user );
677
		}
678
679
		if ( $status->isGood() ) {
680
			wfDebug( __METHOD__ . ": Successful\n" );
681
			return true;
682
		} else {
683
			wfDebug( __METHOD__ . ': failed: ' . $status->getHTML() . "\n" );
684
			return false;
685
		}
686
	}
687
688
	/**
689
	 * @return bool|string
690
	 */
691
	function downloadSource() {
692
		if ( !$this->config->get( 'EnableUploads' ) ) {
693
			return false;
694
		}
695
696
		$tempo = tempnam( wfTempDir(), 'download' );
697
		$f = fopen( $tempo, 'wb' );
698
		if ( !$f ) {
699
			wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
700
			return false;
701
		}
702
703
		// @todo FIXME!
704
		$src = $this->getSrc();
705
		$data = Http::get( $src, [], __METHOD__ );
706
		if ( !$data ) {
707
			wfDebug( "IMPORT: couldn't fetch source $src\n" );
708
			fclose( $f );
709
			unlink( $tempo );
710
			return false;
711
		}
712
713
		fwrite( $f, $data );
714
		fclose( $f );
715
716
		return $tempo;
717
	}
718
719
}
720