Completed
Branch master (939199)
by
unknown
39:35
created

maintenance/dumpIterator.php (1 issue)

all properties have been explicitly declared.

Bug Major

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Take page text out of an XML dump file and perform some operation on it.
4
 * Used as a base class for CompareParsers and PreprocessDump.
5
 * We implement below the simple task of searching inside a dump.
6
 *
7
 * Copyright © 2011 Platonides
8
 * https://www.mediawiki.org/
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License along
21
 * with this program; if not, write to the Free Software Foundation, Inc.,
22
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23
 * http://www.gnu.org/copyleft/gpl.html
24
 *
25
 * @file
26
 * @ingroup Maintenance
27
 */
28
29
require_once __DIR__ . '/Maintenance.php';
30
31
/**
32
 * Base class for interating over a dump.
33
 *
34
 * @ingroup Maintenance
35
 */
36
abstract class DumpIterator extends Maintenance {
37
38
	private $count = 0;
39
	private $startTime;
40
41 View Code Duplication
	public function __construct() {
42
		parent::__construct();
43
		$this->addDescription( 'Does something with a dump' );
44
		$this->addOption( 'file', 'File with text to run.', false, true );
45
		$this->addOption( 'dump', 'XML dump to execute all revisions.', false, true );
46
		$this->addOption( 'from', 'Article from XML dump to start from.', false, true );
47
	}
48
49
	public function execute() {
50
		if ( !( $this->hasOption( 'file' ) ^ $this->hasOption( 'dump' ) ) ) {
51
			$this->error( "You must provide a file or dump", true );
52
		}
53
54
		$this->checkOptions();
55
56
		if ( $this->hasOption( 'file' ) ) {
57
			$revision = new WikiRevision( $this->getConfig() );
58
59
			$revision->setText( file_get_contents( $this->getOption( 'file' ) ) );
60
			$revision->setTitle( Title::newFromText(
61
				rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) )
62
			) );
63
			$this->handleRevision( $revision );
64
65
			return;
66
		}
67
68
		$this->startTime = microtime( true );
69
70
		if ( $this->getOption( 'dump' ) == '-' ) {
71
			$source = new ImportStreamSource( $this->getStdin() );
72
		} else {
73
			$this->error( "Sorry, I don't support dump filenames yet. "
74
				. "Use - and provide it on stdin on the meantime.", true );
75
		}
76
		$importer = new WikiImporter( $source, $this->getConfig() );
77
78
		$importer->setRevisionCallback(
79
			[ $this, 'handleRevision' ] );
80
81
		$this->from = $this->getOption( 'from', null );
0 ignored issues
show
The property from does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
82
		$this->count = 0;
83
		$importer->doImport();
84
85
		$this->conclusions();
86
87
		$delta = microtime( true ) - $this->startTime;
88
		$this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " );
89
		if ( $delta > 0 ) {
90
			$this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
91
		}
92
93
		# Perform the memory_get_peak_usage() when all the other data has been
94
		# output so there's no damage if it dies. It is only available since
95
		# 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
96
		$this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
97
	}
98
99
	public function finalSetup() {
100
		parent::finalSetup();
101
102
		if ( $this->getDbType() == Maintenance::DB_NONE ) {
103
			global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks;
104
			$wgUseDatabaseMessages = false;
105
			$wgLocalisationCacheConf['storeClass'] = 'LCStoreNull';
106
			$wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
107
		}
108
	}
109
110
	static function disableInterwikis( $prefix, &$data ) {
111
		# Title::newFromText will check on each namespaced article if it's an interwiki.
112
		# We always answer that it is not.
113
114
		return false;
115
	}
116
117
	/**
118
	 * Callback function for each revision, child classes should override
119
	 * processRevision instead.
120
	 * @param Database $rev
121
	 */
122
	public function handleRevision( $rev ) {
123
		$title = $rev->getTitle();
124
		if ( !$title ) {
125
			$this->error( "Got bogus revision with null title!" );
126
127
			return;
128
		}
129
130
		$this->count++;
131
		if ( isset( $this->from ) ) {
132
			if ( $this->from != $title ) {
133
				return;
134
			}
135
			$this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" );
136
137
			$this->count = 1;
138
			$this->from = null;
139
		}
140
141
		$this->processRevision( $rev );
142
	}
143
144
	/* Stub function for processing additional options */
145
	public function checkOptions() {
146
		return;
147
	}
148
149
	/* Stub function for giving data about what was computed */
150
	public function conclusions() {
151
		return;
152
	}
153
154
	/* Core function which does whatever the maintenance script is designed to do */
155
	abstract public function processRevision( $rev );
156
}
157
158
/**
159
 * Maintenance script that runs a regex in the revisions from a dump.
160
 *
161
 * @ingroup Maintenance
162
 */
163
class SearchDump extends DumpIterator {
164
165
	public function __construct() {
166
		parent::__construct();
167
		$this->addDescription( 'Runs a regex in the revisions from a dump' );
168
		$this->addOption( 'regex', 'Searching regex', true, true );
169
	}
170
171
	public function getDbType() {
172
		return Maintenance::DB_NONE;
173
	}
174
175
	/**
176
	 * @param Revision $rev
177
	 */
178
	public function processRevision( $rev ) {
179
		if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) {
180
			$this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );
181
		}
182
	}
183
}
184
185
$maintClass = "SearchDump";
186
require_once RUN_MAINTENANCE_IF_MAIN;
187