These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * Take page text out of an XML dump file and perform some operation on it. |
||
4 | * Used as a base class for CompareParsers and PreprocessDump. |
||
5 | * We implement below the simple task of searching inside a dump. |
||
6 | * |
||
7 | * Copyright © 2011 Platonides |
||
8 | * https://www.mediawiki.org/ |
||
9 | * |
||
10 | * This program is free software; you can redistribute it and/or modify |
||
11 | * it under the terms of the GNU General Public License as published by |
||
12 | * the Free Software Foundation; either version 2 of the License, or |
||
13 | * (at your option) any later version. |
||
14 | * |
||
15 | * This program is distributed in the hope that it will be useful, |
||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
18 | * GNU General Public License for more details. |
||
19 | * |
||
20 | * You should have received a copy of the GNU General Public License along |
||
21 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
22 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
23 | * http://www.gnu.org/copyleft/gpl.html |
||
24 | * |
||
25 | * @file |
||
26 | * @ingroup Maintenance |
||
27 | */ |
||
28 | |||
29 | require_once __DIR__ . '/Maintenance.php'; |
||
30 | |||
31 | /** |
||
32 | * Base class for interating over a dump. |
||
33 | * |
||
34 | * @ingroup Maintenance |
||
35 | */ |
||
36 | abstract class DumpIterator extends Maintenance { |
||
37 | |||
38 | private $count = 0; |
||
39 | private $startTime; |
||
40 | |||
41 | View Code Duplication | public function __construct() { |
|
42 | parent::__construct(); |
||
43 | $this->addDescription( 'Does something with a dump' ); |
||
44 | $this->addOption( 'file', 'File with text to run.', false, true ); |
||
45 | $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true ); |
||
46 | $this->addOption( 'from', 'Article from XML dump to start from.', false, true ); |
||
47 | } |
||
48 | |||
49 | public function execute() { |
||
50 | if ( !( $this->hasOption( 'file' ) ^ $this->hasOption( 'dump' ) ) ) { |
||
51 | $this->error( "You must provide a file or dump", true ); |
||
52 | } |
||
53 | |||
54 | $this->checkOptions(); |
||
55 | |||
56 | if ( $this->hasOption( 'file' ) ) { |
||
57 | $revision = new WikiRevision( $this->getConfig() ); |
||
58 | |||
59 | $revision->setText( file_get_contents( $this->getOption( 'file' ) ) ); |
||
60 | $revision->setTitle( Title::newFromText( |
||
61 | rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) ) |
||
62 | ) ); |
||
63 | $this->handleRevision( $revision ); |
||
64 | |||
65 | return; |
||
66 | } |
||
67 | |||
68 | $this->startTime = microtime( true ); |
||
69 | |||
70 | if ( $this->getOption( 'dump' ) == '-' ) { |
||
71 | $source = new ImportStreamSource( $this->getStdin() ); |
||
72 | } else { |
||
73 | $this->error( "Sorry, I don't support dump filenames yet. " |
||
74 | . "Use - and provide it on stdin on the meantime.", true ); |
||
75 | } |
||
76 | $importer = new WikiImporter( $source, $this->getConfig() ); |
||
77 | |||
78 | $importer->setRevisionCallback( |
||
79 | [ $this, 'handleRevision' ] ); |
||
80 | |||
81 | $this->from = $this->getOption( 'from', null ); |
||
0 ignored issues
–
show
|
|||
82 | $this->count = 0; |
||
83 | $importer->doImport(); |
||
84 | |||
85 | $this->conclusions(); |
||
86 | |||
87 | $delta = microtime( true ) - $this->startTime; |
||
88 | $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " ); |
||
89 | if ( $delta > 0 ) { |
||
90 | $this->error( round( $this->count / $delta, 2 ) . " pages/sec" ); |
||
91 | } |
||
92 | |||
93 | # Perform the memory_get_peak_usage() when all the other data has been |
||
94 | # output so there's no damage if it dies. It is only available since |
||
95 | # 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit) |
||
96 | $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" ); |
||
97 | } |
||
98 | |||
99 | public function finalSetup() { |
||
100 | parent::finalSetup(); |
||
101 | |||
102 | if ( $this->getDbType() == Maintenance::DB_NONE ) { |
||
103 | global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks; |
||
104 | $wgUseDatabaseMessages = false; |
||
105 | $wgLocalisationCacheConf['storeClass'] = 'LCStoreNull'; |
||
106 | $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis'; |
||
107 | } |
||
108 | } |
||
109 | |||
110 | static function disableInterwikis( $prefix, &$data ) { |
||
111 | # Title::newFromText will check on each namespaced article if it's an interwiki. |
||
112 | # We always answer that it is not. |
||
113 | |||
114 | return false; |
||
115 | } |
||
116 | |||
117 | /** |
||
118 | * Callback function for each revision, child classes should override |
||
119 | * processRevision instead. |
||
120 | * @param Database $rev |
||
121 | */ |
||
122 | public function handleRevision( $rev ) { |
||
123 | $title = $rev->getTitle(); |
||
124 | if ( !$title ) { |
||
125 | $this->error( "Got bogus revision with null title!" ); |
||
126 | |||
127 | return; |
||
128 | } |
||
129 | |||
130 | $this->count++; |
||
131 | if ( isset( $this->from ) ) { |
||
132 | if ( $this->from != $title ) { |
||
133 | return; |
||
134 | } |
||
135 | $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" ); |
||
136 | |||
137 | $this->count = 1; |
||
138 | $this->from = null; |
||
139 | } |
||
140 | |||
141 | $this->processRevision( $rev ); |
||
142 | } |
||
143 | |||
144 | /* Stub function for processing additional options */ |
||
145 | public function checkOptions() { |
||
146 | return; |
||
147 | } |
||
148 | |||
149 | /* Stub function for giving data about what was computed */ |
||
150 | public function conclusions() { |
||
151 | return; |
||
152 | } |
||
153 | |||
154 | /* Core function which does whatever the maintenance script is designed to do */ |
||
155 | abstract public function processRevision( $rev ); |
||
156 | } |
||
157 | |||
158 | /** |
||
159 | * Maintenance script that runs a regex in the revisions from a dump. |
||
160 | * |
||
161 | * @ingroup Maintenance |
||
162 | */ |
||
163 | class SearchDump extends DumpIterator { |
||
164 | |||
165 | public function __construct() { |
||
166 | parent::__construct(); |
||
167 | $this->addDescription( 'Runs a regex in the revisions from a dump' ); |
||
168 | $this->addOption( 'regex', 'Searching regex', true, true ); |
||
169 | } |
||
170 | |||
171 | public function getDbType() { |
||
172 | return Maintenance::DB_NONE; |
||
173 | } |
||
174 | |||
175 | /** |
||
176 | * @param Revision $rev |
||
177 | */ |
||
178 | public function processRevision( $rev ) { |
||
179 | if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) { |
||
180 | $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" ); |
||
181 | } |
||
182 | } |
||
183 | } |
||
184 | |||
185 | $maintClass = "SearchDump"; |
||
186 | require_once RUN_MAINTENANCE_IF_MAIN; |
||
187 |
In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:
Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion: