These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
0 ignored issues
–
show
|
|||
2 | /** |
||
3 | * Take page text out of an XML dump file and perform some operation on it. |
||
4 | * Used as a base class for CompareParsers and PreprocessDump. |
||
5 | * We implement below the simple task of searching inside a dump. |
||
6 | * |
||
7 | * Copyright © 2011 Platonides |
||
8 | * https://www.mediawiki.org/ |
||
9 | * |
||
10 | * This program is free software; you can redistribute it and/or modify |
||
11 | * it under the terms of the GNU General Public License as published by |
||
12 | * the Free Software Foundation; either version 2 of the License, or |
||
13 | * (at your option) any later version. |
||
14 | * |
||
15 | * This program is distributed in the hope that it will be useful, |
||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
18 | * GNU General Public License for more details. |
||
19 | * |
||
20 | * You should have received a copy of the GNU General Public License along |
||
21 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
22 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
23 | * http://www.gnu.org/copyleft/gpl.html |
||
24 | * |
||
25 | * @file |
||
26 | * @ingroup Maintenance |
||
27 | */ |
||
28 | |||
29 | require_once __DIR__ . '/Maintenance.php'; |
||
30 | |||
31 | /** |
||
32 | * Base class for interating over a dump. |
||
33 | * |
||
34 | * @ingroup Maintenance |
||
35 | */ |
||
36 | abstract class DumpIterator extends Maintenance { |
||
37 | |||
38 | private $count = 0; |
||
39 | private $startTime; |
||
40 | |||
41 | View Code Duplication | public function __construct() { |
|
42 | parent::__construct(); |
||
43 | $this->addDescription( 'Does something with a dump' ); |
||
44 | $this->addOption( 'file', 'File with text to run.', false, true ); |
||
45 | $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true ); |
||
46 | $this->addOption( 'from', 'Article from XML dump to start from.', false, true ); |
||
47 | } |
||
48 | |||
49 | public function execute() { |
||
50 | if ( !( $this->hasOption( 'file' ) ^ $this->hasOption( 'dump' ) ) ) { |
||
51 | $this->error( "You must provide a file or dump", true ); |
||
52 | } |
||
53 | |||
54 | $this->checkOptions(); |
||
55 | |||
56 | if ( $this->hasOption( 'file' ) ) { |
||
57 | $revision = new WikiRevision( $this->getConfig() ); |
||
58 | |||
59 | $revision->setText( file_get_contents( $this->getOption( 'file' ) ) ); |
||
60 | $revision->setTitle( Title::newFromText( |
||
61 | rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) ) |
||
62 | ) ); |
||
63 | $this->handleRevision( $revision ); |
||
64 | |||
65 | return; |
||
66 | } |
||
67 | |||
68 | $this->startTime = microtime( true ); |
||
69 | |||
70 | if ( $this->getOption( 'dump' ) == '-' ) { |
||
71 | $source = new ImportStreamSource( $this->getStdin() ); |
||
72 | } else { |
||
73 | $this->error( "Sorry, I don't support dump filenames yet. " |
||
74 | . "Use - and provide it on stdin on the meantime.", true ); |
||
75 | } |
||
76 | $importer = new WikiImporter( $source, $this->getConfig() ); |
||
77 | |||
78 | $importer->setRevisionCallback( |
||
79 | [ $this, 'handleRevision' ] ); |
||
80 | |||
81 | $this->from = $this->getOption( 'from', null ); |
||
0 ignored issues
–
show
The property
from does not exist. Did you maybe forget to declare it?
In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code: class MyClass { }
$x = new MyClass();
$x->foo = true;
Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion: class MyClass {
public $foo;
}
$x = new MyClass();
$x->foo = true;
Loading history...
|
|||
82 | $this->count = 0; |
||
83 | $importer->doImport(); |
||
84 | |||
85 | $this->conclusions(); |
||
86 | |||
87 | $delta = microtime( true ) - $this->startTime; |
||
88 | $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " ); |
||
89 | if ( $delta > 0 ) { |
||
90 | $this->error( round( $this->count / $delta, 2 ) . " pages/sec" ); |
||
91 | } |
||
92 | |||
93 | # Perform the memory_get_peak_usage() when all the other data has been |
||
94 | # output so there's no damage if it dies. It is only available since |
||
95 | # 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit) |
||
96 | $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" ); |
||
97 | } |
||
98 | |||
99 | public function finalSetup() { |
||
100 | parent::finalSetup(); |
||
101 | |||
102 | if ( $this->getDbType() == Maintenance::DB_NONE ) { |
||
103 | global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks; |
||
104 | $wgUseDatabaseMessages = false; |
||
105 | $wgLocalisationCacheConf['storeClass'] = 'LCStoreNull'; |
||
106 | $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis'; |
||
107 | } |
||
108 | } |
||
109 | |||
110 | static function disableInterwikis( $prefix, &$data ) { |
||
111 | # Title::newFromText will check on each namespaced article if it's an interwiki. |
||
112 | # We always answer that it is not. |
||
113 | |||
114 | return false; |
||
115 | } |
||
116 | |||
117 | /** |
||
118 | * Callback function for each revision, child classes should override |
||
119 | * processRevision instead. |
||
120 | * @param Database $rev |
||
121 | */ |
||
122 | public function handleRevision( $rev ) { |
||
123 | $title = $rev->getTitle(); |
||
0 ignored issues
–
show
The method
getTitle() does not seem to exist on object<Database> .
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed.
Loading history...
|
|||
124 | if ( !$title ) { |
||
125 | $this->error( "Got bogus revision with null title!" ); |
||
126 | |||
127 | return; |
||
128 | } |
||
129 | |||
130 | $this->count++; |
||
131 | if ( isset( $this->from ) ) { |
||
132 | if ( $this->from != $title ) { |
||
133 | return; |
||
134 | } |
||
135 | $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" ); |
||
136 | |||
137 | $this->count = 1; |
||
138 | $this->from = null; |
||
139 | } |
||
140 | |||
141 | $this->processRevision( $rev ); |
||
142 | } |
||
143 | |||
144 | /* Stub function for processing additional options */ |
||
145 | public function checkOptions() { |
||
146 | return; |
||
147 | } |
||
148 | |||
149 | /* Stub function for giving data about what was computed */ |
||
150 | public function conclusions() { |
||
151 | return; |
||
152 | } |
||
153 | |||
154 | /* Core function which does whatever the maintenance script is designed to do */ |
||
155 | abstract public function processRevision( $rev ); |
||
156 | } |
||
157 | |||
158 | /** |
||
159 | * Maintenance script that runs a regex in the revisions from a dump. |
||
160 | * |
||
161 | * @ingroup Maintenance |
||
162 | */ |
||
163 | class SearchDump extends DumpIterator { |
||
164 | |||
165 | public function __construct() { |
||
166 | parent::__construct(); |
||
167 | $this->addDescription( 'Runs a regex in the revisions from a dump' ); |
||
168 | $this->addOption( 'regex', 'Searching regex', true, true ); |
||
169 | } |
||
170 | |||
171 | public function getDbType() { |
||
172 | return Maintenance::DB_NONE; |
||
173 | } |
||
174 | |||
175 | /** |
||
176 | * @param Revision $rev |
||
177 | */ |
||
178 | public function processRevision( $rev ) { |
||
179 | if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) { |
||
180 | $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" ); |
||
181 | } |
||
182 | } |
||
183 | } |
||
184 | |||
185 | $maintClass = "SearchDump"; |
||
186 | require_once RUN_MAINTENANCE_IF_MAIN; |
||
187 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.