This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
0 ignored issues
–
show
|
|||
2 | /** |
||
3 | * Adds blobs from a given external storage cluster to the blob_tracking table. |
||
4 | * |
||
5 | * This program is free software; you can redistribute it and/or modify |
||
6 | * it under the terms of the GNU General Public License as published by |
||
7 | * the Free Software Foundation; either version 2 of the License, or |
||
8 | * (at your option) any later version. |
||
9 | * |
||
10 | * This program is distributed in the hope that it will be useful, |
||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
13 | * GNU General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU General Public License along |
||
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
18 | * http://www.gnu.org/copyleft/gpl.html |
||
19 | * |
||
20 | * @file |
||
21 | * @ingroup Maintenance |
||
22 | * @see wfWaitForSlaves() |
||
23 | */ |
||
24 | |||
25 | require __DIR__ . '/../commandLine.inc'; |
||
26 | |||
27 | if ( count( $args ) < 1 ) { |
||
28 | echo "Usage: php trackBlobs.php <cluster> [... <cluster>]\n"; |
||
29 | echo "Adds blobs from a given ES cluster to the blob_tracking table\n"; |
||
30 | echo "Automatically deletes the tracking table and starts from the start again when restarted.\n"; |
||
31 | |||
32 | exit( 1 ); |
||
33 | } |
||
34 | $tracker = new TrackBlobs( $args ); |
||
35 | $tracker->run(); |
||
36 | echo "All done.\n"; |
||
37 | |||
38 | class TrackBlobs { |
||
39 | public $clusters, $textClause; |
||
0 ignored issues
–
show
|
|||
40 | public $doBlobOrphans; |
||
41 | public $trackedBlobs = []; |
||
42 | |||
43 | public $batchSize = 1000; |
||
44 | public $reportingInterval = 10; |
||
45 | |||
46 | function __construct( $clusters ) { |
||
47 | $this->clusters = $clusters; |
||
48 | if ( extension_loaded( 'gmp' ) ) { |
||
49 | $this->doBlobOrphans = true; |
||
50 | foreach ( $clusters as $cluster ) { |
||
51 | $this->trackedBlobs[$cluster] = gmp_init( 0 ); |
||
52 | } |
||
53 | } else { |
||
54 | echo "Warning: the gmp extension is needed to find orphan blobs\n"; |
||
55 | } |
||
56 | } |
||
57 | |||
58 | function run() { |
||
59 | $this->checkIntegrity(); |
||
60 | $this->initTrackingTable(); |
||
61 | $this->trackRevisions(); |
||
62 | $this->trackOrphanText(); |
||
63 | if ( $this->doBlobOrphans ) { |
||
64 | $this->findOrphanBlobs(); |
||
65 | } |
||
66 | } |
||
67 | |||
68 | function checkIntegrity() { |
||
69 | echo "Doing integrity check...\n"; |
||
70 | $dbr = wfGetDB( DB_REPLICA ); |
||
71 | |||
72 | // Scan for HistoryBlobStub objects in the text table (bug 20757) |
||
73 | |||
74 | $exists = $dbr->selectField( 'text', 1, |
||
75 | 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' . |
||
76 | 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', |
||
77 | __METHOD__ |
||
78 | ); |
||
79 | |||
80 | if ( $exists ) { |
||
81 | echo "Integrity check failed: found HistoryBlobStub objects in your text table.\n" . |
||
82 | "This script could destroy these objects if it continued. Run resolveStubs.php\n" . |
||
83 | "to fix this.\n"; |
||
84 | exit( 1 ); |
||
0 ignored issues
–
show
The method
checkIntegrity() contains an exit expression.
An exit expression should only be used in rare cases. For example, if you write a short command line script. In most cases however, using an ![]() |
|||
85 | } |
||
86 | |||
87 | // Scan the archive table for HistoryBlobStub objects or external flags (bug 22624) |
||
88 | $flags = $dbr->selectField( 'archive', 'ar_flags', |
||
89 | 'ar_flags LIKE \'%external%\' OR (' . |
||
90 | 'ar_flags LIKE \'%object%\' ' . |
||
91 | 'AND LOWER(CONVERT(LEFT(ar_text,22) USING latin1)) = \'o:15:"historyblobstub"\' )', |
||
92 | __METHOD__ |
||
93 | ); |
||
94 | |||
95 | if ( strpos( $flags, 'external' ) !== false ) { |
||
96 | echo "Integrity check failed: found external storage pointers in your archive table.\n" . |
||
97 | "Run normaliseArchiveTable.php to fix this.\n"; |
||
98 | exit( 1 ); |
||
0 ignored issues
–
show
The method
checkIntegrity() contains an exit expression.
An exit expression should only be used in rare cases. For example, if you write a short command line script. In most cases however, using an ![]() |
|||
99 | } elseif ( $flags ) { |
||
100 | echo "Integrity check failed: found HistoryBlobStub objects in your archive table.\n" . |
||
101 | "These objects are probably already broken, continuing would make them\n" . |
||
102 | "unrecoverable. Run \"normaliseArchiveTable.php --fix-cgz-bug\" to fix this.\n"; |
||
103 | exit( 1 ); |
||
0 ignored issues
–
show
The method
checkIntegrity() contains an exit expression.
An exit expression should only be used in rare cases. For example, if you write a short command line script. In most cases however, using an ![]() |
|||
104 | } |
||
105 | |||
106 | echo "Integrity check OK\n"; |
||
107 | } |
||
108 | |||
109 | function initTrackingTable() { |
||
110 | $dbw = wfGetDB( DB_MASTER ); |
||
111 | if ( $dbw->tableExists( 'blob_tracking' ) ) { |
||
112 | $dbw->query( 'DROP TABLE ' . $dbw->tableName( 'blob_tracking' ) ); |
||
113 | $dbw->query( 'DROP TABLE ' . $dbw->tableName( 'blob_orphans' ) ); |
||
114 | } |
||
115 | $dbw->sourceFile( __DIR__ . '/blob_tracking.sql' ); |
||
116 | } |
||
117 | |||
118 | function getTextClause() { |
||
119 | if ( !$this->textClause ) { |
||
120 | $dbr = wfGetDB( DB_REPLICA ); |
||
121 | $this->textClause = ''; |
||
122 | foreach ( $this->clusters as $cluster ) { |
||
123 | if ( $this->textClause != '' ) { |
||
124 | $this->textClause .= ' OR '; |
||
125 | } |
||
126 | $this->textClause .= 'old_text' . $dbr->buildLike( "DB://$cluster/", $dbr->anyString() ); |
||
127 | } |
||
128 | } |
||
129 | |||
130 | return $this->textClause; |
||
131 | } |
||
132 | |||
133 | function interpretPointer( $text ) { |
||
134 | if ( !preg_match( '!^DB://(\w+)/(\d+)(?:/([0-9a-fA-F]+)|)$!', $text, $m ) ) { |
||
135 | return false; |
||
136 | } |
||
137 | |||
138 | return [ |
||
139 | 'cluster' => $m[1], |
||
140 | 'id' => intval( $m[2] ), |
||
141 | 'hash' => isset( $m[3] ) ? $m[3] : null |
||
142 | ]; |
||
143 | } |
||
144 | |||
145 | /** |
||
146 | * Scan the revision table for rows stored in the specified clusters |
||
147 | */ |
||
148 | function trackRevisions() { |
||
149 | $dbw = wfGetDB( DB_MASTER ); |
||
150 | $dbr = wfGetDB( DB_REPLICA ); |
||
151 | |||
152 | $textClause = $this->getTextClause(); |
||
153 | $startId = 0; |
||
154 | $endId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ ); |
||
155 | $batchesDone = 0; |
||
156 | $rowsInserted = 0; |
||
157 | |||
158 | echo "Finding revisions...\n"; |
||
159 | |||
160 | while ( true ) { |
||
161 | $res = $dbr->select( [ 'revision', 'text' ], |
||
162 | [ 'rev_id', 'rev_page', 'old_id', 'old_flags', 'old_text' ], |
||
163 | [ |
||
164 | 'rev_id > ' . $dbr->addQuotes( $startId ), |
||
165 | 'rev_text_id=old_id', |
||
166 | $textClause, |
||
167 | 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), |
||
168 | ], |
||
169 | __METHOD__, |
||
170 | [ |
||
171 | 'ORDER BY' => 'rev_id', |
||
172 | 'LIMIT' => $this->batchSize |
||
173 | ] |
||
174 | ); |
||
175 | if ( !$res->numRows() ) { |
||
176 | break; |
||
177 | } |
||
178 | |||
179 | $insertBatch = []; |
||
180 | foreach ( $res as $row ) { |
||
181 | $startId = $row->rev_id; |
||
182 | $info = $this->interpretPointer( $row->old_text ); |
||
183 | if ( !$info ) { |
||
184 | echo "Invalid DB:// URL in rev_id {$row->rev_id}\n"; |
||
185 | continue; |
||
186 | } |
||
187 | if ( !in_array( $info['cluster'], $this->clusters ) ) { |
||
188 | echo "Invalid cluster returned in SQL query: {$info['cluster']}\n"; |
||
189 | continue; |
||
190 | } |
||
191 | $insertBatch[] = [ |
||
192 | 'bt_page' => $row->rev_page, |
||
193 | 'bt_rev_id' => $row->rev_id, |
||
194 | 'bt_text_id' => $row->old_id, |
||
195 | 'bt_cluster' => $info['cluster'], |
||
196 | 'bt_blob_id' => $info['id'], |
||
197 | 'bt_cgz_hash' => $info['hash'] |
||
198 | ]; |
||
199 | if ( $this->doBlobOrphans ) { |
||
200 | gmp_setbit( $this->trackedBlobs[$info['cluster']], $info['id'] ); |
||
201 | } |
||
202 | } |
||
203 | $dbw->insert( 'blob_tracking', $insertBatch, __METHOD__ ); |
||
204 | $rowsInserted += count( $insertBatch ); |
||
205 | |||
206 | ++$batchesDone; |
||
207 | if ( $batchesDone >= $this->reportingInterval ) { |
||
208 | $batchesDone = 0; |
||
209 | echo "$startId / $endId\n"; |
||
210 | wfWaitForSlaves(); |
||
0 ignored issues
–
show
The function
wfWaitForSlaves() has been deprecated with message: since 1.27 Use LBFactory::waitForReplication
This function has been deprecated. The supplier of the file has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead. ![]() |
|||
211 | } |
||
212 | } |
||
213 | echo "Found $rowsInserted revisions\n"; |
||
214 | } |
||
215 | |||
216 | /** |
||
217 | * Scan the text table for orphan text |
||
218 | * Orphan text here does not imply DB corruption -- deleted text tracked by the |
||
219 | * archive table counts as orphan for our purposes. |
||
220 | */ |
||
221 | function trackOrphanText() { |
||
222 | # Wait until the blob_tracking table is available in the replica DB |
||
223 | $dbw = wfGetDB( DB_MASTER ); |
||
224 | $dbr = wfGetDB( DB_REPLICA ); |
||
225 | $pos = $dbw->getMasterPos(); |
||
226 | $dbr->masterPosWait( $pos, 100000 ); |
||
227 | |||
228 | $textClause = $this->getTextClause( $this->clusters ); |
||
229 | $startId = 0; |
||
230 | $endId = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); |
||
231 | $rowsInserted = 0; |
||
232 | $batchesDone = 0; |
||
233 | |||
234 | echo "Finding orphan text...\n"; |
||
235 | |||
236 | # Scan the text table for orphan text |
||
237 | while ( true ) { |
||
238 | $res = $dbr->select( [ 'text', 'blob_tracking' ], |
||
239 | [ 'old_id', 'old_flags', 'old_text' ], |
||
240 | [ |
||
241 | 'old_id>' . $dbr->addQuotes( $startId ), |
||
242 | $textClause, |
||
243 | 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), |
||
244 | 'bt_text_id IS NULL' |
||
245 | ], |
||
246 | __METHOD__, |
||
247 | [ |
||
248 | 'ORDER BY' => 'old_id', |
||
249 | 'LIMIT' => $this->batchSize |
||
250 | ], |
||
251 | [ 'blob_tracking' => [ 'LEFT JOIN', 'bt_text_id=old_id' ] ] |
||
252 | ); |
||
253 | $ids = []; |
||
254 | foreach ( $res as $row ) { |
||
255 | $ids[] = $row->old_id; |
||
256 | } |
||
257 | |||
258 | if ( !$res->numRows() ) { |
||
259 | break; |
||
260 | } |
||
261 | |||
262 | $insertBatch = []; |
||
263 | foreach ( $res as $row ) { |
||
264 | $startId = $row->old_id; |
||
265 | $info = $this->interpretPointer( $row->old_text ); |
||
266 | if ( !$info ) { |
||
267 | echo "Invalid DB:// URL in old_id {$row->old_id}\n"; |
||
268 | continue; |
||
269 | } |
||
270 | if ( !in_array( $info['cluster'], $this->clusters ) ) { |
||
271 | echo "Invalid cluster returned in SQL query\n"; |
||
272 | continue; |
||
273 | } |
||
274 | |||
275 | $insertBatch[] = [ |
||
276 | 'bt_page' => 0, |
||
277 | 'bt_rev_id' => 0, |
||
278 | 'bt_text_id' => $row->old_id, |
||
279 | 'bt_cluster' => $info['cluster'], |
||
280 | 'bt_blob_id' => $info['id'], |
||
281 | 'bt_cgz_hash' => $info['hash'] |
||
282 | ]; |
||
283 | if ( $this->doBlobOrphans ) { |
||
284 | gmp_setbit( $this->trackedBlobs[$info['cluster']], $info['id'] ); |
||
285 | } |
||
286 | } |
||
287 | $dbw->insert( 'blob_tracking', $insertBatch, __METHOD__ ); |
||
288 | |||
289 | $rowsInserted += count( $insertBatch ); |
||
290 | ++$batchesDone; |
||
291 | if ( $batchesDone >= $this->reportingInterval ) { |
||
292 | $batchesDone = 0; |
||
293 | echo "$startId / $endId\n"; |
||
294 | wfWaitForSlaves(); |
||
0 ignored issues
–
show
The function
wfWaitForSlaves() has been deprecated with message: since 1.27 Use LBFactory::waitForReplication
This function has been deprecated. The supplier of the file has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead. ![]() |
|||
295 | } |
||
296 | } |
||
297 | echo "Found $rowsInserted orphan text rows\n"; |
||
298 | } |
||
299 | |||
300 | /** |
||
301 | * Scan the blobs table for rows not registered in blob_tracking (and thus not |
||
302 | * registered in the text table). |
||
303 | * |
||
304 | * Orphan blobs are indicative of DB corruption. They are inaccessible and |
||
305 | * should probably be deleted. |
||
306 | */ |
||
307 | function findOrphanBlobs() { |
||
308 | if ( !extension_loaded( 'gmp' ) ) { |
||
309 | echo "Can't find orphan blobs, need bitfield support provided by GMP.\n"; |
||
310 | |||
311 | return; |
||
312 | } |
||
313 | |||
314 | $dbw = wfGetDB( DB_MASTER ); |
||
315 | |||
316 | foreach ( $this->clusters as $cluster ) { |
||
317 | echo "Searching for orphan blobs in $cluster...\n"; |
||
318 | $lb = wfGetLBFactory()->getExternalLB( $cluster ); |
||
0 ignored issues
–
show
The function
wfGetLBFactory() has been deprecated with message: since 1.27, use MediaWikiServices::getDBLoadBalancerFactory() instead.
This function has been deprecated. The supplier of the file has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead. ![]() |
|||
319 | try { |
||
320 | $extDB = $lb->getConnection( DB_REPLICA ); |
||
321 | } catch ( DBConnectionError $e ) { |
||
322 | if ( strpos( $e->error, 'Unknown database' ) !== false ) { |
||
0 ignored issues
–
show
The property
error does not seem to exist in DBConnectionError .
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name. If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading. ![]() |
|||
323 | echo "No database on $cluster\n"; |
||
324 | } else { |
||
325 | echo "Error on $cluster: " . $e->getMessage() . "\n"; |
||
326 | } |
||
327 | continue; |
||
328 | } |
||
329 | $table = $extDB->getLBInfo( 'blobs table' ); |
||
330 | if ( is_null( $table ) ) { |
||
331 | $table = 'blobs'; |
||
332 | } |
||
333 | if ( !$extDB->tableExists( $table ) ) { |
||
334 | echo "No blobs table on cluster $cluster\n"; |
||
335 | continue; |
||
336 | } |
||
337 | $startId = 0; |
||
338 | $batchesDone = 0; |
||
339 | $actualBlobs = gmp_init( 0 ); |
||
340 | $endId = $extDB->selectField( $table, 'MAX(blob_id)', false, __METHOD__ ); |
||
341 | |||
342 | // Build a bitmap of actual blob rows |
||
343 | while ( true ) { |
||
344 | $res = $extDB->select( $table, |
||
345 | [ 'blob_id' ], |
||
346 | [ 'blob_id > ' . $extDB->addQuotes( $startId ) ], |
||
347 | __METHOD__, |
||
348 | [ 'LIMIT' => $this->batchSize, 'ORDER BY' => 'blob_id' ] |
||
349 | ); |
||
350 | |||
351 | if ( !$res->numRows() ) { |
||
352 | break; |
||
353 | } |
||
354 | |||
355 | foreach ( $res as $row ) { |
||
356 | gmp_setbit( $actualBlobs, $row->blob_id ); |
||
357 | } |
||
358 | $startId = $row->blob_id; |
||
0 ignored issues
–
show
The variable
$row seems to be defined by a foreach iteration on line 355 . Are you sure the iterator is never empty, otherwise this variable is not defined?
It seems like you are relying on a variable being defined by an iteration: foreach ($a as $b) {
}
// $b is defined here only if $a has elements, for example if $a is array()
// then $b would not be defined here. To avoid that, we recommend to set a
// default value for $b.
// Better
$b = 0; // or whatever default makes sense in your context
foreach ($a as $b) {
}
// $b is now guaranteed to be defined here.
![]() |
|||
359 | |||
360 | ++$batchesDone; |
||
361 | if ( $batchesDone >= $this->reportingInterval ) { |
||
362 | $batchesDone = 0; |
||
363 | echo "$startId / $endId\n"; |
||
364 | } |
||
365 | } |
||
366 | |||
367 | // Find actual blobs that weren't tracked by the previous passes |
||
368 | // This is a set-theoretic difference A \ B, or in bitwise terms, A & ~B |
||
369 | $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) ); |
||
370 | |||
371 | // Traverse the orphan list |
||
372 | $insertBatch = []; |
||
373 | $id = 0; |
||
374 | $numOrphans = 0; |
||
375 | while ( true ) { |
||
376 | $id = gmp_scan1( $orphans, $id ); |
||
377 | if ( $id == -1 ) { |
||
378 | break; |
||
379 | } |
||
380 | $insertBatch[] = [ |
||
381 | 'bo_cluster' => $cluster, |
||
382 | 'bo_blob_id' => $id |
||
383 | ]; |
||
384 | if ( count( $insertBatch ) > $this->batchSize ) { |
||
385 | $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ ); |
||
386 | $insertBatch = []; |
||
387 | } |
||
388 | |||
389 | ++$id; |
||
390 | ++$numOrphans; |
||
391 | } |
||
392 | if ( $insertBatch ) { |
||
393 | $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ ); |
||
394 | } |
||
395 | echo "Found $numOrphans orphan(s) in $cluster\n"; |
||
396 | } |
||
397 | } |
||
398 | } |
||
399 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.