|
1
|
|
|
<?php |
|
|
|
|
|
|
2
|
|
|
/** |
|
3
|
|
|
* Script to fix bug 20757. |
|
4
|
|
|
* |
|
5
|
|
|
* This program is free software; you can redistribute it and/or modify |
|
6
|
|
|
* it under the terms of the GNU General Public License as published by |
|
7
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
8
|
|
|
* (at your option) any later version. |
|
9
|
|
|
* |
|
10
|
|
|
* This program is distributed in the hope that it will be useful, |
|
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13
|
|
|
* GNU General Public License for more details. |
|
14
|
|
|
* |
|
15
|
|
|
* You should have received a copy of the GNU General Public License along |
|
16
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
|
17
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
18
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
|
19
|
|
|
* |
|
20
|
|
|
* @file |
|
21
|
|
|
* @ingroup Maintenance ExternalStorage |
|
22
|
|
|
*/ |
|
23
|
|
|
|
|
24
|
|
|
require_once __DIR__ . '/../Maintenance.php'; |
|
25
|
|
|
|
|
26
|
|
|
/** |
|
27
|
|
|
* Maintenance script to fix bug 20757. |
|
28
|
|
|
* |
|
29
|
|
|
* @ingroup Maintenance ExternalStorage |
|
30
|
|
|
*/ |
|
31
|
|
|
class FixBug20757 extends Maintenance { |
|
32
|
|
|
public $batchSize = 10000; |
|
33
|
|
|
public $mapCache = []; |
|
34
|
|
|
public $mapCacheSize = 0; |
|
35
|
|
|
public $maxMapCacheSize = 1000000; |
|
36
|
|
|
|
|
37
|
|
|
function __construct() { |
|
38
|
|
|
parent::__construct(); |
|
39
|
|
|
$this->addDescription( 'Script to fix bug 20757 assuming that blob_tracking is intact' ); |
|
40
|
|
|
$this->addOption( 'dry-run', 'Report only' ); |
|
41
|
|
|
$this->addOption( 'start', 'old_id to start at', false, true ); |
|
42
|
|
|
} |
|
43
|
|
|
|
|
44
|
|
|
function execute() { |
|
45
|
|
|
$dbr = $this->getDB( DB_REPLICA ); |
|
46
|
|
|
$dbw = $this->getDB( DB_MASTER ); |
|
47
|
|
|
|
|
48
|
|
|
$dryRun = $this->getOption( 'dry-run' ); |
|
49
|
|
|
if ( $dryRun ) { |
|
50
|
|
|
print "Dry run only.\n"; |
|
51
|
|
|
} |
|
52
|
|
|
|
|
53
|
|
|
$startId = $this->getOption( 'start', 0 ); |
|
54
|
|
|
$numGood = 0; |
|
55
|
|
|
$numFixed = 0; |
|
56
|
|
|
$numBad = 0; |
|
57
|
|
|
|
|
58
|
|
|
$totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); |
|
59
|
|
|
|
|
60
|
|
|
// In MySQL 4.1+, the binary field old_text has a non-working LOWER() function |
|
61
|
|
|
$lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))'; |
|
62
|
|
|
|
|
63
|
|
|
while ( true ) { |
|
64
|
|
|
print "ID: $startId / $totalRevs\r"; |
|
65
|
|
|
|
|
66
|
|
|
$res = $dbr->select( |
|
67
|
|
|
'text', |
|
68
|
|
|
[ 'old_id', 'old_flags', 'old_text' ], |
|
69
|
|
|
[ |
|
70
|
|
|
'old_id > ' . intval( $startId ), |
|
71
|
|
|
'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', |
|
72
|
|
|
"$lowerLeft = 'o:15:\"historyblobstub\"'", |
|
73
|
|
|
], |
|
74
|
|
|
__METHOD__, |
|
75
|
|
|
[ |
|
76
|
|
|
'ORDER BY' => 'old_id', |
|
77
|
|
|
'LIMIT' => $this->batchSize, |
|
78
|
|
|
] |
|
79
|
|
|
); |
|
80
|
|
|
|
|
81
|
|
|
if ( !$res->numRows() ) { |
|
82
|
|
|
break; |
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
$secondaryIds = []; |
|
86
|
|
|
$stubs = []; |
|
87
|
|
|
|
|
88
|
|
|
foreach ( $res as $row ) { |
|
89
|
|
|
$startId = $row->old_id; |
|
90
|
|
|
|
|
91
|
|
|
// Basic sanity checks |
|
92
|
|
|
$obj = unserialize( $row->old_text ); |
|
93
|
|
|
if ( $obj === false ) { |
|
94
|
|
|
print "{$row->old_id}: unrecoverable: cannot unserialize\n"; |
|
95
|
|
|
++$numBad; |
|
96
|
|
|
continue; |
|
97
|
|
|
} |
|
98
|
|
|
|
|
99
|
|
|
if ( !is_object( $obj ) ) { |
|
100
|
|
|
print "{$row->old_id}: unrecoverable: unserialized to type " . |
|
101
|
|
|
gettype( $obj ) . ", possible double-serialization\n"; |
|
102
|
|
|
++$numBad; |
|
103
|
|
|
continue; |
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
|
|
if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) { |
|
107
|
|
|
print "{$row->old_id}: unrecoverable: unexpected object class " . |
|
108
|
|
|
get_class( $obj ) . "\n"; |
|
109
|
|
|
++$numBad; |
|
110
|
|
|
continue; |
|
111
|
|
|
} |
|
112
|
|
|
|
|
113
|
|
|
// Process flags |
|
114
|
|
|
$flags = explode( ',', $row->old_flags ); |
|
115
|
|
|
if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) { |
|
116
|
|
|
$legacyEncoding = false; |
|
117
|
|
|
} else { |
|
118
|
|
|
$legacyEncoding = true; |
|
119
|
|
|
} |
|
120
|
|
|
|
|
121
|
|
|
// Queue the stub for future batch processing |
|
122
|
|
|
$id = intval( $obj->mOldId ); |
|
123
|
|
|
$secondaryIds[] = $id; |
|
124
|
|
|
$stubs[$row->old_id] = [ |
|
125
|
|
|
'legacyEncoding' => $legacyEncoding, |
|
126
|
|
|
'secondaryId' => $id, |
|
127
|
|
|
'hash' => $obj->mHash, |
|
128
|
|
|
]; |
|
129
|
|
|
} |
|
130
|
|
|
|
|
131
|
|
|
$secondaryIds = array_unique( $secondaryIds ); |
|
132
|
|
|
|
|
133
|
|
|
if ( !count( $secondaryIds ) ) { |
|
134
|
|
|
continue; |
|
135
|
|
|
} |
|
136
|
|
|
|
|
137
|
|
|
// Run the batch query on blob_tracking |
|
138
|
|
|
$res = $dbr->select( |
|
139
|
|
|
'blob_tracking', |
|
140
|
|
|
'*', |
|
141
|
|
|
[ |
|
142
|
|
|
'bt_text_id' => $secondaryIds, |
|
143
|
|
|
], |
|
144
|
|
|
__METHOD__ |
|
145
|
|
|
); |
|
146
|
|
|
$trackedBlobs = []; |
|
147
|
|
|
foreach ( $res as $row ) { |
|
148
|
|
|
$trackedBlobs[$row->bt_text_id] = $row; |
|
149
|
|
|
} |
|
150
|
|
|
|
|
151
|
|
|
// Process the stubs |
|
152
|
|
|
foreach ( $stubs as $primaryId => $stub ) { |
|
153
|
|
|
$secondaryId = $stub['secondaryId']; |
|
154
|
|
|
if ( !isset( $trackedBlobs[$secondaryId] ) ) { |
|
155
|
|
|
// No tracked blob. Work out what went wrong |
|
156
|
|
|
$secondaryRow = $dbr->selectRow( |
|
157
|
|
|
'text', |
|
158
|
|
|
[ 'old_flags', 'old_text' ], |
|
159
|
|
|
[ 'old_id' => $secondaryId ], |
|
160
|
|
|
__METHOD__ |
|
161
|
|
|
); |
|
162
|
|
|
if ( !$secondaryRow ) { |
|
163
|
|
|
print "$primaryId: unrecoverable: secondary row is missing\n"; |
|
164
|
|
|
++$numBad; |
|
165
|
|
|
} elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) { |
|
166
|
|
|
// Not broken yet, and not in the tracked clusters so it won't get |
|
167
|
|
|
// broken by the current RCT run. |
|
168
|
|
|
++$numGood; |
|
169
|
|
|
} elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) { |
|
170
|
|
|
print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n"; |
|
171
|
|
|
++$numBad; |
|
172
|
|
|
} else { |
|
173
|
|
|
print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n"; |
|
174
|
|
|
++$numBad; |
|
175
|
|
|
} |
|
176
|
|
|
unset( $stubs[$primaryId] ); |
|
177
|
|
|
continue; |
|
178
|
|
|
} |
|
179
|
|
|
$trackRow = $trackedBlobs[$secondaryId]; |
|
180
|
|
|
|
|
181
|
|
|
// Check that the specified text really is available in the tracked source row |
|
182
|
|
|
$url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}"; |
|
183
|
|
|
$text = ExternalStore::fetchFromURL( $url ); |
|
184
|
|
|
if ( $text === false ) { |
|
185
|
|
|
print "$primaryId: unrecoverable: source text missing\n"; |
|
186
|
|
|
++$numBad; |
|
187
|
|
|
unset( $stubs[$primaryId] ); |
|
188
|
|
|
continue; |
|
189
|
|
|
} |
|
190
|
|
|
if ( md5( $text ) !== $stub['hash'] ) { |
|
191
|
|
|
print "$primaryId: unrecoverable: content hashes do not match\n"; |
|
192
|
|
|
++$numBad; |
|
193
|
|
|
unset( $stubs[$primaryId] ); |
|
194
|
|
|
continue; |
|
195
|
|
|
} |
|
196
|
|
|
|
|
197
|
|
|
// Find the page_id and rev_id |
|
198
|
|
|
// The page is probably the same as the page of the secondary row |
|
199
|
|
|
$pageId = intval( $trackRow->bt_page ); |
|
200
|
|
|
if ( !$pageId ) { |
|
201
|
|
|
$revId = $pageId = 0; |
|
202
|
|
|
} else { |
|
203
|
|
|
$revId = $this->findTextIdInPage( $pageId, $primaryId ); |
|
204
|
|
|
if ( !$revId ) { |
|
205
|
|
|
// Actually an orphan |
|
206
|
|
|
$pageId = $revId = 0; |
|
207
|
|
|
} |
|
208
|
|
|
} |
|
209
|
|
|
|
|
210
|
|
|
$newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8'; |
|
211
|
|
|
|
|
212
|
|
|
if ( !$dryRun ) { |
|
213
|
|
|
// Reset the text row to point to the original copy |
|
214
|
|
|
$this->beginTransaction( $dbw, __METHOD__ ); |
|
|
|
|
|
|
215
|
|
|
$dbw->update( |
|
216
|
|
|
'text', |
|
217
|
|
|
// SET |
|
218
|
|
|
[ |
|
219
|
|
|
'old_flags' => $newFlags, |
|
220
|
|
|
'old_text' => $url |
|
221
|
|
|
], |
|
222
|
|
|
// WHERE |
|
223
|
|
|
[ 'old_id' => $primaryId ], |
|
224
|
|
|
__METHOD__ |
|
225
|
|
|
); |
|
226
|
|
|
|
|
227
|
|
|
// Add a blob_tracking row so that the new reference can be recompressed |
|
228
|
|
|
// without needing to run trackBlobs.php again |
|
229
|
|
|
$dbw->insert( 'blob_tracking', |
|
230
|
|
|
[ |
|
231
|
|
|
'bt_page' => $pageId, |
|
232
|
|
|
'bt_rev_id' => $revId, |
|
233
|
|
|
'bt_text_id' => $primaryId, |
|
234
|
|
|
'bt_cluster' => $trackRow->bt_cluster, |
|
235
|
|
|
'bt_blob_id' => $trackRow->bt_blob_id, |
|
236
|
|
|
'bt_cgz_hash' => $stub['hash'], |
|
237
|
|
|
'bt_new_url' => null, |
|
238
|
|
|
'bt_moved' => 0, |
|
239
|
|
|
], |
|
240
|
|
|
__METHOD__ |
|
241
|
|
|
); |
|
242
|
|
|
$this->commitTransaction( $dbw, __METHOD__ ); |
|
|
|
|
|
|
243
|
|
|
$this->waitForSlaves(); |
|
244
|
|
|
} |
|
245
|
|
|
|
|
246
|
|
|
print "$primaryId: resolved to $url\n"; |
|
247
|
|
|
++$numFixed; |
|
248
|
|
|
} |
|
249
|
|
|
} |
|
250
|
|
|
|
|
251
|
|
|
print "\n"; |
|
252
|
|
|
print "Fixed: $numFixed\n"; |
|
253
|
|
|
print "Unrecoverable: $numBad\n"; |
|
254
|
|
|
print "Good stubs: $numGood\n"; |
|
255
|
|
|
} |
|
256
|
|
|
|
|
257
|
|
|
function waitForSlaves() { |
|
258
|
|
|
static $iteration = 0; |
|
259
|
|
|
++$iteration; |
|
260
|
|
|
if ( ++$iteration > 50 == 0 ) { |
|
261
|
|
|
wfWaitForSlaves(); |
|
|
|
|
|
|
262
|
|
|
$iteration = 0; |
|
263
|
|
|
} |
|
264
|
|
|
} |
|
265
|
|
|
|
|
266
|
|
|
function findTextIdInPage( $pageId, $textId ) { |
|
267
|
|
|
$ids = $this->getRevTextMap( $pageId ); |
|
268
|
|
|
if ( !isset( $ids[$textId] ) ) { |
|
269
|
|
|
return null; |
|
270
|
|
|
} else { |
|
271
|
|
|
return $ids[$textId]; |
|
272
|
|
|
} |
|
273
|
|
|
} |
|
274
|
|
|
|
|
275
|
|
|
function getRevTextMap( $pageId ) { |
|
276
|
|
|
if ( !isset( $this->mapCache[$pageId] ) ) { |
|
277
|
|
|
// Limit cache size |
|
278
|
|
|
while ( $this->mapCacheSize > $this->maxMapCacheSize ) { |
|
279
|
|
|
$key = key( $this->mapCache ); |
|
280
|
|
|
$this->mapCacheSize -= count( $this->mapCache[$key] ); |
|
281
|
|
|
unset( $this->mapCache[$key] ); |
|
282
|
|
|
} |
|
283
|
|
|
|
|
284
|
|
|
$dbr = $this->getDB( DB_REPLICA ); |
|
285
|
|
|
$map = []; |
|
286
|
|
|
$res = $dbr->select( 'revision', |
|
287
|
|
|
[ 'rev_id', 'rev_text_id' ], |
|
288
|
|
|
[ 'rev_page' => $pageId ], |
|
289
|
|
|
__METHOD__ |
|
290
|
|
|
); |
|
291
|
|
|
foreach ( $res as $row ) { |
|
292
|
|
|
$map[$row->rev_text_id] = $row->rev_id; |
|
293
|
|
|
} |
|
294
|
|
|
$this->mapCache[$pageId] = $map; |
|
295
|
|
|
$this->mapCacheSize += count( $map ); |
|
296
|
|
|
} |
|
297
|
|
|
|
|
298
|
|
|
return $this->mapCache[$pageId]; |
|
299
|
|
|
} |
|
300
|
|
|
|
|
301
|
|
|
/** |
|
302
|
|
|
* This is based on part of HistoryBlobStub::getText(). |
|
303
|
|
|
* Determine if the text can be retrieved from the row in the normal way. |
|
304
|
|
|
* @param array $stub |
|
305
|
|
|
* @param stdClass $secondaryRow |
|
306
|
|
|
* @return bool |
|
307
|
|
|
*/ |
|
308
|
|
|
function isUnbrokenStub( $stub, $secondaryRow ) { |
|
309
|
|
|
$flags = explode( ',', $secondaryRow->old_flags ); |
|
310
|
|
|
$text = $secondaryRow->old_text; |
|
311
|
|
|
if ( in_array( 'external', $flags ) ) { |
|
312
|
|
|
$url = $text; |
|
313
|
|
|
MediaWiki\suppressWarnings(); |
|
314
|
|
|
list( /* $proto */, $path ) = explode( '://', $url, 2 ); |
|
315
|
|
|
MediaWiki\restoreWarnings(); |
|
316
|
|
|
|
|
317
|
|
|
if ( $path == "" ) { |
|
318
|
|
|
return false; |
|
319
|
|
|
} |
|
320
|
|
|
$text = ExternalStore::fetchFromURL( $url ); |
|
321
|
|
|
} |
|
322
|
|
|
if ( !in_array( 'object', $flags ) ) { |
|
323
|
|
|
return false; |
|
324
|
|
|
} |
|
325
|
|
|
|
|
326
|
|
View Code Duplication |
if ( in_array( 'gzip', $flags ) ) { |
|
327
|
|
|
$obj = unserialize( gzinflate( $text ) ); |
|
328
|
|
|
} else { |
|
329
|
|
|
$obj = unserialize( $text ); |
|
330
|
|
|
} |
|
331
|
|
|
|
|
332
|
|
|
if ( !is_object( $obj ) ) { |
|
333
|
|
|
// Correct for old double-serialization bug. |
|
334
|
|
|
$obj = unserialize( $obj ); |
|
335
|
|
|
} |
|
336
|
|
|
|
|
337
|
|
|
if ( !is_object( $obj ) ) { |
|
338
|
|
|
return false; |
|
339
|
|
|
} |
|
340
|
|
|
|
|
341
|
|
|
$obj->uncompress(); |
|
342
|
|
|
$text = $obj->getItem( $stub['hash'] ); |
|
343
|
|
|
|
|
344
|
|
|
return $text !== false; |
|
345
|
|
|
} |
|
346
|
|
|
} |
|
347
|
|
|
|
|
348
|
|
|
$maintClass = 'FixBug20757'; |
|
349
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN; |
|
350
|
|
|
|
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.