1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace SMW\Maintenance; |
4
|
|
|
|
5
|
|
|
use LinkCache; |
6
|
|
|
use Onoi\MessageReporter\MessageReporter; |
7
|
|
|
use Onoi\MessageReporter\MessageReporterFactory; |
8
|
|
|
use SMW\DIWikiPage; |
9
|
|
|
use SMW\MediaWiki\TitleCreator; |
10
|
|
|
use SMW\ApplicationFactory; |
11
|
|
|
use SMW\Options; |
12
|
|
|
use SMW\Store; |
13
|
|
|
use Title; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* Is part of the `rebuildData.php` maintenance script to rebuild existing data |
17
|
|
|
* for the store |
18
|
|
|
* |
19
|
|
|
* @note This is an internal class and should not be used outside of smw-core |
20
|
|
|
* |
21
|
|
|
* @license GNU GPL v2+ |
22
|
|
|
* @since 1.9.2 |
23
|
|
|
* |
24
|
|
|
* @author mwjames |
25
|
|
|
*/ |
26
|
|
|
class DataRebuilder { |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @var Store |
30
|
|
|
*/ |
31
|
|
|
private $store; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* @var TitleCreator |
35
|
|
|
*/ |
36
|
|
|
private $titleCreator; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @var Options |
40
|
|
|
*/ |
41
|
|
|
private $options; |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* @var MessageReporter |
45
|
|
|
*/ |
46
|
|
|
private $reporter; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* @var DistinctEntityDataRebuilder |
50
|
|
|
*/ |
51
|
|
|
private $distinctEntityDataRebuilder; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* @var ExceptionFileLogger |
55
|
|
|
*/ |
56
|
|
|
private $exceptionFileLogger; |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* @var array |
60
|
|
|
*/ |
61
|
|
|
private $exceptionLog = array(); |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* @var integer |
65
|
|
|
*/ |
66
|
|
|
private $rebuildCount = 0; |
67
|
|
|
|
68
|
|
|
private $delay = false; |
69
|
|
|
private $canWriteToIdFile = false; |
70
|
|
|
private $start = 1; |
71
|
|
|
private $end = false; |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* @var int[] |
75
|
|
|
*/ |
76
|
|
|
private $filters = array(); |
77
|
|
|
private $verbose = false; |
78
|
|
|
private $startIdFile = false; |
79
|
|
|
|
80
|
|
|
/** |
81
|
|
|
* @since 1.9.2 |
82
|
|
|
* |
83
|
|
|
* @param Store $store |
84
|
|
|
* @param TitleCreator $titleCreator |
85
|
|
|
*/ |
86
|
10 |
|
public function __construct( Store $store, TitleCreator $titleCreator ) { |
87
|
10 |
|
$this->store = $store; |
88
|
10 |
|
$this->titleCreator = $titleCreator; |
89
|
10 |
|
$this->reporter = MessageReporterFactory::getInstance()->newNullMessageReporter(); |
90
|
10 |
|
$this->distinctEntityDataRebuilder = new DistinctEntityDataRebuilder( $store, $titleCreator ); |
91
|
10 |
|
$this->exceptionFileLogger = new ExceptionFileLogger( 'rebuilddata' ); |
92
|
10 |
|
} |
93
|
|
|
|
94
|
|
|
/** |
95
|
|
|
* @since 2.1 |
96
|
|
|
* |
97
|
|
|
* @param MessageReporter $reporter |
98
|
|
|
*/ |
99
|
2 |
|
public function setMessageReporter( MessageReporter $reporter ) { |
100
|
2 |
|
$this->reporter = $reporter; |
101
|
2 |
|
} |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* @since 1.9.2 |
105
|
|
|
* |
106
|
|
|
* @param Options $options |
107
|
|
|
*/ |
108
|
9 |
|
public function setOptions( Options $options ) { |
|
|
|
|
109
|
9 |
|
$this->options = $options; |
110
|
|
|
|
111
|
9 |
|
if ( $options->has( 'server' ) ) { |
112
|
|
|
$GLOBALS['wgServer'] = $options->get( 'server' ); |
113
|
|
|
} |
114
|
|
|
|
115
|
9 |
|
if ( $options->has( 'd' ) ) { |
116
|
|
|
$this->delay = intval( $options->get( 'd' ) ) * 1000; // convert milliseconds to microseconds |
|
|
|
|
117
|
|
|
} |
118
|
|
|
|
119
|
9 |
|
if ( $options->has( 's' ) ) { |
120
|
2 |
|
$this->start = max( 1, intval( $options->get( 's' ) ) ); |
121
|
8 |
|
} elseif ( $options->has( 'startidfile' ) ) { |
122
|
|
|
|
123
|
|
|
$this->canWriteToIdFile = $this->idFileIsWritable( $options->get( 'startidfile' ) ); |
124
|
|
|
$this->startIdFile = $options->get( 'startidfile' ); |
|
|
|
|
125
|
|
|
|
126
|
|
|
if ( is_readable( $options->get( 'startidfile' ) ) ) { |
127
|
|
|
$this->start = max( 1, intval( file_get_contents( $options->get( 'startidfile' ) ) ) ); |
128
|
|
|
} |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
// Note: this might reasonably be larger than the page count |
132
|
9 |
|
if ( $options->has( 'e' ) ) { |
133
|
3 |
|
$this->end = intval( $options->get( 'e' ) ); |
|
|
|
|
134
|
7 |
|
} elseif ( $options->has( 'n' ) ) { |
135
|
1 |
|
$this->end = $this->start + intval( $options->get( 'n' ) ); |
|
|
|
|
136
|
|
|
} |
137
|
|
|
|
138
|
9 |
|
$this->verbose = $options->has( 'v' ); |
139
|
9 |
|
$this->exceptionFileLogger->setOptions( $options ); |
140
|
|
|
|
141
|
9 |
|
$this->setFiltersFromOptions( $options ); |
142
|
9 |
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* @since 1.9.2 |
146
|
|
|
* |
147
|
|
|
* @return boolean |
148
|
|
|
*/ |
149
|
9 |
|
public function rebuild() { |
150
|
|
|
|
151
|
9 |
|
$storeName = get_class( $this->store ); |
152
|
|
|
|
153
|
9 |
|
if ( strpos( $storeName, "\\") !== false ) { |
154
|
1 |
|
$storeName = explode("\\", $storeName ); |
155
|
1 |
|
$storeName = end( $storeName ); |
156
|
|
|
} |
157
|
|
|
|
158
|
9 |
|
$this->reportMessage( "\nRunning for storage: " . $storeName . "\n\n" ); |
159
|
|
|
|
160
|
9 |
|
if ( $this->options->has( 'f' ) ) { |
161
|
2 |
|
$this->performFullDelete(); |
162
|
|
|
} |
163
|
|
|
|
164
|
9 |
|
if ( $this->options->has( 'page' ) || $this->options->has( 'query' ) || $this->hasFilters() || $this->options->has( 'redirects' ) ) { |
165
|
5 |
|
return $this->doRebuildDistinctEntities(); |
166
|
|
|
} |
167
|
|
|
|
168
|
5 |
|
return $this->doRebuildAll(); |
169
|
|
|
} |
170
|
|
|
|
171
|
7 |
|
private function hasFilters() { |
172
|
7 |
|
return $this->filters !== array(); |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* @since 1.9.2 |
177
|
|
|
* |
178
|
|
|
* @return int |
179
|
|
|
*/ |
180
|
1 |
|
public function getRebuildCount() { |
181
|
1 |
|
return $this->rebuildCount; |
182
|
|
|
} |
183
|
|
|
|
184
|
9 |
|
private function doRebuildDistinctEntities() { |
185
|
|
|
|
186
|
9 |
|
$this->distinctEntityDataRebuilder->setOptions( |
187
|
9 |
|
$this->options |
188
|
|
|
); |
189
|
|
|
|
190
|
9 |
|
$this->distinctEntityDataRebuilder->setMessageReporter( |
191
|
9 |
|
$this->reporter |
192
|
|
|
); |
193
|
|
|
|
194
|
9 |
|
$this->distinctEntityDataRebuilder->doRebuild(); |
195
|
|
|
|
196
|
9 |
|
$this->rebuildCount = $this->distinctEntityDataRebuilder->getRebuildCount(); |
197
|
|
|
|
198
|
9 |
|
$this->exceptionFileLogger->doWriteExceptionLog( |
199
|
9 |
|
$this->distinctEntityDataRebuilder->getExceptionLog() |
200
|
|
|
); |
201
|
|
|
|
202
|
9 |
|
if ( $this->options->has( 'ignore-exceptions' ) && $this->exceptionFileLogger->getExceptionCounter() > 0 ) { |
203
|
|
|
$this->reportMessage( "\n" . |
204
|
|
|
$this->exceptionFileLogger->getExceptionCounter() . " exceptions were ignored! (See " . |
205
|
|
|
$this->exceptionFileLogger->getExceptionFile() . ").\n" |
206
|
|
|
); |
207
|
|
|
} |
208
|
|
|
|
209
|
9 |
|
return true; |
210
|
|
|
} |
211
|
|
|
|
212
|
5 |
|
private function doRebuildAll() { |
213
|
|
|
|
214
|
5 |
|
$byIdDataRebuildDispatcher = $this->store->refreshData( |
215
|
5 |
|
$this->start, |
216
|
5 |
|
1 |
217
|
|
|
); |
218
|
|
|
|
219
|
5 |
|
$byIdDataRebuildDispatcher->setIterationLimit( 1 ); |
|
|
|
|
220
|
|
|
|
221
|
5 |
|
$byIdDataRebuildDispatcher->setUpdateJobParseMode( |
|
|
|
|
222
|
5 |
|
$this->options->has( 'shallow-update' ) ? SMW_UJ_PM_CLASTMDATE : false |
223
|
|
|
); |
224
|
|
|
|
225
|
5 |
|
$byIdDataRebuildDispatcher->setUpdateJobToUseJobQueueScheduler( false ); |
|
|
|
|
226
|
5 |
|
$this->doDisposeMarkedOutdatedEntities(); |
227
|
|
|
|
228
|
5 |
|
if ( !$this->options->has( 'skip-properties' ) ) { |
229
|
5 |
|
$this->options->set( 'p', true ); |
230
|
5 |
|
$this->doRebuildDistinctEntities(); |
231
|
5 |
|
$this->reportMessage( "\n" ); |
232
|
|
|
} |
233
|
|
|
|
234
|
5 |
|
$this->store->clear(); |
235
|
|
|
|
236
|
5 |
|
$this->reportMessage( "Refreshing all semantic data in the database!\n---\n" . |
237
|
|
|
" Some versions of PHP suffer from memory leaks in long-running \n" . |
238
|
|
|
" scripts. If your machine gets very slow after many pages \n" . |
239
|
|
|
" (typically more than 1000) were refreshed, please abort with\n" . |
240
|
|
|
" CTRL-C and resume this script at the last processed page id\n" . |
241
|
|
|
" using the parameter -s (use -v to display page ids during \n" . |
242
|
5 |
|
" refresh). Continue this until all pages have been refreshed.\n---\n" |
243
|
|
|
); |
244
|
|
|
|
245
|
5 |
|
$total = $this->end && $this->end - $this->start > 0 ? $this->end - $this->start : $byIdDataRebuildDispatcher->getMaxId(); |
|
|
|
|
246
|
5 |
|
$id = $this->start; |
247
|
|
|
|
248
|
5 |
|
$this->reportMessage( |
249
|
|
|
" The progress displayed is an estimation and is self-adjusting \n" . |
250
|
5 |
|
" during the update process.\n---\n" ); |
251
|
|
|
|
252
|
5 |
|
$this->reportMessage( |
253
|
5 |
|
"Processing all IDs from $this->start to " . |
254
|
5 |
|
( $this->end ? "$this->end" : $byIdDataRebuildDispatcher->getMaxId() ) . " ...\n" |
|
|
|
|
255
|
|
|
); |
256
|
|
|
|
257
|
5 |
|
$this->rebuildCount = 0; |
258
|
|
|
|
259
|
5 |
|
while ( ( ( !$this->end ) || ( $id <= $this->end ) ) && ( $id > 0 ) ) { |
260
|
|
|
|
261
|
5 |
|
$progress = ''; |
262
|
|
|
|
263
|
5 |
|
$this->rebuildCount++; |
264
|
5 |
|
$this->exceptionLog = array(); |
265
|
|
|
|
266
|
5 |
|
$this->doExecuteFor( $byIdDataRebuildDispatcher, $id ); |
267
|
|
|
|
268
|
5 |
|
if ( $this->rebuildCount % 60 === 0 ) { |
269
|
2 |
|
$progress = round( ( $this->end - $this->start > 0 ? $this->rebuildCount / $total : $byIdDataRebuildDispatcher->getEstimatedProgress() ) * 100 ) . "%"; |
|
|
|
|
270
|
|
|
} |
271
|
|
|
|
272
|
5 |
|
foreach ( $byIdDataRebuildDispatcher->getDispatchedEntities() as $value ) { |
|
|
|
|
273
|
|
|
|
274
|
2 |
|
$text = $this->getHumanReadableTextFrom( $id, $value ); |
275
|
|
|
|
276
|
2 |
|
$this->reportMessage( |
277
|
2 |
|
sprintf( "%-16s%s\n", "($this->rebuildCount/$total)", "Finished processing ID " . $text ), |
278
|
2 |
|
$this->options->has( 'v' ) |
279
|
|
|
); |
280
|
|
|
|
281
|
2 |
|
if ( $this->options->has( 'ignore-exceptions' ) && isset( $this->exceptionLog[$id] ) ) { |
282
|
|
|
$this->exceptionFileLogger->doWriteExceptionLog( |
283
|
2 |
|
array( $id . ' ' . $text => $this->exceptionLog[$id] ) |
284
|
|
|
); |
285
|
|
|
} |
286
|
|
|
} |
287
|
|
|
|
288
|
5 |
|
$this->doPrintDotProgressIndicator( $this->verbose, $this->rebuildCount, $progress ); |
289
|
|
|
} |
290
|
|
|
|
291
|
5 |
|
$this->writeIdToFile( $id ); |
292
|
5 |
|
$this->reportMessage( "\n\n$this->rebuildCount IDs refreshed.\n" ); |
293
|
|
|
|
294
|
5 |
|
if ( $this->options->has( 'ignore-exceptions' ) && $this->exceptionFileLogger->getExceptionCounter() > 0 ) { |
295
|
|
|
$this->reportMessage( "\n" . |
296
|
|
|
$this->exceptionFileLogger->getExceptionCounter() . " exceptions were ignored! (See " . |
297
|
|
|
$this->exceptionFileLogger->getExceptionFile() . ").\n" |
298
|
|
|
); |
299
|
|
|
} |
300
|
|
|
|
301
|
5 |
|
return true; |
302
|
|
|
} |
303
|
|
|
|
304
|
5 |
|
private function doExecuteFor( $byIdDataRebuildDispatcher, &$id ) { |
305
|
|
|
|
306
|
5 |
|
if ( !$this->options->has( 'ignore-exceptions' ) ) { |
307
|
5 |
|
$byIdDataRebuildDispatcher->dispatchRebuildFor( $id ); |
308
|
|
|
} else { |
309
|
|
|
|
310
|
|
|
try { |
311
|
|
|
$byIdDataRebuildDispatcher->dispatchRebuildFor( $id ); |
312
|
|
|
} catch ( \Exception $e ) { |
313
|
|
|
$this->exceptionLog[$id] = array( |
314
|
|
|
'msg' => $e->getMessage(), |
315
|
|
|
'trace' => $e->getTraceAsString() |
316
|
|
|
); |
317
|
|
|
} |
318
|
|
|
} |
319
|
|
|
|
320
|
5 |
|
if ( $this->delay !== false ) { |
321
|
|
|
usleep( $this->delay ); |
322
|
|
|
} |
323
|
|
|
|
324
|
5 |
|
if ( $this->rebuildCount % 100 === 0 ) { // every 100 pages only |
325
|
1 |
|
LinkCache::singleton()->clear(); // avoid memory leaks |
326
|
|
|
} |
327
|
5 |
|
} |
328
|
|
|
|
329
|
2 |
|
private function getHumanReadableTextFrom( $id, array $entities ) { |
330
|
|
|
|
331
|
2 |
|
if ( !$this->options->has( 'v' ) ) { |
332
|
2 |
|
return ''; |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
// Indicates whether this is a MW page (*) or SMW's object table |
336
|
|
|
$text = $id . ( isset( $entities['t'] ) ? '*' : '' ); |
337
|
|
|
|
338
|
|
|
$entity = end( $entities ); |
339
|
|
|
|
340
|
|
|
if ( $entity instanceof \Title ) { |
|
|
|
|
341
|
|
|
return $text . ' (' . $entity->getPrefixedDBKey() .')'; |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
if ( $entity instanceof DIWikiPage ) { |
345
|
|
|
return $text . ' (' . $entity->getHash() .')'; |
346
|
|
|
} |
347
|
|
|
|
348
|
|
|
return $text . ' (' . ( is_string( $entity ) && $entity !== '' ? $entity : 'N/A' ) . ')'; |
349
|
|
|
} |
350
|
|
|
|
351
|
2 |
|
private function performFullDelete() { |
352
|
|
|
|
353
|
2 |
|
$this->reportMessage( "Deleting all stored data completely and rebuilding it again later!\n---\n" . |
354
|
|
|
" Semantic data in the wiki might be incomplete for some time while this operation runs.\n\n" . |
355
|
|
|
" NOTE: It is usually necessary to run this script ONE MORE TIME after this operation,\n" . |
356
|
2 |
|
" since some properties' types are not stored yet in the first run.\n---\n" |
357
|
|
|
); |
358
|
|
|
|
359
|
2 |
|
if ( $this->options->has( 's' ) || $this->options->has( 'e' ) ) { |
360
|
1 |
|
$this->reportMessage( " WARNING: -s or -e are used, so some pages will not be refreshed at all!\n" . |
361
|
|
|
" Data for those pages will only be available again when they have been\n" . |
362
|
1 |
|
" refreshed as well!\n\n" |
363
|
|
|
); |
364
|
|
|
} |
365
|
|
|
|
366
|
2 |
|
$obLevel = ob_get_level(); |
367
|
|
|
|
368
|
2 |
|
$this->reportMessage( ' Abort with control-c in the next five seconds ... ' ); |
369
|
2 |
|
wfCountDown( 6 ); |
370
|
|
|
|
371
|
2 |
|
$this->store->drop( $this->verbose ); |
372
|
2 |
|
$this->store->setupStore( $this->verbose ); |
373
|
|
|
|
374
|
|
|
// Be sure to have some buffer, otherwise some PHPs complain |
375
|
2 |
|
while ( ob_get_level() > $obLevel ) { |
376
|
|
|
ob_end_flush(); |
377
|
|
|
} |
378
|
|
|
|
379
|
2 |
|
$this->reportMessage( "\nAll storage structures have been deleted and recreated.\n\n" ); |
380
|
|
|
|
381
|
2 |
|
return true; |
382
|
|
|
} |
383
|
|
|
|
384
|
5 |
|
private function doDisposeMarkedOutdatedEntities() { |
385
|
|
|
|
386
|
5 |
|
$entityIdDisposerJob = ApplicationFactory::getInstance()->newJobFactory()->newEntityIdDisposerJob( |
387
|
5 |
|
Title::newFromText( __METHOD__ ) |
388
|
|
|
); |
389
|
|
|
|
390
|
5 |
|
$outdatedEntitiesResultIterator = $entityIdDisposerJob->newOutdatedEntitiesResultIterator(); |
391
|
5 |
|
$matchesCount = $outdatedEntitiesResultIterator->count(); |
392
|
5 |
|
$counter = 0; |
393
|
|
|
|
394
|
5 |
|
if ( $matchesCount == 0 ) { |
395
|
4 |
|
return; |
396
|
|
|
} |
397
|
|
|
|
398
|
1 |
|
$this->reportMessage( "Removing table entries (marked for deletion).\n" ); |
399
|
|
|
|
400
|
1 |
|
foreach ( $outdatedEntitiesResultIterator as $row ) { |
401
|
1 |
|
$counter++; |
402
|
1 |
|
$this->doPrintDotProgressIndicator( false, $counter, round( $counter / $matchesCount * 100 ) . ' %' ); |
403
|
1 |
|
$entityIdDisposerJob->executeWith( $row ); |
404
|
|
|
} |
405
|
|
|
|
406
|
1 |
|
$this->reportMessage( "\n\n{$matchesCount} IDs removed.\n\n" ); |
407
|
1 |
|
} |
408
|
|
|
|
409
|
|
|
private function idFileIsWritable( $startIdFile ) { |
410
|
|
|
|
411
|
|
|
if ( !is_writable( file_exists( $startIdFile ) ? $startIdFile : dirname( $startIdFile ) ) ) { |
412
|
|
|
die( "Cannot use a startidfile that we can't write to.\n" ); |
|
|
|
|
413
|
|
|
} |
414
|
|
|
|
415
|
|
|
return true; |
416
|
|
|
} |
417
|
|
|
|
418
|
5 |
|
private function writeIdToFile( $id ) { |
419
|
5 |
|
if ( $this->canWriteToIdFile ) { |
420
|
|
|
file_put_contents( $this->startIdFile, "$id" ); |
421
|
|
|
} |
422
|
5 |
|
} |
423
|
|
|
|
424
|
|
|
/** |
425
|
|
|
* @param array $options |
426
|
|
|
*/ |
427
|
9 |
|
private function setFiltersFromOptions( Options $options ) { |
428
|
9 |
|
$this->filters = array(); |
429
|
|
|
|
430
|
9 |
|
if ( $options->has( 'categories' ) ) { |
431
|
1 |
|
$this->filters[] = NS_CATEGORY; |
432
|
|
|
} |
433
|
|
|
|
434
|
9 |
|
if ( $options->has( 'p' ) ) { |
435
|
2 |
|
$this->filters[] = SMW_NS_PROPERTY; |
436
|
|
|
} |
437
|
9 |
|
} |
438
|
|
|
|
439
|
9 |
|
private function reportMessage( $message, $output = true ) { |
440
|
9 |
|
if ( $output ) { |
441
|
9 |
|
$this->reporter->reportMessage( $message ); |
442
|
|
|
} |
443
|
9 |
|
} |
444
|
|
|
|
445
|
5 |
|
private function doPrintDotProgressIndicator( $verbose, $counter, $progress ) { |
446
|
|
|
|
447
|
5 |
|
if ( ( $counter - 1 ) % 60 === 0 ) { |
448
|
5 |
|
$this->reportMessage( "\n", !$verbose ); |
449
|
|
|
} |
450
|
|
|
|
451
|
5 |
|
$this->reportMessage( '.', !$verbose ); |
452
|
|
|
|
453
|
5 |
|
if ( $counter % 60 === 0 ) { |
454
|
2 |
|
$this->reportMessage( " $progress", !$verbose ); |
455
|
|
|
} |
456
|
5 |
|
} |
457
|
|
|
|
458
|
|
|
} |
459
|
|
|
|
Instead of super-globals, we recommend to explicitly inject the dependencies of your class. This makes your code less dependent on global state and it becomes generally more testable: