These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace SMW\Maintenance; |
||
4 | |||
5 | use LinkCache; |
||
6 | use Onoi\MessageReporter\MessageReporter; |
||
7 | use Onoi\MessageReporter\MessageReporterFactory; |
||
8 | use SMW\DIWikiPage; |
||
9 | use SMW\MediaWiki\TitleCreator; |
||
10 | use SMW\ApplicationFactory; |
||
11 | use SMW\Options; |
||
12 | use SMW\Store; |
||
13 | use Title; |
||
14 | |||
15 | /** |
||
16 | * Is part of the `rebuildData.php` maintenance script to rebuild existing data |
||
17 | * for the store |
||
18 | * |
||
19 | * @note This is an internal class and should not be used outside of smw-core |
||
20 | * |
||
21 | * @license GNU GPL v2+ |
||
22 | * @since 1.9.2 |
||
23 | * |
||
24 | * @author mwjames |
||
25 | */ |
||
26 | class DataRebuilder { |
||
27 | |||
28 | /** |
||
29 | * @var Store |
||
30 | */ |
||
31 | private $store; |
||
32 | |||
33 | /** |
||
34 | * @var TitleCreator |
||
35 | */ |
||
36 | private $titleCreator; |
||
37 | |||
38 | /** |
||
39 | * @var Options |
||
40 | */ |
||
41 | private $options; |
||
42 | |||
43 | /** |
||
44 | * @var MessageReporter |
||
45 | */ |
||
46 | private $reporter; |
||
47 | |||
48 | /** |
||
49 | * @var DistinctEntityDataRebuilder |
||
50 | */ |
||
51 | private $distinctEntityDataRebuilder; |
||
52 | |||
53 | /** |
||
54 | * @var ExceptionFileLogger |
||
55 | */ |
||
56 | private $exceptionFileLogger; |
||
57 | |||
58 | /** |
||
59 | * @var array |
||
60 | */ |
||
61 | private $exceptionLog = array(); |
||
62 | |||
63 | /** |
||
64 | * @var integer |
||
65 | */ |
||
66 | private $rebuildCount = 0; |
||
67 | |||
68 | private $delay = false; |
||
69 | private $canWriteToIdFile = false; |
||
70 | private $start = 1; |
||
71 | private $end = false; |
||
72 | |||
73 | /** |
||
74 | * @var int[] |
||
75 | */ |
||
76 | private $filters = array(); |
||
77 | private $verbose = false; |
||
78 | private $startIdFile = false; |
||
79 | |||
80 | /** |
||
81 | * @since 1.9.2 |
||
82 | * |
||
83 | * @param Store $store |
||
84 | * @param TitleCreator $titleCreator |
||
85 | */ |
||
86 | 10 | public function __construct( Store $store, TitleCreator $titleCreator ) { |
|
87 | 10 | $this->store = $store; |
|
88 | 10 | $this->titleCreator = $titleCreator; |
|
89 | 10 | $this->reporter = MessageReporterFactory::getInstance()->newNullMessageReporter(); |
|
90 | 10 | $this->distinctEntityDataRebuilder = new DistinctEntityDataRebuilder( $store, $titleCreator ); |
|
91 | 10 | $this->exceptionFileLogger = new ExceptionFileLogger( 'rebuilddata' ); |
|
92 | 10 | } |
|
93 | |||
94 | /** |
||
95 | * @since 2.1 |
||
96 | * |
||
97 | * @param MessageReporter $reporter |
||
98 | */ |
||
99 | 2 | public function setMessageReporter( MessageReporter $reporter ) { |
|
100 | 2 | $this->reporter = $reporter; |
|
101 | 2 | } |
|
102 | |||
103 | /** |
||
104 | * @since 1.9.2 |
||
105 | * |
||
106 | * @param Options $options |
||
107 | */ |
||
108 | 9 | public function setOptions( Options $options ) { |
|
109 | 9 | $this->options = $options; |
|
110 | |||
111 | 9 | if ( $options->has( 'server' ) ) { |
|
112 | $GLOBALS['wgServer'] = $options->get( 'server' ); |
||
113 | } |
||
114 | |||
115 | 9 | if ( $options->has( 'd' ) ) { |
|
116 | $this->delay = intval( $options->get( 'd' ) ) * 1000; // convert milliseconds to microseconds |
||
117 | } |
||
118 | |||
119 | 9 | if ( $options->has( 's' ) ) { |
|
120 | 2 | $this->start = max( 1, intval( $options->get( 's' ) ) ); |
|
121 | 8 | } elseif ( $options->has( 'startidfile' ) ) { |
|
122 | |||
123 | $this->canWriteToIdFile = $this->idFileIsWritable( $options->get( 'startidfile' ) ); |
||
124 | $this->startIdFile = $options->get( 'startidfile' ); |
||
125 | |||
126 | if ( is_readable( $options->get( 'startidfile' ) ) ) { |
||
127 | $this->start = max( 1, intval( file_get_contents( $options->get( 'startidfile' ) ) ) ); |
||
128 | } |
||
129 | } |
||
130 | |||
131 | // Note: this might reasonably be larger than the page count |
||
132 | 9 | if ( $options->has( 'e' ) ) { |
|
133 | 3 | $this->end = intval( $options->get( 'e' ) ); |
|
134 | 7 | } elseif ( $options->has( 'n' ) ) { |
|
135 | 1 | $this->end = $this->start + intval( $options->get( 'n' ) ); |
|
136 | } |
||
137 | |||
138 | 9 | $this->verbose = $options->has( 'v' ); |
|
139 | 9 | $this->exceptionFileLogger->setOptions( $options ); |
|
140 | |||
141 | 9 | $this->setFiltersFromOptions( $options ); |
|
142 | 9 | } |
|
143 | |||
144 | /** |
||
145 | * @since 1.9.2 |
||
146 | * |
||
147 | * @return boolean |
||
148 | */ |
||
149 | 9 | public function rebuild() { |
|
150 | |||
151 | 9 | $storeName = get_class( $this->store ); |
|
152 | |||
153 | 9 | if ( strpos( $storeName, "\\") !== false ) { |
|
154 | 1 | $storeName = explode("\\", $storeName ); |
|
155 | 1 | $storeName = end( $storeName ); |
|
156 | } |
||
157 | |||
158 | 9 | $this->reportMessage( "\nRunning for storage: " . $storeName . "\n\n" ); |
|
159 | |||
160 | 9 | if ( $this->options->has( 'f' ) ) { |
|
161 | 2 | $this->performFullDelete(); |
|
162 | } |
||
163 | |||
164 | 9 | if ( $this->options->has( 'page' ) || $this->options->has( 'query' ) || $this->hasFilters() || $this->options->has( 'redirects' ) ) { |
|
165 | 5 | return $this->doRebuildDistinctEntities(); |
|
166 | } |
||
167 | |||
168 | 5 | return $this->doRebuildAll(); |
|
169 | } |
||
170 | |||
171 | 7 | private function hasFilters() { |
|
172 | 7 | return $this->filters !== array(); |
|
173 | } |
||
174 | |||
175 | /** |
||
176 | * @since 1.9.2 |
||
177 | * |
||
178 | * @return int |
||
179 | */ |
||
180 | 1 | public function getRebuildCount() { |
|
181 | 1 | return $this->rebuildCount; |
|
182 | } |
||
183 | |||
184 | 9 | private function doRebuildDistinctEntities() { |
|
185 | |||
186 | 9 | $this->distinctEntityDataRebuilder->setOptions( |
|
187 | 9 | $this->options |
|
188 | ); |
||
189 | |||
190 | 9 | $this->distinctEntityDataRebuilder->setMessageReporter( |
|
191 | 9 | $this->reporter |
|
192 | ); |
||
193 | |||
194 | 9 | $this->distinctEntityDataRebuilder->doRebuild(); |
|
195 | |||
196 | 9 | $this->rebuildCount = $this->distinctEntityDataRebuilder->getRebuildCount(); |
|
197 | |||
198 | 9 | $this->exceptionFileLogger->doWriteExceptionLog( |
|
199 | 9 | $this->distinctEntityDataRebuilder->getExceptionLog() |
|
200 | ); |
||
201 | |||
202 | 9 | if ( $this->options->has( 'ignore-exceptions' ) && $this->exceptionFileLogger->getExceptionCounter() > 0 ) { |
|
203 | $this->reportMessage( "\n" . |
||
204 | $this->exceptionFileLogger->getExceptionCounter() . " exceptions were ignored! (See " . |
||
205 | $this->exceptionFileLogger->getExceptionFile() . ").\n" |
||
206 | ); |
||
207 | } |
||
208 | |||
209 | 9 | return true; |
|
210 | } |
||
211 | |||
212 | 5 | private function doRebuildAll() { |
|
213 | |||
214 | 5 | $entityRebuildDispatcher = $this->store->refreshData( |
|
215 | 5 | $this->start, |
|
216 | 5 | 1 |
|
217 | ); |
||
218 | |||
219 | 5 | $entityRebuildDispatcher->setDispatchRangeLimit( 1 ); |
|
0 ignored issues
–
show
|
|||
220 | |||
221 | 5 | $entityRebuildDispatcher->setUpdateJobParseMode( |
|
0 ignored issues
–
show
|
|||
222 | 5 | $this->options->has( 'shallow-update' ) ? SMW_UJ_PM_CLASTMDATE : false |
|
223 | ); |
||
224 | |||
225 | 5 | $entityRebuildDispatcher->useJobQueueScheduler( false ); |
|
0 ignored issues
–
show
|
|||
226 | 5 | $this->doDisposeMarkedOutdatedEntities(); |
|
227 | |||
228 | 5 | if ( !$this->options->has( 'skip-properties' ) ) { |
|
229 | 5 | $this->options->set( 'p', true ); |
|
230 | 5 | $this->doRebuildDistinctEntities(); |
|
231 | 5 | $this->reportMessage( "\n" ); |
|
232 | } |
||
233 | |||
234 | 5 | $this->store->clear(); |
|
235 | |||
236 | 5 | $this->reportMessage( "Refreshing all semantic data in the database!\n---\n" . |
|
237 | " Some versions of PHP suffer from memory leaks in long-running \n" . |
||
238 | " scripts. If your machine gets very slow after many pages \n" . |
||
239 | " (typically more than 1000) were refreshed, please abort with\n" . |
||
240 | " CTRL-C and resume this script at the last processed page id\n" . |
||
241 | " using the parameter -s (use -v to display page ids during \n" . |
||
242 | 5 | " refresh). Continue this until all pages have been refreshed.\n---\n" |
|
243 | ); |
||
244 | |||
245 | 5 | $total = $this->end && $this->end - $this->start > 0 ? $this->end - $this->start : $entityRebuildDispatcher->getMaxId(); |
|
0 ignored issues
–
show
|
|||
246 | 5 | $id = $this->start; |
|
247 | |||
248 | 5 | $this->reportMessage( |
|
249 | " The progress displayed is an estimation and is self-adjusting \n" . |
||
250 | 5 | " during the update process.\n---\n" ); |
|
251 | |||
252 | 5 | $this->reportMessage( |
|
253 | 5 | "Processing all IDs from $this->start to " . |
|
254 | 5 | ( $this->end ? "$this->end" : $entityRebuildDispatcher->getMaxId() ) . " ...\n" |
|
0 ignored issues
–
show
|
|||
255 | ); |
||
256 | |||
257 | 5 | $this->rebuildCount = 0; |
|
258 | |||
259 | 5 | while ( ( ( !$this->end ) || ( $id <= $this->end ) ) && ( $id > 0 ) ) { |
|
260 | |||
261 | 5 | $progress = ''; |
|
262 | |||
263 | 5 | $this->rebuildCount++; |
|
264 | 5 | $this->exceptionLog = array(); |
|
265 | |||
266 | 5 | $this->doExecuteFor( $entityRebuildDispatcher, $id ); |
|
267 | |||
268 | 5 | if ( $this->rebuildCount % 60 === 0 ) { |
|
269 | 2 | $progress = round( ( $this->end - $this->start > 0 ? $this->rebuildCount / $total : $entityRebuildDispatcher->getEstimatedProgress() ) * 100 ) . "%"; |
|
0 ignored issues
–
show
|
|||
270 | } |
||
271 | |||
272 | 5 | foreach ( $entityRebuildDispatcher->getDispatchedEntities() as $value ) { |
|
0 ignored issues
–
show
|
|||
273 | |||
274 | 2 | $text = $this->getHumanReadableTextFrom( $id, $value ); |
|
275 | |||
276 | 2 | $this->reportMessage( |
|
277 | 2 | sprintf( "%-16s%s\n", "($this->rebuildCount/$total)", "Finished processing ID " . $text ), |
|
278 | 2 | $this->options->has( 'v' ) |
|
279 | ); |
||
280 | |||
281 | 2 | if ( $this->options->has( 'ignore-exceptions' ) && isset( $this->exceptionLog[$id] ) ) { |
|
282 | $this->exceptionFileLogger->doWriteExceptionLog( |
||
283 | 2 | array( $id . ' ' . $text => $this->exceptionLog[$id] ) |
|
284 | ); |
||
285 | } |
||
286 | } |
||
287 | |||
288 | 5 | $this->doPrintDotProgressIndicator( $this->verbose, $this->rebuildCount, $progress ); |
|
289 | } |
||
290 | |||
291 | 5 | $this->writeIdToFile( $id ); |
|
292 | 5 | $this->reportMessage( "\n\n$this->rebuildCount IDs refreshed.\n" ); |
|
293 | |||
294 | 5 | if ( $this->options->has( 'ignore-exceptions' ) && $this->exceptionFileLogger->getExceptionCounter() > 0 ) { |
|
295 | $this->reportMessage( "\n" . |
||
296 | $this->exceptionFileLogger->getExceptionCounter() . " exceptions were ignored! (See " . |
||
297 | $this->exceptionFileLogger->getExceptionFile() . ").\n" |
||
298 | ); |
||
299 | } |
||
300 | |||
301 | 5 | return true; |
|
302 | } |
||
303 | |||
304 | 5 | private function doExecuteFor( $entityRebuildDispatcher, &$id ) { |
|
305 | |||
306 | 5 | if ( !$this->options->has( 'ignore-exceptions' ) ) { |
|
307 | 5 | $entityRebuildDispatcher->startRebuildWith( $id ); |
|
308 | } else { |
||
309 | |||
310 | try { |
||
311 | $entityRebuildDispatcher->startRebuildWith( $id ); |
||
312 | } catch ( \Exception $e ) { |
||
313 | $this->exceptionLog[$id] = array( |
||
314 | 'msg' => $e->getMessage(), |
||
315 | 'trace' => $e->getTraceAsString() |
||
316 | ); |
||
317 | } |
||
318 | } |
||
319 | |||
320 | 5 | if ( $this->delay !== false ) { |
|
321 | usleep( $this->delay ); |
||
322 | } |
||
323 | |||
324 | 5 | if ( $this->rebuildCount % 100 === 0 ) { // every 100 pages only |
|
325 | 1 | LinkCache::singleton()->clear(); // avoid memory leaks |
|
326 | } |
||
327 | 5 | } |
|
328 | |||
329 | 2 | private function getHumanReadableTextFrom( $id, array $entities ) { |
|
330 | |||
331 | 2 | if ( !$this->options->has( 'v' ) ) { |
|
332 | 2 | return ''; |
|
333 | } |
||
334 | |||
335 | // Indicates whether this is a MW page (*) or SMW's object table |
||
336 | $text = $id . ( isset( $entities['t'] ) ? '*' : '' ); |
||
337 | |||
338 | $entity = end( $entities ); |
||
339 | |||
340 | if ( $entity instanceof \Title ) { |
||
341 | return $text . ' (' . $entity->getPrefixedDBKey() .')'; |
||
342 | } |
||
343 | |||
344 | if ( $entity instanceof DIWikiPage ) { |
||
345 | return $text . ' (' . $entity->getHash() .')'; |
||
346 | } |
||
347 | |||
348 | return $text . ' (' . ( is_string( $entity ) && $entity !== '' ? $entity : 'N/A' ) . ')'; |
||
349 | } |
||
350 | |||
351 | 2 | private function performFullDelete() { |
|
352 | |||
353 | 2 | $this->reportMessage( "Deleting all stored data completely and rebuilding it again later!\n---\n" . |
|
354 | " Semantic data in the wiki might be incomplete for some time while this operation runs.\n\n" . |
||
355 | " NOTE: It is usually necessary to run this script ONE MORE TIME after this operation,\n" . |
||
356 | 2 | " since some properties' types are not stored yet in the first run.\n---\n" |
|
357 | ); |
||
358 | |||
359 | 2 | if ( $this->options->has( 's' ) || $this->options->has( 'e' ) ) { |
|
360 | 1 | $this->reportMessage( " WARNING: -s or -e are used, so some pages will not be refreshed at all!\n" . |
|
361 | " Data for those pages will only be available again when they have been\n" . |
||
362 | 1 | " refreshed as well!\n\n" |
|
363 | ); |
||
364 | } |
||
365 | |||
366 | 2 | $obLevel = ob_get_level(); |
|
367 | |||
368 | 2 | $this->reportMessage( ' Abort with control-c in the next five seconds ... ' ); |
|
369 | 2 | wfCountDown( 6 ); |
|
370 | |||
371 | 2 | $this->store->drop( $this->verbose ); |
|
372 | 2 | $this->store->setupStore( $this->verbose ); |
|
373 | |||
374 | // Be sure to have some buffer, otherwise some PHPs complain |
||
375 | 2 | while ( ob_get_level() > $obLevel ) { |
|
376 | ob_end_flush(); |
||
377 | } |
||
378 | |||
379 | 2 | $this->reportMessage( "\nAll storage structures have been deleted and recreated.\n\n" ); |
|
380 | |||
381 | 2 | return true; |
|
382 | } |
||
383 | |||
384 | 5 | private function doDisposeMarkedOutdatedEntities() { |
|
385 | |||
386 | 5 | $entityIdDisposerJob = ApplicationFactory::getInstance()->newJobFactory()->newEntityIdDisposerJob( |
|
387 | 5 | Title::newFromText( __METHOD__ ) |
|
388 | ); |
||
389 | |||
390 | 5 | $outdatedEntitiesResultIterator = $entityIdDisposerJob->newOutdatedEntitiesResultIterator(); |
|
391 | 5 | $matchesCount = $outdatedEntitiesResultIterator->count(); |
|
392 | 5 | $counter = 0; |
|
393 | |||
394 | 5 | if ( $matchesCount == 0 ) { |
|
395 | 4 | return; |
|
396 | } |
||
397 | |||
398 | 1 | $this->reportMessage( "Removing table entries (marked for deletion).\n" ); |
|
399 | |||
400 | 1 | foreach ( $outdatedEntitiesResultIterator as $row ) { |
|
401 | 1 | $counter++; |
|
402 | 1 | $this->doPrintDotProgressIndicator( false, $counter, round( $counter / $matchesCount * 100 ) . ' %' ); |
|
403 | 1 | $entityIdDisposerJob->executeWith( $row ); |
|
404 | } |
||
405 | |||
406 | 1 | $this->reportMessage( "\n\n{$matchesCount} IDs removed.\n\n" ); |
|
407 | 1 | } |
|
408 | |||
409 | private function idFileIsWritable( $startIdFile ) { |
||
410 | |||
411 | if ( !is_writable( file_exists( $startIdFile ) ? $startIdFile : dirname( $startIdFile ) ) ) { |
||
412 | die( "Cannot use a startidfile that we can't write to.\n" ); |
||
413 | } |
||
414 | |||
415 | return true; |
||
416 | } |
||
417 | |||
418 | 5 | private function writeIdToFile( $id ) { |
|
419 | 5 | if ( $this->canWriteToIdFile ) { |
|
420 | file_put_contents( $this->startIdFile, "$id" ); |
||
421 | } |
||
422 | 5 | } |
|
423 | |||
424 | /** |
||
425 | * @param array $options |
||
426 | */ |
||
427 | 9 | private function setFiltersFromOptions( Options $options ) { |
|
428 | 9 | $this->filters = array(); |
|
429 | |||
430 | 9 | if ( $options->has( 'categories' ) ) { |
|
431 | 1 | $this->filters[] = NS_CATEGORY; |
|
432 | } |
||
433 | |||
434 | 9 | if ( $options->has( 'p' ) ) { |
|
435 | 2 | $this->filters[] = SMW_NS_PROPERTY; |
|
436 | } |
||
437 | 9 | } |
|
438 | |||
439 | 9 | private function reportMessage( $message, $output = true ) { |
|
440 | 9 | if ( $output ) { |
|
441 | 9 | $this->reporter->reportMessage( $message ); |
|
442 | } |
||
443 | 9 | } |
|
444 | |||
445 | 5 | private function doPrintDotProgressIndicator( $verbose, $counter, $progress ) { |
|
446 | |||
447 | 5 | if ( ( $counter - 1 ) % 60 === 0 ) { |
|
448 | 5 | $this->reportMessage( "\n", !$verbose ); |
|
449 | } |
||
450 | |||
451 | 5 | $this->reportMessage( '.', !$verbose ); |
|
452 | |||
453 | 5 | if ( $counter % 60 === 0 ) { |
|
454 | 2 | $this->reportMessage( " $progress", !$verbose ); |
|
455 | } |
||
456 | 5 | } |
|
457 | |||
458 | } |
||
459 |
Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.