Completed
Push — master ( 314506...335380 )
by mw
100:54 queued 62:54
created

src/Maintenance/DataRebuilder.php (7 issues)

Labels
Severity

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
namespace SMW\Maintenance;
4
5
use LinkCache;
6
use Onoi\MessageReporter\MessageReporter;
7
use Onoi\MessageReporter\MessageReporterFactory;
8
use SMW\DIWikiPage;
9
use SMW\MediaWiki\TitleCreator;
10
use SMW\ApplicationFactory;
11
use SMW\Options;
12
use SMW\Store;
13
use Title;
14
15
/**
16
 * Is part of the `rebuildData.php` maintenance script to rebuild existing data
17
 * for the store
18
 *
19
 * @note This is an internal class and should not be used outside of smw-core
20
 *
21
 * @license GNU GPL v2+
22
 * @since 1.9.2
23
 *
24
 * @author mwjames
25
 */
26
class DataRebuilder {
27
28
	/**
29
	 * @var Store
30
	 */
31
	private $store;
32
33
	/**
34
	 * @var TitleCreator
35
	 */
36
	private $titleCreator;
37
38
	/**
39
	 * @var Options
40
	 */
41
	private $options;
42
43
	/**
44
	 * @var MessageReporter
45
	 */
46
	private $reporter;
47
48
	/**
49
	 * @var DistinctEntityDataRebuilder
50
	 */
51
	private $distinctEntityDataRebuilder;
52
53
	/**
54
	 * @var ExceptionFileLogger
55
	 */
56
	private $exceptionFileLogger;
57
58
	/**
59
	 * @var array
60
	 */
61
	private $exceptionLog = array();
62
63
	/**
64
	 * @var integer
65
	 */
66
	private $rebuildCount = 0;
67
68
	private $delay = false;
69
	private $canWriteToIdFile = false;
70
	private $start = 1;
71
	private $end = false;
72
73
	/**
74
	 * @var int[]
75
	 */
76
	private $filters = array();
77
	private $verbose = false;
78
	private $startIdFile = false;
79
80
	/**
81
	 * @since 1.9.2
82
	 *
83
	 * @param Store $store
84
	 * @param TitleCreator $titleCreator
85
	 */
86 10
	public function __construct( Store $store, TitleCreator $titleCreator ) {
87 10
		$this->store = $store;
88 10
		$this->titleCreator = $titleCreator;
89 10
		$this->reporter = MessageReporterFactory::getInstance()->newNullMessageReporter();
90 10
		$this->distinctEntityDataRebuilder = new DistinctEntityDataRebuilder( $store, $titleCreator );
91 10
		$this->exceptionFileLogger = new ExceptionFileLogger( 'rebuilddata' );
92 10
	}
93
94
	/**
95
	 * @since 2.1
96
	 *
97
	 * @param MessageReporter $reporter
98
	 */
99 2
	public function setMessageReporter( MessageReporter $reporter ) {
100 2
		$this->reporter = $reporter;
101 2
	}
102
103
	/**
104
	 * @since 1.9.2
105
	 *
106
	 * @param Options $options
107
	 */
108 9
	public function setOptions( Options $options ) {
109 9
		$this->options = $options;
110
111 9
		if ( $options->has( 'server' ) ) {
112
			$GLOBALS['wgServer'] = $options->get( 'server' );
113
		}
114
115 9
		if ( $options->has( 'd' ) ) {
116
			$this->delay = intval( $options->get( 'd' ) ) * 1000; // convert milliseconds to microseconds
117
		}
118
119 9
		if ( $options->has( 's' ) ) {
120 2
			$this->start = max( 1, intval( $options->get( 's' ) ) );
121 8
		} elseif ( $options->has( 'startidfile' ) ) {
122
123
			$this->canWriteToIdFile = $this->idFileIsWritable( $options->get( 'startidfile' )  );
124
			$this->startIdFile = $options->get( 'startidfile' );
125
126
			if ( is_readable( $options->get( 'startidfile' ) ) ) {
127
				$this->start = max( 1, intval( file_get_contents( $options->get( 'startidfile' ) ) ) );
128
			}
129
		}
130
131
		// Note: this might reasonably be larger than the page count
132 9
		if ( $options->has( 'e' ) ) {
133 3
			$this->end = intval( $options->get( 'e' ) );
134 7
		} elseif ( $options->has( 'n' ) ) {
135 1
			$this->end = $this->start + intval( $options->get( 'n' ) );
136
		}
137
138 9
		$this->verbose = $options->has( 'v' );
139 9
		$this->exceptionFileLogger->setOptions( $options );
140
141 9
		$this->setFiltersFromOptions( $options );
142 9
	}
143
144
	/**
145
	 * @since 1.9.2
146
	 *
147
	 * @return boolean
148
	 */
149 9
	public function rebuild() {
150
151 9
		$storeName = get_class( $this->store );
152
153 9
		if ( strpos( $storeName, "\\") !== false ) {
154 1
			$storeName = explode("\\", $storeName );
155 1
			$storeName = end( $storeName );
156
		}
157
158 9
		$this->reportMessage( "\nRunning for storage: " . $storeName . "\n\n" );
159
160 9
		if ( $this->options->has( 'f' ) ) {
161 2
			$this->performFullDelete();
162
		}
163
164 9
		if ( $this->options->has( 'page' ) || $this->options->has( 'query' ) || $this->hasFilters() || $this->options->has( 'redirects' ) ) {
165 5
			return $this->doRebuildDistinctEntities();
166
		}
167
168 5
		return $this->doRebuildAll();
169
	}
170
171 7
	private function hasFilters() {
172 7
		return $this->filters !== array();
173
	}
174
175
	/**
176
	 * @since 1.9.2
177
	 *
178
	 * @return int
179
	 */
180 1
	public function getRebuildCount() {
181 1
		return $this->rebuildCount;
182
	}
183
184 9
	private function doRebuildDistinctEntities() {
185
186 9
		$this->distinctEntityDataRebuilder->setOptions(
187 9
			$this->options
188
		);
189
190 9
		$this->distinctEntityDataRebuilder->setMessageReporter(
191 9
			$this->reporter
192
		);
193
194 9
		$this->distinctEntityDataRebuilder->doRebuild();
195
196 9
		$this->rebuildCount = $this->distinctEntityDataRebuilder->getRebuildCount();
197
198 9
		$this->exceptionFileLogger->doWriteExceptionLog(
199 9
			$this->distinctEntityDataRebuilder->getExceptionLog()
200
		);
201
202 9
		if ( $this->options->has( 'ignore-exceptions' ) && $this->exceptionFileLogger->getExceptionCounter() > 0 ) {
203
			$this->reportMessage( "\n" .
204
				$this->exceptionFileLogger->getExceptionCounter() . " exceptions were ignored! (See " .
205
				$this->exceptionFileLogger->getExceptionFile() . ").\n"
206
			);
207
		}
208
209 9
		return true;
210
	}
211
212 5
	private function doRebuildAll() {
213
214 5
		$entityRebuildDispatcher = $this->store->refreshData(
215 5
			$this->start,
216 5
			1
217
		);
218
219 5
		$entityRebuildDispatcher->setDispatchRangeLimit( 1 );
0 ignored issues
show
The method setDispatchRangeLimit cannot be called on $entityRebuildDispatcher (of type double).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
220
221 5
		$entityRebuildDispatcher->setUpdateJobParseMode(
0 ignored issues
show
The method setUpdateJobParseMode cannot be called on $entityRebuildDispatcher (of type double).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
222 5
			$this->options->has( 'shallow-update' ) ? SMW_UJ_PM_CLASTMDATE : false
223
		);
224
225 5
		$entityRebuildDispatcher->useJobQueueScheduler( false );
0 ignored issues
show
The method useJobQueueScheduler cannot be called on $entityRebuildDispatcher (of type double).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
226 5
		$this->doDisposeMarkedOutdatedEntities();
227
228 5
		if ( !$this->options->has( 'skip-properties' ) ) {
229 5
			$this->options->set( 'p', true );
230 5
			$this->doRebuildDistinctEntities();
231 5
			$this->reportMessage( "\n" );
232
		}
233
234 5
		$this->store->clear();
235
236 5
		$this->reportMessage( "Refreshing all semantic data in the database!\n---\n" .
237
			" Some versions of PHP suffer from memory leaks in long-running \n" .
238
			" scripts. If your machine gets very slow after many pages \n" .
239
			" (typically more than 1000) were refreshed, please abort with\n" .
240
			" CTRL-C and resume this script at the last processed page id\n" .
241
			" using the parameter -s (use -v to display page ids during \n" .
242 5
			" refresh). Continue this until all pages have been refreshed.\n---\n"
243
		);
244
245 5
		$total = $this->end && $this->end - $this->start > 0 ? $this->end - $this->start : $entityRebuildDispatcher->getMaxId();
0 ignored issues
show
The method getMaxId cannot be called on $entityRebuildDispatcher (of type double).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
246 5
		$id = $this->start;
247
248 5
		$this->reportMessage(
249
			" The progress displayed is an estimation and is self-adjusting \n" .
250 5
			" during the update process.\n---\n" );
251
252 5
		$this->reportMessage(
253 5
			"Processing all IDs from $this->start to " .
254 5
			( $this->end ? "$this->end" : $entityRebuildDispatcher->getMaxId() ) . " ...\n"
0 ignored issues
show
The method getMaxId cannot be called on $entityRebuildDispatcher (of type double).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
255
		);
256
257 5
		$this->rebuildCount = 0;
258
259 5
		while ( ( ( !$this->end ) || ( $id <= $this->end ) ) && ( $id > 0 ) ) {
260
261 5
			$progress = '';
262
263 5
			$this->rebuildCount++;
264 5
			$this->exceptionLog = array();
265
266 5
			$this->doExecuteFor( $entityRebuildDispatcher, $id );
267
268 5
			if ( $this->rebuildCount % 60 === 0 ) {
269 2
				$progress = round( ( $this->end - $this->start > 0 ? $this->rebuildCount / $total : $entityRebuildDispatcher->getEstimatedProgress() ) * 100 ) . "%";
0 ignored issues
show
The method getEstimatedProgress cannot be called on $entityRebuildDispatcher (of type double).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
270
			}
271
272 5
			foreach ( $entityRebuildDispatcher->getDispatchedEntities() as $value ) {
0 ignored issues
show
The method getDispatchedEntities cannot be called on $entityRebuildDispatcher (of type double).

Methods can only be called on objects. This check looks for methods being called on variables that have been inferred to never be objects.

Loading history...
273
274 2
				$text = $this->getHumanReadableTextFrom( $id, $value );
275
276 2
				$this->reportMessage(
277 2
					sprintf( "%-16s%s\n", "($this->rebuildCount/$total)", "Finished processing ID " . $text ),
278 2
					$this->options->has( 'v' )
279
				);
280
281 2
				if ( $this->options->has( 'ignore-exceptions' ) && isset( $this->exceptionLog[$id] ) ) {
282
					$this->exceptionFileLogger->doWriteExceptionLog(
283 2
						array( $id . ' ' . $text => $this->exceptionLog[$id] )
284
					);
285
				}
286
			}
287
288 5
			$this->doPrintDotProgressIndicator( $this->verbose, $this->rebuildCount, $progress );
289
		}
290
291 5
		$this->writeIdToFile( $id );
292 5
		$this->reportMessage( "\n\n$this->rebuildCount IDs refreshed.\n" );
293
294 5
		if ( $this->options->has( 'ignore-exceptions' ) && $this->exceptionFileLogger->getExceptionCounter() > 0 ) {
295
			$this->reportMessage( "\n" .
296
				$this->exceptionFileLogger->getExceptionCounter() . " exceptions were ignored! (See " .
297
				$this->exceptionFileLogger->getExceptionFile() . ").\n"
298
			);
299
		}
300
301 5
		return true;
302
	}
303
304 5
	private function doExecuteFor( $entityRebuildDispatcher, &$id ) {
305
306 5
		if ( !$this->options->has( 'ignore-exceptions' ) ) {
307 5
			$entityRebuildDispatcher->startRebuildWith( $id );
308
		} else {
309
310
			try {
311
				$entityRebuildDispatcher->startRebuildWith( $id );
312
			} catch ( \Exception $e ) {
313
				$this->exceptionLog[$id] = array(
314
					'msg'   => $e->getMessage(),
315
					'trace' => $e->getTraceAsString()
316
				);
317
			}
318
		}
319
320 5
		if ( $this->delay !== false ) {
321
			usleep( $this->delay );
322
		}
323
324 5
		if ( $this->rebuildCount % 100 === 0 ) { // every 100 pages only
325 1
			LinkCache::singleton()->clear(); // avoid memory leaks
326
		}
327 5
	}
328
329 2
	private function getHumanReadableTextFrom( $id, array $entities ) {
330
331 2
		if ( !$this->options->has( 'v' ) ) {
332 2
			return '';
333
		}
334
335
		// Indicates whether this is a MW page (*) or SMW's object table
336
		$text = $id . ( isset( $entities['t'] ) ? '*' : '' );
337
338
		$entity = end( $entities );
339
340
		if ( $entity instanceof \Title ) {
341
			return $text . ' (' . $entity->getPrefixedDBKey() .')';
342
		}
343
344
		if ( $entity instanceof DIWikiPage ) {
345
			return $text . ' (' . $entity->getHash() .')';
346
		}
347
348
		return $text . ' (' . ( is_string( $entity ) && $entity !== '' ? $entity : 'N/A' ) . ')';
349
	}
350
351 2
	private function performFullDelete() {
352
353 2
		$this->reportMessage( "Deleting all stored data completely and rebuilding it again later!\n---\n" .
354
			" Semantic data in the wiki might be incomplete for some time while this operation runs.\n\n" .
355
			" NOTE: It is usually necessary to run this script ONE MORE TIME after this operation,\n" .
356 2
			" since some properties' types are not stored yet in the first run.\n---\n"
357
		);
358
359 2
		if ( $this->options->has( 's' ) || $this->options->has( 'e' ) ) {
360 1
			$this->reportMessage( " WARNING: -s or -e are used, so some pages will not be refreshed at all!\n" .
361
				" Data for those pages will only be available again when they have been\n" .
362 1
				" refreshed as well!\n\n"
363
			);
364
		}
365
366 2
		$obLevel = ob_get_level();
367
368 2
		$this->reportMessage( ' Abort with control-c in the next five seconds ...  ' );
369 2
		wfCountDown( 6 );
370
371 2
		$this->store->drop( $this->verbose );
372 2
		$this->store->setupStore( $this->verbose );
373
374
		// Be sure to have some buffer, otherwise some PHPs complain
375 2
		while ( ob_get_level() > $obLevel ) {
376
			ob_end_flush();
377
		}
378
379 2
		$this->reportMessage( "\nAll storage structures have been deleted and recreated.\n\n" );
380
381 2
		return true;
382
	}
383
384 5
	private function doDisposeMarkedOutdatedEntities() {
385
386 5
		$entityIdDisposerJob = ApplicationFactory::getInstance()->newJobFactory()->newEntityIdDisposerJob(
387 5
			Title::newFromText( __METHOD__ )
388
		);
389
390 5
		$outdatedEntitiesResultIterator = $entityIdDisposerJob->newOutdatedEntitiesResultIterator();
391 5
		$matchesCount = $outdatedEntitiesResultIterator->count();
392 5
		$counter = 0;
393
394 5
		if ( $matchesCount == 0 ) {
395 4
			return;
396
		}
397
398 1
		$this->reportMessage( "Removing table entries (marked for deletion).\n" );
399
400 1
		foreach ( $outdatedEntitiesResultIterator as $row ) {
401 1
			$counter++;
402 1
			$this->doPrintDotProgressIndicator( false, $counter, round( $counter / $matchesCount * 100 ) . ' %' );
403 1
			$entityIdDisposerJob->executeWith( $row );
404
		}
405
406 1
		$this->reportMessage( "\n\n{$matchesCount} IDs removed.\n\n" );
407 1
	}
408
409
	private function idFileIsWritable( $startIdFile ) {
410
411
		if ( !is_writable( file_exists( $startIdFile ) ? $startIdFile : dirname( $startIdFile ) ) ) {
412
			die( "Cannot use a startidfile that we can't write to.\n" );
413
		}
414
415
		return true;
416
	}
417
418 5
	private function writeIdToFile( $id ) {
419 5
		if ( $this->canWriteToIdFile ) {
420
			file_put_contents( $this->startIdFile, "$id" );
421
		}
422 5
	}
423
424
	/**
425
	 * @param array $options
426
	 */
427 9
	private function setFiltersFromOptions( Options $options ) {
428 9
		$this->filters = array();
429
430 9
		if ( $options->has( 'categories' ) ) {
431 1
			$this->filters[] = NS_CATEGORY;
432
		}
433
434 9
		if ( $options->has( 'p' ) ) {
435 2
			$this->filters[] = SMW_NS_PROPERTY;
436
		}
437 9
	}
438
439 9
	private function reportMessage( $message, $output = true ) {
440 9
		if ( $output ) {
441 9
			$this->reporter->reportMessage( $message );
442
		}
443 9
	}
444
445 5
	private function doPrintDotProgressIndicator( $verbose, $counter, $progress ) {
446
447 5
		if ( ( $counter - 1 ) % 60 === 0 ) {
448 5
			$this->reportMessage( "\n", !$verbose );
449
		}
450
451 5
		$this->reportMessage( '.', !$verbose );
452
453 5
		if ( $counter % 60 === 0 ) {
454 2
			$this->reportMessage( " $progress", !$verbose );
455
		}
456 5
	}
457
458
}
459