Completed
Push — master ( 314506...335380 )
by mw
100:54 queued 62:54
created

src/SQLStore/EntityRebuildDispatcher.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
namespace SMW\SQLStore;
4
5
use Hooks;
6
use SMW\ApplicationFactory;
7
use SMW\DIProperty;
8
use SMW\DIWikiPage;
9
use SMW\SemanticData;
10
use SMW\Store;
11
use Title;
12
13
/**
14
 * @private
15
 *
16
 * @license GNU GPL v2+
17
 * @since 2.3
18
 *
19
 * @author Markus Krötzsch
20
 * @author Jeroen De Dauw
21
 * @author Nischay Nahata
22
 * @author mwjames
23
 */
24
class EntityRebuildDispatcher {
25
26
	/**
27
	 * @var SQLStore
28
	 */
29
	private $store;
30
31
	/**
32
	 * @var PropertyTableIdReferenceDisposer
33
	 */
34
	private $propertyTableIdReferenceDisposer;
35
36
	/**
37
	 * @var JobFactory
38
	 */
39
	private $jobFactory;
40
41
	/**
42
	 * @var integer
43
	 */
44
	private $updateJobParseMode;
45
46
	/**
47
	 * @var boolean
48
	 */
49
	private $useJobQueueScheduler = true;
50
51
	/**
52
	 * @var array|false
53
	 */
54
	private $namespaces = false;
55
56
	/**
57
	 * @var integer
58
	 */
59
	private $iterationLimit = 1;
60
61
	/**
62
	 * @var integer
63
	 */
64
	private $progress = 1;
65
66
	/**
67
	 * @var array
68
	 */
69
	private $dispatchedEntities = array();
70
71
	/**
72
	 * @since 2.3
73
	 *
74
	 * @param SQLStore $store
75
	 */
76
	public function __construct( SQLStore $store ) {
77
		$this->store = $store;
78
		$this->propertyTableIdReferenceDisposer = new PropertyTableIdReferenceDisposer( $store );
79
		$this->jobFactory = ApplicationFactory::getInstance()->newJobFactory();
0 ignored issues
show
Documentation Bug introduced by
It seems like \SMW\ApplicationFactory:...ance()->newJobFactory() of type object<SMW\MediaWiki\Jobs\JobFactory> is incompatible with the declared type object<SMW\SQLStore\JobFactory> of property $jobFactory.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
80
	}
81
82
	/**
83
	 * @since 2.3
84
	 *
85
	 * @param integer $updateJobParseMode
86
	 */
87
	public function setUpdateJobParseMode( $updateJobParseMode ) {
88
		$this->updateJobParseMode = $updateJobParseMode;
89
	}
90
91
	/**
92
	 * @since 2.3
93
	 *
94
	 * @param boolean $useJobQueueScheduler
95
	 */
96
	public function useJobQueueScheduler( $useJobQueueScheduler ) {
97
		$this->useJobQueueScheduler = (bool)$useJobQueueScheduler;
98
	}
99
100
	/**
101
	 * @since 2.3
102
	 *
103
	 * @param array|false $namespaces
104
	 */
105
	public function setRestrictionToNamespaces( $namespaces ) {
106
		$this->namespaces = $namespaces;
107
	}
108
109
	/**
110
	 * @since 2.3
111
	 *
112
	 * @param integer $iterationLimit
113
	 */
114
	public function setDispatchRangeLimit( $iterationLimit ) {
115
		$this->iterationLimit = (int)$iterationLimit;
116
	}
117
118
	/**
119
	 * @since 2.3
120
	 *
121
	 * @return integer
122
	 */
123
	public function getMaxId() {
124
125
		$db = $this->store->getConnection( 'mw.db' );
126
127
		$maxByPageId = (int)$db->selectField(
128
			'page',
129
			'MAX(page_id)',
130
			'',
131
			__METHOD__
132
		);
133
134
		$maxBySmwId = (int)$db->selectField(
135
			\SMWSql3SmwIds::TABLE_NAME,
136
			'MAX(smw_id)',
137
			'',
138
			__METHOD__
139
		);
140
141
		return max( $maxByPageId, $maxBySmwId );
142
	}
143
144
	/**
145
	 * Decimal between 0 and 1 to indicate the overall progress of the rebuild
146
	 * process
147
	 *
148
	 * @since 2.3
149
	 *
150
	 * @return integer
151
	 */
152
	public function getEstimatedProgress() {
153
		return $this->progress;
154
	}
155
156
	/**
157
	 * @since 2.4
158
	 *
159
	 * @return array
160
	 */
161
	public function getDispatchedEntities() {
162
		return $this->dispatchedEntities;
163
	}
164
165
	/**
166
	 * Dispatching of a single or a chunk of ids in either online or batch mode
167
	 * using the JobQueueScheduler
168
	 *
169
	 * @since 2.3
170
	 *
171
	 * @param integer &$id
172
	 */
173
	public function startRebuildWith( &$id ) {
174
175
		$updateJobs = array();
176
		$this->dispatchedEntities = array();
177
178
		// was nothing done in this run?
179
		$emptyRange = true;
180
181
		$this->createUpdateJobsForTitleIdRange( $id, $updateJobs );
182
183
		if ( $updateJobs !== array() ) {
184
			$emptyRange = false;
185
		}
186
187
		$this->createUpdateJobsForSmwIdRange( $id, $updateJobs, $emptyRange );
188
189
		// Deprecated since 2.3, use 'SMW::SQLStore::BeforeDataRebuildJobInsert'
190
		\Hooks::run('smwRefreshDataJobs', array( &$updateJobs ) );
191
192
		Hooks::run( 'SMW::SQLStore::BeforeDataRebuildJobInsert', array( $this->store, &$updateJobs ) );
193
194
		if ( $this->useJobQueueScheduler ) {
195
			$this->jobFactory->batchInsert( $updateJobs );
196
		} else {
197
			foreach ( $updateJobs as $job ) {
198
				$job->run();
199
			}
200
		}
201
202
		// -1 means that no next position is available
203
		$this->findNextIdPosition( $id, $emptyRange );
204
205
		return $this->progress = $id > 0 ? $id / $this->getMaxId() : 1;
206
	}
207
208
	/**
209
	 * @param integer $id
210
	 * @param UpdateJob[] &$updateJobs
211
	 */
212
	private function createUpdateJobsForTitleIdRange( $id, &$updateJobs ) {
213
214
		// Update by MediaWiki page id --> make sure we get all pages.
215
		$tids = array();
216
217
		// Array of ids
218
		for ( $i = $id; $i < $id + $this->iterationLimit; $i++ ) {
219
			$tids[] = $i;
220
		}
221
222
		$titles = Title::newFromIDs( $tids );
223
224
		foreach ( $titles as $title ) {
225
			if ( ( $this->namespaces == false ) || ( in_array( $title->getNamespace(), $this->namespaces ) ) ) {
226
				$updateJobs[] = $this->newUpdateJob( $title );
227
			}
228
229
			$this->dispatchedEntities[] = array( 't' => $title->getPrefixedDBKey() );
230
		}
231
	}
232
233
	/**
234
	 * @param integer $id
235
	 * @param UpdateJob[] &$updateJobs
236
	 * @param bool $emptyRange
237
	 */
238
	private function createUpdateJobsForSmwIdRange( $id, &$updateJobs, &$emptyRange ) {
239
240
		// update by internal SMW id --> make sure we get all objects in SMW
241
		$db = $this->store->getConnection( 'mw.db' );
242
243
		$res = $db->select(
244
			\SMWSql3SmwIds::TABLE_NAME,
245
			array( 'smw_id', 'smw_title', 'smw_namespace', 'smw_iw', 'smw_subobject', 'smw_sortkey', 'smw_proptable_hash' ),
246
			array(
247
				"smw_id >= $id ",
248
				" smw_id < " . $db->addQuotes( $id + $this->iterationLimit )
249
			),
250
			__METHOD__
251
		);
252
253
		foreach ( $res as $row ) {
254
			$emptyRange = false; // note this even if no jobs were created
255
256
			if ( $this->namespaces && !in_array( $row->smw_namespace, $this->namespaces ) ) {
257
				continue;
258
			}
259
260
			// Find page to refresh, even for special properties:
261
			if ( $row->smw_title != '' && $row->smw_title{0} != '_' ) {
262
				$titleKey = $row->smw_title;
263
			} elseif ( $row->smw_namespace == SMW_NS_PROPERTY && $row->smw_iw == '' && $row->smw_subobject == '' ) {
264
				$titleKey = str_replace( ' ', '_', DIProperty::findPropertyLabel( $row->smw_title ) );
265
			} else {
266
				$titleKey = '';
267
			}
268
269
			if ( $row->smw_subobject !== '' && $row->smw_iw !== SMW_SQL3_SMWDELETEIW ) {
270
				// leave subobjects alone; they ought to be changed with their pages
271
				$this->dispatchedEntities[] = array( 's' => $row->smw_title . '#' . $row->smw_namespace . '#' .$row->smw_subobject );
272
			} elseif ( $this->isPlainObjectValue( $row ) ) {
273
				$this->propertyTableIdReferenceDisposer->removeOutdatedEntityReferencesById( $row->smw_id );
274
			} elseif ( $row->smw_iw === '' && $titleKey != '' ) {
275
				// objects representing pages
276
				$title = Title::makeTitleSafe( $row->smw_namespace, $titleKey );
277
278
				if ( $title !== null ) {
279
					$this->dispatchedEntities[] = array( 's' => $title->getPrefixedDBKey() );
280
					$updateJobs[] = $this->newUpdateJob( $title );
281
				}
282
283
			} elseif ( $row->smw_iw == SMW_SQL3_SMWREDIIW && $titleKey != '' ) {
284
				// TODO: special treatment of redirects needed, since the store will
285
				// not act on redirects that did not change according to its records
286
				$title = Title::makeTitleSafe( $row->smw_namespace, $titleKey );
287
288
				if ( $title !== null && !$title->exists() ) {
289
					$this->dispatchedEntities[] = array( 's' => $title->getPrefixedDBKey() );
290
					$updateJobs[] = $this->newUpdateJob( $title );
291
				}
292
			} elseif ( $row->smw_iw == SMW_SQL3_SMWIW_OUTDATED || $row->smw_iw == SMW_SQL3_SMWDELETEIW ) { // remove outdated internal object references
293
				$this->propertyTableIdReferenceDisposer->cleanUpTableEntriesById( $row->smw_id );
294
			} elseif ( $titleKey != '' ) { // "normal" interwiki pages or outdated internal objects -- delete
295
				$diWikiPage = new DIWikiPage( $titleKey, $row->smw_namespace, $row->smw_iw );
296
				$emptySemanticData = new SemanticData( $diWikiPage );
297
				$this->store->updateData( $emptySemanticData );
298
				$this->dispatchedEntities[] = array( 's' => $diWikiPage );
299
			}
300
301
			if ( $row->smw_namespace == SMW_NS_PROPERTY && $row->smw_iw == '' && $row->smw_subobject == '' ) {
302
				$this->markPossibleDuplicateProperties( $row );
303
			}
304
		}
305
306
		$db->freeResult( $res );
307
	}
308
309
	private function isPlainObjectValue( $row ) {
310
311
		// A rogue title should never happen
312
		if ( $row->smw_title === '' && $row->smw_proptable_hash === null ) {
313
			return true;
314
		}
315
316
		return $row->smw_iw != SMW_SQL3_SMWDELETEIW &&
317
			$row->smw_iw != SMW_SQL3_SMWREDIIW &&
318
			$row->smw_iw != SMW_SQL3_SMWIW_OUTDATED &&
319
			// Leave any pre-defined property (_...) untouched
320
			$row->smw_title != '' &&
321
			$row->smw_title{0} != '_' &&
322
			// smw_proptable_hash === null means it is not a subject but an object value
323
			$row->smw_proptable_hash === null;
324
	}
325
326
	private function markPossibleDuplicateProperties( $row ) {
327
328
		$db = $this->store->getConnection( 'mw.db' );
329
330
		// Use the sortkey (comparing the label and not the "_..." key) in order
331
		// to match possible duplicate properties by label (not by key)
332
		$duplicates = $db->select(
333
			\SMWSql3SmwIds::TABLE_NAME,
334
			array( 'smw_id', 'smw_title' ),
335
			array(
336
				"smw_id !=" . $db->addQuotes( $row->smw_id ),
337
				"smw_sortkey =" . $db->addQuotes( $row->smw_sortkey ),
338
				"smw_namespace =" . $row->smw_namespace,
339
				"smw_subobject =" . $db->addQuotes( $row->smw_subobject )
340
			),
341
			__METHOD__,
342
			array( 'ORDER BY' => "smw_id ASC" )
343
		);
344
345
		if ( $duplicates === false ) {
346
			return;
347
		}
348
349
		// Instead of copying ID's across DB tables have the re-parse to ensure
350
		// that all property value ID's are reassigned together while the duplicate
351
		// is marked for removal until the next run
352
		foreach ( $duplicates as $duplicate ) {
353
354
			// If titles don't match then continue because it could be that
355
			// Property:Foo with displaytitle foobar -> sortkey ->foobar
356
			// Property:Bar with displaytitle foobar -> sortkey ->foobar
357
			if ( $row->smw_title !== $duplicate->smw_title ) {
358
				continue;
359
			}
360
361
			$this->store->getObjectIds()->updateInterwikiField(
362
				$duplicate->smw_id,
363
				new DIWikiPage( $row->smw_title, $row->smw_namespace, SMW_SQL3_SMWDELETEIW )
364
			);
365
		}
366
	}
367
368
	private function findNextIdPosition( &$id, $emptyRange ) {
369
370
		$nextPosition = $id + $this->iterationLimit;
371
		$db = $this->store->getConnection( 'mw.db' );
372
373
		// nothing found, check if there will be more pages later on
374
		if ( $emptyRange && $nextPosition > \SMWSql3SmwIds::FXD_PROP_BORDER_ID ) {
375
376
			$nextByPageId = (int)$db->selectField(
377
				'page',
378
				'page_id',
379
				"page_id >= $nextPosition",
380
				__METHOD__,
381
				array( 'ORDER BY' => "page_id ASC" )
382
			);
383
384
			$nextBySmwId = (int)$db->selectField(
385
				\SMWSql3SmwIds::TABLE_NAME,
386
				'smw_id',
387
				"smw_id >= $nextPosition",
388
				__METHOD__,
389
				array( 'ORDER BY' => "smw_id ASC" )
390
			);
391
392
			// Next position is determined by the pool with the maxId
393
			$nextPosition = $nextBySmwId != 0 && $nextBySmwId > $nextByPageId ? $nextBySmwId : $nextByPageId;
394
		}
395
396
		$id = $nextPosition ? $nextPosition : -1;
397
	}
398
399
	private function newUpdateJob( $title ) {
400
		return $this->jobFactory->newUpdateJob( $title, array( 'pm' => $this->updateJobParseMode ) );
401
	}
402
403
}
404