Completed
Branch master (f93894)
by
unknown
27:35
created

RefreshLinksJob::waitForMasterPosition()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 8
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 3
nc 2
nop 0
dl 0
loc 8
rs 9.4285
c 0
b 0
f 0
1
<?php
2
/**
3
 * Job to update link tables for pages
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup JobQueue
22
 */
23
use MediaWiki\MediaWikiServices;
24
25
/**
26
 * Job to update link tables for pages
27
 *
28
 * This job comes in a few variants:
29
 *   - a) Recursive jobs to update links for backlink pages for a given title.
30
 *        These jobs have (recursive:true,table:<table>) set.
31
 *   - b) Jobs to update links for a set of pages (the job title is ignored).
32
 *	      These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set.
33
 *   - c) Jobs to update links for a single page (the job title)
34
 *        These jobs need no extra fields set.
35
 *
36
 * @ingroup JobQueue
37
 */
38
class RefreshLinksJob extends Job {
39
	/** @var float Cache parser output when it takes this long to render */
40
	const PARSE_THRESHOLD_SEC = 1.0;
41
	/** @var integer Lag safety margin when comparing root job times to last-refresh times */
42
	const CLOCK_FUDGE = 10;
43
	/** @var integer How many seconds to wait for slaves to catch up */
44
	const LAG_WAIT_TIMEOUT = 15;
45
46
	function __construct( Title $title, array $params ) {
47
		parent::__construct( 'refreshLinks', $title, $params );
48
		// Avoid the overhead of de-duplication when it would be pointless
49
		$this->removeDuplicates = (
50
			// Ranges rarely will line up
51
			!isset( $params['range'] ) &&
52
			// Multiple pages per job make matches unlikely
53
			!( isset( $params['pages'] ) && count( $params['pages'] ) != 1 )
54
		);
55
	}
56
57
	/**
58
	 * @param Title $title
59
	 * @param array $params
60
	 * @return RefreshLinksJob
61
	 */
62
	public static function newPrioritized( Title $title, array $params ) {
63
		$job = new self( $title, $params );
64
		$job->command = 'refreshLinksPrioritized';
65
66
		return $job;
67
	}
68
69
	/**
70
	 * @param Title $title
71
	 * @param array $params
72
	 * @return RefreshLinksJob
73
	 */
74
	public static function newDynamic( Title $title, array $params ) {
75
		$job = new self( $title, $params );
76
		$job->command = 'refreshLinksDynamic';
77
78
		return $job;
79
	}
80
81
	function run() {
82
		global $wgUpdateRowsPerJob;
83
84
		// Job to update all (or a range of) backlink pages for a page
85
		if ( !empty( $this->params['recursive'] ) ) {
86
			// When the base job branches, wait for the slaves to catch up to the master.
87
			// From then on, we know that any template changes at the time the base job was
88
			// enqueued will be reflected in backlink page parses when the leaf jobs run.
89
			if ( !isset( $params['range'] ) ) {
0 ignored issues
show
Bug introduced by
The variable $params seems to never exist, and therefore isset should always return false. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
90
				try {
91
					wfGetLBFactory()->waitForReplication( [
0 ignored issues
show
Deprecated Code introduced by
The function wfGetLBFactory() has been deprecated with message: since 1.27, use MediaWikiServices::getDBLoadBalancerFactory() instead.

This function has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.

Loading history...
92
						'wiki'    => wfWikiID(),
93
						'timeout' => self::LAG_WAIT_TIMEOUT
94
					] );
95
				} catch ( DBReplicationWaitError $e ) { // only try so hard
96
					$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
97
					$stats->increment( 'refreshlinks.lag_wait_failed' );
98
				}
99
			}
100
			// Carry over information for de-duplication
101
			$extraParams = $this->getRootJobParams();
102
			$extraParams['triggeredRecursive'] = true;
103
			// Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title
104
			// jobs and possibly a recursive RefreshLinks job for the rest of the backlinks
105
			$jobs = BacklinkJobUtils::partitionBacklinkJob(
106
				$this,
107
				$wgUpdateRowsPerJob,
108
				1, // job-per-title
109
				[ 'params' => $extraParams ]
110
			);
111
			JobQueueGroup::singleton()->push( $jobs );
112
		// Job to update link tables for a set of titles
113
		} elseif ( isset( $this->params['pages'] ) ) {
114
			foreach ( $this->params['pages'] as $pageId => $nsAndKey ) {
115
				list( $ns, $dbKey ) = $nsAndKey;
116
				$this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) );
0 ignored issues
show
Bug introduced by
It seems like \Title::makeTitleSafe($ns, $dbKey) can be null; however, runForTitle() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
117
			}
118
		// Job to update link tables for a given title
119
		} else {
120
			$this->runForTitle( $this->title );
121
		}
122
123
		return true;
124
	}
125
126
	/**
127
	 * @param Title $title
128
	 * @return bool
129
	 */
130
	protected function runForTitle( Title $title ) {
131
		$page = WikiPage::factory( $title );
132
		if ( !empty( $this->params['triggeringRevisionId'] ) ) {
133
			// Fetch the specified revision; lockAndGetLatest() below detects if the page
134
			// was edited since and aborts in order to avoid corrupting the link tables
135
			$revision = Revision::newFromId(
136
				$this->params['triggeringRevisionId'],
137
				Revision::READ_LATEST
138
			);
139
		} else {
140
			// Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures
141
			$revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST );
142
		}
143
144
		$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
145
146
		if ( !$revision ) {
147
			$stats->increment( 'refreshlinks.rev_not_found' );
148
			$this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" );
149
			return false; // just deleted?
150
		} elseif ( !$revision->isCurrent() ) {
151
			// If the revision isn't current, there's no point in doing a bunch
152
			// of work just to fail at the lockAndGetLatest() check later.
153
			$stats->increment( 'refreshlinks.rev_not_current' );
154
			$this->setLastError( "Revision {$revision->getId()} is not current" );
155
			return false;
156
		}
157
158
		$content = $revision->getContent( Revision::RAW );
159
		if ( !$content ) {
160
			// If there is no content, pretend the content is empty
161
			$content = $revision->getContentHandler()->makeEmptyContent();
162
		}
163
164
		$parserOutput = false;
165
		$parserOptions = $page->makeParserOptions( 'canonical' );
166
		// If page_touched changed after this root job, then it is likely that
167
		// any views of the pages already resulted in re-parses which are now in
168
		// cache. The cache can be reused to avoid expensive parsing in some cases.
169
		if ( isset( $this->params['rootJobTimestamp'] ) ) {
170
			$opportunistic = !empty( $this->params['isOpportunistic'] );
171
172
			$skewedTimestamp = $this->params['rootJobTimestamp'];
173
			if ( $opportunistic ) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
174
				// Neither clock skew nor DB snapshot/slave lag matter much for such
175
				// updates; focus on reusing the (often recently updated) cache
176
			} else {
177
				// For transclusion updates, the template changes must be reflected
178
				$skewedTimestamp = wfTimestamp( TS_MW,
179
					wfTimestamp( TS_UNIX, $skewedTimestamp ) + self::CLOCK_FUDGE
180
				);
181
			}
182
183
			if ( $page->getLinksTimestamp() > $skewedTimestamp ) {
184
				// Something already updated the backlinks since this job was made
185
				$stats->increment( 'refreshlinks.update_skipped' );
186
				return true;
187
			}
188
189
			if ( $page->getTouched() >= $this->params['rootJobTimestamp'] || $opportunistic ) {
190
				// Cache is suspected to be up-to-date. As long as the cache rev ID matches
191
				// and it reflects the job's triggering change, then it is usable.
192
				$parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions );
193
				if ( !$parserOutput
194
					|| $parserOutput->getCacheRevisionId() != $revision->getId()
195
					|| $parserOutput->getCacheTime() < $skewedTimestamp
196
				) {
197
					$parserOutput = false; // too stale
198
				}
199
			}
200
		}
201
202
		// Fetch the current revision and parse it if necessary...
203
		if ( $parserOutput ) {
204
			$stats->increment( 'refreshlinks.parser_cached' );
205
		} else {
206
			$start = microtime( true );
207
			// Revision ID must be passed to the parser output to get revision variables correct
208
			$parserOutput = $content->getParserOutput(
209
				$title, $revision->getId(), $parserOptions, false );
210
			$elapsed = microtime( true ) - $start;
211
			// If it took a long time to render, then save this back to the cache to avoid
212
			// wasted CPU by other apaches or job runners. We don't want to always save to
213
			// cache as this can cause high cache I/O and LRU churn when a template changes.
214
			if ( $elapsed >= self::PARSE_THRESHOLD_SEC
215
				&& $page->shouldCheckParserCache( $parserOptions, $revision->getId() )
216
				&& $parserOutput->isCacheable()
217
			) {
218
				$ctime = wfTimestamp( TS_MW, (int)$start ); // cache time
219
				ParserCache::singleton()->save(
220
					$parserOutput, $page, $parserOptions, $ctime, $revision->getId()
0 ignored issues
show
Security Bug introduced by
It seems like $ctime defined by wfTimestamp(TS_MW, (int) $start) on line 218 can also be of type false; however, ParserCache::save() does only seem to accept string|null, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
221
				);
222
			}
223
			$stats->increment( 'refreshlinks.parser_uncached' );
224
		}
225
226
		$updates = $content->getSecondaryDataUpdates(
227
			$title,
228
			null,
229
			!empty( $this->params['useRecursiveLinksUpdate'] ),
230
			$parserOutput
231
		);
232
233
		foreach ( $updates as $key => $update ) {
234
			// FIXME: This code probably shouldn't be here?
235
			// Needed by things like Echo notifications which need
236
			// to know which user caused the links update
237
			if ( $update instanceof LinksUpdate ) {
238
				$update->setRevision( $revision );
239
				if ( !empty( $this->params['triggeringUser'] ) ) {
240
					$userInfo = $this->params['triggeringUser'];
241
					if ( $userInfo['userId'] ) {
242
						$user = User::newFromId( $userInfo['userId'] );
243
					} else {
244
						// Anonymous, use the username
245
						$user = User::newFromName( $userInfo['userName'], false );
246
					}
247
					$update->setTriggeringUser( $user );
0 ignored issues
show
Security Bug introduced by
It seems like $user defined by \User::newFromName($userInfo['userName'], false) on line 245 can also be of type false; however, LinksUpdate::setTriggeringUser() does only seem to accept object<User>, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
248
				}
249
			}
250
		}
251
252
		$latestNow = $page->lockAndGetLatest();
253
		if ( !$latestNow || $revision->getId() != $latestNow ) {
254
			// Do not clobber over newer updates with older ones. If all jobs where FIFO and
255
			// serialized, it would be OK to update links based on older revisions since it
256
			// would eventually get to the latest. Since that is not the case (by design),
257
			// only update the link tables to a state matching the current revision's output.
258
			$stats->increment( 'refreshlinks.rev_cas_failure' );
259
			$this->setLastError( "page_latest changed from {$revision->getId()} to $latestNow" );
260
			return false;
261
		}
262
263
		DataUpdate::runUpdates( $updates );
264
265
		InfoAction::invalidateCache( $title );
266
267
		return true;
268
	}
269
270
	public function getDeduplicationInfo() {
271
		$info = parent::getDeduplicationInfo();
272
		if ( is_array( $info['params'] ) ) {
273
			// For per-pages jobs, the job title is that of the template that changed
274
			// (or similar), so remove that since it ruins duplicate detection
275
			if ( isset( $info['pages'] ) ) {
276
				unset( $info['namespace'] );
277
				unset( $info['title'] );
278
			}
279
		}
280
281
		return $info;
282
	}
283
284
	public function workItemCount() {
285
		return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
286
	}
287
}
288