This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * Job to update link tables for pages |
||
4 | * |
||
5 | * This program is free software; you can redistribute it and/or modify |
||
6 | * it under the terms of the GNU General Public License as published by |
||
7 | * the Free Software Foundation; either version 2 of the License, or |
||
8 | * (at your option) any later version. |
||
9 | * |
||
10 | * This program is distributed in the hope that it will be useful, |
||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
13 | * GNU General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU General Public License along |
||
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
18 | * http://www.gnu.org/copyleft/gpl.html |
||
19 | * |
||
20 | * @file |
||
21 | * @ingroup JobQueue |
||
22 | */ |
||
23 | use MediaWiki\MediaWikiServices; |
||
24 | |||
25 | /** |
||
26 | * Job to update link tables for pages |
||
27 | * |
||
28 | * This job comes in a few variants: |
||
29 | * - a) Recursive jobs to update links for backlink pages for a given title. |
||
30 | * These jobs have (recursive:true,table:<table>) set. |
||
31 | * - b) Jobs to update links for a set of pages (the job title is ignored). |
||
32 | * These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set. |
||
33 | * - c) Jobs to update links for a single page (the job title) |
||
34 | * These jobs need no extra fields set. |
||
35 | * |
||
36 | * @ingroup JobQueue |
||
37 | */ |
||
38 | class RefreshLinksJob extends Job { |
||
39 | /** @var float Cache parser output when it takes this long to render */ |
||
40 | const PARSE_THRESHOLD_SEC = 1.0; |
||
41 | /** @var integer Lag safety margin when comparing root job times to last-refresh times */ |
||
42 | const CLOCK_FUDGE = 10; |
||
43 | /** @var integer How many seconds to wait for replica DBs to catch up */ |
||
44 | const LAG_WAIT_TIMEOUT = 15; |
||
45 | |||
46 | function __construct( Title $title, array $params ) { |
||
47 | parent::__construct( 'refreshLinks', $title, $params ); |
||
48 | // Avoid the overhead of de-duplication when it would be pointless |
||
49 | $this->removeDuplicates = ( |
||
50 | // Ranges rarely will line up |
||
51 | !isset( $params['range'] ) && |
||
52 | // Multiple pages per job make matches unlikely |
||
53 | !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 ) |
||
54 | ); |
||
55 | } |
||
56 | |||
57 | /** |
||
58 | * @param Title $title |
||
59 | * @param array $params |
||
60 | * @return RefreshLinksJob |
||
61 | */ |
||
62 | public static function newPrioritized( Title $title, array $params ) { |
||
63 | $job = new self( $title, $params ); |
||
64 | $job->command = 'refreshLinksPrioritized'; |
||
65 | |||
66 | return $job; |
||
67 | } |
||
68 | |||
69 | /** |
||
70 | * @param Title $title |
||
71 | * @param array $params |
||
72 | * @return RefreshLinksJob |
||
73 | */ |
||
74 | public static function newDynamic( Title $title, array $params ) { |
||
75 | $job = new self( $title, $params ); |
||
76 | $job->command = 'refreshLinksDynamic'; |
||
77 | |||
78 | return $job; |
||
79 | } |
||
80 | |||
81 | function run() { |
||
82 | global $wgUpdateRowsPerJob; |
||
83 | |||
84 | // Job to update all (or a range of) backlink pages for a page |
||
85 | if ( !empty( $this->params['recursive'] ) ) { |
||
86 | // When the base job branches, wait for the replica DBs to catch up to the master. |
||
87 | // From then on, we know that any template changes at the time the base job was |
||
88 | // enqueued will be reflected in backlink page parses when the leaf jobs run. |
||
89 | if ( !isset( $params['range'] ) ) { |
||
0 ignored issues
–
show
|
|||
90 | try { |
||
91 | $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); |
||
92 | $lbFactory->waitForReplication( [ |
||
93 | 'wiki' => wfWikiID(), |
||
94 | 'timeout' => self::LAG_WAIT_TIMEOUT |
||
95 | ] ); |
||
96 | } catch ( DBReplicationWaitError $e ) { // only try so hard |
||
97 | $stats = MediaWikiServices::getInstance()->getStatsdDataFactory(); |
||
98 | $stats->increment( 'refreshlinks.lag_wait_failed' ); |
||
99 | } |
||
100 | } |
||
101 | // Carry over information for de-duplication |
||
102 | $extraParams = $this->getRootJobParams(); |
||
103 | $extraParams['triggeredRecursive'] = true; |
||
104 | // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title |
||
105 | // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks |
||
106 | $jobs = BacklinkJobUtils::partitionBacklinkJob( |
||
107 | $this, |
||
108 | $wgUpdateRowsPerJob, |
||
109 | 1, // job-per-title |
||
110 | [ 'params' => $extraParams ] |
||
111 | ); |
||
112 | JobQueueGroup::singleton()->push( $jobs ); |
||
113 | // Job to update link tables for a set of titles |
||
114 | } elseif ( isset( $this->params['pages'] ) ) { |
||
115 | foreach ( $this->params['pages'] as $pageId => $nsAndKey ) { |
||
116 | list( $ns, $dbKey ) = $nsAndKey; |
||
117 | $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) ); |
||
0 ignored issues
–
show
It seems like
\Title::makeTitleSafe($ns, $dbKey) can be null ; however, runForTitle() does not accept null , maybe add an additional type check?
Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code: /** @return stdClass|null */
function mayReturnNull() { }
function doesNotAcceptNull(stdClass $x) { }
// With potential error.
function withoutCheck() {
$x = mayReturnNull();
doesNotAcceptNull($x); // Potential error here.
}
// Safe - Alternative 1
function withCheck1() {
$x = mayReturnNull();
if ( ! $x instanceof stdClass) {
throw new \LogicException('$x must be defined.');
}
doesNotAcceptNull($x);
}
// Safe - Alternative 2
function withCheck2() {
$x = mayReturnNull();
if ($x instanceof stdClass) {
doesNotAcceptNull($x);
}
}
![]() |
|||
118 | } |
||
119 | // Job to update link tables for a given title |
||
120 | } else { |
||
121 | $this->runForTitle( $this->title ); |
||
122 | } |
||
123 | |||
124 | return true; |
||
125 | } |
||
126 | |||
127 | /** |
||
128 | * @param Title $title |
||
129 | * @return bool |
||
130 | */ |
||
131 | protected function runForTitle( Title $title ) { |
||
132 | $services = MediaWikiServices::getInstance(); |
||
133 | $stats = $services->getStatsdDataFactory(); |
||
134 | $lbFactory = $services->getDBLoadBalancerFactory(); |
||
135 | $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ ); |
||
136 | |||
137 | $page = WikiPage::factory( $title ); |
||
138 | $page->loadPageData( WikiPage::READ_LATEST ); |
||
139 | |||
140 | // Serialize links updates by page ID so they see each others' changes |
||
141 | $dbw = $lbFactory->getMainLB()->getConnection( DB_MASTER ); |
||
142 | /** @noinspection PhpUnusedLocalVariableInspection */ |
||
143 | $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->getId(), 'job' ); |
||
0 ignored issues
–
show
$scopedLock is not used, you could remove the assignment.
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value';
$higher = false;
if (rand(1, 6) > 3) {
$higher = true;
} else {
$higher = false;
}
Both the ![]() It seems like
$dbw defined by $lbFactory->getMainLB()->getConnection(DB_MASTER) on line 141 can be null ; however, LinksUpdate::acquirePageLock() does not accept null , maybe add an additional type check?
Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code: /** @return stdClass|null */
function mayReturnNull() { }
function doesNotAcceptNull(stdClass $x) { }
// With potential error.
function withoutCheck() {
$x = mayReturnNull();
doesNotAcceptNull($x); // Potential error here.
}
// Safe - Alternative 1
function withCheck1() {
$x = mayReturnNull();
if ( ! $x instanceof stdClass) {
throw new \LogicException('$x must be defined.');
}
doesNotAcceptNull($x);
}
// Safe - Alternative 2
function withCheck2() {
$x = mayReturnNull();
if ($x instanceof stdClass) {
doesNotAcceptNull($x);
}
}
![]() |
|||
144 | // Get the latest ID *after* acquirePageLock() flushed the transaction. |
||
145 | // This is used to detect edits/moves after loadPageData() but before the scope lock. |
||
146 | // The works around the chicken/egg problem of determining the scope lock key. |
||
147 | $latest = $title->getLatestRevID( Title::GAID_FOR_UPDATE ); |
||
148 | |||
149 | if ( !empty( $this->params['triggeringRevisionId'] ) ) { |
||
150 | // Fetch the specified revision; lockAndGetLatest() below detects if the page |
||
151 | // was edited since and aborts in order to avoid corrupting the link tables |
||
152 | $revision = Revision::newFromId( |
||
153 | $this->params['triggeringRevisionId'], |
||
154 | Revision::READ_LATEST |
||
155 | ); |
||
156 | } else { |
||
157 | // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures |
||
158 | $revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST ); |
||
159 | } |
||
160 | |||
161 | if ( !$revision ) { |
||
162 | $stats->increment( 'refreshlinks.rev_not_found' ); |
||
163 | $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" ); |
||
164 | return false; // just deleted? |
||
165 | } elseif ( $revision->getId() != $latest || $revision->getPage() !== $page->getId() ) { |
||
166 | // Do not clobber over newer updates with older ones. If all jobs where FIFO and |
||
167 | // serialized, it would be OK to update links based on older revisions since it |
||
168 | // would eventually get to the latest. Since that is not the case (by design), |
||
169 | // only update the link tables to a state matching the current revision's output. |
||
170 | $stats->increment( 'refreshlinks.rev_not_current' ); |
||
171 | $this->setLastError( "Revision {$revision->getId()} is not current" ); |
||
172 | return false; |
||
173 | } |
||
174 | |||
175 | $content = $revision->getContent( Revision::RAW ); |
||
176 | if ( !$content ) { |
||
177 | // If there is no content, pretend the content is empty |
||
178 | $content = $revision->getContentHandler()->makeEmptyContent(); |
||
179 | } |
||
180 | |||
181 | $parserOutput = false; |
||
182 | $parserOptions = $page->makeParserOptions( 'canonical' ); |
||
183 | // If page_touched changed after this root job, then it is likely that |
||
184 | // any views of the pages already resulted in re-parses which are now in |
||
185 | // cache. The cache can be reused to avoid expensive parsing in some cases. |
||
186 | if ( isset( $this->params['rootJobTimestamp'] ) ) { |
||
187 | $opportunistic = !empty( $this->params['isOpportunistic'] ); |
||
188 | |||
189 | $skewedTimestamp = $this->params['rootJobTimestamp']; |
||
190 | if ( $opportunistic ) { |
||
0 ignored issues
–
show
This
if statement is empty and can be removed.
This check looks for the bodies of These if (rand(1, 6) > 3) {
//print "Check failed";
} else {
print "Check succeeded";
}
could be turned into if (rand(1, 6) <= 3) {
print "Check succeeded";
}
This is much more concise to read. ![]() |
|||
191 | // Neither clock skew nor DB snapshot/replica DB lag matter much for such |
||
192 | // updates; focus on reusing the (often recently updated) cache |
||
193 | } else { |
||
194 | // For transclusion updates, the template changes must be reflected |
||
195 | $skewedTimestamp = wfTimestamp( TS_MW, |
||
196 | wfTimestamp( TS_UNIX, $skewedTimestamp ) + self::CLOCK_FUDGE |
||
197 | ); |
||
198 | } |
||
199 | |||
200 | if ( $page->getLinksTimestamp() > $skewedTimestamp ) { |
||
201 | // Something already updated the backlinks since this job was made |
||
202 | $stats->increment( 'refreshlinks.update_skipped' ); |
||
203 | return true; |
||
204 | } |
||
205 | |||
206 | if ( $page->getTouched() >= $this->params['rootJobTimestamp'] || $opportunistic ) { |
||
207 | // Cache is suspected to be up-to-date. As long as the cache rev ID matches |
||
208 | // and it reflects the job's triggering change, then it is usable. |
||
209 | $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions ); |
||
0 ignored issues
–
show
It seems like
$page defined by \WikiPage::factory($title) on line 137 can be null ; however, ParserCache::getDirty() does not accept null , maybe add an additional type check?
Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code: /** @return stdClass|null */
function mayReturnNull() { }
function doesNotAcceptNull(stdClass $x) { }
// With potential error.
function withoutCheck() {
$x = mayReturnNull();
doesNotAcceptNull($x); // Potential error here.
}
// Safe - Alternative 1
function withCheck1() {
$x = mayReturnNull();
if ( ! $x instanceof stdClass) {
throw new \LogicException('$x must be defined.');
}
doesNotAcceptNull($x);
}
// Safe - Alternative 2
function withCheck2() {
$x = mayReturnNull();
if ($x instanceof stdClass) {
doesNotAcceptNull($x);
}
}
![]() |
|||
210 | if ( !$parserOutput |
||
211 | || $parserOutput->getCacheRevisionId() != $revision->getId() |
||
212 | || $parserOutput->getCacheTime() < $skewedTimestamp |
||
213 | ) { |
||
214 | $parserOutput = false; // too stale |
||
215 | } |
||
216 | } |
||
217 | } |
||
218 | |||
219 | // Fetch the current revision and parse it if necessary... |
||
220 | if ( $parserOutput ) { |
||
221 | $stats->increment( 'refreshlinks.parser_cached' ); |
||
222 | } else { |
||
223 | $start = microtime( true ); |
||
224 | // Revision ID must be passed to the parser output to get revision variables correct |
||
225 | $parserOutput = $content->getParserOutput( |
||
226 | $title, $revision->getId(), $parserOptions, false ); |
||
227 | $elapsed = microtime( true ) - $start; |
||
228 | // If it took a long time to render, then save this back to the cache to avoid |
||
229 | // wasted CPU by other apaches or job runners. We don't want to always save to |
||
230 | // cache as this can cause high cache I/O and LRU churn when a template changes. |
||
231 | if ( $elapsed >= self::PARSE_THRESHOLD_SEC |
||
232 | && $page->shouldCheckParserCache( $parserOptions, $revision->getId() ) |
||
233 | && $parserOutput->isCacheable() |
||
234 | ) { |
||
235 | $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time |
||
236 | ParserCache::singleton()->save( |
||
237 | $parserOutput, $page, $parserOptions, $ctime, $revision->getId() |
||
0 ignored issues
–
show
It seems like
$ctime defined by wfTimestamp(TS_MW, (int) $start) on line 235 can also be of type false ; however, ParserCache::save() does only seem to accept string|null , did you maybe forget to handle an error condition?
This check looks for type mismatches where the missing type is Consider the follow example <?php
function getDate($date)
{
if ($date !== null) {
return new DateTime($date);
}
return false;
}
This function either returns a new ![]() It seems like
$page defined by \WikiPage::factory($title) on line 137 can be null ; however, ParserCache::save() does not accept null , maybe add an additional type check?
Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code: /** @return stdClass|null */
function mayReturnNull() { }
function doesNotAcceptNull(stdClass $x) { }
// With potential error.
function withoutCheck() {
$x = mayReturnNull();
doesNotAcceptNull($x); // Potential error here.
}
// Safe - Alternative 1
function withCheck1() {
$x = mayReturnNull();
if ( ! $x instanceof stdClass) {
throw new \LogicException('$x must be defined.');
}
doesNotAcceptNull($x);
}
// Safe - Alternative 2
function withCheck2() {
$x = mayReturnNull();
if ($x instanceof stdClass) {
doesNotAcceptNull($x);
}
}
![]() |
|||
238 | ); |
||
239 | } |
||
240 | $stats->increment( 'refreshlinks.parser_uncached' ); |
||
241 | } |
||
242 | |||
243 | $updates = $content->getSecondaryDataUpdates( |
||
244 | $title, |
||
245 | null, |
||
246 | !empty( $this->params['useRecursiveLinksUpdate'] ), |
||
247 | $parserOutput |
||
248 | ); |
||
249 | |||
250 | foreach ( $updates as $key => $update ) { |
||
251 | // FIXME: This code probably shouldn't be here? |
||
252 | // Needed by things like Echo notifications which need |
||
253 | // to know which user caused the links update |
||
254 | if ( $update instanceof LinksUpdate ) { |
||
255 | $update->setRevision( $revision ); |
||
256 | if ( !empty( $this->params['triggeringUser'] ) ) { |
||
257 | $userInfo = $this->params['triggeringUser']; |
||
258 | if ( $userInfo['userId'] ) { |
||
259 | $user = User::newFromId( $userInfo['userId'] ); |
||
260 | } else { |
||
261 | // Anonymous, use the username |
||
262 | $user = User::newFromName( $userInfo['userName'], false ); |
||
263 | } |
||
264 | $update->setTriggeringUser( $user ); |
||
0 ignored issues
–
show
It seems like
$user defined by \User::newFromName($userInfo['userName'], false) on line 262 can also be of type false ; however, LinksUpdate::setTriggeringUser() does only seem to accept object<User> , did you maybe forget to handle an error condition?
This check looks for type mismatches where the missing type is Consider the follow example <?php
function getDate($date)
{
if ($date !== null) {
return new DateTime($date);
}
return false;
}
This function either returns a new ![]() |
|||
265 | } |
||
266 | } |
||
267 | } |
||
268 | |||
269 | foreach ( $updates as $update ) { |
||
270 | $update->setTransactionTicket( $ticket ); |
||
271 | $update->doUpdate(); |
||
272 | } |
||
273 | |||
274 | InfoAction::invalidateCache( $title ); |
||
275 | |||
276 | return true; |
||
277 | } |
||
278 | |||
279 | public function getDeduplicationInfo() { |
||
280 | $info = parent::getDeduplicationInfo(); |
||
281 | if ( is_array( $info['params'] ) ) { |
||
282 | // For per-pages jobs, the job title is that of the template that changed |
||
283 | // (or similar), so remove that since it ruins duplicate detection |
||
284 | if ( isset( $info['pages'] ) ) { |
||
285 | unset( $info['namespace'] ); |
||
286 | unset( $info['title'] ); |
||
287 | } |
||
288 | } |
||
289 | |||
290 | return $info; |
||
291 | } |
||
292 | |||
293 | public function workItemCount() { |
||
294 | return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1; |
||
295 | } |
||
296 | } |
||
297 |
This check looks for calls to
isset(...)
orempty()
on variables that are yet undefined. These calls will always produce the same result and can be removed.This is most likely caused by the renaming of a variable or the removal of a function/method parameter.