HTMLCacheUpdateJob::workItemCount()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 2
nc 2
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * HTML cache invalidation of all pages linking to a given title.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup JobQueue
22
 * @ingroup Cache
23
 */
24
25
/**
26
 * Job to purge the cache for all pages that link to or use another page or file
27
 *
28
 * This job comes in a few variants:
29
 *   - a) Recursive jobs to purge caches for backlink pages for a given title.
30
 *        These jobs have (recursive:true,table:<table>) set.
31
 *   - b) Jobs to purge caches for a set of titles (the job title is ignored).
32
 *        These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set.
33
 *
34
 * @ingroup JobQueue
35
 */
36
class HTMLCacheUpdateJob extends Job {
37
	function __construct( Title $title, array $params ) {
38
		parent::__construct( 'htmlCacheUpdate', $title, $params );
39
		// Base backlink purge jobs can be de-duplicated
40
		$this->removeDuplicates = ( !isset( $params['range'] ) && !isset( $params['pages'] ) );
41
	}
42
43
	/**
44
	 * @param Title $title Title to purge backlink pages from
45
	 * @param string $table Backlink table name
46
	 * @return HTMLCacheUpdateJob
47
	 */
48
	public static function newForBacklinks( Title $title, $table ) {
49
		return new self(
50
			$title,
51
			[
52
				'table' => $table,
53
				'recursive' => true
54
			] + Job::newRootJobParams( // "overall" refresh links job info
55
				"htmlCacheUpdate:{$table}:{$title->getPrefixedText()}"
56
			)
57
		);
58
	}
59
60
	function run() {
61
		global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
62
63
		if ( isset( $this->params['table'] ) && !isset( $this->params['pages'] ) ) {
64
			$this->params['recursive'] = true; // b/c; base job
65
		}
66
67
		// Job to purge all (or a range of) backlink pages for a page
68
		if ( !empty( $this->params['recursive'] ) ) {
69
			// Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title
70
			// jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks
71
			$jobs = BacklinkJobUtils::partitionBacklinkJob(
72
				$this,
73
				$wgUpdateRowsPerJob,
74
				$wgUpdateRowsPerQuery, // jobs-per-title
75
				// Carry over information for de-duplication
76
				[ 'params' => $this->getRootJobParams() ]
77
			);
78
			JobQueueGroup::singleton()->push( $jobs );
79
		// Job to purge pages for a set of titles
80
		} elseif ( isset( $this->params['pages'] ) ) {
81
			$this->invalidateTitles( $this->params['pages'] );
82
		// Job to update a single title
83
		} else {
84
			$t = $this->title;
85
			$this->invalidateTitles( [
86
				$t->getArticleID() => [ $t->getNamespace(), $t->getDBkey() ]
87
			] );
88
		}
89
90
		return true;
91
	}
92
93
	/**
94
	 * @param array $pages Map of (page ID => (namespace, DB key)) entries
95
	 */
96
	protected function invalidateTitles( array $pages ) {
97
		global $wgUpdateRowsPerQuery, $wgUseFileCache;
98
99
		// Get all page IDs in this query into an array
100
		$pageIds = array_keys( $pages );
101
		if ( !$pageIds ) {
102
			return;
103
		}
104
105
		// Bump page_touched to the current timestamp. This used to use the root job timestamp
106
		// (e.g. template/file edit time), which was a bit more efficient when template edits are
107
		// rare and don't effect the same pages much. However, this way allows for better
108
		// de-duplication, which is much more useful for wikis with high edit rates. Note that
109
		// RefreshLinksJob, which is enqueued alongside HTMLCacheUpdateJob, saves the parser output
110
		// since it has to parse anyway. We assume that vast majority of the cache jobs finish
111
		// before the link jobs, so using the current timestamp instead of the root timestamp is
112
		// not expected to invalidate these cache entries too often.
113
		$touchTimestamp = wfTimestampNow();
114
115
		$dbw = wfGetDB( DB_MASTER );
116
		$factory = wfGetLBFactory();
0 ignored issues
show
Deprecated Code introduced by
The function wfGetLBFactory() has been deprecated with message: since 1.27, use MediaWikiServices::getDBLoadBalancerFactory() instead.

This function has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.

Loading history...
117
		$ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
118
		// Update page_touched (skipping pages already touched since the root job).
119
		// Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already.
120 View Code Duplication
		foreach ( array_chunk( $pageIds, $wgUpdateRowsPerQuery ) as $batch ) {
121
			$factory->commitAndWaitForReplication( __METHOD__, $ticket );
122
123
			$dbw->update( 'page',
124
				[ 'page_touched' => $dbw->timestamp( $touchTimestamp ) ],
125
				[ 'page_id' => $batch,
126
					// don't invalidated pages that were already invalidated
127
					"page_touched < " . $dbw->addQuotes( $dbw->timestamp( $touchTimestamp ) )
128
				],
129
				__METHOD__
130
			);
131
		}
132
		// Get the list of affected pages (races only mean something else did the purge)
133
		$titleArray = TitleArray::newFromResult( $dbw->select(
134
			'page',
135
			[ 'page_namespace', 'page_title' ],
136
			[ 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $touchTimestamp ) ],
137
			__METHOD__
138
		) );
139
140
		// Update CDN
141
		$u = CdnCacheUpdate::newFromTitles( $titleArray );
0 ignored issues
show
Bug introduced by
It seems like $titleArray defined by \TitleArray::newFromResu...mestamp)), __METHOD__)) on line 133 can be null; however, CdnCacheUpdate::newFromTitles() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
142
		$u->doUpdate();
143
144
		// Update file cache
145
		if ( $wgUseFileCache ) {
146
			foreach ( $titleArray as $title ) {
0 ignored issues
show
Bug introduced by
The expression $titleArray of type object<TitleArrayFromResult>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
147
				HTMLFileCache::clearFileCache( $title );
148
			}
149
		}
150
	}
151
152
	public function workItemCount() {
153
		return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
154
	}
155
}
156