1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* HTML cache invalidation of all pages linking to a given title. |
4
|
|
|
* |
5
|
|
|
* This program is free software; you can redistribute it and/or modify |
6
|
|
|
* it under the terms of the GNU General Public License as published by |
7
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
8
|
|
|
* (at your option) any later version. |
9
|
|
|
* |
10
|
|
|
* This program is distributed in the hope that it will be useful, |
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13
|
|
|
* GNU General Public License for more details. |
14
|
|
|
* |
15
|
|
|
* You should have received a copy of the GNU General Public License along |
16
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
17
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
19
|
|
|
* |
20
|
|
|
* @file |
21
|
|
|
* @ingroup JobQueue |
22
|
|
|
* @ingroup Cache |
23
|
|
|
*/ |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* Job to purge the cache for all pages that link to or use another page or file |
27
|
|
|
* |
28
|
|
|
* This job comes in a few variants: |
29
|
|
|
* - a) Recursive jobs to purge caches for backlink pages for a given title. |
30
|
|
|
* These jobs have (recursive:true,table:<table>) set. |
31
|
|
|
* - b) Jobs to purge caches for a set of titles (the job title is ignored). |
32
|
|
|
* These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set. |
33
|
|
|
* |
34
|
|
|
* @ingroup JobQueue |
35
|
|
|
*/ |
36
|
|
|
class HTMLCacheUpdateJob extends Job { |
37
|
|
|
function __construct( Title $title, array $params ) { |
38
|
|
|
parent::__construct( 'htmlCacheUpdate', $title, $params ); |
39
|
|
|
// Base backlink purge jobs can be de-duplicated |
40
|
|
|
$this->removeDuplicates = ( !isset( $params['range'] ) && !isset( $params['pages'] ) ); |
41
|
|
|
} |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* @param Title $title Title to purge backlink pages from |
45
|
|
|
* @param string $table Backlink table name |
46
|
|
|
* @return HTMLCacheUpdateJob |
47
|
|
|
*/ |
48
|
|
|
public static function newForBacklinks( Title $title, $table ) { |
49
|
|
|
return new self( |
50
|
|
|
$title, |
51
|
|
|
[ |
52
|
|
|
'table' => $table, |
53
|
|
|
'recursive' => true |
54
|
|
|
] + Job::newRootJobParams( // "overall" refresh links job info |
55
|
|
|
"htmlCacheUpdate:{$table}:{$title->getPrefixedText()}" |
56
|
|
|
) |
57
|
|
|
); |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
function run() { |
61
|
|
|
global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery; |
62
|
|
|
|
63
|
|
|
if ( isset( $this->params['table'] ) && !isset( $this->params['pages'] ) ) { |
64
|
|
|
$this->params['recursive'] = true; // b/c; base job |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
// Job to purge all (or a range of) backlink pages for a page |
68
|
|
|
if ( !empty( $this->params['recursive'] ) ) { |
69
|
|
|
// Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title |
70
|
|
|
// jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks |
71
|
|
|
$jobs = BacklinkJobUtils::partitionBacklinkJob( |
72
|
|
|
$this, |
73
|
|
|
$wgUpdateRowsPerJob, |
74
|
|
|
$wgUpdateRowsPerQuery, // jobs-per-title |
75
|
|
|
// Carry over information for de-duplication |
76
|
|
|
[ 'params' => $this->getRootJobParams() ] |
77
|
|
|
); |
78
|
|
|
JobQueueGroup::singleton()->push( $jobs ); |
79
|
|
|
// Job to purge pages for a set of titles |
80
|
|
|
} elseif ( isset( $this->params['pages'] ) ) { |
81
|
|
|
$this->invalidateTitles( $this->params['pages'] ); |
82
|
|
|
// Job to update a single title |
83
|
|
|
} else { |
84
|
|
|
$t = $this->title; |
85
|
|
|
$this->invalidateTitles( [ |
86
|
|
|
$t->getArticleID() => [ $t->getNamespace(), $t->getDBkey() ] |
87
|
|
|
] ); |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
return true; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* @param array $pages Map of (page ID => (namespace, DB key)) entries |
95
|
|
|
*/ |
96
|
|
|
protected function invalidateTitles( array $pages ) { |
97
|
|
|
global $wgUpdateRowsPerQuery, $wgUseFileCache; |
98
|
|
|
|
99
|
|
|
// Get all page IDs in this query into an array |
100
|
|
|
$pageIds = array_keys( $pages ); |
101
|
|
|
if ( !$pageIds ) { |
102
|
|
|
return; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
// Bump page_touched to the current timestamp. This used to use the root job timestamp |
106
|
|
|
// (e.g. template/file edit time), which was a bit more efficient when template edits are |
107
|
|
|
// rare and don't effect the same pages much. However, this way allows for better |
108
|
|
|
// de-duplication, which is much more useful for wikis with high edit rates. Note that |
109
|
|
|
// RefreshLinksJob, which is enqueued alongside HTMLCacheUpdateJob, saves the parser output |
110
|
|
|
// since it has to parse anyway. We assume that vast majority of the cache jobs finish |
111
|
|
|
// before the link jobs, so using the current timestamp instead of the root timestamp is |
112
|
|
|
// not expected to invalidate these cache entries too often. |
113
|
|
|
$touchTimestamp = wfTimestampNow(); |
114
|
|
|
|
115
|
|
|
$dbw = wfGetDB( DB_MASTER ); |
116
|
|
|
$factory = wfGetLBFactory(); |
|
|
|
|
117
|
|
|
$ticket = $factory->getEmptyTransactionTicket( __METHOD__ ); |
118
|
|
|
// Update page_touched (skipping pages already touched since the root job). |
119
|
|
|
// Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already. |
120
|
|
View Code Duplication |
foreach ( array_chunk( $pageIds, $wgUpdateRowsPerQuery ) as $batch ) { |
121
|
|
|
$factory->commitAndWaitForReplication( __METHOD__, $ticket ); |
122
|
|
|
|
123
|
|
|
$dbw->update( 'page', |
124
|
|
|
[ 'page_touched' => $dbw->timestamp( $touchTimestamp ) ], |
125
|
|
|
[ 'page_id' => $batch, |
126
|
|
|
// don't invalidated pages that were already invalidated |
127
|
|
|
"page_touched < " . $dbw->addQuotes( $dbw->timestamp( $touchTimestamp ) ) |
128
|
|
|
], |
129
|
|
|
__METHOD__ |
130
|
|
|
); |
131
|
|
|
} |
132
|
|
|
// Get the list of affected pages (races only mean something else did the purge) |
133
|
|
|
$titleArray = TitleArray::newFromResult( $dbw->select( |
134
|
|
|
'page', |
135
|
|
|
[ 'page_namespace', 'page_title' ], |
136
|
|
|
[ 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $touchTimestamp ) ], |
137
|
|
|
__METHOD__ |
138
|
|
|
) ); |
139
|
|
|
|
140
|
|
|
// Update CDN |
141
|
|
|
$u = CdnCacheUpdate::newFromTitles( $titleArray ); |
|
|
|
|
142
|
|
|
$u->doUpdate(); |
143
|
|
|
|
144
|
|
|
// Update file cache |
145
|
|
|
if ( $wgUseFileCache ) { |
146
|
|
|
foreach ( $titleArray as $title ) { |
|
|
|
|
147
|
|
|
HTMLFileCache::clearFileCache( $title ); |
148
|
|
|
} |
149
|
|
|
} |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
public function workItemCount() { |
153
|
|
|
return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1; |
154
|
|
|
} |
155
|
|
|
} |
156
|
|
|
|
This function has been deprecated. The supplier of the file has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.