RecentChangesUpdateJob::run()   A
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 12
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 9
nc 3
nop 0
dl 0
loc 12
rs 9.4285
c 0
b 0
f 0
1
<?php
2
/**
3
 * This program is free software; you can redistribute it and/or modify
4
 * it under the terms of the GNU General Public License as published by
5
 * the Free Software Foundation; either version 2 of the License, or
6
 * (at your option) any later version.
7
 *
8
 * This program is distributed in the hope that it will be useful,
9
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
 * GNU General Public License for more details.
12
 *
13
 * You should have received a copy of the GNU General Public License along
14
 * with this program; if not, write to the Free Software Foundation, Inc.,
15
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
 * http://www.gnu.org/copyleft/gpl.html
17
 *
18
 * @file
19
 * @author Aaron Schulz
20
 * @ingroup JobQueue
21
 */
22
use MediaWiki\MediaWikiServices;
23
24
/**
25
 * Job for pruning recent changes
26
 *
27
 * @ingroup JobQueue
28
 * @since 1.25
29
 */
30
class RecentChangesUpdateJob extends Job {
31 View Code Duplication
	function __construct( Title $title, array $params ) {
32
		parent::__construct( 'recentChangesUpdate', $title, $params );
33
34
		if ( !isset( $params['type'] ) ) {
35
			throw new Exception( "Missing 'type' parameter." );
36
		}
37
38
		$this->removeDuplicates = true;
39
	}
40
41
	/**
42
	 * @return RecentChangesUpdateJob
43
	 */
44
	final public static function newPurgeJob() {
45
		return new self(
46
			SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ]
47
		);
48
	}
49
50
	/**
51
	 * @return RecentChangesUpdateJob
52
	 * @since 1.26
53
	 */
54
	final public static function newCacheUpdateJob() {
55
		return new self(
56
			SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ]
57
		);
58
	}
59
60
	public function run() {
61
		if ( $this->params['type'] === 'purge' ) {
62
			$this->purgeExpiredRows();
63
		} elseif ( $this->params['type'] === 'cacheUpdate' ) {
64
			$this->updateActiveUsers();
65
		} else {
66
			throw new InvalidArgumentException(
67
				"Invalid 'type' parameter '{$this->params['type']}'." );
68
		}
69
70
		return true;
71
	}
72
73
	protected function purgeExpiredRows() {
74
		global $wgRCMaxAge, $wgUpdateRowsPerQuery;
75
76
		$lockKey = wfWikiID() . ':recentchanges-prune';
77
78
		$dbw = wfGetDB( DB_MASTER );
79
		if ( !$dbw->lockIsFree( $lockKey, __METHOD__ )
80
			|| !$dbw->lock( $lockKey, __METHOD__, 1 )
81
		) {
82
			return; // already in progress
83
		}
84
85
		$factory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
86
		$ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
87
		$cutoff = $dbw->timestamp( time() - $wgRCMaxAge );
88
		do {
89
			$rcIds = $dbw->selectFieldValues( 'recentchanges',
90
				'rc_id',
91
				[ 'rc_timestamp < ' . $dbw->addQuotes( $cutoff ) ],
92
				__METHOD__,
93
				[ 'LIMIT' => $wgUpdateRowsPerQuery ]
94
			);
95
			if ( $rcIds ) {
96
				$dbw->delete( 'recentchanges', [ 'rc_id' => $rcIds ], __METHOD__ );
97
				// There might be more, so try waiting for replica DBs
98
				try {
99
					$factory->commitAndWaitForReplication(
100
						__METHOD__, $ticket, [ 'timeout' => 3 ]
101
					);
102
				} catch ( DBReplicationWaitError $e ) {
103
					// Another job will continue anyway
104
					break;
105
				}
106
			}
107
		} while ( $rcIds );
108
109
		$dbw->unlock( $lockKey, __METHOD__ );
110
	}
111
112
	protected function updateActiveUsers() {
113
		global $wgActiveUserDays;
114
115
		// Users that made edits at least this many days ago are "active"
116
		$days = $wgActiveUserDays;
117
		// Pull in the full window of active users in this update
118
		$window = $wgActiveUserDays * 86400;
119
120
		$dbw = wfGetDB( DB_MASTER );
121
		// JobRunner uses DBO_TRX, but doesn't call begin/commit itself;
122
		// onTransactionIdle() will run immediately since there is no trx.
123
		$dbw->onTransactionIdle(
124
			function () use ( $dbw, $days, $window ) {
125
				$factory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
126
				$ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
127
				// Avoid disconnect/ping() cycle that makes locks fall off
128
				$dbw->setSessionOptions( [ 'connTimeout' => 900 ] );
129
130
				$lockKey = wfWikiID() . '-activeusers';
131
				if ( !$dbw->lock( $lockKey, __METHOD__, 1 ) ) {
132
					return; // exclusive update (avoids duplicate entries)
133
				}
134
135
				$nowUnix = time();
136
				// Get the last-updated timestamp for the cache
137
				$cTime = $dbw->selectField( 'querycache_info',
138
					'qci_timestamp',
139
					[ 'qci_type' => 'activeusers' ]
140
				);
141
				$cTimeUnix = $cTime ? wfTimestamp( TS_UNIX, $cTime ) : 1;
142
143
				// Pick the date range to fetch from. This is normally from the last
144
				// update to till the present time, but has a limited window for sanity.
145
				// If the window is limited, multiple runs are need to fully populate it.
146
				$sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 );
147
				$eTimestamp = min( $sTimestamp + $window, $nowUnix );
148
149
				// Get all the users active since the last update
150
				$res = $dbw->select(
151
					[ 'recentchanges' ],
152
					[ 'rc_user_text', 'lastedittime' => 'MAX(rc_timestamp)' ],
153
					[
154
						'rc_user > 0', // actual accounts
155
						'rc_type != ' . $dbw->addQuotes( RC_EXTERNAL ), // no wikidata
156
						'rc_log_type IS NULL OR rc_log_type != ' . $dbw->addQuotes( 'newusers' ),
157
						'rc_timestamp >= ' . $dbw->addQuotes( $dbw->timestamp( $sTimestamp ) ),
158
						'rc_timestamp <= ' . $dbw->addQuotes( $dbw->timestamp( $eTimestamp ) )
159
					],
160
					__METHOD__,
161
					[
162
						'GROUP BY' => [ 'rc_user_text' ],
163
						'ORDER BY' => 'NULL' // avoid filesort
164
					]
165
				);
166
				$names = [];
167
				foreach ( $res as $row ) {
0 ignored issues
show
Bug introduced by
The expression $res of type object<ResultWrapper>|boolean is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
168
					$names[$row->rc_user_text] = $row->lastedittime;
169
				}
170
171
				// Rotate out users that have not edited in too long (according to old data set)
172
				$dbw->delete( 'querycachetwo',
173
					[
174
						'qcc_type' => 'activeusers',
175
						'qcc_value < ' . $dbw->addQuotes( $nowUnix - $days * 86400 ) // TS_UNIX
176
					],
177
					__METHOD__
178
				);
179
180
				// Find which of the recently active users are already accounted for
181
				if ( count( $names ) ) {
182
					$res = $dbw->select( 'querycachetwo',
183
						[ 'user_name' => 'qcc_title' ],
184
						[
185
							'qcc_type' => 'activeusers',
186
							'qcc_namespace' => NS_USER,
187
							'qcc_title' => array_keys( $names ) ],
188
						__METHOD__
189
					);
190
					foreach ( $res as $row ) {
0 ignored issues
show
Bug introduced by
The expression $res of type object<ResultWrapper>|boolean is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
191
						unset( $names[$row->user_name] );
192
					}
193
				}
194
195
				// Insert the users that need to be added to the list
196
				if ( count( $names ) ) {
197
					$newRows = [];
198
					foreach ( $names as $name => $lastEditTime ) {
199
						$newRows[] = [
200
							'qcc_type' => 'activeusers',
201
							'qcc_namespace' => NS_USER,
202
							'qcc_title' => $name,
203
							'qcc_value' => wfTimestamp( TS_UNIX, $lastEditTime ),
204
							'qcc_namespacetwo' => 0, // unused
205
							'qcc_titletwo' => '' // unused
206
						];
207
					}
208
					foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) {
209
						$dbw->insert( 'querycachetwo', $rowBatch, __METHOD__ );
210
						$factory->commitAndWaitForReplication( __METHOD__, $ticket );
211
					}
212
				}
213
214
				// If a transaction was already started, it might have an old
215
				// snapshot, so kludge the timestamp range back as needed.
216
				$asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() );
217
218
				// Touch the data freshness timestamp
219
				$dbw->replace( 'querycache_info',
220
					[ 'qci_type' ],
221
					[ 'qci_type' => 'activeusers',
222
						'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ) ], // not always $now
223
					__METHOD__
224
				);
225
226
				$dbw->unlock( $lockKey, __METHOD__ );
227
			},
228
			__METHOD__
229
		);
230
	}
231
}
232