PurgeChangedPages::execute()   D
last analyzed

Complexity

Conditions 13
Paths 20

Size

Total Lines 100
Code Lines 52

Duplication

Lines 15
Ratio 15 %

Importance

Changes 0
Metric Value
cc 13
eloc 52
nc 20
nop 0
dl 15
loc 100
rs 4.9922
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
0 ignored issues
show
Coding Style Compatibility introduced by
For compatibility and reusability of your code, PSR1 recommends that a file should introduce either new symbols (like classes, functions, etc.) or have side-effects (like outputting something, or including other files), but not both at the same time. The first symbol is defined on line 35 and the first side effect is on line 24.

The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.

The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.

To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.

Loading history...
2
/**
3
 * Send purge requests for pages edited in date range to squid/varnish.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup Maintenance
22
 */
23
24
require_once __DIR__ . '/Maintenance.php';
25
26
/**
27
 * Maintenance script that sends purge requests for pages edited in a date
28
 * range to squid/varnish.
29
 *
30
 * Can be used to recover from an HTCP message partition or other major cache
31
 * layer interruption.
32
 *
33
 * @ingroup Maintenance
34
 */
35
class PurgeChangedPages extends Maintenance {
36
37 View Code Duplication
	public function __construct() {
38
		parent::__construct();
39
		$this->addDescription( 'Send purge requests for edits in date range to squid/varnish' );
40
		$this->addOption( 'starttime', 'Starting timestamp', true, true );
41
		$this->addOption( 'endtime', 'Ending timestamp', true, true );
42
		$this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
43
		$this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
44
		$this->addOption( 'dry-run', 'Do not send purge requests' );
45
		$this->addOption( 'verbose', 'Show more output', false, false, 'v' );
46
		$this->setBatchSize( 100 );
47
	}
48
49
	public function execute() {
50
		global $wgHTCPRouting;
51
52 View Code Duplication
		if ( $this->hasOption( 'htcp-dest' ) ) {
53
			$parts = explode( ':', $this->getOption( 'htcp-dest' ) );
54
			if ( count( $parts ) < 2 ) {
55
				// Add default htcp port
56
				$parts[] = '4827';
57
			}
58
59
			// Route all HTCP messages to provided host:port
60
			$wgHTCPRouting = [
61
				'' => [ 'host' => $parts[0], 'port' => $parts[1] ],
62
			];
63
			if ( $this->hasOption( 'verbose' ) ) {
64
				$this->output( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
65
			}
66
		}
67
68
		$dbr = $this->getDB( DB_REPLICA );
69
		$minTime = $dbr->timestamp( $this->getOption( 'starttime' ) );
70
		$maxTime = $dbr->timestamp( $this->getOption( 'endtime' ) );
71
72
		if ( $maxTime < $minTime ) {
73
			$this->error( "\nERROR: starttime after endtime\n" );
74
			$this->maybeHelp( true );
75
		}
76
77
		$stuckCount = 0; // loop breaker
78
		while ( true ) {
79
			// Adjust bach size if we are stuck in a second that had many changes
80
			$bSize = $this->mBatchSize + ( $stuckCount * $this->mBatchSize );
81
82
			$res = $dbr->select(
83
				[ 'page', 'revision' ],
84
				[
85
					'rev_timestamp',
86
					'page_namespace',
87
					'page_title',
88
				],
89
				[
90
					"rev_timestamp > " . $dbr->addQuotes( $minTime ),
91
					"rev_timestamp <= " . $dbr->addQuotes( $maxTime ),
92
					// Only get rows where the revision is the latest for the page.
93
					// Other revisions would be duplicate and we don't need to purge if
94
					// there has been an edit after the interesting time window.
95
					"page_latest = rev_id",
96
				],
97
				__METHOD__,
98
				[ 'ORDER BY' => 'rev_timestamp', 'LIMIT' => $bSize ],
99
				[
100
					'page' => [ 'INNER JOIN', 'rev_page=page_id' ],
101
				]
102
			);
103
104
			if ( !$res->numRows() ) {
105
				// nothing more found so we are done
106
				break;
107
			}
108
109
			// Kludge to not get stuck in loops for batches with the same timestamp
110
			list( $rows, $lastTime ) = $this->pageableSortedRows( $res, 'rev_timestamp', $bSize );
111
			if ( !count( $rows ) ) {
112
				++$stuckCount;
113
				continue;
114
			}
115
			// Reset suck counter
116
			$stuckCount = 0;
117
118
			$this->output( "Processing changes from {$minTime} to {$lastTime}.\n" );
119
120
			// Advance past the last row next time
121
			$minTime = $lastTime;
122
123
			// Create list of URLs from page_namespace + page_title
124
			$urls = [];
125
			foreach ( $rows as $row ) {
126
				$title = Title::makeTitle( $row->page_namespace, $row->page_title );
127
				$urls[] = $title->getInternalURL();
128
			}
129
130
			if ( $this->hasOption( 'dry-run' ) || $this->hasOption( 'verbose' ) ) {
131
				$this->output( implode( "\n", $urls ) . "\n" );
132
				if ( $this->hasOption( 'dry-run' ) ) {
133
					continue;
134
				}
135
			}
136
137
			// Send batch of purge requests out to squids
138
			$squid = new CdnCacheUpdate( $urls, count( $urls ) );
139
			$squid->doUpdate();
140
141
			if ( $this->hasOption( 'sleep-per-batch' ) ) {
142
				// sleep-per-batch is milliseconds, usleep wants micro seconds.
143
				usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
144
			}
145
		}
146
147
		$this->output( "Done!\n" );
148
	}
149
150
	/**
151
	 * Remove all the rows in a result set with the highest value for column
152
	 * $column unless the number of rows is less $limit. This returns the new
153
	 * array of rows and the highest value of column $column for the rows left.
154
	 * The ordering of rows is maintained.
155
	 *
156
	 * This is useful for paging on mostly-unique values that may sometimes
157
	 * have large clumps of identical values. It should be safe to do the next
158
	 * query on items with a value higher than the highest of the rows returned here.
159
	 * If this returns an empty array for a non-empty query result, then all the rows
160
	 * had the same column value and the query should be repeated with a higher LIMIT.
161
	 *
162
	 * @todo move this elsewhere
163
	 *
164
	 * @param ResultWrapper $res Query result sorted by $column (ascending)
165
	 * @param string $column
166
	 * @param int $limit
167
	 * @return array (array of rows, string column value)
168
	 */
169
	protected function pageableSortedRows( ResultWrapper $res, $column, $limit ) {
170
		$rows = iterator_to_array( $res, false );
171
		$count = count( $rows );
172
		if ( !$count ) {
173
			return [ [], null ]; // nothing to do
174
		} elseif ( $count < $limit ) {
175
			return [ $rows, $rows[$count - 1]->$column ]; // no more rows left
176
		}
177
		$lastValue = $rows[$count - 1]->$column; // should be the highest
178
		for ( $i = $count - 1; $i >= 0; --$i ) {
179
			if ( $rows[$i]->$column === $lastValue ) {
180
				unset( $rows[$i] );
181
			} else {
182
				break;
183
			}
184
		}
185
		$lastValueLeft = count( $rows ) ? $rows[count( $rows ) - 1]->$column : null;
186
187
		return [ $rows, $lastValueLeft ];
188
	}
189
}
190
191
$maintClass = "PurgeChangedPages";
192
require_once RUN_MAINTENANCE_IF_MAIN;
193