Completed
Branch master (e2eefa)
by
unknown
25:58
created

TableCleanup   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 144
Duplicated Lines 4.86 %

Coupling/Cohesion

Components 1
Dependencies 4
Metric Value
dl 7
loc 144
rs 10
wmc 15
lcom 1
cbo 4

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A execute() 0 12 2
A init() 7 7 1
B progress() 0 29 2
B runTable() 0 58 8
A hexChar() 0 3 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
/**
3
 * Generic class to cleanup a database table.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup Maintenance
22
 */
23
24
require_once __DIR__ . '/Maintenance.php';
25
26
/**
27
 * Generic class to cleanup a database table. Already subclasses Maintenance.
28
 *
29
 * @ingroup Maintenance
30
 */
31
class TableCleanup extends Maintenance {
32
	protected $defaultParams = [
33
		'table' => 'page',
34
		'conds' => [],
35
		'index' => 'page_id',
36
		'callback' => 'processRow',
37
	];
38
39
	protected $dryrun = false;
40
	public $batchSize = 100;
41
	public $reportInterval = 100;
42
43
	protected $processed, $updated, $count, $startTime, $table;
44
45
	public function __construct() {
46
		parent::__construct();
47
		$this->addOption( 'dry-run', 'Perform a dry run' );
48
	}
49
50
	public function execute() {
51
		global $wgUser;
52
		$this->dryrun = $this->hasOption( 'dry-run' );
53
		if ( $this->dryrun ) {
54
			$wgUser = User::newFromName( 'Conversion script' );
55
			$this->output( "Checking for bad titles...\n" );
56
		} else {
57
			$wgUser = User::newSystemUser( 'Conversion script', [ 'steal' => true ] );
58
			$this->output( "Checking and fixing bad titles...\n" );
59
		}
60
		$this->runTable( $this->defaultParams );
61
	}
62
63 View Code Duplication
	protected function init( $count, $table ) {
64
		$this->processed = 0;
65
		$this->updated = 0;
66
		$this->count = $count;
67
		$this->startTime = microtime( true );
68
		$this->table = $table;
69
	}
70
71
	/**
72
	 * @param int $updated
73
	 */
74
	protected function progress( $updated ) {
75
		$this->updated += $updated;
76
		$this->processed++;
77
		if ( $this->processed % $this->reportInterval != 0 ) {
78
			return;
79
		}
80
		$portion = $this->processed / $this->count;
81
		$updateRate = $this->updated / $this->processed;
82
83
		$now = microtime( true );
84
		$delta = $now - $this->startTime;
85
		$estimatedTotalTime = $delta / $portion;
86
		$eta = $this->startTime + $estimatedTotalTime;
87
88
		$this->output(
89
			sprintf( "%s %s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n",
90
				wfWikiID(),
91
				wfTimestamp( TS_DB, intval( $now ) ),
92
				$portion * 100.0,
93
				$this->table,
94
				wfTimestamp( TS_DB, intval( $eta ) ),
95
				$this->processed,
96
				$this->count,
97
				$this->processed / $delta,
98
				$updateRate * 100.0
99
			)
100
		);
101
		flush();
102
	}
103
104
	/**
105
	 * @param array $params
106
	 * @throws MWException
107
	 */
108
	public function runTable( $params ) {
109
		$dbr = $this->getDB( DB_SLAVE );
110
111
		if ( array_diff( array_keys( $params ),
112
			[ 'table', 'conds', 'index', 'callback' ] )
113
		) {
114
			throw new MWException( __METHOD__ . ': Missing parameter ' . implode( ', ', $params ) );
115
		}
116
117
		$table = $params['table'];
118
		// count(*) would melt the DB for huge tables, we can estimate here
119
		$count = $dbr->estimateRowCount( $table, '*', '', __METHOD__ );
120
		$this->init( $count, $table );
121
		$this->output( "Processing $table...\n" );
122
123
		$index = (array)$params['index'];
124
		$indexConds = [];
125
		$options = [
126
			'ORDER BY' => implode( ',', $index ),
127
			'LIMIT' => $this->batchSize
128
		];
129
		$callback = [ $this, $params['callback'] ];
130
131
		while ( true ) {
132
			$conds = array_merge( $params['conds'], $indexConds );
133
			$res = $dbr->select( $table, '*', $conds, __METHOD__, $options );
134
			if ( !$res->numRows() ) {
135
				// Done
136
				break;
137
			}
138
139
			foreach ( $res as $row ) {
0 ignored issues
show
Bug introduced by
The expression $res of type object<ResultWrapper>|boolean is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
140
				call_user_func( $callback, $row );
141
			}
142
143
			if ( $res->numRows() < $this->batchSize ) {
144
				// Done
145
				break;
146
			}
147
148
			// Update the conditions to select the next batch.
149
			// Construct a condition string by starting with the least significant part
150
			// of the index, and adding more significant parts progressively to the left
151
			// of the string.
152
			$nextCond = '';
153
			foreach ( array_reverse( $index ) as $field ) {
154
				$encValue = $dbr->addQuotes( $row->$field );
0 ignored issues
show
Bug introduced by
The variable $row seems to be defined by a foreach iteration on line 139. Are you sure the iterator is never empty, otherwise this variable is not defined?

It seems like you are relying on a variable being defined by an iteration:

foreach ($a as $b) {
}

// $b is defined here only if $a has elements, for example if $a is array()
// then $b would not be defined here. To avoid that, we recommend to set a
// default value for $b.


// Better
$b = 0; // or whatever default makes sense in your context
foreach ($a as $b) {
}

// $b is now guaranteed to be defined here.
Loading history...
155
				if ( $nextCond === '' ) {
156
					$nextCond = "$field > $encValue";
157
				} else {
158
					$nextCond = "$field > $encValue OR ($field = $encValue AND ($nextCond))";
159
				}
160
			}
161
			$indexConds = [ $nextCond ];
162
		}
163
164
		$this->output( "Finished $table... $this->updated of $this->processed rows updated\n" );
165
	}
166
167
	/**
168
	 * @param array $matches
169
	 * @return string
170
	 */
171
	protected function hexChar( $matches ) {
172
		return sprintf( "\\x%02x", ord( $matches[1] ) );
173
	}
174
}
175