Complex classes like UpdateCollation often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use UpdateCollation, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
35 | class UpdateCollation extends Maintenance { |
||
36 | const BATCH_SIZE = 100; // Number of rows to process in one batch |
||
37 | const SYNC_INTERVAL = 5; // Wait for replica DBs after this many batches |
||
38 | |||
39 | public $sizeHistogram = []; |
||
40 | |||
41 | public function __construct() { |
||
68 | |||
69 | public function execute() { |
||
70 | global $wgCategoryCollation; |
||
71 | |||
72 | $dbw = $this->getDB( DB_MASTER ); |
||
73 | $dbr = $this->getDB( DB_REPLICA ); |
||
74 | $force = $this->getOption( 'force' ); |
||
75 | $dryRun = $this->getOption( 'dry-run' ); |
||
76 | $verboseStats = $this->getOption( 'verbose-stats' ); |
||
77 | if ( $this->hasOption( 'target-collation' ) ) { |
||
78 | $collationName = $this->getOption( 'target-collation' ); |
||
79 | $collation = Collation::factory( $collationName ); |
||
80 | } else { |
||
81 | $collationName = $wgCategoryCollation; |
||
82 | $collation = Collation::singleton(); |
||
83 | } |
||
84 | |||
85 | // Collation sanity check: in some cases the constructor will work, |
||
86 | // but this will raise an exception, breaking all category pages |
||
87 | $collation->getFirstLetter( 'MediaWiki' ); |
||
88 | |||
89 | // Locally at least, (my local is a rather old version of mysql) |
||
90 | // mysql seems to filesort if there is both an equality |
||
91 | // (but not for an inequality) condition on cl_collation in the |
||
92 | // WHERE and it is also the first item in the ORDER BY. |
||
93 | if ( $this->hasOption( 'previous-collation' ) ) { |
||
94 | $orderBy = 'cl_to, cl_type, cl_from'; |
||
95 | } else { |
||
96 | $orderBy = 'cl_collation, cl_to, cl_type, cl_from'; |
||
97 | } |
||
98 | $options = [ |
||
99 | 'LIMIT' => self::BATCH_SIZE, |
||
100 | 'ORDER BY' => $orderBy, |
||
101 | 'STRAIGHT_JOIN' // per T58041 |
||
102 | ]; |
||
103 | |||
104 | if ( $force ) { |
||
105 | $collationConds = []; |
||
106 | } else { |
||
107 | if ( $this->hasOption( 'previous-collation' ) ) { |
||
108 | $collationConds['cl_collation'] = $this->getOption( 'previous-collation' ); |
||
109 | } else { |
||
110 | $collationConds = [ 0 => |
||
111 | 'cl_collation != ' . $dbw->addQuotes( $collationName ) |
||
112 | ]; |
||
113 | } |
||
114 | |||
115 | $count = $dbr->estimateRowCount( |
||
116 | 'categorylinks', |
||
117 | '*', |
||
118 | $collationConds, |
||
119 | __METHOD__ |
||
120 | ); |
||
121 | // Improve estimate if feasible |
||
122 | if ( $count < 1000000 ) { |
||
123 | $count = $dbr->selectField( |
||
124 | 'categorylinks', |
||
125 | 'COUNT(*)', |
||
126 | $collationConds, |
||
127 | __METHOD__ |
||
128 | ); |
||
129 | } |
||
130 | if ( $count == 0 ) { |
||
131 | $this->output( "Collations up-to-date.\n" ); |
||
132 | |||
133 | return; |
||
134 | } |
||
135 | if ( $dryRun ) { |
||
136 | $this->output( "$count rows would be updated.\n" ); |
||
137 | } else { |
||
138 | $this->output( "Fixing collation for $count rows.\n" ); |
||
139 | } |
||
140 | wfWaitForSlaves(); |
||
141 | } |
||
142 | $count = 0; |
||
143 | $batchCount = 0; |
||
144 | $batchConds = []; |
||
145 | do { |
||
146 | $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." ); |
||
147 | |||
148 | // cl_type must be selected as a number for proper paging because |
||
149 | // enums suck. |
||
150 | if ( $dbw->getType() === 'mysql' ) { |
||
151 | $clType = 'cl_type+0 AS "cl_type_numeric"'; |
||
152 | } else { |
||
153 | $clType = 'cl_type'; |
||
154 | } |
||
155 | $res = $dbw->select( |
||
156 | [ 'categorylinks', 'page' ], |
||
157 | [ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', |
||
158 | 'cl_sortkey', $clType, |
||
159 | 'page_namespace', 'page_title' |
||
160 | ], |
||
161 | array_merge( $collationConds, $batchConds, [ 'cl_from = page_id' ] ), |
||
162 | __METHOD__, |
||
163 | $options |
||
164 | ); |
||
165 | $this->output( " processing..." ); |
||
166 | |||
167 | if ( !$dryRun ) { |
||
168 | $this->beginTransaction( $dbw, __METHOD__ ); |
||
169 | } |
||
170 | foreach ( $res as $row ) { |
||
171 | $title = Title::newFromRow( $row ); |
||
172 | if ( !$row->cl_collation ) { |
||
173 | # This is an old-style row, so the sortkey needs to be |
||
174 | # converted. |
||
175 | if ( $row->cl_sortkey == $title->getText() |
||
176 | || $row->cl_sortkey == $title->getPrefixedText() |
||
177 | ) { |
||
178 | $prefix = ''; |
||
179 | } else { |
||
180 | # Custom sortkey, use it as a prefix |
||
181 | $prefix = $row->cl_sortkey; |
||
182 | } |
||
183 | } else { |
||
184 | $prefix = $row->cl_sortkey_prefix; |
||
185 | } |
||
186 | # cl_type will be wrong for lots of pages if cl_collation is 0, |
||
187 | # so let's update it while we're here. |
||
188 | if ( $title->getNamespace() == NS_CATEGORY ) { |
||
189 | $type = 'subcat'; |
||
190 | } elseif ( $title->getNamespace() == NS_FILE ) { |
||
191 | $type = 'file'; |
||
192 | } else { |
||
193 | $type = 'page'; |
||
194 | } |
||
195 | $newSortKey = $collation->getSortKey( |
||
196 | $title->getCategorySortkey( $prefix ) ); |
||
197 | if ( $verboseStats ) { |
||
198 | $this->updateSortKeySizeHistogram( $newSortKey ); |
||
199 | } |
||
200 | |||
201 | if ( !$dryRun ) { |
||
202 | $dbw->update( |
||
203 | 'categorylinks', |
||
204 | [ |
||
205 | 'cl_sortkey' => $newSortKey, |
||
206 | 'cl_sortkey_prefix' => $prefix, |
||
207 | 'cl_collation' => $collationName, |
||
208 | 'cl_type' => $type, |
||
209 | 'cl_timestamp = cl_timestamp', |
||
210 | ], |
||
211 | [ 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ], |
||
212 | __METHOD__ |
||
213 | ); |
||
214 | } |
||
215 | if ( $row ) { |
||
216 | $batchConds = [ $this->getBatchCondition( $row, $dbw ) ]; |
||
217 | } |
||
218 | } |
||
219 | if ( !$dryRun ) { |
||
220 | $this->commitTransaction( $dbw, __METHOD__ ); |
||
221 | } |
||
222 | |||
223 | $count += $res->numRows(); |
||
224 | $this->output( "$count done.\n" ); |
||
225 | |||
226 | if ( !$dryRun && ++$batchCount % self::SYNC_INTERVAL == 0 ) { |
||
227 | $this->output( "Waiting for replica DBs ... " ); |
||
228 | wfWaitForSlaves(); |
||
229 | $this->output( "done\n" ); |
||
230 | } |
||
231 | } while ( $res->numRows() == self::BATCH_SIZE ); |
||
232 | |||
233 | $this->output( "$count rows processed\n" ); |
||
234 | |||
235 | if ( $verboseStats ) { |
||
236 | $this->output( "\n" ); |
||
237 | $this->showSortKeySizeHistogram(); |
||
238 | } |
||
239 | } |
||
240 | |||
241 | /** |
||
242 | * Return an SQL expression selecting rows which sort above the given row, |
||
243 | * assuming an ordering of cl_collation, cl_to, cl_type, cl_from |
||
244 | * @param stdClass $row |
||
245 | * @param Database $dbw |
||
246 | * @return string |
||
247 | */ |
||
248 | function getBatchCondition( $row, $dbw ) { |
||
279 | |||
280 | function updateSortKeySizeHistogram( $key ) { |
||
287 | |||
288 | function showSortKeySizeHistogram() { |
||
343 | } |
||
344 | |||
347 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.