| Conditions | 23 |
| Paths | 4 |
| Total Lines | 212 |
| Code Lines | 134 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 44 | function execute() { |
||
| 45 | $dbr = $this->getDB( DB_REPLICA ); |
||
| 46 | $dbw = $this->getDB( DB_MASTER ); |
||
| 47 | |||
| 48 | $dryRun = $this->getOption( 'dry-run' ); |
||
| 49 | if ( $dryRun ) { |
||
| 50 | print "Dry run only.\n"; |
||
| 51 | } |
||
| 52 | |||
| 53 | $startId = $this->getOption( 'start', 0 ); |
||
| 54 | $numGood = 0; |
||
| 55 | $numFixed = 0; |
||
| 56 | $numBad = 0; |
||
| 57 | |||
| 58 | $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); |
||
| 59 | |||
| 60 | // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function |
||
| 61 | $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))'; |
||
| 62 | |||
| 63 | while ( true ) { |
||
| 64 | print "ID: $startId / $totalRevs\r"; |
||
| 65 | |||
| 66 | $res = $dbr->select( |
||
| 67 | 'text', |
||
| 68 | [ 'old_id', 'old_flags', 'old_text' ], |
||
| 69 | [ |
||
| 70 | 'old_id > ' . intval( $startId ), |
||
| 71 | 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', |
||
| 72 | "$lowerLeft = 'o:15:\"historyblobstub\"'", |
||
| 73 | ], |
||
| 74 | __METHOD__, |
||
| 75 | [ |
||
| 76 | 'ORDER BY' => 'old_id', |
||
| 77 | 'LIMIT' => $this->batchSize, |
||
| 78 | ] |
||
| 79 | ); |
||
| 80 | |||
| 81 | if ( !$res->numRows() ) { |
||
| 82 | break; |
||
| 83 | } |
||
| 84 | |||
| 85 | $secondaryIds = []; |
||
| 86 | $stubs = []; |
||
| 87 | |||
| 88 | foreach ( $res as $row ) { |
||
| 89 | $startId = $row->old_id; |
||
| 90 | |||
| 91 | // Basic sanity checks |
||
| 92 | $obj = unserialize( $row->old_text ); |
||
| 93 | if ( $obj === false ) { |
||
| 94 | print "{$row->old_id}: unrecoverable: cannot unserialize\n"; |
||
| 95 | ++$numBad; |
||
| 96 | continue; |
||
| 97 | } |
||
| 98 | |||
| 99 | if ( !is_object( $obj ) ) { |
||
| 100 | print "{$row->old_id}: unrecoverable: unserialized to type " . |
||
| 101 | gettype( $obj ) . ", possible double-serialization\n"; |
||
| 102 | ++$numBad; |
||
| 103 | continue; |
||
| 104 | } |
||
| 105 | |||
| 106 | if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) { |
||
| 107 | print "{$row->old_id}: unrecoverable: unexpected object class " . |
||
| 108 | get_class( $obj ) . "\n"; |
||
| 109 | ++$numBad; |
||
| 110 | continue; |
||
| 111 | } |
||
| 112 | |||
| 113 | // Process flags |
||
| 114 | $flags = explode( ',', $row->old_flags ); |
||
| 115 | if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) { |
||
| 116 | $legacyEncoding = false; |
||
| 117 | } else { |
||
| 118 | $legacyEncoding = true; |
||
| 119 | } |
||
| 120 | |||
| 121 | // Queue the stub for future batch processing |
||
| 122 | $id = intval( $obj->mOldId ); |
||
| 123 | $secondaryIds[] = $id; |
||
| 124 | $stubs[$row->old_id] = [ |
||
| 125 | 'legacyEncoding' => $legacyEncoding, |
||
| 126 | 'secondaryId' => $id, |
||
| 127 | 'hash' => $obj->mHash, |
||
| 128 | ]; |
||
| 129 | } |
||
| 130 | |||
| 131 | $secondaryIds = array_unique( $secondaryIds ); |
||
| 132 | |||
| 133 | if ( !count( $secondaryIds ) ) { |
||
| 134 | continue; |
||
| 135 | } |
||
| 136 | |||
| 137 | // Run the batch query on blob_tracking |
||
| 138 | $res = $dbr->select( |
||
| 139 | 'blob_tracking', |
||
| 140 | '*', |
||
| 141 | [ |
||
| 142 | 'bt_text_id' => $secondaryIds, |
||
| 143 | ], |
||
| 144 | __METHOD__ |
||
| 145 | ); |
||
| 146 | $trackedBlobs = []; |
||
| 147 | foreach ( $res as $row ) { |
||
| 148 | $trackedBlobs[$row->bt_text_id] = $row; |
||
| 149 | } |
||
| 150 | |||
| 151 | // Process the stubs |
||
| 152 | foreach ( $stubs as $primaryId => $stub ) { |
||
| 153 | $secondaryId = $stub['secondaryId']; |
||
| 154 | if ( !isset( $trackedBlobs[$secondaryId] ) ) { |
||
| 155 | // No tracked blob. Work out what went wrong |
||
| 156 | $secondaryRow = $dbr->selectRow( |
||
| 157 | 'text', |
||
| 158 | [ 'old_flags', 'old_text' ], |
||
| 159 | [ 'old_id' => $secondaryId ], |
||
| 160 | __METHOD__ |
||
| 161 | ); |
||
| 162 | if ( !$secondaryRow ) { |
||
| 163 | print "$primaryId: unrecoverable: secondary row is missing\n"; |
||
| 164 | ++$numBad; |
||
| 165 | } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) { |
||
| 166 | // Not broken yet, and not in the tracked clusters so it won't get |
||
| 167 | // broken by the current RCT run. |
||
| 168 | ++$numGood; |
||
| 169 | } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) { |
||
| 170 | print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n"; |
||
| 171 | ++$numBad; |
||
| 172 | } else { |
||
| 173 | print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n"; |
||
| 174 | ++$numBad; |
||
| 175 | } |
||
| 176 | unset( $stubs[$primaryId] ); |
||
| 177 | continue; |
||
| 178 | } |
||
| 179 | $trackRow = $trackedBlobs[$secondaryId]; |
||
| 180 | |||
| 181 | // Check that the specified text really is available in the tracked source row |
||
| 182 | $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}"; |
||
| 183 | $text = ExternalStore::fetchFromURL( $url ); |
||
| 184 | if ( $text === false ) { |
||
| 185 | print "$primaryId: unrecoverable: source text missing\n"; |
||
| 186 | ++$numBad; |
||
| 187 | unset( $stubs[$primaryId] ); |
||
| 188 | continue; |
||
| 189 | } |
||
| 190 | if ( md5( $text ) !== $stub['hash'] ) { |
||
| 191 | print "$primaryId: unrecoverable: content hashes do not match\n"; |
||
| 192 | ++$numBad; |
||
| 193 | unset( $stubs[$primaryId] ); |
||
| 194 | continue; |
||
| 195 | } |
||
| 196 | |||
| 197 | // Find the page_id and rev_id |
||
| 198 | // The page is probably the same as the page of the secondary row |
||
| 199 | $pageId = intval( $trackRow->bt_page ); |
||
| 200 | if ( !$pageId ) { |
||
| 201 | $revId = $pageId = 0; |
||
| 202 | } else { |
||
| 203 | $revId = $this->findTextIdInPage( $pageId, $primaryId ); |
||
| 204 | if ( !$revId ) { |
||
| 205 | // Actually an orphan |
||
| 206 | $pageId = $revId = 0; |
||
| 207 | } |
||
| 208 | } |
||
| 209 | |||
| 210 | $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8'; |
||
| 211 | |||
| 212 | if ( !$dryRun ) { |
||
| 213 | // Reset the text row to point to the original copy |
||
| 214 | $this->beginTransaction( $dbw, __METHOD__ ); |
||
| 215 | $dbw->update( |
||
| 216 | 'text', |
||
| 217 | // SET |
||
| 218 | [ |
||
| 219 | 'old_flags' => $newFlags, |
||
| 220 | 'old_text' => $url |
||
| 221 | ], |
||
| 222 | // WHERE |
||
| 223 | [ 'old_id' => $primaryId ], |
||
| 224 | __METHOD__ |
||
| 225 | ); |
||
| 226 | |||
| 227 | // Add a blob_tracking row so that the new reference can be recompressed |
||
| 228 | // without needing to run trackBlobs.php again |
||
| 229 | $dbw->insert( 'blob_tracking', |
||
| 230 | [ |
||
| 231 | 'bt_page' => $pageId, |
||
| 232 | 'bt_rev_id' => $revId, |
||
| 233 | 'bt_text_id' => $primaryId, |
||
| 234 | 'bt_cluster' => $trackRow->bt_cluster, |
||
| 235 | 'bt_blob_id' => $trackRow->bt_blob_id, |
||
| 236 | 'bt_cgz_hash' => $stub['hash'], |
||
| 237 | 'bt_new_url' => null, |
||
| 238 | 'bt_moved' => 0, |
||
| 239 | ], |
||
| 240 | __METHOD__ |
||
| 241 | ); |
||
| 242 | $this->commitTransaction( $dbw, __METHOD__ ); |
||
| 243 | $this->waitForSlaves(); |
||
| 244 | } |
||
| 245 | |||
| 246 | print "$primaryId: resolved to $url\n"; |
||
| 247 | ++$numFixed; |
||
| 248 | } |
||
| 249 | } |
||
| 250 | |||
| 251 | print "\n"; |
||
| 252 | print "Fixed: $numFixed\n"; |
||
| 253 | print "Unrecoverable: $numBad\n"; |
||
| 254 | print "Good stubs: $numGood\n"; |
||
| 255 | } |
||
| 256 | |||
| 350 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.