Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like ApiPageSet often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ApiPageSet, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
41 | class ApiPageSet extends ApiBase { |
||
42 | /** |
||
43 | * Constructor flag: The new instance of ApiPageSet will ignore the 'generator=' parameter |
||
44 | * @since 1.21 |
||
45 | */ |
||
46 | const DISABLE_GENERATORS = 1; |
||
47 | |||
48 | private $mDbSource; |
||
49 | private $mParams; |
||
50 | private $mResolveRedirects; |
||
51 | private $mConvertTitles; |
||
52 | private $mAllowGenerator; |
||
53 | |||
54 | private $mAllPages = []; // [ns][dbkey] => page_id or negative when missing |
||
55 | private $mTitles = []; |
||
56 | private $mGoodAndMissingPages = []; // [ns][dbkey] => page_id or negative when missing |
||
57 | private $mGoodPages = []; // [ns][dbkey] => page_id |
||
58 | private $mGoodTitles = []; |
||
59 | private $mMissingPages = []; // [ns][dbkey] => fake page_id |
||
60 | private $mMissingTitles = []; |
||
61 | /** @var array [fake_page_id] => [ 'title' => $title, 'invalidreason' => $reason ] */ |
||
62 | private $mInvalidTitles = []; |
||
63 | private $mMissingPageIDs = []; |
||
64 | private $mRedirectTitles = []; |
||
65 | private $mSpecialTitles = []; |
||
66 | private $mNormalizedTitles = []; |
||
67 | private $mInterwikiTitles = []; |
||
68 | /** @var Title[] */ |
||
69 | private $mPendingRedirectIDs = []; |
||
70 | private $mResolvedRedirectTitles = []; |
||
71 | private $mConvertedTitles = []; |
||
72 | private $mGoodRevIDs = []; |
||
73 | private $mLiveRevIDs = []; |
||
74 | private $mDeletedRevIDs = []; |
||
75 | private $mMissingRevIDs = []; |
||
76 | private $mGeneratorData = []; // [ns][dbkey] => data array |
||
77 | private $mFakePageId = -1; |
||
78 | private $mCacheMode = 'public'; |
||
79 | private $mRequestedPageFields = []; |
||
80 | /** @var int */ |
||
81 | private $mDefaultNamespace = NS_MAIN; |
||
82 | /** @var callable|null */ |
||
83 | private $mRedirectMergePolicy; |
||
84 | |||
85 | /** |
||
86 | * Add all items from $values into the result |
||
87 | * @param array $result Output |
||
88 | * @param array $values Values to add |
||
89 | * @param string[] $flags The names of boolean flags to mark this element |
||
90 | * @param string $name If given, name of the value |
||
91 | */ |
||
92 | private static function addValues( array &$result, $values, $flags = [], $name = null ) { |
||
93 | foreach ( $values as $val ) { |
||
94 | if ( $val instanceof Title ) { |
||
95 | $v = []; |
||
96 | ApiQueryBase::addTitleInfo( $v, $val ); |
||
97 | } elseif ( $name !== null ) { |
||
98 | $v = [ $name => $val ]; |
||
99 | } else { |
||
100 | $v = $val; |
||
101 | } |
||
102 | foreach ( $flags as $flag ) { |
||
103 | $v[$flag] = true; |
||
104 | } |
||
105 | $result[] = $v; |
||
106 | } |
||
107 | } |
||
108 | |||
109 | /** |
||
110 | * @param ApiBase $dbSource Module implementing getDB(). |
||
111 | * Allows PageSet to reuse existing db connection from the shared state like ApiQuery. |
||
112 | * @param int $flags Zero or more flags like DISABLE_GENERATORS |
||
113 | * @param int $defaultNamespace The namespace to use if none is specified by a prefix. |
||
114 | * @since 1.21 accepts $flags instead of two boolean values |
||
115 | */ |
||
116 | public function __construct( ApiBase $dbSource, $flags = 0, $defaultNamespace = NS_MAIN ) { |
||
126 | |||
127 | /** |
||
128 | * In case execute() is not called, call this method to mark all relevant parameters as used |
||
129 | * This prevents unused parameters from being reported as warnings |
||
130 | */ |
||
131 | public function executeDryRun() { |
||
134 | |||
135 | /** |
||
136 | * Populate the PageSet from the request parameters. |
||
137 | */ |
||
138 | public function execute() { |
||
141 | |||
142 | /** |
||
143 | * Populate the PageSet from the request parameters. |
||
144 | * @param bool $isDryRun If true, instantiates generator, but only to mark |
||
145 | * relevant parameters as used |
||
146 | */ |
||
147 | private function executeInternal( $isDryRun ) { |
||
232 | |||
233 | /** |
||
234 | * Check whether this PageSet is resolving redirects |
||
235 | * @return bool |
||
236 | */ |
||
237 | public function isResolvingRedirects() { |
||
240 | |||
241 | /** |
||
242 | * Return the parameter name that is the source of data for this PageSet |
||
243 | * |
||
244 | * If multiple source parameters are specified (e.g. titles and pageids), |
||
245 | * one will be named arbitrarily. |
||
246 | * |
||
247 | * @return string|null |
||
248 | */ |
||
249 | public function getDataSource() { |
||
265 | |||
266 | /** |
||
267 | * Request an additional field from the page table. |
||
268 | * Must be called before execute() |
||
269 | * @param string $fieldName Field name |
||
270 | */ |
||
271 | public function requestField( $fieldName ) { |
||
274 | |||
275 | /** |
||
276 | * Get the value of a custom field previously requested through |
||
277 | * requestField() |
||
278 | * @param string $fieldName Field name |
||
279 | * @return mixed Field value |
||
280 | */ |
||
281 | public function getCustomField( $fieldName ) { |
||
284 | |||
285 | /** |
||
286 | * Get the fields that have to be queried from the page table: |
||
287 | * the ones requested through requestField() and a few basic ones |
||
288 | * we always need |
||
289 | * @return array Array of field names |
||
290 | */ |
||
291 | public function getPageTableFields() { |
||
320 | |||
321 | /** |
||
322 | * Returns an array [ns][dbkey] => page_id for all requested titles. |
||
323 | * page_id is a unique negative number in case title was not found. |
||
324 | * Invalid titles will also have negative page IDs and will be in namespace 0 |
||
325 | * @return array |
||
326 | */ |
||
327 | public function getAllTitlesByNamespace() { |
||
330 | |||
331 | /** |
||
332 | * All Title objects provided. |
||
333 | * @return Title[] |
||
334 | */ |
||
335 | public function getTitles() { |
||
338 | |||
339 | /** |
||
340 | * Returns the number of unique pages (not revisions) in the set. |
||
341 | * @return int |
||
342 | */ |
||
343 | public function getTitleCount() { |
||
346 | |||
347 | /** |
||
348 | * Returns an array [ns][dbkey] => page_id for all good titles. |
||
349 | * @return array |
||
350 | */ |
||
351 | public function getGoodTitlesByNamespace() { |
||
354 | |||
355 | /** |
||
356 | * Title objects that were found in the database. |
||
357 | * @return Title[] Array page_id (int) => Title (obj) |
||
358 | */ |
||
359 | public function getGoodTitles() { |
||
362 | |||
363 | /** |
||
364 | * Returns the number of found unique pages (not revisions) in the set. |
||
365 | * @return int |
||
366 | */ |
||
367 | public function getGoodTitleCount() { |
||
370 | |||
371 | /** |
||
372 | * Returns an array [ns][dbkey] => fake_page_id for all missing titles. |
||
373 | * fake_page_id is a unique negative number. |
||
374 | * @return array |
||
375 | */ |
||
376 | public function getMissingTitlesByNamespace() { |
||
379 | |||
380 | /** |
||
381 | * Title objects that were NOT found in the database. |
||
382 | * The array's index will be negative for each item |
||
383 | * @return Title[] |
||
384 | */ |
||
385 | public function getMissingTitles() { |
||
388 | |||
389 | /** |
||
390 | * Returns an array [ns][dbkey] => page_id for all good and missing titles. |
||
391 | * @return array |
||
392 | */ |
||
393 | public function getGoodAndMissingTitlesByNamespace() { |
||
396 | |||
397 | /** |
||
398 | * Title objects for good and missing titles. |
||
399 | * @return array |
||
400 | */ |
||
401 | public function getGoodAndMissingTitles() { |
||
404 | |||
405 | /** |
||
406 | * Titles that were deemed invalid by Title::newFromText() |
||
407 | * The array's index will be unique and negative for each item |
||
408 | * @deprecated since 1.26, use self::getInvalidTitlesAndReasons() |
||
409 | * @return string[] Array of strings (not Title objects) |
||
410 | */ |
||
411 | public function getInvalidTitles() { |
||
417 | |||
418 | /** |
||
419 | * Titles that were deemed invalid by Title::newFromText() |
||
420 | * The array's index will be unique and negative for each item |
||
421 | * @return array[] Array of arrays with 'title' and 'invalidreason' properties |
||
422 | */ |
||
423 | public function getInvalidTitlesAndReasons() { |
||
426 | |||
427 | /** |
||
428 | * Page IDs that were not found in the database |
||
429 | * @return array Array of page IDs |
||
430 | */ |
||
431 | public function getMissingPageIDs() { |
||
434 | |||
435 | /** |
||
436 | * Get a list of redirect resolutions - maps a title to its redirect |
||
437 | * target, as an array of output-ready arrays |
||
438 | * @return Title[] |
||
439 | */ |
||
440 | public function getRedirectTitles() { |
||
443 | |||
444 | /** |
||
445 | * Get a list of redirect resolutions - maps a title to its redirect |
||
446 | * target. Includes generator data for redirect source when available. |
||
447 | * @param ApiResult $result |
||
448 | * @return array Array of prefixed_title (string) => Title object |
||
449 | * @since 1.21 |
||
450 | */ |
||
451 | public function getRedirectTitlesAsResult( $result = null ) { |
||
481 | |||
482 | /** |
||
483 | * Get a list of title normalizations - maps a title to its normalized |
||
484 | * version. |
||
485 | * @return array Array of raw_prefixed_title (string) => prefixed_title (string) |
||
486 | */ |
||
487 | public function getNormalizedTitles() { |
||
490 | |||
491 | /** |
||
492 | * Get a list of title normalizations - maps a title to its normalized |
||
493 | * version in the form of result array. |
||
494 | * @param ApiResult $result |
||
495 | * @return array Array of raw_prefixed_title (string) => prefixed_title (string) |
||
496 | * @since 1.21 |
||
497 | */ |
||
498 | public function getNormalizedTitlesAsResult( $result = null ) { |
||
516 | |||
517 | /** |
||
518 | * Get a list of title conversions - maps a title to its converted |
||
519 | * version. |
||
520 | * @return array Array of raw_prefixed_title (string) => prefixed_title (string) |
||
521 | */ |
||
522 | public function getConvertedTitles() { |
||
525 | |||
526 | /** |
||
527 | * Get a list of title conversions - maps a title to its converted |
||
528 | * version as a result array. |
||
529 | * @param ApiResult $result |
||
530 | * @return array Array of (from, to) strings |
||
531 | * @since 1.21 |
||
532 | */ |
||
533 | public function getConvertedTitlesAsResult( $result = null ) { |
||
547 | |||
548 | /** |
||
549 | * Get a list of interwiki titles - maps a title to its interwiki |
||
550 | * prefix. |
||
551 | * @return array Array of raw_prefixed_title (string) => interwiki_prefix (string) |
||
552 | */ |
||
553 | public function getInterwikiTitles() { |
||
556 | |||
557 | /** |
||
558 | * Get a list of interwiki titles - maps a title to its interwiki |
||
559 | * prefix as result. |
||
560 | * @param ApiResult $result |
||
561 | * @param bool $iwUrl |
||
562 | * @return array Array of raw_prefixed_title (string) => interwiki_prefix (string) |
||
563 | * @since 1.21 |
||
564 | */ |
||
565 | public function getInterwikiTitlesAsResult( $result = null, $iwUrl = false ) { |
||
584 | |||
585 | /** |
||
586 | * Get an array of invalid/special/missing titles. |
||
587 | * |
||
588 | * @param array $invalidChecks List of types of invalid titles to include. |
||
589 | * Recognized values are: |
||
590 | * - invalidTitles: Titles and reasons from $this->getInvalidTitlesAndReasons() |
||
591 | * - special: Titles from $this->getSpecialTitles() |
||
592 | * - missingIds: ids from $this->getMissingPageIDs() |
||
593 | * - missingRevIds: ids from $this->getMissingRevisionIDs() |
||
594 | * - missingTitles: Titles from $this->getMissingTitles() |
||
595 | * - interwikiTitles: Titles from $this->getInterwikiTitlesAsResult() |
||
596 | * @return array Array suitable for inclusion in the response |
||
597 | * @since 1.23 |
||
598 | */ |
||
599 | public function getInvalidTitlesAndRevisions( $invalidChecks = [ 'invalidTitles', |
||
644 | |||
645 | /** |
||
646 | * Get the list of valid revision IDs (requested with the revids= parameter) |
||
647 | * @return array Array of revID (int) => pageID (int) |
||
648 | */ |
||
649 | public function getRevisionIDs() { |
||
652 | |||
653 | /** |
||
654 | * Get the list of non-deleted revision IDs (requested with the revids= parameter) |
||
655 | * @return array Array of revID (int) => pageID (int) |
||
656 | */ |
||
657 | public function getLiveRevisionIDs() { |
||
660 | |||
661 | /** |
||
662 | * Get the list of revision IDs that were associated with deleted titles. |
||
663 | * @return array Array of revID (int) => pageID (int) |
||
664 | */ |
||
665 | public function getDeletedRevisionIDs() { |
||
668 | |||
669 | /** |
||
670 | * Revision IDs that were not found in the database |
||
671 | * @return array Array of revision IDs |
||
672 | */ |
||
673 | public function getMissingRevisionIDs() { |
||
676 | |||
677 | /** |
||
678 | * Revision IDs that were not found in the database as result array. |
||
679 | * @param ApiResult $result |
||
680 | * @return array Array of revision IDs |
||
681 | * @since 1.21 |
||
682 | */ |
||
683 | public function getMissingRevisionIDsAsResult( $result = null ) { |
||
696 | |||
697 | /** |
||
698 | * Get the list of titles with negative namespace |
||
699 | * @return Title[] |
||
700 | */ |
||
701 | public function getSpecialTitles() { |
||
704 | |||
705 | /** |
||
706 | * Returns the number of revisions (requested with revids= parameter). |
||
707 | * @return int Number of revisions. |
||
708 | */ |
||
709 | public function getRevisionCount() { |
||
712 | |||
713 | /** |
||
714 | * Populate this PageSet from a list of Titles |
||
715 | * @param array $titles Array of Title objects |
||
716 | */ |
||
717 | public function populateFromTitles( $titles ) { |
||
720 | |||
721 | /** |
||
722 | * Populate this PageSet from a list of page IDs |
||
723 | * @param array $pageIDs Array of page IDs |
||
724 | */ |
||
725 | public function populateFromPageIDs( $pageIDs ) { |
||
728 | |||
729 | /** |
||
730 | * Populate this PageSet from a rowset returned from the database |
||
731 | * |
||
732 | * Note that the query result must include the columns returned by |
||
733 | * $this->getPageTableFields(). |
||
734 | * |
||
735 | * @param IDatabase $db |
||
736 | * @param ResultWrapper $queryResult Query result object |
||
737 | */ |
||
738 | public function populateFromQueryResult( $db, $queryResult ) { |
||
741 | |||
742 | /** |
||
743 | * Populate this PageSet from a list of revision IDs |
||
744 | * @param array $revIDs Array of revision IDs |
||
745 | */ |
||
746 | public function populateFromRevisionIDs( $revIDs ) { |
||
749 | |||
750 | /** |
||
751 | * Extract all requested fields from the row received from the database |
||
752 | * @param stdClass $row Result row |
||
753 | */ |
||
754 | public function processDbRow( $row ) { |
||
776 | |||
777 | /** |
||
778 | * This method populates internal variables with page information |
||
779 | * based on the given array of title strings. |
||
780 | * |
||
781 | * Steps: |
||
782 | * #1 For each title, get data from `page` table |
||
783 | * #2 If page was not found in the DB, store it as missing |
||
784 | * |
||
785 | * Additionally, when resolving redirects: |
||
786 | * #3 If no more redirects left, stop. |
||
787 | * #4 For each redirect, get its target from the `redirect` table. |
||
788 | * #5 Substitute the original LinkBatch object with the new list |
||
789 | * #6 Repeat from step #1 |
||
790 | * |
||
791 | * @param array $titles Array of Title objects or strings |
||
792 | */ |
||
793 | private function initFromTitles( $titles ) { |
||
813 | |||
814 | /** |
||
815 | * Does the same as initFromTitles(), but is based on page IDs instead |
||
816 | * @param array $pageids Array of page IDs |
||
817 | */ |
||
818 | private function initFromPageIds( $pageids ) { |
||
845 | |||
846 | /** |
||
847 | * Iterate through the result of the query on 'page' table, |
||
848 | * and for each row create and store title object and save any extra fields requested. |
||
849 | * @param ResultWrapper $res DB Query result |
||
850 | * @param array $remaining Array of either pageID or ns/title elements (optional). |
||
851 | * If given, any missing items will go to $mMissingPageIDs and $mMissingTitles |
||
852 | * @param bool $processTitles Must be provided together with $remaining. |
||
853 | * If true, treat $remaining as an array of [ns][title] |
||
854 | * If false, treat it as an array of [pageIDs] |
||
855 | */ |
||
856 | private function initFromQueryResult( $res, &$remaining = null, $processTitles = null ) { |
||
921 | |||
922 | /** |
||
923 | * Does the same as initFromTitles(), but is based on revision IDs |
||
924 | * instead |
||
925 | * @param array $revids Array of revision IDs |
||
926 | */ |
||
927 | private function initFromRevIDs( $revids ) { |
||
1004 | |||
1005 | /** |
||
1006 | * Resolve any redirects in the result if redirect resolution was |
||
1007 | * requested. This function is called repeatedly until all redirects |
||
1008 | * have been resolved. |
||
1009 | */ |
||
1010 | private function resolvePendingRedirects() { |
||
1039 | |||
1040 | /** |
||
1041 | * Get the targets of the pending redirects from the database |
||
1042 | * |
||
1043 | * Also creates entries in the redirect table for redirects that don't |
||
1044 | * have one. |
||
1045 | * @return LinkBatch |
||
1046 | */ |
||
1047 | private function getRedirectTargets() { |
||
1101 | |||
1102 | /** |
||
1103 | * Get the cache mode for the data generated by this module. |
||
1104 | * All PageSet users should take into account whether this returns a more-restrictive |
||
1105 | * cache mode than the using module itself. For possible return values and other |
||
1106 | * details about cache modes, see ApiMain::setCacheMode() |
||
1107 | * |
||
1108 | * Public caching will only be allowed if *all* the modules that supply |
||
1109 | * data for a given request return a cache mode of public. |
||
1110 | * |
||
1111 | * @param array|null $params |
||
1112 | * @return string |
||
1113 | * @since 1.21 |
||
1114 | */ |
||
1115 | public function getCacheMode( $params = null ) { |
||
1118 | |||
1119 | /** |
||
1120 | * Given an array of title strings, convert them into Title objects. |
||
1121 | * Alternatively, an array of Title objects may be given. |
||
1122 | * This method validates access rights for the title, |
||
1123 | * and appends normalization values to the output. |
||
1124 | * |
||
1125 | * @param array $titles Array of Title objects or strings |
||
1126 | * @return LinkBatch |
||
1127 | */ |
||
1128 | private function processTitlesArray( $titles ) { |
||
1205 | |||
1206 | /** |
||
1207 | * Set data for a title. |
||
1208 | * |
||
1209 | * This data may be extracted into an ApiResult using |
||
1210 | * self::populateGeneratorData. This should generally be limited to |
||
1211 | * data that is likely to be particularly useful to end users rather than |
||
1212 | * just being a dump of everything returned in non-generator mode. |
||
1213 | * |
||
1214 | * Redirects here will *not* be followed, even if 'redirects' was |
||
1215 | * specified, since in the case of multiple redirects we can't know which |
||
1216 | * source's data to use on the target. |
||
1217 | * |
||
1218 | * @param Title $title |
||
1219 | * @param array $data |
||
1220 | */ |
||
1221 | public function setGeneratorData( Title $title, array $data ) { |
||
1226 | |||
1227 | /** |
||
1228 | * Controls how generator data about a redirect source is merged into |
||
1229 | * the generator data for the redirect target. When not set no data |
||
1230 | * is merged. Note that if multiple titles redirect to the same target |
||
1231 | * the order of operations is undefined. |
||
1232 | * |
||
1233 | * Example to include generated data from redirect in target, prefering |
||
1234 | * the data generated for the destination when there is a collision: |
||
1235 | * @code |
||
1236 | * $pageSet->setRedirectMergePolicy( function( array $current, array $new ) { |
||
1237 | * return $current + $new; |
||
1238 | * } ); |
||
1239 | * @endcode |
||
1240 | * |
||
1241 | * @param callable|null $callable Recieves two array arguments, first the |
||
1242 | * generator data for the redirect target and second the generator data |
||
1243 | * for the redirect source. Returns the resulting generator data to use |
||
1244 | * for the redirect target. |
||
1245 | */ |
||
1246 | public function setRedirectMergePolicy( $callable ) { |
||
1249 | |||
1250 | /** |
||
1251 | * Populate the generator data for all titles in the result |
||
1252 | * |
||
1253 | * The page data may be inserted into an ApiResult object or into an |
||
1254 | * associative array. The $path parameter specifies the path within the |
||
1255 | * ApiResult or array to find the "pages" node. |
||
1256 | * |
||
1257 | * The "pages" node itself must be an associative array mapping the page ID |
||
1258 | * or fake page ID values returned by this pageset (see |
||
1259 | * self::getAllTitlesByNamespace() and self::getSpecialTitles()) to |
||
1260 | * associative arrays of page data. Each of those subarrays will have the |
||
1261 | * data from self::setGeneratorData() merged in. |
||
1262 | * |
||
1263 | * Data that was set by self::setGeneratorData() for pages not in the |
||
1264 | * "pages" node will be ignored. |
||
1265 | * |
||
1266 | * @param ApiResult|array &$result |
||
1267 | * @param array $path |
||
1268 | * @return bool Whether the data fit |
||
1269 | */ |
||
1270 | public function populateGeneratorData( &$result, array $path = [] ) { |
||
1355 | |||
1356 | /** |
||
1357 | * Get the database connection (read-only) |
||
1358 | * @return Database |
||
1359 | */ |
||
1360 | protected function getDB() { |
||
1363 | |||
1364 | /** |
||
1365 | * Returns the input array of integers with all values < 0 removed |
||
1366 | * |
||
1367 | * @param array $array |
||
1368 | * @return array |
||
1369 | */ |
||
1370 | private static function getPositiveIntegers( $array ) { |
||
1382 | |||
1383 | public function getAllowedParams( $flags = 0 ) { |
||
1434 | |||
1435 | protected function handleParamNormalization( $paramName, $value, $rawValue ) { |
||
1451 | |||
1452 | private static $generators = null; |
||
1453 | |||
1454 | /** |
||
1455 | * Get an array of all available generators |
||
1456 | * @return array |
||
1457 | */ |
||
1458 | private function getGenerators() { |
||
1480 | } |
||
1481 |
Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code: