| Total Complexity | 54 | 
| Total Lines | 503 | 
| Duplicated Lines | 0 % | 
| Changes | 1 | ||
| Bugs | 0 | Features | 0 | 
Complex classes like PageRepository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PageRepository, and based on these observations, apply Extract Interface, too.
| 1 | <?php  | 
            ||
| 24 | class PageRepository extends Repository  | 
            ||
| 25 | { | 
            ||
| 26 | /**  | 
            ||
| 27 | * Get metadata about a single page from the API.  | 
            ||
| 28 | * @param Project $project The project to which the page belongs.  | 
            ||
| 29 | * @param string $pageTitle Page title.  | 
            ||
| 30 | * @return string[]|null Array with some of the following keys: pageid, title, missing, displaytitle, url.  | 
            ||
| 31 | * Returns null if page does not exist.  | 
            ||
| 32 | */  | 
            ||
| 33 | public function getPageInfo(Project $project, string $pageTitle): ?array  | 
            ||
| 34 |     { | 
            ||
| 35 | $info = $this->getPagesInfo($project, [$pageTitle]);  | 
            ||
| 36 | return null !== $info ? array_shift($info) : null;  | 
            ||
| 37 | }  | 
            ||
| 38 | |||
| 39 | /**  | 
            ||
| 40 | * Get metadata about a set of pages from the API.  | 
            ||
| 41 | * @param Project $project The project to which the pages belong.  | 
            ||
| 42 | * @param string[] $pageTitles Array of page titles.  | 
            ||
| 43 | * @return array|null Array keyed by the page names, each element with some of the following keys: pageid,  | 
            ||
| 44 | * title, missing, displaytitle, url. Returns null if page does not exist.  | 
            ||
| 45 | */  | 
            ||
| 46 | public function getPagesInfo(Project $project, array $pageTitles): ?array  | 
            ||
| 47 |     { | 
            ||
| 48 | $params = [  | 
            ||
| 49 | 'prop' => 'info|pageprops',  | 
            ||
| 50 | 'inprop' => 'protection|talkid|watched|watchers|notificationtimestamp|subjectid|url|displaytitle',  | 
            ||
| 51 | 'converttitles' => '',  | 
            ||
| 52 |             'titles' => join('|', $pageTitles), | 
            ||
| 53 | 'formatversion' => 2,  | 
            ||
| 54 | ];  | 
            ||
| 55 | |||
| 56 | $res = $this->executeApiRequest($project, $params);  | 
            ||
| 57 | $result = [];  | 
            ||
| 58 |         if (isset($res['query']['pages'])) { | 
            ||
| 59 |             foreach ($res['query']['pages'] as $pageInfo) { | 
            ||
| 60 | $result[$pageInfo['title']] = $pageInfo;  | 
            ||
| 61 | }  | 
            ||
| 62 |         } else { | 
            ||
| 63 | return null;  | 
            ||
| 64 | }  | 
            ||
| 65 | return $result;  | 
            ||
| 66 | }  | 
            ||
| 67 | |||
| 68 | /**  | 
            ||
| 69 | * Get the full page text of a set of pages.  | 
            ||
| 70 | * @param Project $project The project to which the pages belong.  | 
            ||
| 71 | * @param string[] $pageTitles Array of page titles.  | 
            ||
| 72 | * @return string[] Array keyed by the page names, with the page text as the values.  | 
            ||
| 73 | */  | 
            ||
| 74 | public function getPagesWikitext(Project $project, array $pageTitles): array  | 
            ||
| 75 |     { | 
            ||
| 76 | $params = [  | 
            ||
| 77 | 'prop' => 'revisions',  | 
            ||
| 78 | 'rvprop' => 'content',  | 
            ||
| 79 |             'titles' => join('|', $pageTitles), | 
            ||
| 80 | 'formatversion' => 2,  | 
            ||
| 81 | ];  | 
            ||
| 82 | $res = $this->executeApiRequest($project, $params);  | 
            ||
| 83 | $result = [];  | 
            ||
| 84 | |||
| 85 |         if (!isset($res['query']['pages'])) { | 
            ||
| 86 | return [];  | 
            ||
| 87 | }  | 
            ||
| 88 | |||
| 89 |         foreach ($res['query']['pages'] as $page) { | 
            ||
| 90 |             if (isset($page['revisions'][0]['content'])) { | 
            ||
| 91 | $result[$page['title']] = $page['revisions'][0]['content'];  | 
            ||
| 92 |             } else { | 
            ||
| 93 | $result[$page['title']] = '';  | 
            ||
| 94 | }  | 
            ||
| 95 | }  | 
            ||
| 96 | |||
| 97 | return $result;  | 
            ||
| 98 | }  | 
            ||
| 99 | |||
| 100 | /**  | 
            ||
| 101 | * Get revisions of a single page.  | 
            ||
| 102 | * @param Page $page The page.  | 
            ||
| 103 | * @param User|null $user Specify to get only revisions by the given user.  | 
            ||
| 104 | * @param false|int $start  | 
            ||
| 105 | * @param false|int $end  | 
            ||
| 106 | * @return string[] Each member with keys: id, timestamp, length.  | 
            ||
| 107 | */  | 
            ||
| 108 | public function getRevisions(Page $page, ?User $user = null, $start = false, $end = false): array  | 
            ||
| 109 |     { | 
            ||
| 110 | $cacheKey = $this->getCacheKey(func_get_args(), 'page_revisions');  | 
            ||
| 111 |         if ($this->cache->hasItem($cacheKey)) { | 
            ||
| 112 | return $this->cache->getItem($cacheKey)->get();  | 
            ||
| 113 | }  | 
            ||
| 114 | |||
| 115 | $stmt = $this->getRevisionsStmt($page, $user, null, null, $start, $end);  | 
            ||
| 116 | $result = $stmt->fetchAllAssociative();  | 
            ||
| 117 | |||
| 118 | // Cache and return.  | 
            ||
| 119 | return $this->setCache($cacheKey, $result);  | 
            ||
| 120 | }  | 
            ||
| 121 | |||
| 122 | /**  | 
            ||
| 123 | * Get the statement for a single revision, so that you can iterate row by row.  | 
            ||
| 124 | * @param Page $page The page.  | 
            ||
| 125 | * @param User|null $user Specify to get only revisions by the given user.  | 
            ||
| 126 | * @param ?int $limit Max number of revisions to process.  | 
            ||
| 127 | * @param ?int $numRevisions Number of revisions, if known. This is used solely to determine the  | 
            ||
| 128 | * OFFSET if we are given a $limit (see below). If $limit is set and $numRevisions is not set,  | 
            ||
| 129 | * a separate query is ran to get the number of revisions.  | 
            ||
| 130 | * @param false|int $start  | 
            ||
| 131 | * @param false|int $end  | 
            ||
| 132 | * @return ResultStatement  | 
            ||
| 133 | */  | 
            ||
| 134 | public function getRevisionsStmt(  | 
            ||
| 135 | Page $page,  | 
            ||
| 136 | ?User $user = null,  | 
            ||
| 137 | ?int $limit = null,  | 
            ||
| 138 | ?int $numRevisions = null,  | 
            ||
| 139 | $start = false,  | 
            ||
| 140 | $end = false  | 
            ||
| 141 |     ): ResultStatement { | 
            ||
| 142 | $revTable = $this->getTableName(  | 
            ||
| 143 | $page->getProject()->getDatabaseName(),  | 
            ||
| 144 | 'revision',  | 
            ||
| 145 | $user ? null : '' // Use 'revision' if there's no user, otherwise default to revision_userindex  | 
            ||
| 146 | );  | 
            ||
| 147 |         $commentTable = $page->getProject()->getTableName('comment'); | 
            ||
| 148 |         $actorTable = $page->getProject()->getTableName('actor'); | 
            ||
| 149 | $userClause = $user ? "revs.rev_actor = :actorId AND " : "";  | 
            ||
| 150 | |||
| 151 | $limitClause = '';  | 
            ||
| 152 |         if (intval($limit) > 0 && isset($numRevisions)) { | 
            ||
| 153 | $limitClause = "LIMIT $limit";  | 
            ||
| 154 | }  | 
            ||
| 155 | |||
| 156 | $dateConditions = $this->getDateConditions($start, $end, false, 'revs.');  | 
            ||
| 157 | |||
| 158 | $sql = "SELECT * FROM (  | 
            ||
| 159 | SELECT  | 
            ||
| 160 | revs.rev_id AS `id`,  | 
            ||
| 161 | revs.rev_timestamp AS `timestamp`,  | 
            ||
| 162 | revs.rev_minor_edit AS `minor`,  | 
            ||
| 163 | revs.rev_len AS `length`,  | 
            ||
| 164 | (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS `length_change`,  | 
            ||
| 165 | actor_user AS user_id,  | 
            ||
| 166 | actor_name AS username,  | 
            ||
| 167 | comment_text AS `comment`,  | 
            ||
| 168 | revs.rev_sha1 AS `sha`,  | 
            ||
| 169 | revs.rev_deleted AS `deleted`  | 
            ||
| 170 | FROM $revTable AS revs  | 
            ||
| 171 | LEFT JOIN $actorTable ON revs.rev_actor = actor_id  | 
            ||
| 172 | LEFT JOIN $revTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)  | 
            ||
| 173 | LEFT OUTER JOIN $commentTable ON comment_id = revs.rev_comment_id  | 
            ||
| 174 | WHERE $userClause revs.rev_page = :pageid $dateConditions  | 
            ||
| 175 | ORDER BY revs.rev_timestamp DESC  | 
            ||
| 176 | $limitClause  | 
            ||
| 177 | ) a  | 
            ||
| 178 | ORDER BY `timestamp` ASC";  | 
            ||
| 179 | |||
| 180 | $params = ['pageid' => $page->getId()];  | 
            ||
| 181 |         if ($user) { | 
            ||
| 182 | $params['actorId'] = $user->getActorId($page->getProject());  | 
            ||
| 183 | }  | 
            ||
| 184 | |||
| 185 | return $this->executeProjectsQuery($page->getProject(), $sql, $params);  | 
            ||
| 186 | }  | 
            ||
| 187 | |||
| 188 | /**  | 
            ||
| 189 | * Get a count of the number of revisions of a single page  | 
            ||
| 190 | * @param Page $page The page.  | 
            ||
| 191 | * @param User|null $user Specify to only count revisions by the given user.  | 
            ||
| 192 | * @param false|int $start  | 
            ||
| 193 | * @param false|int $end  | 
            ||
| 194 | * @return int  | 
            ||
| 195 | */  | 
            ||
| 196 | public function getNumRevisions(Page $page, ?User $user = null, $start = false, $end = false): int  | 
            ||
| 197 |     { | 
            ||
| 198 | $cacheKey = $this->getCacheKey(func_get_args(), 'page_numrevisions');  | 
            ||
| 199 |         if ($this->cache->hasItem($cacheKey)) { | 
            ||
| 200 | return $this->cache->getItem($cacheKey)->get();  | 
            ||
| 201 | }  | 
            ||
| 202 | |||
| 203 | // In this case revision is faster than revision_userindex if we're not querying by user.  | 
            ||
| 204 | $revTable = $page->getProject()->getTableName(  | 
            ||
| 205 | 'revision',  | 
            ||
| 206 | $user && $this->isWMF ? '_userindex' : ''  | 
            ||
| 207 | );  | 
            ||
| 208 | $userClause = $user ? "rev_actor = :actorId AND " : "";  | 
            ||
| 209 | |||
| 210 | $dateConditions = $this->getDateConditions($start, $end);  | 
            ||
| 211 | |||
| 212 | $sql = "SELECT COUNT(*)  | 
            ||
| 213 | FROM $revTable  | 
            ||
| 214 | WHERE $userClause rev_page = :pageid $dateConditions";  | 
            ||
| 215 | $params = ['pageid' => $page->getId()];  | 
            ||
| 216 |         if ($user) { | 
            ||
| 217 | $params['rev_actor'] = $user->getActorId($page->getProject());  | 
            ||
| 218 | }  | 
            ||
| 219 | |||
| 220 | $result = (int)$this->executeProjectsQuery($page->getProject(), $sql, $params)->fetchOne();  | 
            ||
| 221 | |||
| 222 | // Cache and return.  | 
            ||
| 223 | return $this->setCache($cacheKey, $result);  | 
            ||
| 224 | }  | 
            ||
| 225 | |||
| 226 | /**  | 
            ||
| 227 | * Get any CheckWiki errors of a single page  | 
            ||
| 228 | * @param Page $page  | 
            ||
| 229 | * @return array Results from query  | 
            ||
| 230 | */  | 
            ||
| 231 | public function getCheckWikiErrors(Page $page): array  | 
            ||
| 258 | }  | 
            ||
| 259 | |||
| 260 | /**  | 
            ||
| 261 | * Get basic wikidata on the page: label and description.  | 
            ||
| 262 | * @param Page $page  | 
            ||
| 263 | * @return string[][] In the format:  | 
            ||
| 264 | * [[  | 
            ||
| 265 | * 'term' => string such as 'label',  | 
            ||
| 266 | * 'term_text' => string (value for 'label'),  | 
            ||
| 267 | * ], ... ]  | 
            ||
| 268 | */  | 
            ||
| 269 | public function getWikidataInfo(Page $page): array  | 
            ||
| 270 |     { | 
            ||
| 271 |         if (empty($page->getWikidataId())) { | 
            ||
| 272 | return [];  | 
            ||
| 273 | }  | 
            ||
| 274 | |||
| 275 | $wikidataId = ltrim($page->getWikidataId(), 'Q');  | 
            ||
| 276 | $lang = $page->getProject()->getLang();  | 
            ||
| 277 | $wdp = 'wikidatawiki_p';  | 
            ||
| 278 | |||
| 279 | $sql = "SELECT wby_name AS term, wbx_text AS term_text  | 
            ||
| 280 | FROM $wdp.wbt_item_terms  | 
            ||
| 281 | JOIN $wdp.wbt_term_in_lang ON wbit_term_in_lang_id = wbtl_id  | 
            ||
| 282 | JOIN $wdp.wbt_type ON wbtl_type_id = wby_id  | 
            ||
| 283 | JOIN $wdp.wbt_text_in_lang ON wbtl_text_in_lang_id = wbxl_id  | 
            ||
| 284 | JOIN $wdp.wbt_text ON wbxl_text_id = wbx_id  | 
            ||
| 285 | WHERE wbit_item_id = :wikidataId  | 
            ||
| 286 |                 AND wby_name IN ('label', 'description') | 
            ||
| 287 | AND wbxl_language = :lang";  | 
            ||
| 288 | |||
| 289 |         return $this->executeProjectsQuery('wikidatawiki', $sql, [ | 
            ||
| 290 | 'lang' => $lang,  | 
            ||
| 291 | 'wikidataId' => $wikidataId,  | 
            ||
| 292 | ])->fetchAllAssociative();  | 
            ||
| 293 | }  | 
            ||
| 294 | |||
| 295 | /**  | 
            ||
| 296 | * Get or count all wikidata items for the given page,  | 
            ||
| 297 | * not just languages of sister projects  | 
            ||
| 298 | * @param Page $page  | 
            ||
| 299 | * @param bool $count Set to true to get only a COUNT  | 
            ||
| 300 | * @return string[]|int Records as returend by the DB,  | 
            ||
| 301 | * or raw COUNT of the records.  | 
            ||
| 302 | */  | 
            ||
| 303 | public function getWikidataItems(Page $page, bool $count = false)  | 
            ||
| 320 | }  | 
            ||
| 321 | |||
| 322 | /**  | 
            ||
| 323 | * Get number of in and outgoing links and redirects to the given page.  | 
            ||
| 324 | * @param Page $page  | 
            ||
| 325 | * @return string[] Counts with the keys 'links_ext_count', 'links_out_count',  | 
            ||
| 326 | * 'links_in_count' and 'redirects_count'  | 
            ||
| 327 | */  | 
            ||
| 328 | public function countLinksAndRedirects(Page $page): array  | 
            ||
| 329 |     { | 
            ||
| 330 |         $externalLinksTable = $page->getProject()->getTableName('externallinks'); | 
            ||
| 331 |         $pageLinksTable = $page->getProject()->getTableName('pagelinks'); | 
            ||
| 332 |         $linkTargetTable = $page->getProject()->getTableName('linktarget'); | 
            ||
| 333 |         $redirectTable = $page->getProject()->getTableName('redirect'); | 
            ||
| 334 | |||
| 335 | $sql = "SELECT COUNT(*) AS value, 'links_ext' AS type  | 
            ||
| 336 | FROM $externalLinksTable WHERE el_from = :id  | 
            ||
| 337 | UNION  | 
            ||
| 338 | SELECT COUNT(*) AS value, 'links_out' AS type  | 
            ||
| 339 | FROM $pageLinksTable WHERE pl_from = :id  | 
            ||
| 340 | UNION  | 
            ||
| 341 | SELECT COUNT(*) AS value, 'links_in' AS type  | 
            ||
| 342 | FROM $pageLinksTable  | 
            ||
| 343 | JOIN $linkTargetTable ON lt_id = pl_target_id  | 
            ||
| 344 | WHERE lt_namespace = :namespace AND lt_title = :title  | 
            ||
| 345 | UNION  | 
            ||
| 346 | SELECT COUNT(*) AS value, 'redirects' AS type  | 
            ||
| 347 | FROM $redirectTable WHERE rd_namespace = :namespace AND rd_title = :title";  | 
            ||
| 348 | |||
| 349 | $params = [  | 
            ||
| 350 | 'id' => $page->getId(),  | 
            ||
| 351 |             'title' => str_replace(' ', '_', $page->getTitleWithoutNamespace()), | 
            ||
| 352 | 'namespace' => $page->getNamespace(),  | 
            ||
| 353 | ];  | 
            ||
| 354 | |||
| 355 | $res = $this->executeProjectsQuery($page->getProject(), $sql, $params);  | 
            ||
| 356 | $data = [];  | 
            ||
| 357 | |||
| 358 | // Transform to associative array by 'type'  | 
            ||
| 359 |         foreach ($res as $row) { | 
            ||
| 360 | $data[$row['type'] . '_count'] = (int)$row['value'];  | 
            ||
| 361 | }  | 
            ||
| 362 | |||
| 363 | return $data;  | 
            ||
| 364 | }  | 
            ||
| 365 | |||
| 366 | /**  | 
            ||
| 367 | * Count wikidata items for the given page, not just languages of sister projects  | 
            ||
| 368 | * @param Page $page  | 
            ||
| 369 | * @return int Number of records.  | 
            ||
| 370 | */  | 
            ||
| 371 | public function countWikidataItems(Page $page): int  | 
            ||
| 372 |     { | 
            ||
| 373 | return $this->getWikidataItems($page, true);  | 
            ||
| 374 | }  | 
            ||
| 375 | |||
| 376 | /**  | 
            ||
| 377 | * Get page views for the given page and timeframe.  | 
            ||
| 378 | * @fixme use Symfony Guzzle package.  | 
            ||
| 379 | * @param Page $page  | 
            ||
| 380 | * @param string|DateTime $start In the format YYYYMMDD  | 
            ||
| 381 | * @param string|DateTime $end In the format YYYYMMDD  | 
            ||
| 382 | * @return string[][][]  | 
            ||
| 383 | * @throws BadGatewayException  | 
            ||
| 384 | */  | 
            ||
| 385 | public function getPageviews(Page $page, $start, $end): array  | 
            ||
| 423 | }  | 
            ||
| 424 | }  | 
            ||
| 425 | |||
| 426 | /**  | 
            ||
| 427 | * Get the full HTML content of the the page.  | 
            ||
| 428 | * @param Page $page  | 
            ||
| 429 | * @param int|null $revId What revision to query for.  | 
            ||
| 430 | * @return string  | 
            ||
| 431 | * @throws BadGatewayException  | 
            ||
| 432 | */  | 
            ||
| 433 | public function getHTMLContent(Page $page, ?int $revId = null): string  | 
            ||
| 460 | }  | 
            ||
| 461 | }  | 
            ||
| 462 | |||
| 463 | /**  | 
            ||
| 464 | * Get the ID of the revision of a page at the time of the given DateTime.  | 
            ||
| 465 | * @param Page $page  | 
            ||
| 466 | * @param DateTime $date  | 
            ||
| 467 | * @return int  | 
            ||
| 468 | */  | 
            ||
| 469 | public function getRevisionIdAtDate(Page $page, DateTime $date): int  | 
            ||
| 481 | }  | 
            ||
| 482 | |||
| 483 | /**  | 
            ||
| 484 | * Get HTML display titles of a set of pages (or the normal title if there's no display title).  | 
            ||
| 485 | * This will send t/50 API requests where t is the number of titles supplied.  | 
            ||
| 486 | * @param Project $project The project.  | 
            ||
| 487 | * @param string[] $pageTitles The titles to fetch.  | 
            ||
| 488 | * @return string[] Keys are the original supplied title, and values are the display titles.  | 
            ||
| 489 | * @static  | 
            ||
| 490 | */  | 
            ||
| 491 | public function displayTitles(Project $project, array $pageTitles): array  | 
            ||
| 527 | }  | 
            ||
| 528 | }  | 
            ||
| 529 |