| Total Complexity | 46 |
| Total Lines | 468 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like PageRepository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PageRepository, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 22 | class PageRepository extends Repository |
||
| 23 | { |
||
| 24 | /** |
||
| 25 | * Get metadata about a single page from the API. |
||
| 26 | * @param Project $project The project to which the page belongs. |
||
| 27 | * @param string $pageTitle Page title. |
||
| 28 | * @return string[]|null Array with some of the following keys: pageid, title, missing, displaytitle, url. |
||
| 29 | * Returns null if page does not exist. |
||
| 30 | */ |
||
| 31 | public function getPageInfo(Project $project, string $pageTitle): ?array |
||
| 32 | { |
||
| 33 | $info = $this->getPagesInfo($project, [$pageTitle]); |
||
| 34 | return null !== $info ? array_shift($info) : null; |
||
| 35 | } |
||
| 36 | |||
| 37 | /** |
||
| 38 | * Get metadata about a set of pages from the API. |
||
| 39 | * @param Project $project The project to which the pages belong. |
||
| 40 | * @param string[] $pageTitles Array of page titles. |
||
| 41 | * @return string[]|null Array keyed by the page names, each element with some of the following keys: pageid, |
||
| 42 | * title, missing, displaytitle, url. Returns null if page does not exist. |
||
| 43 | */ |
||
| 44 | public function getPagesInfo(Project $project, array $pageTitles): ?array |
||
| 45 | { |
||
| 46 | $params = [ |
||
| 47 | 'prop' => 'info|pageprops', |
||
| 48 | 'inprop' => 'protection|talkid|watched|watchers|notificationtimestamp|subjectid|url|displaytitle', |
||
| 49 | 'converttitles' => '', |
||
| 50 | 'titles' => join('|', $pageTitles), |
||
| 51 | 'formatversion' => 2, |
||
| 52 | ]; |
||
| 53 | |||
| 54 | $res = $this->executeApiRequest($project, $params); |
||
| 55 | $result = []; |
||
| 56 | if (isset($res['query']['pages'])) { |
||
| 57 | foreach ($res['query']['pages'] as $pageInfo) { |
||
| 58 | $result[$pageInfo['title']] = $pageInfo; |
||
| 59 | } |
||
| 60 | } else { |
||
| 61 | return null; |
||
| 62 | } |
||
| 63 | return $result; |
||
| 64 | } |
||
| 65 | |||
| 66 | /** |
||
| 67 | * Get the full page text of a set of pages. |
||
| 68 | * @param Project $project The project to which the pages belong. |
||
| 69 | * @param string[] $pageTitles Array of page titles. |
||
| 70 | * @return string[] Array keyed by the page names, with the page text as the values. |
||
| 71 | */ |
||
| 72 | public function getPagesWikitext(Project $project, array $pageTitles): array |
||
| 73 | { |
||
| 74 | $params = [ |
||
| 75 | 'prop' => 'revisions', |
||
| 76 | 'rvprop' => 'content', |
||
| 77 | 'titles' => join('|', $pageTitles), |
||
| 78 | 'formatversion' => 2, |
||
| 79 | ]; |
||
| 80 | $res = $this->executeApiRequest($project, $params); |
||
| 81 | $result = []; |
||
| 82 | |||
| 83 | if (!isset($res['query']['pages'])) { |
||
| 84 | return []; |
||
| 85 | } |
||
| 86 | |||
| 87 | foreach ($res['query']['pages'] as $page) { |
||
| 88 | if (isset($page['revisions'][0]['content'])) { |
||
| 89 | $result[$page['title']] = $page['revisions'][0]['content']; |
||
| 90 | } else { |
||
| 91 | $result[$page['title']] = ''; |
||
| 92 | } |
||
| 93 | } |
||
| 94 | |||
| 95 | return $result; |
||
| 96 | } |
||
| 97 | |||
| 98 | /** |
||
| 99 | * Get revisions of a single page. |
||
| 100 | * @param Page $page The page. |
||
| 101 | * @param User|null $user Specify to get only revisions by the given user. |
||
| 102 | * @param false|int $start |
||
| 103 | * @param false|int $end |
||
| 104 | * @return string[] Each member with keys: id, timestamp, length. |
||
| 105 | */ |
||
| 106 | public function getRevisions(Page $page, ?User $user = null, $start = false, $end = false): array |
||
| 107 | { |
||
| 108 | $cacheKey = $this->getCacheKey(func_get_args(), 'page_revisions'); |
||
| 109 | if ($this->cache->hasItem($cacheKey)) { |
||
| 110 | return $this->cache->getItem($cacheKey)->get(); |
||
| 111 | } |
||
| 112 | |||
| 113 | $stmt = $this->getRevisionsStmt($page, $user, null, null, $start, $end); |
||
| 114 | $result = $stmt->fetchAll(); |
||
|
|
|||
| 115 | |||
| 116 | // Cache and return. |
||
| 117 | return $this->setCache($cacheKey, $result); |
||
| 118 | } |
||
| 119 | |||
| 120 | /** |
||
| 121 | * Get the statement for a single revision, so that you can iterate row by row. |
||
| 122 | * @param Page $page The page. |
||
| 123 | * @param User|null $user Specify to get only revisions by the given user. |
||
| 124 | * @param ?int $limit Max number of revisions to process. |
||
| 125 | * @param ?int $numRevisions Number of revisions, if known. This is used solely to determine the |
||
| 126 | * OFFSET if we are given a $limit (see below). If $limit is set and $numRevisions is not set, |
||
| 127 | * a separate query is ran to get the number of revisions. |
||
| 128 | * @param false|int $start |
||
| 129 | * @param false|int $end |
||
| 130 | * @return ResultStatement |
||
| 131 | */ |
||
| 132 | public function getRevisionsStmt( |
||
| 133 | Page $page, |
||
| 134 | ?User $user = null, |
||
| 135 | ?int $limit = null, |
||
| 136 | ?int $numRevisions = null, |
||
| 137 | $start = false, |
||
| 138 | $end = false |
||
| 139 | ): ResultStatement { |
||
| 140 | $revTable = $this->getTableName( |
||
| 141 | $page->getProject()->getDatabaseName(), |
||
| 142 | 'revision', |
||
| 143 | $user ? null : '' // Use 'revision' if there's no user, otherwise default to revision_userindex |
||
| 144 | ); |
||
| 145 | $commentTable = $page->getProject()->getTableName('comment'); |
||
| 146 | $actorTable = $page->getProject()->getTableName('actor'); |
||
| 147 | $userClause = $user ? "revs.rev_actor = :actorId AND " : ""; |
||
| 148 | |||
| 149 | $limitClause = ''; |
||
| 150 | if (intval($limit) > 0 && isset($numRevisions)) { |
||
| 151 | $limitClause = "LIMIT $limit"; |
||
| 152 | } |
||
| 153 | |||
| 154 | $dateConditions = $this->getDateConditions($start, $end, false, 'revs.'); |
||
| 155 | |||
| 156 | $sql = "SELECT * FROM ( |
||
| 157 | SELECT |
||
| 158 | revs.rev_id AS id, |
||
| 159 | revs.rev_timestamp AS timestamp, |
||
| 160 | revs.rev_minor_edit AS minor, |
||
| 161 | revs.rev_len AS length, |
||
| 162 | (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change, |
||
| 163 | actor_user AS user_id, |
||
| 164 | actor_name AS username, |
||
| 165 | comment_text AS `comment`, |
||
| 166 | revs.rev_sha1 AS sha |
||
| 167 | FROM $revTable AS revs |
||
| 168 | LEFT JOIN $actorTable ON revs.rev_actor = actor_id |
||
| 169 | LEFT JOIN $revTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id) |
||
| 170 | LEFT OUTER JOIN $commentTable ON comment_id = revs.rev_comment_id |
||
| 171 | WHERE $userClause revs.rev_page = :pageid $dateConditions |
||
| 172 | ORDER BY revs.rev_timestamp DESC |
||
| 173 | $limitClause |
||
| 174 | ) a |
||
| 175 | ORDER BY timestamp ASC"; |
||
| 176 | |||
| 177 | $params = ['pageid' => $page->getId()]; |
||
| 178 | if ($user) { |
||
| 179 | $params['actorId'] = $user->getActorId($page->getProject()); |
||
| 180 | } |
||
| 181 | |||
| 182 | return $this->executeProjectsQuery($page->getProject(), $sql, $params); |
||
| 183 | } |
||
| 184 | |||
| 185 | /** |
||
| 186 | * Get a count of the number of revisions of a single page |
||
| 187 | * @param Page $page The page. |
||
| 188 | * @param User|null $user Specify to only count revisions by the given user. |
||
| 189 | * @param false|int $start |
||
| 190 | * @param false|int $end |
||
| 191 | * @return int |
||
| 192 | */ |
||
| 193 | public function getNumRevisions(Page $page, ?User $user = null, $start = false, $end = false): int |
||
| 194 | { |
||
| 195 | $cacheKey = $this->getCacheKey(func_get_args(), 'page_numrevisions'); |
||
| 196 | if ($this->cache->hasItem($cacheKey)) { |
||
| 197 | return $this->cache->getItem($cacheKey)->get(); |
||
| 198 | } |
||
| 199 | |||
| 200 | // In this case revision is faster than revision_userindex if we're not querying by user. |
||
| 201 | $revTable = $page->getProject()->getTableName( |
||
| 202 | 'revision', |
||
| 203 | $user && $this->isLabs() ? '_userindex' : '' |
||
| 204 | ); |
||
| 205 | $userClause = $user ? "rev_actor = :actorId AND " : ""; |
||
| 206 | |||
| 207 | $dateConditions = $this->getDateConditions($start, $end); |
||
| 208 | |||
| 209 | $sql = "SELECT COUNT(*) |
||
| 210 | FROM $revTable |
||
| 211 | WHERE $userClause rev_page = :pageid $dateConditions"; |
||
| 212 | $params = ['pageid' => $page->getId()]; |
||
| 213 | if ($user) { |
||
| 214 | $params['rev_actor'] = $user->getActorId($page->getProject()); |
||
| 215 | } |
||
| 216 | |||
| 217 | $result = (int)$this->executeProjectsQuery($page->getProject(), $sql, $params)->fetchColumn(0); |
||
| 218 | |||
| 219 | // Cache and return. |
||
| 220 | return $this->setCache($cacheKey, $result); |
||
| 221 | } |
||
| 222 | |||
| 223 | /** |
||
| 224 | * Get any CheckWiki errors of a single page |
||
| 225 | * @param Page $page |
||
| 226 | * @return array Results from query |
||
| 227 | */ |
||
| 228 | public function getCheckWikiErrors(Page $page): array |
||
| 256 | } |
||
| 257 | |||
| 258 | /** |
||
| 259 | * Get basic wikidata on the page: label and description. |
||
| 260 | * @param Page $page |
||
| 261 | * @return string[] In the format: |
||
| 262 | * [[ |
||
| 263 | * 'term' => string such as 'label', |
||
| 264 | * 'term_text' => string (value for 'label'), |
||
| 265 | * ], ... ] |
||
| 266 | */ |
||
| 267 | public function getWikidataInfo(Page $page): array |
||
| 268 | { |
||
| 269 | if (empty($page->getWikidataId())) { |
||
| 270 | return []; |
||
| 271 | } |
||
| 272 | |||
| 273 | $wikidataId = ltrim($page->getWikidataId(), 'Q'); |
||
| 274 | $lang = $page->getProject()->getLang(); |
||
| 275 | $wdp = 'wikidatawiki_p'; |
||
| 276 | |||
| 277 | $sql = "SELECT wby_name AS term, wbx_text AS term_text |
||
| 278 | FROM $wdp.wbt_item_terms |
||
| 279 | JOIN $wdp.wbt_term_in_lang ON wbit_term_in_lang_id = wbtl_id |
||
| 280 | JOIN $wdp.wbt_type ON wbtl_type_id = wby_id |
||
| 281 | JOIN $wdp.wbt_text_in_lang ON wbtl_text_in_lang_id = wbxl_id |
||
| 282 | JOIN $wdp.wbt_text ON wbxl_text_id = wbx_id |
||
| 283 | WHERE wbit_item_id = :wikidataId |
||
| 284 | AND wby_name IN ('label', 'description') |
||
| 285 | AND wbxl_language = :lang"; |
||
| 286 | |||
| 287 | return $this->executeProjectsQuery('wikidatawiki', $sql, [ |
||
| 288 | 'lang' => $lang, |
||
| 289 | 'wikidataId' => $wikidataId, |
||
| 290 | ])->fetchAll(); |
||
| 291 | } |
||
| 292 | |||
| 293 | /** |
||
| 294 | * Get or count all wikidata items for the given page, |
||
| 295 | * not just languages of sister projects |
||
| 296 | * @param Page $page |
||
| 297 | * @param bool $count Set to true to get only a COUNT |
||
| 298 | * @return string[]|int Records as returend by the DB, |
||
| 299 | * or raw COUNT of the records. |
||
| 300 | */ |
||
| 301 | public function getWikidataItems(Page $page, bool $count = false) |
||
| 302 | { |
||
| 303 | if (!$page->getWikidataId()) { |
||
| 304 | return $count ? 0 : []; |
||
| 305 | } |
||
| 306 | |||
| 307 | $wikidataId = ltrim($page->getWikidataId(), 'Q'); |
||
| 308 | |||
| 309 | $sql = "SELECT " . ($count ? 'COUNT(*) AS count' : '*') . " |
||
| 310 | FROM wikidatawiki_p.wb_items_per_site |
||
| 311 | WHERE ips_item_id = :wikidataId"; |
||
| 312 | |||
| 313 | $result = $this->executeProjectsQuery('wikidatawiki', $sql, [ |
||
| 314 | 'wikidataId' => $wikidataId, |
||
| 315 | ])->fetchAll(); |
||
| 316 | |||
| 317 | return $count ? (int) $result[0]['count'] : $result; |
||
| 318 | } |
||
| 319 | |||
| 320 | /** |
||
| 321 | * Get number of in and outgoing links and redirects to the given page. |
||
| 322 | * @param Page $page |
||
| 323 | * @return string[] Counts with the keys 'links_ext_count', 'links_out_count', |
||
| 324 | * 'links_in_count' and 'redirects_count' |
||
| 325 | */ |
||
| 326 | public function countLinksAndRedirects(Page $page): array |
||
| 327 | { |
||
| 328 | $externalLinksTable = $this->getTableName($page->getProject()->getDatabaseName(), 'externallinks'); |
||
| 329 | $pageLinksTable = $this->getTableName($page->getProject()->getDatabaseName(), 'pagelinks'); |
||
| 330 | $redirectTable = $this->getTableName($page->getProject()->getDatabaseName(), 'redirect'); |
||
| 331 | |||
| 332 | $sql = "SELECT COUNT(*) AS value, 'links_ext' AS type |
||
| 333 | FROM $externalLinksTable WHERE el_from = :id |
||
| 334 | UNION |
||
| 335 | SELECT COUNT(*) AS value, 'links_out' AS type |
||
| 336 | FROM $pageLinksTable WHERE pl_from = :id |
||
| 337 | UNION |
||
| 338 | SELECT COUNT(*) AS value, 'links_in' AS type |
||
| 339 | FROM $pageLinksTable WHERE pl_namespace = :namespace AND pl_title = :title |
||
| 340 | UNION |
||
| 341 | SELECT COUNT(*) AS value, 'redirects' AS type |
||
| 342 | FROM $redirectTable WHERE rd_namespace = :namespace AND rd_title = :title"; |
||
| 343 | |||
| 344 | $params = [ |
||
| 345 | 'id' => $page->getId(), |
||
| 346 | 'title' => str_replace(' ', '_', $page->getTitleWithoutNamespace()), |
||
| 347 | 'namespace' => $page->getNamespace(), |
||
| 348 | ]; |
||
| 349 | |||
| 350 | $res = $this->executeProjectsQuery($page->getProject(), $sql, $params); |
||
| 351 | $data = []; |
||
| 352 | |||
| 353 | // Transform to associative array by 'type' |
||
| 354 | foreach ($res as $row) { |
||
| 355 | $data[$row['type'] . '_count'] = (int)$row['value']; |
||
| 356 | } |
||
| 357 | |||
| 358 | return $data; |
||
| 359 | } |
||
| 360 | |||
| 361 | /** |
||
| 362 | * Count wikidata items for the given page, not just languages of sister projects |
||
| 363 | * @param Page $page |
||
| 364 | * @return int Number of records. |
||
| 365 | */ |
||
| 366 | public function countWikidataItems(Page $page): int |
||
| 367 | { |
||
| 368 | return $this->getWikidataItems($page, true); |
||
| 369 | } |
||
| 370 | |||
| 371 | /** |
||
| 372 | * Get page views for the given page and timeframe. |
||
| 373 | * @fixme use Symfony Guzzle package. |
||
| 374 | * @param Page $page |
||
| 375 | * @param string|DateTime $start In the format YYYYMMDD |
||
| 376 | * @param string|DateTime $end In the format YYYYMMDD |
||
| 377 | * @return string[] |
||
| 378 | */ |
||
| 379 | public function getPageviews(Page $page, $start, $end): array |
||
| 404 | } |
||
| 405 | |||
| 406 | /** |
||
| 407 | * Get the full HTML content of the the page. |
||
| 408 | * @param Page $page |
||
| 409 | * @param int $revId What revision to query for. |
||
| 410 | * @return string |
||
| 411 | */ |
||
| 412 | public function getHTMLContent(Page $page, ?int $revId = null): string |
||
| 423 | } |
||
| 424 | |||
| 425 | /** |
||
| 426 | * Get the ID of the revision of a page at the time of the given DateTime. |
||
| 427 | * @param Page $page |
||
| 428 | * @param DateTime $date |
||
| 429 | * @return int |
||
| 430 | */ |
||
| 431 | public function getRevisionIdAtDate(Page $page, DateTime $date): int |
||
| 442 | } |
||
| 443 | |||
| 444 | /** |
||
| 445 | * Get HTML display titles of a set of pages (or the normal title if there's no display title). |
||
| 446 | * This will send t/50 API requests where t is the number of titles supplied. |
||
| 447 | * @param Project $project The project. |
||
| 448 | * @param string[] $pageTitles The titles to fetch. |
||
| 449 | * @return string[] Keys are the original supplied title, and values are the display titles. |
||
| 450 | * @static |
||
| 451 | */ |
||
| 452 | public function displayTitles(Project $project, array $pageTitles): array |
||
| 490 | } |
||
| 491 | } |
||
| 492 |
This function has been deprecated. The supplier of the function has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.