@@ -18,213 +18,213 @@ |
||
| 18 | 18 | class CheckExternalLinksTask extends BuildTask |
| 19 | 19 | { |
| 20 | 20 | |
| 21 | - private static $dependencies = [ |
|
| 22 | - 'LinkChecker' => '%$' . LinkChecker::class |
|
| 23 | - ]; |
|
| 24 | - |
|
| 25 | - /** |
|
| 26 | - * @var bool |
|
| 27 | - */ |
|
| 28 | - protected $silent = false; |
|
| 29 | - |
|
| 30 | - /** |
|
| 31 | - * @var LinkChecker |
|
| 32 | - */ |
|
| 33 | - protected $linkChecker; |
|
| 34 | - |
|
| 35 | - protected $title = 'Checking broken External links in the SiteTree'; |
|
| 36 | - |
|
| 37 | - protected $description = 'A task that records external broken links in the SiteTree'; |
|
| 38 | - |
|
| 39 | - protected $enabled = true; |
|
| 40 | - |
|
| 41 | - /** |
|
| 42 | - * Log a message |
|
| 43 | - * |
|
| 44 | - * @param string $message |
|
| 45 | - */ |
|
| 46 | - protected function log($message) |
|
| 47 | - { |
|
| 48 | - if (!$this->silent) { |
|
| 49 | - Debug::message($message); |
|
| 50 | - } |
|
| 51 | - } |
|
| 52 | - |
|
| 53 | - public function run($request) |
|
| 54 | - { |
|
| 55 | - $this->runLinksCheck(); |
|
| 56 | - } |
|
| 57 | - /** |
|
| 58 | - * Turn on or off message output |
|
| 59 | - * |
|
| 60 | - * @param bool $silent |
|
| 61 | - */ |
|
| 62 | - public function setSilent($silent) |
|
| 63 | - { |
|
| 64 | - $this->silent = $silent; |
|
| 65 | - } |
|
| 66 | - |
|
| 67 | - /** |
|
| 68 | - * @param LinkChecker $linkChecker |
|
| 69 | - */ |
|
| 70 | - public function setLinkChecker(LinkChecker $linkChecker) |
|
| 71 | - { |
|
| 72 | - $this->linkChecker = $linkChecker; |
|
| 73 | - } |
|
| 74 | - |
|
| 75 | - /** |
|
| 76 | - * @return LinkChecker |
|
| 77 | - */ |
|
| 78 | - public function getLinkChecker() |
|
| 79 | - { |
|
| 80 | - return $this->linkChecker; |
|
| 81 | - } |
|
| 82 | - |
|
| 83 | - /** |
|
| 84 | - * Check the status of a single link on a page |
|
| 85 | - * |
|
| 86 | - * @param BrokenExternalPageTrack $pageTrack |
|
| 87 | - * @param DOMNode $link |
|
| 88 | - */ |
|
| 89 | - protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
| 90 | - { |
|
| 91 | - $class = $link->getAttribute('class'); |
|
| 92 | - $href = $link->getAttribute('href'); |
|
| 93 | - $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
| 94 | - |
|
| 95 | - // Check link |
|
| 96 | - $httpCode = $this->linkChecker->checkLink($href); |
|
| 97 | - if ($httpCode === null) { |
|
| 98 | - return; // Null link means uncheckable, such as an internal link |
|
| 99 | - } |
|
| 100 | - |
|
| 101 | - // If this code is broken then mark as such |
|
| 102 | - if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
| 103 | - // Create broken record |
|
| 104 | - $brokenLink = new BrokenExternalLink(); |
|
| 105 | - $brokenLink->Link = $href; |
|
| 106 | - $brokenLink->HTTPCode = $httpCode; |
|
| 107 | - $brokenLink->TrackID = $pageTrack->ID; |
|
| 108 | - $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
| 109 | - $brokenLink->write(); |
|
| 110 | - } |
|
| 111 | - |
|
| 112 | - // Check if we need to update CSS class, otherwise return |
|
| 113 | - if ($markedBroken == $foundBroken) { |
|
| 114 | - return; |
|
| 115 | - } |
|
| 116 | - if ($foundBroken) { |
|
| 117 | - $class .= ' ss-broken'; |
|
| 118 | - } else { |
|
| 119 | - $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
| 120 | - } |
|
| 121 | - $link->setAttribute('class', trim($class)); |
|
| 122 | - } |
|
| 123 | - |
|
| 124 | - /** |
|
| 125 | - * Determine if the given HTTP code is "broken" |
|
| 126 | - * |
|
| 127 | - * @param int $httpCode |
|
| 128 | - * @return bool True if this is a broken code |
|
| 129 | - */ |
|
| 130 | - protected function isCodeBroken($httpCode) |
|
| 131 | - { |
|
| 132 | - // Null represents no request attempted |
|
| 133 | - if ($httpCode === null) { |
|
| 134 | - return false; |
|
| 135 | - } |
|
| 136 | - |
|
| 137 | - // do we have any whitelisted codes |
|
| 138 | - $ignoreCodes = $this->config()->get('IgnoreCodes'); |
|
| 139 | - if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
| 140 | - return false; |
|
| 141 | - } |
|
| 142 | - |
|
| 143 | - // Check if code is outside valid range |
|
| 144 | - return $httpCode < 200 || $httpCode > 302; |
|
| 145 | - } |
|
| 146 | - |
|
| 147 | - /** |
|
| 148 | - * Runs the links checker and returns the track used |
|
| 149 | - * |
|
| 150 | - * @param int $limit Limit to number of pages to run, or null to run all |
|
| 151 | - * @return BrokenExternalPageTrackStatus |
|
| 152 | - */ |
|
| 153 | - public function runLinksCheck($limit = null) |
|
| 154 | - { |
|
| 155 | - // Check the current status |
|
| 156 | - $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
| 157 | - |
|
| 158 | - // Calculate pages to run |
|
| 159 | - $pageTracks = $status->getIncompleteTracks(); |
|
| 160 | - if ($limit) { |
|
| 161 | - $pageTracks = $pageTracks->limit($limit); |
|
| 162 | - } |
|
| 163 | - |
|
| 164 | - // Check each page |
|
| 165 | - foreach ($pageTracks as $pageTrack) { |
|
| 166 | - // Flag as complete |
|
| 167 | - $pageTrack->Processed = 1; |
|
| 168 | - $pageTrack->write(); |
|
| 169 | - |
|
| 170 | - // Check value of html area |
|
| 171 | - $page = $pageTrack->Page(); |
|
| 172 | - $this->log("Checking {$page->Title}"); |
|
| 173 | - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
| 174 | - if (!$htmlValue->isValid()) { |
|
| 175 | - continue; |
|
| 176 | - } |
|
| 177 | - |
|
| 178 | - // Check each link |
|
| 179 | - $links = $htmlValue->getElementsByTagName('a'); |
|
| 180 | - foreach ($links as $link) { |
|
| 181 | - $this->checkPageLink($pageTrack, $link); |
|
| 182 | - } |
|
| 183 | - |
|
| 184 | - // Update content of page based on link fixes / breakages |
|
| 185 | - $htmlValue->saveHTML(); |
|
| 186 | - $page->Content = $htmlValue->getContent(); |
|
| 187 | - $page->write(); |
|
| 188 | - |
|
| 189 | - // Once all links have been created for this page update HasBrokenLinks |
|
| 190 | - $count = $pageTrack->BrokenLinks()->count(); |
|
| 191 | - $this->log("Found {$count} broken links"); |
|
| 192 | - if ($count) { |
|
| 193 | - $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); |
|
| 194 | - // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
| 195 | - DB::query(sprintf( |
|
| 196 | - 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
| 197 | - $siteTreeTable, |
|
| 198 | - intval($pageTrack->ID) |
|
| 199 | - )); |
|
| 200 | - } |
|
| 201 | - } |
|
| 202 | - |
|
| 203 | - $status->updateJobInfo('Updating completed pages'); |
|
| 204 | - $status->updateStatus(); |
|
| 205 | - return $status; |
|
| 206 | - } |
|
| 207 | - |
|
| 208 | - private function updateCompletedPages($trackID = 0) |
|
| 209 | - { |
|
| 210 | - $noPages = BrokenExternalPageTrack::get() |
|
| 211 | - ->filter(array( |
|
| 212 | - 'TrackID' => $trackID, |
|
| 213 | - 'Processed' => 1 |
|
| 214 | - )) |
|
| 215 | - ->count(); |
|
| 216 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 217 | - $track->CompletedPages = $noPages; |
|
| 218 | - $track->write(); |
|
| 219 | - return $noPages; |
|
| 220 | - } |
|
| 221 | - |
|
| 222 | - private function updateJobInfo($message) |
|
| 223 | - { |
|
| 224 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 225 | - if ($track) { |
|
| 226 | - $track->JobInfo = $message; |
|
| 227 | - $track->write(); |
|
| 228 | - } |
|
| 229 | - } |
|
| 21 | + private static $dependencies = [ |
|
| 22 | + 'LinkChecker' => '%$' . LinkChecker::class |
|
| 23 | + ]; |
|
| 24 | + |
|
| 25 | + /** |
|
| 26 | + * @var bool |
|
| 27 | + */ |
|
| 28 | + protected $silent = false; |
|
| 29 | + |
|
| 30 | + /** |
|
| 31 | + * @var LinkChecker |
|
| 32 | + */ |
|
| 33 | + protected $linkChecker; |
|
| 34 | + |
|
| 35 | + protected $title = 'Checking broken External links in the SiteTree'; |
|
| 36 | + |
|
| 37 | + protected $description = 'A task that records external broken links in the SiteTree'; |
|
| 38 | + |
|
| 39 | + protected $enabled = true; |
|
| 40 | + |
|
| 41 | + /** |
|
| 42 | + * Log a message |
|
| 43 | + * |
|
| 44 | + * @param string $message |
|
| 45 | + */ |
|
| 46 | + protected function log($message) |
|
| 47 | + { |
|
| 48 | + if (!$this->silent) { |
|
| 49 | + Debug::message($message); |
|
| 50 | + } |
|
| 51 | + } |
|
| 52 | + |
|
| 53 | + public function run($request) |
|
| 54 | + { |
|
| 55 | + $this->runLinksCheck(); |
|
| 56 | + } |
|
| 57 | + /** |
|
| 58 | + * Turn on or off message output |
|
| 59 | + * |
|
| 60 | + * @param bool $silent |
|
| 61 | + */ |
|
| 62 | + public function setSilent($silent) |
|
| 63 | + { |
|
| 64 | + $this->silent = $silent; |
|
| 65 | + } |
|
| 66 | + |
|
| 67 | + /** |
|
| 68 | + * @param LinkChecker $linkChecker |
|
| 69 | + */ |
|
| 70 | + public function setLinkChecker(LinkChecker $linkChecker) |
|
| 71 | + { |
|
| 72 | + $this->linkChecker = $linkChecker; |
|
| 73 | + } |
|
| 74 | + |
|
| 75 | + /** |
|
| 76 | + * @return LinkChecker |
|
| 77 | + */ |
|
| 78 | + public function getLinkChecker() |
|
| 79 | + { |
|
| 80 | + return $this->linkChecker; |
|
| 81 | + } |
|
| 82 | + |
|
| 83 | + /** |
|
| 84 | + * Check the status of a single link on a page |
|
| 85 | + * |
|
| 86 | + * @param BrokenExternalPageTrack $pageTrack |
|
| 87 | + * @param DOMNode $link |
|
| 88 | + */ |
|
| 89 | + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
| 90 | + { |
|
| 91 | + $class = $link->getAttribute('class'); |
|
| 92 | + $href = $link->getAttribute('href'); |
|
| 93 | + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
| 94 | + |
|
| 95 | + // Check link |
|
| 96 | + $httpCode = $this->linkChecker->checkLink($href); |
|
| 97 | + if ($httpCode === null) { |
|
| 98 | + return; // Null link means uncheckable, such as an internal link |
|
| 99 | + } |
|
| 100 | + |
|
| 101 | + // If this code is broken then mark as such |
|
| 102 | + if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
| 103 | + // Create broken record |
|
| 104 | + $brokenLink = new BrokenExternalLink(); |
|
| 105 | + $brokenLink->Link = $href; |
|
| 106 | + $brokenLink->HTTPCode = $httpCode; |
|
| 107 | + $brokenLink->TrackID = $pageTrack->ID; |
|
| 108 | + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
| 109 | + $brokenLink->write(); |
|
| 110 | + } |
|
| 111 | + |
|
| 112 | + // Check if we need to update CSS class, otherwise return |
|
| 113 | + if ($markedBroken == $foundBroken) { |
|
| 114 | + return; |
|
| 115 | + } |
|
| 116 | + if ($foundBroken) { |
|
| 117 | + $class .= ' ss-broken'; |
|
| 118 | + } else { |
|
| 119 | + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
| 120 | + } |
|
| 121 | + $link->setAttribute('class', trim($class)); |
|
| 122 | + } |
|
| 123 | + |
|
| 124 | + /** |
|
| 125 | + * Determine if the given HTTP code is "broken" |
|
| 126 | + * |
|
| 127 | + * @param int $httpCode |
|
| 128 | + * @return bool True if this is a broken code |
|
| 129 | + */ |
|
| 130 | + protected function isCodeBroken($httpCode) |
|
| 131 | + { |
|
| 132 | + // Null represents no request attempted |
|
| 133 | + if ($httpCode === null) { |
|
| 134 | + return false; |
|
| 135 | + } |
|
| 136 | + |
|
| 137 | + // do we have any whitelisted codes |
|
| 138 | + $ignoreCodes = $this->config()->get('IgnoreCodes'); |
|
| 139 | + if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
| 140 | + return false; |
|
| 141 | + } |
|
| 142 | + |
|
| 143 | + // Check if code is outside valid range |
|
| 144 | + return $httpCode < 200 || $httpCode > 302; |
|
| 145 | + } |
|
| 146 | + |
|
| 147 | + /** |
|
| 148 | + * Runs the links checker and returns the track used |
|
| 149 | + * |
|
| 150 | + * @param int $limit Limit to number of pages to run, or null to run all |
|
| 151 | + * @return BrokenExternalPageTrackStatus |
|
| 152 | + */ |
|
| 153 | + public function runLinksCheck($limit = null) |
|
| 154 | + { |
|
| 155 | + // Check the current status |
|
| 156 | + $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
| 157 | + |
|
| 158 | + // Calculate pages to run |
|
| 159 | + $pageTracks = $status->getIncompleteTracks(); |
|
| 160 | + if ($limit) { |
|
| 161 | + $pageTracks = $pageTracks->limit($limit); |
|
| 162 | + } |
|
| 163 | + |
|
| 164 | + // Check each page |
|
| 165 | + foreach ($pageTracks as $pageTrack) { |
|
| 166 | + // Flag as complete |
|
| 167 | + $pageTrack->Processed = 1; |
|
| 168 | + $pageTrack->write(); |
|
| 169 | + |
|
| 170 | + // Check value of html area |
|
| 171 | + $page = $pageTrack->Page(); |
|
| 172 | + $this->log("Checking {$page->Title}"); |
|
| 173 | + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
| 174 | + if (!$htmlValue->isValid()) { |
|
| 175 | + continue; |
|
| 176 | + } |
|
| 177 | + |
|
| 178 | + // Check each link |
|
| 179 | + $links = $htmlValue->getElementsByTagName('a'); |
|
| 180 | + foreach ($links as $link) { |
|
| 181 | + $this->checkPageLink($pageTrack, $link); |
|
| 182 | + } |
|
| 183 | + |
|
| 184 | + // Update content of page based on link fixes / breakages |
|
| 185 | + $htmlValue->saveHTML(); |
|
| 186 | + $page->Content = $htmlValue->getContent(); |
|
| 187 | + $page->write(); |
|
| 188 | + |
|
| 189 | + // Once all links have been created for this page update HasBrokenLinks |
|
| 190 | + $count = $pageTrack->BrokenLinks()->count(); |
|
| 191 | + $this->log("Found {$count} broken links"); |
|
| 192 | + if ($count) { |
|
| 193 | + $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); |
|
| 194 | + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
| 195 | + DB::query(sprintf( |
|
| 196 | + 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
| 197 | + $siteTreeTable, |
|
| 198 | + intval($pageTrack->ID) |
|
| 199 | + )); |
|
| 200 | + } |
|
| 201 | + } |
|
| 202 | + |
|
| 203 | + $status->updateJobInfo('Updating completed pages'); |
|
| 204 | + $status->updateStatus(); |
|
| 205 | + return $status; |
|
| 206 | + } |
|
| 207 | + |
|
| 208 | + private function updateCompletedPages($trackID = 0) |
|
| 209 | + { |
|
| 210 | + $noPages = BrokenExternalPageTrack::get() |
|
| 211 | + ->filter(array( |
|
| 212 | + 'TrackID' => $trackID, |
|
| 213 | + 'Processed' => 1 |
|
| 214 | + )) |
|
| 215 | + ->count(); |
|
| 216 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 217 | + $track->CompletedPages = $noPages; |
|
| 218 | + $track->write(); |
|
| 219 | + return $noPages; |
|
| 220 | + } |
|
| 221 | + |
|
| 222 | + private function updateJobInfo($message) |
|
| 223 | + { |
|
| 224 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 225 | + if ($track) { |
|
| 226 | + $track->JobInfo = $message; |
|
| 227 | + $track->write(); |
|
| 228 | + } |
|
| 229 | + } |
|
| 230 | 230 | } |