| @@ -18,213 +18,213 @@ | ||
| 18 | 18 | class CheckExternalLinksTask extends BuildTask | 
| 19 | 19 |  { | 
| 20 | 20 | |
| 21 | - private static $dependencies = [ | |
| 22 | - 'LinkChecker' => '%$' . LinkChecker::class | |
| 23 | - ]; | |
| 24 | - | |
| 25 | - /** | |
| 26 | - * @var bool | |
| 27 | - */ | |
| 28 | - protected $silent = false; | |
| 29 | - | |
| 30 | - /** | |
| 31 | - * @var LinkChecker | |
| 32 | - */ | |
| 33 | - protected $linkChecker; | |
| 34 | - | |
| 35 | - protected $title = 'Checking broken External links in the SiteTree'; | |
| 36 | - | |
| 37 | - protected $description = 'A task that records external broken links in the SiteTree'; | |
| 38 | - | |
| 39 | - protected $enabled = true; | |
| 40 | - | |
| 41 | - /** | |
| 42 | - * Log a message | |
| 43 | - * | |
| 44 | - * @param string $message | |
| 45 | - */ | |
| 46 | - protected function log($message) | |
| 47 | -    { | |
| 48 | -        if (!$this->silent) { | |
| 49 | - Debug::message($message); | |
| 50 | - } | |
| 51 | - } | |
| 52 | - | |
| 53 | - public function run($request) | |
| 54 | -    { | |
| 55 | - $this->runLinksCheck(); | |
| 56 | - } | |
| 57 | - /** | |
| 58 | - * Turn on or off message output | |
| 59 | - * | |
| 60 | - * @param bool $silent | |
| 61 | - */ | |
| 62 | - public function setSilent($silent) | |
| 63 | -    { | |
| 64 | - $this->silent = $silent; | |
| 65 | - } | |
| 66 | - | |
| 67 | - /** | |
| 68 | - * @param LinkChecker $linkChecker | |
| 69 | - */ | |
| 70 | - public function setLinkChecker(LinkChecker $linkChecker) | |
| 71 | -    { | |
| 72 | - $this->linkChecker = $linkChecker; | |
| 73 | - } | |
| 74 | - | |
| 75 | - /** | |
| 76 | - * @return LinkChecker | |
| 77 | - */ | |
| 78 | - public function getLinkChecker() | |
| 79 | -    { | |
| 80 | - return $this->linkChecker; | |
| 81 | - } | |
| 82 | - | |
| 83 | - /** | |
| 84 | - * Check the status of a single link on a page | |
| 85 | - * | |
| 86 | - * @param BrokenExternalPageTrack $pageTrack | |
| 87 | - * @param DOMNode $link | |
| 88 | - */ | |
| 89 | - protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) | |
| 90 | -    { | |
| 91 | -        $class = $link->getAttribute('class'); | |
| 92 | -        $href = $link->getAttribute('href'); | |
| 93 | -        $markedBroken = preg_match('/\b(ss-broken)\b/', $class); | |
| 94 | - | |
| 95 | - // Check link | |
| 96 | - $httpCode = $this->linkChecker->checkLink($href); | |
| 97 | -        if ($httpCode === null) { | |
| 98 | - return; // Null link means uncheckable, such as an internal link | |
| 99 | - } | |
| 100 | - | |
| 101 | - // If this code is broken then mark as such | |
| 102 | -        if ($foundBroken = $this->isCodeBroken($httpCode)) { | |
| 103 | - // Create broken record | |
| 104 | - $brokenLink = new BrokenExternalLink(); | |
| 105 | - $brokenLink->Link = $href; | |
| 106 | - $brokenLink->HTTPCode = $httpCode; | |
| 107 | - $brokenLink->TrackID = $pageTrack->ID; | |
| 108 | - $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons | |
| 109 | - $brokenLink->write(); | |
| 110 | - } | |
| 111 | - | |
| 112 | - // Check if we need to update CSS class, otherwise return | |
| 113 | -        if ($markedBroken == $foundBroken) { | |
| 114 | - return; | |
| 115 | - } | |
| 116 | -        if ($foundBroken) { | |
| 117 | - $class .= ' ss-broken'; | |
| 118 | -        } else { | |
| 119 | -            $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); | |
| 120 | - } | |
| 121 | -        $link->setAttribute('class', trim($class)); | |
| 122 | - } | |
| 123 | - | |
| 124 | - /** | |
| 125 | - * Determine if the given HTTP code is "broken" | |
| 126 | - * | |
| 127 | - * @param int $httpCode | |
| 128 | - * @return bool True if this is a broken code | |
| 129 | - */ | |
| 130 | - protected function isCodeBroken($httpCode) | |
| 131 | -    { | |
| 132 | - // Null represents no request attempted | |
| 133 | -        if ($httpCode === null) { | |
| 134 | - return false; | |
| 135 | - } | |
| 136 | - | |
| 137 | - // do we have any whitelisted codes | |
| 138 | -        $ignoreCodes = $this->config()->get('IgnoreCodes'); | |
| 139 | -        if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { | |
| 140 | - return false; | |
| 141 | - } | |
| 142 | - | |
| 143 | - // Check if code is outside valid range | |
| 144 | - return $httpCode < 200 || $httpCode > 302; | |
| 145 | - } | |
| 146 | - | |
| 147 | - /** | |
| 148 | - * Runs the links checker and returns the track used | |
| 149 | - * | |
| 150 | - * @param int $limit Limit to number of pages to run, or null to run all | |
| 151 | - * @return BrokenExternalPageTrackStatus | |
| 152 | - */ | |
| 153 | - public function runLinksCheck($limit = null) | |
| 154 | -    { | |
| 155 | - // Check the current status | |
| 156 | - $status = BrokenExternalPageTrackStatus::get_or_create(); | |
| 157 | - | |
| 158 | - // Calculate pages to run | |
| 159 | - $pageTracks = $status->getIncompleteTracks(); | |
| 160 | -        if ($limit) { | |
| 161 | - $pageTracks = $pageTracks->limit($limit); | |
| 162 | - } | |
| 163 | - | |
| 164 | - // Check each page | |
| 165 | -        foreach ($pageTracks as $pageTrack) { | |
| 166 | - // Flag as complete | |
| 167 | - $pageTrack->Processed = 1; | |
| 168 | - $pageTrack->write(); | |
| 169 | - | |
| 170 | - // Check value of html area | |
| 171 | - $page = $pageTrack->Page(); | |
| 172 | -            $this->log("Checking {$page->Title}"); | |
| 173 | -            $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); | |
| 174 | -            if (!$htmlValue->isValid()) { | |
| 175 | - continue; | |
| 176 | - } | |
| 177 | - | |
| 178 | - // Check each link | |
| 179 | -            $links = $htmlValue->getElementsByTagName('a'); | |
| 180 | -            foreach ($links as $link) { | |
| 181 | - $this->checkPageLink($pageTrack, $link); | |
| 182 | - } | |
| 183 | - | |
| 184 | - // Update content of page based on link fixes / breakages | |
| 185 | - $htmlValue->saveHTML(); | |
| 186 | - $page->Content = $htmlValue->getContent(); | |
| 187 | - $page->write(); | |
| 188 | - | |
| 189 | - // Once all links have been created for this page update HasBrokenLinks | |
| 190 | - $count = $pageTrack->BrokenLinks()->count(); | |
| 191 | -            $this->log("Found {$count} broken links"); | |
| 192 | -            if ($count) { | |
| 193 | - $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); | |
| 194 | - // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true | |
| 195 | - DB::query(sprintf( | |
| 196 | - 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', | |
| 197 | - $siteTreeTable, | |
| 198 | - intval($pageTrack->ID) | |
| 199 | - )); | |
| 200 | - } | |
| 201 | - } | |
| 202 | - | |
| 203 | -        $status->updateJobInfo('Updating completed pages'); | |
| 204 | - $status->updateStatus(); | |
| 205 | - return $status; | |
| 206 | - } | |
| 207 | - | |
| 208 | - private function updateCompletedPages($trackID = 0) | |
| 209 | -    { | |
| 210 | - $noPages = BrokenExternalPageTrack::get() | |
| 211 | - ->filter(array( | |
| 212 | - 'TrackID' => $trackID, | |
| 213 | - 'Processed' => 1 | |
| 214 | - )) | |
| 215 | - ->count(); | |
| 216 | - $track = BrokenExternalPageTrackStatus::get_latest(); | |
| 217 | - $track->CompletedPages = $noPages; | |
| 218 | - $track->write(); | |
| 219 | - return $noPages; | |
| 220 | - } | |
| 221 | - | |
| 222 | - private function updateJobInfo($message) | |
| 223 | -    { | |
| 224 | - $track = BrokenExternalPageTrackStatus::get_latest(); | |
| 225 | -        if ($track) { | |
| 226 | - $track->JobInfo = $message; | |
| 227 | - $track->write(); | |
| 228 | - } | |
| 229 | - } | |
| 21 | + private static $dependencies = [ | |
| 22 | + 'LinkChecker' => '%$' . LinkChecker::class | |
| 23 | + ]; | |
| 24 | + | |
| 25 | + /** | |
| 26 | + * @var bool | |
| 27 | + */ | |
| 28 | + protected $silent = false; | |
| 29 | + | |
| 30 | + /** | |
| 31 | + * @var LinkChecker | |
| 32 | + */ | |
| 33 | + protected $linkChecker; | |
| 34 | + | |
| 35 | + protected $title = 'Checking broken External links in the SiteTree'; | |
| 36 | + | |
| 37 | + protected $description = 'A task that records external broken links in the SiteTree'; | |
| 38 | + | |
| 39 | + protected $enabled = true; | |
| 40 | + | |
| 41 | + /** | |
| 42 | + * Log a message | |
| 43 | + * | |
| 44 | + * @param string $message | |
| 45 | + */ | |
| 46 | + protected function log($message) | |
| 47 | +	{ | |
| 48 | +		if (!$this->silent) { | |
| 49 | + Debug::message($message); | |
| 50 | + } | |
| 51 | + } | |
| 52 | + | |
| 53 | + public function run($request) | |
| 54 | +	{ | |
| 55 | + $this->runLinksCheck(); | |
| 56 | + } | |
| 57 | + /** | |
| 58 | + * Turn on or off message output | |
| 59 | + * | |
| 60 | + * @param bool $silent | |
| 61 | + */ | |
| 62 | + public function setSilent($silent) | |
| 63 | +	{ | |
| 64 | + $this->silent = $silent; | |
| 65 | + } | |
| 66 | + | |
| 67 | + /** | |
| 68 | + * @param LinkChecker $linkChecker | |
| 69 | + */ | |
| 70 | + public function setLinkChecker(LinkChecker $linkChecker) | |
| 71 | +	{ | |
| 72 | + $this->linkChecker = $linkChecker; | |
| 73 | + } | |
| 74 | + | |
| 75 | + /** | |
| 76 | + * @return LinkChecker | |
| 77 | + */ | |
| 78 | + public function getLinkChecker() | |
| 79 | +	{ | |
| 80 | + return $this->linkChecker; | |
| 81 | + } | |
| 82 | + | |
| 83 | + /** | |
| 84 | + * Check the status of a single link on a page | |
| 85 | + * | |
| 86 | + * @param BrokenExternalPageTrack $pageTrack | |
| 87 | + * @param DOMNode $link | |
| 88 | + */ | |
| 89 | + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) | |
| 90 | +	{ | |
| 91 | +		$class = $link->getAttribute('class'); | |
| 92 | +		$href = $link->getAttribute('href'); | |
| 93 | +		$markedBroken = preg_match('/\b(ss-broken)\b/', $class); | |
| 94 | + | |
| 95 | + // Check link | |
| 96 | + $httpCode = $this->linkChecker->checkLink($href); | |
| 97 | +		if ($httpCode === null) { | |
| 98 | + return; // Null link means uncheckable, such as an internal link | |
| 99 | + } | |
| 100 | + | |
| 101 | + // If this code is broken then mark as such | |
| 102 | +		if ($foundBroken = $this->isCodeBroken($httpCode)) { | |
| 103 | + // Create broken record | |
| 104 | + $brokenLink = new BrokenExternalLink(); | |
| 105 | + $brokenLink->Link = $href; | |
| 106 | + $brokenLink->HTTPCode = $httpCode; | |
| 107 | + $brokenLink->TrackID = $pageTrack->ID; | |
| 108 | + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons | |
| 109 | + $brokenLink->write(); | |
| 110 | + } | |
| 111 | + | |
| 112 | + // Check if we need to update CSS class, otherwise return | |
| 113 | +		if ($markedBroken == $foundBroken) { | |
| 114 | + return; | |
| 115 | + } | |
| 116 | +		if ($foundBroken) { | |
| 117 | + $class .= ' ss-broken'; | |
| 118 | +		} else { | |
| 119 | +			$class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); | |
| 120 | + } | |
| 121 | +		$link->setAttribute('class', trim($class)); | |
| 122 | + } | |
| 123 | + | |
| 124 | + /** | |
| 125 | + * Determine if the given HTTP code is "broken" | |
| 126 | + * | |
| 127 | + * @param int $httpCode | |
| 128 | + * @return bool True if this is a broken code | |
| 129 | + */ | |
| 130 | + protected function isCodeBroken($httpCode) | |
| 131 | +	{ | |
| 132 | + // Null represents no request attempted | |
| 133 | +		if ($httpCode === null) { | |
| 134 | + return false; | |
| 135 | + } | |
| 136 | + | |
| 137 | + // do we have any whitelisted codes | |
| 138 | +		$ignoreCodes = $this->config()->get('IgnoreCodes'); | |
| 139 | +		if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { | |
| 140 | + return false; | |
| 141 | + } | |
| 142 | + | |
| 143 | + // Check if code is outside valid range | |
| 144 | + return $httpCode < 200 || $httpCode > 302; | |
| 145 | + } | |
| 146 | + | |
| 147 | + /** | |
| 148 | + * Runs the links checker and returns the track used | |
| 149 | + * | |
| 150 | + * @param int $limit Limit to number of pages to run, or null to run all | |
| 151 | + * @return BrokenExternalPageTrackStatus | |
| 152 | + */ | |
| 153 | + public function runLinksCheck($limit = null) | |
| 154 | +	{ | |
| 155 | + // Check the current status | |
| 156 | + $status = BrokenExternalPageTrackStatus::get_or_create(); | |
| 157 | + | |
| 158 | + // Calculate pages to run | |
| 159 | + $pageTracks = $status->getIncompleteTracks(); | |
| 160 | +		if ($limit) { | |
| 161 | + $pageTracks = $pageTracks->limit($limit); | |
| 162 | + } | |
| 163 | + | |
| 164 | + // Check each page | |
| 165 | +		foreach ($pageTracks as $pageTrack) { | |
| 166 | + // Flag as complete | |
| 167 | + $pageTrack->Processed = 1; | |
| 168 | + $pageTrack->write(); | |
| 169 | + | |
| 170 | + // Check value of html area | |
| 171 | + $page = $pageTrack->Page(); | |
| 172 | +			$this->log("Checking {$page->Title}"); | |
| 173 | +			$htmlValue = Injector::inst()->create('HTMLValue', $page->Content); | |
| 174 | +			if (!$htmlValue->isValid()) { | |
| 175 | + continue; | |
| 176 | + } | |
| 177 | + | |
| 178 | + // Check each link | |
| 179 | +			$links = $htmlValue->getElementsByTagName('a'); | |
| 180 | +			foreach ($links as $link) { | |
| 181 | + $this->checkPageLink($pageTrack, $link); | |
| 182 | + } | |
| 183 | + | |
| 184 | + // Update content of page based on link fixes / breakages | |
| 185 | + $htmlValue->saveHTML(); | |
| 186 | + $page->Content = $htmlValue->getContent(); | |
| 187 | + $page->write(); | |
| 188 | + | |
| 189 | + // Once all links have been created for this page update HasBrokenLinks | |
| 190 | + $count = $pageTrack->BrokenLinks()->count(); | |
| 191 | +			$this->log("Found {$count} broken links"); | |
| 192 | +			if ($count) { | |
| 193 | + $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); | |
| 194 | + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true | |
| 195 | + DB::query(sprintf( | |
| 196 | + 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', | |
| 197 | + $siteTreeTable, | |
| 198 | + intval($pageTrack->ID) | |
| 199 | + )); | |
| 200 | + } | |
| 201 | + } | |
| 202 | + | |
| 203 | +		$status->updateJobInfo('Updating completed pages'); | |
| 204 | + $status->updateStatus(); | |
| 205 | + return $status; | |
| 206 | + } | |
| 207 | + | |
| 208 | + private function updateCompletedPages($trackID = 0) | |
| 209 | +	{ | |
| 210 | + $noPages = BrokenExternalPageTrack::get() | |
| 211 | + ->filter(array( | |
| 212 | + 'TrackID' => $trackID, | |
| 213 | + 'Processed' => 1 | |
| 214 | + )) | |
| 215 | + ->count(); | |
| 216 | + $track = BrokenExternalPageTrackStatus::get_latest(); | |
| 217 | + $track->CompletedPages = $noPages; | |
| 218 | + $track->write(); | |
| 219 | + return $noPages; | |
| 220 | + } | |
| 221 | + | |
| 222 | + private function updateJobInfo($message) | |
| 223 | +	{ | |
| 224 | + $track = BrokenExternalPageTrackStatus::get_latest(); | |
| 225 | +		if ($track) { | |
| 226 | + $track->JobInfo = $message; | |
| 227 | + $track->write(); | |
| 228 | + } | |
| 229 | + } | |
| 230 | 230 | } |