@@ -18,229 +18,229 @@ |
||
18 | 18 | |
19 | 19 | class CheckExternalLinksTask extends BuildTask |
20 | 20 | { |
21 | - private static $dependencies = [ |
|
22 | - 'LinkChecker' => '%$' . LinkChecker::class |
|
23 | - ]; |
|
24 | - |
|
25 | - private static $segment = 'CheckExternalLinksTask'; |
|
26 | - |
|
27 | - /** |
|
28 | - * Define a list of HTTP response codes that should not be treated as "broken", where they usually |
|
29 | - * might be. |
|
30 | - * |
|
31 | - * @config |
|
32 | - * @var array |
|
33 | - */ |
|
34 | - private static $ignore_codes = []; |
|
35 | - |
|
36 | - /** |
|
37 | - * @var bool |
|
38 | - */ |
|
39 | - protected $silent = false; |
|
40 | - |
|
41 | - /** |
|
42 | - * @var LinkChecker |
|
43 | - */ |
|
44 | - protected $linkChecker; |
|
45 | - |
|
46 | - protected $title = 'Checking broken External links in the SiteTree'; |
|
47 | - |
|
48 | - protected $description = 'A task that records external broken links in the SiteTree'; |
|
49 | - |
|
50 | - protected $enabled = true; |
|
51 | - |
|
52 | - /** |
|
53 | - * Log a message |
|
54 | - * |
|
55 | - * @param string $message |
|
56 | - */ |
|
57 | - protected function log($message) |
|
58 | - { |
|
59 | - if (!$this->silent) { |
|
60 | - Debug::message($message); |
|
61 | - } |
|
62 | - } |
|
63 | - |
|
64 | - public function run($request) |
|
65 | - { |
|
66 | - $this->runLinksCheck(); |
|
67 | - } |
|
68 | - /** |
|
69 | - * Turn on or off message output |
|
70 | - * |
|
71 | - * @param bool $silent |
|
72 | - */ |
|
73 | - public function setSilent($silent) |
|
74 | - { |
|
75 | - $this->silent = $silent; |
|
76 | - } |
|
77 | - |
|
78 | - /** |
|
79 | - * @param LinkChecker $linkChecker |
|
80 | - */ |
|
81 | - public function setLinkChecker(LinkChecker $linkChecker) |
|
82 | - { |
|
83 | - $this->linkChecker = $linkChecker; |
|
84 | - } |
|
85 | - |
|
86 | - /** |
|
87 | - * @return LinkChecker |
|
88 | - */ |
|
89 | - public function getLinkChecker() |
|
90 | - { |
|
91 | - return $this->linkChecker; |
|
92 | - } |
|
93 | - |
|
94 | - /** |
|
95 | - * Check the status of a single link on a page |
|
96 | - * |
|
97 | - * @param BrokenExternalPageTrack $pageTrack |
|
98 | - * @param DOMNode $link |
|
99 | - */ |
|
100 | - protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
101 | - { |
|
102 | - $class = $link->getAttribute('class'); |
|
103 | - $href = $link->getAttribute('href'); |
|
104 | - $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
105 | - |
|
106 | - // Check link |
|
107 | - $httpCode = $this->linkChecker->checkLink($href); |
|
108 | - if ($httpCode === null) { |
|
109 | - return; // Null link means uncheckable, such as an internal link |
|
110 | - } |
|
111 | - |
|
112 | - // If this code is broken then mark as such |
|
113 | - if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
114 | - // Create broken record |
|
115 | - $brokenLink = new BrokenExternalLink(); |
|
116 | - $brokenLink->Link = $href; |
|
117 | - $brokenLink->HTTPCode = $httpCode; |
|
118 | - $brokenLink->TrackID = $pageTrack->ID; |
|
119 | - $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
120 | - $brokenLink->write(); |
|
121 | - } |
|
122 | - |
|
123 | - // Check if we need to update CSS class, otherwise return |
|
124 | - if ($markedBroken == $foundBroken) { |
|
125 | - return; |
|
126 | - } |
|
127 | - if ($foundBroken) { |
|
128 | - $class .= ' ss-broken'; |
|
129 | - } else { |
|
130 | - $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
131 | - } |
|
132 | - $link->setAttribute('class', trim($class)); |
|
133 | - } |
|
134 | - |
|
135 | - /** |
|
136 | - * Determine if the given HTTP code is "broken" |
|
137 | - * |
|
138 | - * @param int $httpCode |
|
139 | - * @return bool True if this is a broken code |
|
140 | - */ |
|
141 | - protected function isCodeBroken($httpCode) |
|
142 | - { |
|
143 | - // Null represents no request attempted |
|
144 | - if ($httpCode === null) { |
|
145 | - return false; |
|
146 | - } |
|
147 | - |
|
148 | - // do we have any whitelisted codes |
|
149 | - $ignoreCodes = $this->config()->get('ignore_codes'); |
|
150 | - if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
151 | - return false; |
|
152 | - } |
|
153 | - |
|
154 | - // Check if code is outside valid range |
|
155 | - return $httpCode < 200 || $httpCode > 302; |
|
156 | - } |
|
157 | - |
|
158 | - /** |
|
159 | - * Runs the links checker and returns the track used |
|
160 | - * |
|
161 | - * @param int $limit Limit to number of pages to run, or null to run all |
|
162 | - * @return BrokenExternalPageTrackStatus |
|
163 | - */ |
|
164 | - public function runLinksCheck($limit = null) |
|
165 | - { |
|
166 | - // Check the current status |
|
167 | - $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
168 | - |
|
169 | - // Calculate pages to run |
|
170 | - $pageTracks = $status->getIncompleteTracks(); |
|
171 | - if ($limit) { |
|
172 | - $pageTracks = $pageTracks->limit($limit); |
|
173 | - } |
|
174 | - |
|
175 | - // Check each page |
|
176 | - foreach ($pageTracks as $pageTrack) { |
|
177 | - // Flag as complete |
|
178 | - $pageTrack->Processed = 1; |
|
179 | - $pageTrack->write(); |
|
180 | - |
|
181 | - // Check value of html area |
|
182 | - $page = $pageTrack->Page(); |
|
183 | - $this->log("Checking {$page->Title}"); |
|
184 | - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
185 | - if (!$htmlValue->isValid()) { |
|
186 | - continue; |
|
187 | - } |
|
188 | - |
|
189 | - // Check each link |
|
190 | - $links = $htmlValue->getElementsByTagName('a'); |
|
191 | - foreach ($links as $link) { |
|
192 | - $this->checkPageLink($pageTrack, $link); |
|
193 | - } |
|
194 | - |
|
195 | - // Update content of page based on link fixes / breakages |
|
196 | - $htmlValue->saveHTML(); |
|
197 | - $page->Content = $htmlValue->getContent(); |
|
198 | - try { |
|
199 | - $page->write(); |
|
200 | - } catch (ValidationException $ex) { |
|
201 | - $this->log("Exception caught for {$page->Title}, skipping. Message: " . $ex->getMessage()); |
|
202 | - continue; |
|
203 | - } |
|
204 | - |
|
205 | - // Once all links have been created for this page update HasBrokenLinks |
|
206 | - $count = $pageTrack->BrokenLinks()->count(); |
|
207 | - $this->log("Found {$count} broken links"); |
|
208 | - if ($count) { |
|
209 | - $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); |
|
210 | - // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
211 | - DB::query(sprintf( |
|
212 | - 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
213 | - $siteTreeTable, |
|
214 | - intval($pageTrack->ID) |
|
215 | - )); |
|
216 | - } |
|
217 | - } |
|
218 | - |
|
219 | - $status->updateJobInfo('Updating completed pages'); |
|
220 | - $status->updateStatus(); |
|
221 | - return $status; |
|
222 | - } |
|
223 | - |
|
224 | - private function updateCompletedPages($trackID = 0) |
|
225 | - { |
|
226 | - $noPages = BrokenExternalPageTrack::get() |
|
227 | - ->filter(array( |
|
228 | - 'TrackID' => $trackID, |
|
229 | - 'Processed' => 1 |
|
230 | - )) |
|
231 | - ->count(); |
|
232 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
233 | - $track->CompletedPages = $noPages; |
|
234 | - $track->write(); |
|
235 | - return $noPages; |
|
236 | - } |
|
237 | - |
|
238 | - private function updateJobInfo($message) |
|
239 | - { |
|
240 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
241 | - if ($track) { |
|
242 | - $track->JobInfo = $message; |
|
243 | - $track->write(); |
|
244 | - } |
|
245 | - } |
|
21 | + private static $dependencies = [ |
|
22 | + 'LinkChecker' => '%$' . LinkChecker::class |
|
23 | + ]; |
|
24 | + |
|
25 | + private static $segment = 'CheckExternalLinksTask'; |
|
26 | + |
|
27 | + /** |
|
28 | + * Define a list of HTTP response codes that should not be treated as "broken", where they usually |
|
29 | + * might be. |
|
30 | + * |
|
31 | + * @config |
|
32 | + * @var array |
|
33 | + */ |
|
34 | + private static $ignore_codes = []; |
|
35 | + |
|
36 | + /** |
|
37 | + * @var bool |
|
38 | + */ |
|
39 | + protected $silent = false; |
|
40 | + |
|
41 | + /** |
|
42 | + * @var LinkChecker |
|
43 | + */ |
|
44 | + protected $linkChecker; |
|
45 | + |
|
46 | + protected $title = 'Checking broken External links in the SiteTree'; |
|
47 | + |
|
48 | + protected $description = 'A task that records external broken links in the SiteTree'; |
|
49 | + |
|
50 | + protected $enabled = true; |
|
51 | + |
|
52 | + /** |
|
53 | + * Log a message |
|
54 | + * |
|
55 | + * @param string $message |
|
56 | + */ |
|
57 | + protected function log($message) |
|
58 | + { |
|
59 | + if (!$this->silent) { |
|
60 | + Debug::message($message); |
|
61 | + } |
|
62 | + } |
|
63 | + |
|
64 | + public function run($request) |
|
65 | + { |
|
66 | + $this->runLinksCheck(); |
|
67 | + } |
|
68 | + /** |
|
69 | + * Turn on or off message output |
|
70 | + * |
|
71 | + * @param bool $silent |
|
72 | + */ |
|
73 | + public function setSilent($silent) |
|
74 | + { |
|
75 | + $this->silent = $silent; |
|
76 | + } |
|
77 | + |
|
78 | + /** |
|
79 | + * @param LinkChecker $linkChecker |
|
80 | + */ |
|
81 | + public function setLinkChecker(LinkChecker $linkChecker) |
|
82 | + { |
|
83 | + $this->linkChecker = $linkChecker; |
|
84 | + } |
|
85 | + |
|
86 | + /** |
|
87 | + * @return LinkChecker |
|
88 | + */ |
|
89 | + public function getLinkChecker() |
|
90 | + { |
|
91 | + return $this->linkChecker; |
|
92 | + } |
|
93 | + |
|
94 | + /** |
|
95 | + * Check the status of a single link on a page |
|
96 | + * |
|
97 | + * @param BrokenExternalPageTrack $pageTrack |
|
98 | + * @param DOMNode $link |
|
99 | + */ |
|
100 | + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
101 | + { |
|
102 | + $class = $link->getAttribute('class'); |
|
103 | + $href = $link->getAttribute('href'); |
|
104 | + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
105 | + |
|
106 | + // Check link |
|
107 | + $httpCode = $this->linkChecker->checkLink($href); |
|
108 | + if ($httpCode === null) { |
|
109 | + return; // Null link means uncheckable, such as an internal link |
|
110 | + } |
|
111 | + |
|
112 | + // If this code is broken then mark as such |
|
113 | + if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
114 | + // Create broken record |
|
115 | + $brokenLink = new BrokenExternalLink(); |
|
116 | + $brokenLink->Link = $href; |
|
117 | + $brokenLink->HTTPCode = $httpCode; |
|
118 | + $brokenLink->TrackID = $pageTrack->ID; |
|
119 | + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
120 | + $brokenLink->write(); |
|
121 | + } |
|
122 | + |
|
123 | + // Check if we need to update CSS class, otherwise return |
|
124 | + if ($markedBroken == $foundBroken) { |
|
125 | + return; |
|
126 | + } |
|
127 | + if ($foundBroken) { |
|
128 | + $class .= ' ss-broken'; |
|
129 | + } else { |
|
130 | + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
131 | + } |
|
132 | + $link->setAttribute('class', trim($class)); |
|
133 | + } |
|
134 | + |
|
135 | + /** |
|
136 | + * Determine if the given HTTP code is "broken" |
|
137 | + * |
|
138 | + * @param int $httpCode |
|
139 | + * @return bool True if this is a broken code |
|
140 | + */ |
|
141 | + protected function isCodeBroken($httpCode) |
|
142 | + { |
|
143 | + // Null represents no request attempted |
|
144 | + if ($httpCode === null) { |
|
145 | + return false; |
|
146 | + } |
|
147 | + |
|
148 | + // do we have any whitelisted codes |
|
149 | + $ignoreCodes = $this->config()->get('ignore_codes'); |
|
150 | + if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
151 | + return false; |
|
152 | + } |
|
153 | + |
|
154 | + // Check if code is outside valid range |
|
155 | + return $httpCode < 200 || $httpCode > 302; |
|
156 | + } |
|
157 | + |
|
158 | + /** |
|
159 | + * Runs the links checker and returns the track used |
|
160 | + * |
|
161 | + * @param int $limit Limit to number of pages to run, or null to run all |
|
162 | + * @return BrokenExternalPageTrackStatus |
|
163 | + */ |
|
164 | + public function runLinksCheck($limit = null) |
|
165 | + { |
|
166 | + // Check the current status |
|
167 | + $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
168 | + |
|
169 | + // Calculate pages to run |
|
170 | + $pageTracks = $status->getIncompleteTracks(); |
|
171 | + if ($limit) { |
|
172 | + $pageTracks = $pageTracks->limit($limit); |
|
173 | + } |
|
174 | + |
|
175 | + // Check each page |
|
176 | + foreach ($pageTracks as $pageTrack) { |
|
177 | + // Flag as complete |
|
178 | + $pageTrack->Processed = 1; |
|
179 | + $pageTrack->write(); |
|
180 | + |
|
181 | + // Check value of html area |
|
182 | + $page = $pageTrack->Page(); |
|
183 | + $this->log("Checking {$page->Title}"); |
|
184 | + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
185 | + if (!$htmlValue->isValid()) { |
|
186 | + continue; |
|
187 | + } |
|
188 | + |
|
189 | + // Check each link |
|
190 | + $links = $htmlValue->getElementsByTagName('a'); |
|
191 | + foreach ($links as $link) { |
|
192 | + $this->checkPageLink($pageTrack, $link); |
|
193 | + } |
|
194 | + |
|
195 | + // Update content of page based on link fixes / breakages |
|
196 | + $htmlValue->saveHTML(); |
|
197 | + $page->Content = $htmlValue->getContent(); |
|
198 | + try { |
|
199 | + $page->write(); |
|
200 | + } catch (ValidationException $ex) { |
|
201 | + $this->log("Exception caught for {$page->Title}, skipping. Message: " . $ex->getMessage()); |
|
202 | + continue; |
|
203 | + } |
|
204 | + |
|
205 | + // Once all links have been created for this page update HasBrokenLinks |
|
206 | + $count = $pageTrack->BrokenLinks()->count(); |
|
207 | + $this->log("Found {$count} broken links"); |
|
208 | + if ($count) { |
|
209 | + $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); |
|
210 | + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
211 | + DB::query(sprintf( |
|
212 | + 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
213 | + $siteTreeTable, |
|
214 | + intval($pageTrack->ID) |
|
215 | + )); |
|
216 | + } |
|
217 | + } |
|
218 | + |
|
219 | + $status->updateJobInfo('Updating completed pages'); |
|
220 | + $status->updateStatus(); |
|
221 | + return $status; |
|
222 | + } |
|
223 | + |
|
224 | + private function updateCompletedPages($trackID = 0) |
|
225 | + { |
|
226 | + $noPages = BrokenExternalPageTrack::get() |
|
227 | + ->filter(array( |
|
228 | + 'TrackID' => $trackID, |
|
229 | + 'Processed' => 1 |
|
230 | + )) |
|
231 | + ->count(); |
|
232 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
233 | + $track->CompletedPages = $noPages; |
|
234 | + $track->write(); |
|
235 | + return $noPages; |
|
236 | + } |
|
237 | + |
|
238 | + private function updateJobInfo($message) |
|
239 | + { |
|
240 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
241 | + if ($track) { |
|
242 | + $track->JobInfo = $message; |
|
243 | + $track->write(); |
|
244 | + } |
|
245 | + } |
|
246 | 246 | } |
@@ -19,7 +19,7 @@ discard block |
||
19 | 19 | class CheckExternalLinksTask extends BuildTask |
20 | 20 | { |
21 | 21 | private static $dependencies = [ |
22 | - 'LinkChecker' => '%$' . LinkChecker::class |
|
22 | + 'LinkChecker' => '%$'.LinkChecker::class |
|
23 | 23 | ]; |
24 | 24 | |
25 | 25 | private static $segment = 'CheckExternalLinksTask'; |
@@ -198,7 +198,7 @@ discard block |
||
198 | 198 | try { |
199 | 199 | $page->write(); |
200 | 200 | } catch (ValidationException $ex) { |
201 | - $this->log("Exception caught for {$page->Title}, skipping. Message: " . $ex->getMessage()); |
|
201 | + $this->log("Exception caught for {$page->Title}, skipping. Message: ".$ex->getMessage()); |
|
202 | 202 | continue; |
203 | 203 | } |
204 | 204 |