@@ -17,224 +17,224 @@ |
||
17 | 17 | |
18 | 18 | class CheckExternalLinksTask extends BuildTask |
19 | 19 | { |
20 | - private static $dependencies = [ |
|
21 | - 'LinkChecker' => '%$' . LinkChecker::class |
|
22 | - ]; |
|
23 | - |
|
24 | - private static $segment = 'CheckExternalLinksTask'; |
|
25 | - |
|
26 | - /** |
|
27 | - * Define a list of HTTP response codes that should not be treated as "broken", where they usually |
|
28 | - * might be. |
|
29 | - * |
|
30 | - * @config |
|
31 | - * @var array |
|
32 | - */ |
|
33 | - private static $ignore_codes = []; |
|
34 | - |
|
35 | - /** |
|
36 | - * @var bool |
|
37 | - */ |
|
38 | - protected $silent = false; |
|
39 | - |
|
40 | - /** |
|
41 | - * @var LinkChecker |
|
42 | - */ |
|
43 | - protected $linkChecker; |
|
44 | - |
|
45 | - protected $title = 'Checking broken External links in the SiteTree'; |
|
46 | - |
|
47 | - protected $description = 'A task that records external broken links in the SiteTree'; |
|
48 | - |
|
49 | - protected $enabled = true; |
|
50 | - |
|
51 | - /** |
|
52 | - * Log a message |
|
53 | - * |
|
54 | - * @param string $message |
|
55 | - */ |
|
56 | - protected function log($message) |
|
57 | - { |
|
58 | - if (!$this->silent) { |
|
59 | - Debug::message($message); |
|
60 | - } |
|
61 | - } |
|
62 | - |
|
63 | - public function run($request) |
|
64 | - { |
|
65 | - $this->runLinksCheck(); |
|
66 | - } |
|
67 | - /** |
|
68 | - * Turn on or off message output |
|
69 | - * |
|
70 | - * @param bool $silent |
|
71 | - */ |
|
72 | - public function setSilent($silent) |
|
73 | - { |
|
74 | - $this->silent = $silent; |
|
75 | - } |
|
76 | - |
|
77 | - /** |
|
78 | - * @param LinkChecker $linkChecker |
|
79 | - */ |
|
80 | - public function setLinkChecker(LinkChecker $linkChecker) |
|
81 | - { |
|
82 | - $this->linkChecker = $linkChecker; |
|
83 | - } |
|
84 | - |
|
85 | - /** |
|
86 | - * @return LinkChecker |
|
87 | - */ |
|
88 | - public function getLinkChecker() |
|
89 | - { |
|
90 | - return $this->linkChecker; |
|
91 | - } |
|
92 | - |
|
93 | - /** |
|
94 | - * Check the status of a single link on a page |
|
95 | - * |
|
96 | - * @param BrokenExternalPageTrack $pageTrack |
|
97 | - * @param DOMNode $link |
|
98 | - */ |
|
99 | - protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
100 | - { |
|
101 | - $class = $link->getAttribute('class'); |
|
102 | - $href = $link->getAttribute('href'); |
|
103 | - $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
104 | - |
|
105 | - // Check link |
|
106 | - $httpCode = $this->linkChecker->checkLink($href); |
|
107 | - if ($httpCode === null) { |
|
108 | - return; // Null link means uncheckable, such as an internal link |
|
109 | - } |
|
110 | - |
|
111 | - // If this code is broken then mark as such |
|
112 | - if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
113 | - // Create broken record |
|
114 | - $brokenLink = new BrokenExternalLink(); |
|
115 | - $brokenLink->Link = $href; |
|
116 | - $brokenLink->HTTPCode = $httpCode; |
|
117 | - $brokenLink->TrackID = $pageTrack->ID; |
|
118 | - $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
119 | - $brokenLink->write(); |
|
120 | - } |
|
121 | - |
|
122 | - // Check if we need to update CSS class, otherwise return |
|
123 | - if ($markedBroken == $foundBroken) { |
|
124 | - return; |
|
125 | - } |
|
126 | - if ($foundBroken) { |
|
127 | - $class .= ' ss-broken'; |
|
128 | - } else { |
|
129 | - $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
130 | - } |
|
131 | - $link->setAttribute('class', trim($class)); |
|
132 | - } |
|
133 | - |
|
134 | - /** |
|
135 | - * Determine if the given HTTP code is "broken" |
|
136 | - * |
|
137 | - * @param int $httpCode |
|
138 | - * @return bool True if this is a broken code |
|
139 | - */ |
|
140 | - protected function isCodeBroken($httpCode) |
|
141 | - { |
|
142 | - // Null represents no request attempted |
|
143 | - if ($httpCode === null) { |
|
144 | - return false; |
|
145 | - } |
|
146 | - |
|
147 | - // do we have any whitelisted codes |
|
148 | - $ignoreCodes = $this->config()->get('ignore_codes'); |
|
149 | - if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
150 | - return false; |
|
151 | - } |
|
152 | - |
|
153 | - // Check if code is outside valid range |
|
154 | - return $httpCode < 200 || $httpCode > 302; |
|
155 | - } |
|
156 | - |
|
157 | - /** |
|
158 | - * Runs the links checker and returns the track used |
|
159 | - * |
|
160 | - * @param int $limit Limit to number of pages to run, or null to run all |
|
161 | - * @return BrokenExternalPageTrackStatus |
|
162 | - */ |
|
163 | - public function runLinksCheck($limit = null) |
|
164 | - { |
|
165 | - // Check the current status |
|
166 | - $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
167 | - |
|
168 | - // Calculate pages to run |
|
169 | - $pageTracks = $status->getIncompleteTracks(); |
|
170 | - if ($limit) { |
|
171 | - $pageTracks = $pageTracks->limit($limit); |
|
172 | - } |
|
173 | - |
|
174 | - // Check each page |
|
175 | - foreach ($pageTracks as $pageTrack) { |
|
176 | - // Flag as complete |
|
177 | - $pageTrack->Processed = 1; |
|
178 | - $pageTrack->write(); |
|
179 | - |
|
180 | - // Check value of html area |
|
181 | - $page = $pageTrack->Page(); |
|
182 | - $this->log("Checking {$page->Title}"); |
|
183 | - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
184 | - if (!$htmlValue->isValid()) { |
|
185 | - continue; |
|
186 | - } |
|
187 | - |
|
188 | - // Check each link |
|
189 | - $links = $htmlValue->getElementsByTagName('a'); |
|
190 | - foreach ($links as $link) { |
|
191 | - $this->checkPageLink($pageTrack, $link); |
|
192 | - } |
|
193 | - |
|
194 | - // Update content of page based on link fixes / breakages |
|
195 | - $htmlValue->saveHTML(); |
|
196 | - $page->Content = $htmlValue->getContent(); |
|
197 | - $page->write(); |
|
198 | - |
|
199 | - // Once all links have been created for this page update HasBrokenLinks |
|
200 | - $count = $pageTrack->BrokenLinks()->count(); |
|
201 | - $this->log("Found {$count} broken links"); |
|
202 | - if ($count) { |
|
203 | - $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); |
|
204 | - // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
205 | - DB::query(sprintf( |
|
206 | - 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
207 | - $siteTreeTable, |
|
208 | - intval($pageTrack->ID) |
|
209 | - )); |
|
210 | - } |
|
211 | - } |
|
212 | - |
|
213 | - $status->updateJobInfo('Updating completed pages'); |
|
214 | - $status->updateStatus(); |
|
215 | - return $status; |
|
216 | - } |
|
217 | - |
|
218 | - private function updateCompletedPages($trackID = 0) |
|
219 | - { |
|
220 | - $noPages = BrokenExternalPageTrack::get() |
|
221 | - ->filter(array( |
|
222 | - 'TrackID' => $trackID, |
|
223 | - 'Processed' => 1 |
|
224 | - )) |
|
225 | - ->count(); |
|
226 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
227 | - $track->CompletedPages = $noPages; |
|
228 | - $track->write(); |
|
229 | - return $noPages; |
|
230 | - } |
|
231 | - |
|
232 | - private function updateJobInfo($message) |
|
233 | - { |
|
234 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
235 | - if ($track) { |
|
236 | - $track->JobInfo = $message; |
|
237 | - $track->write(); |
|
238 | - } |
|
239 | - } |
|
20 | + private static $dependencies = [ |
|
21 | + 'LinkChecker' => '%$' . LinkChecker::class |
|
22 | + ]; |
|
23 | + |
|
24 | + private static $segment = 'CheckExternalLinksTask'; |
|
25 | + |
|
26 | + /** |
|
27 | + * Define a list of HTTP response codes that should not be treated as "broken", where they usually |
|
28 | + * might be. |
|
29 | + * |
|
30 | + * @config |
|
31 | + * @var array |
|
32 | + */ |
|
33 | + private static $ignore_codes = []; |
|
34 | + |
|
35 | + /** |
|
36 | + * @var bool |
|
37 | + */ |
|
38 | + protected $silent = false; |
|
39 | + |
|
40 | + /** |
|
41 | + * @var LinkChecker |
|
42 | + */ |
|
43 | + protected $linkChecker; |
|
44 | + |
|
45 | + protected $title = 'Checking broken External links in the SiteTree'; |
|
46 | + |
|
47 | + protected $description = 'A task that records external broken links in the SiteTree'; |
|
48 | + |
|
49 | + protected $enabled = true; |
|
50 | + |
|
51 | + /** |
|
52 | + * Log a message |
|
53 | + * |
|
54 | + * @param string $message |
|
55 | + */ |
|
56 | + protected function log($message) |
|
57 | + { |
|
58 | + if (!$this->silent) { |
|
59 | + Debug::message($message); |
|
60 | + } |
|
61 | + } |
|
62 | + |
|
63 | + public function run($request) |
|
64 | + { |
|
65 | + $this->runLinksCheck(); |
|
66 | + } |
|
67 | + /** |
|
68 | + * Turn on or off message output |
|
69 | + * |
|
70 | + * @param bool $silent |
|
71 | + */ |
|
72 | + public function setSilent($silent) |
|
73 | + { |
|
74 | + $this->silent = $silent; |
|
75 | + } |
|
76 | + |
|
77 | + /** |
|
78 | + * @param LinkChecker $linkChecker |
|
79 | + */ |
|
80 | + public function setLinkChecker(LinkChecker $linkChecker) |
|
81 | + { |
|
82 | + $this->linkChecker = $linkChecker; |
|
83 | + } |
|
84 | + |
|
85 | + /** |
|
86 | + * @return LinkChecker |
|
87 | + */ |
|
88 | + public function getLinkChecker() |
|
89 | + { |
|
90 | + return $this->linkChecker; |
|
91 | + } |
|
92 | + |
|
93 | + /** |
|
94 | + * Check the status of a single link on a page |
|
95 | + * |
|
96 | + * @param BrokenExternalPageTrack $pageTrack |
|
97 | + * @param DOMNode $link |
|
98 | + */ |
|
99 | + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
100 | + { |
|
101 | + $class = $link->getAttribute('class'); |
|
102 | + $href = $link->getAttribute('href'); |
|
103 | + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
104 | + |
|
105 | + // Check link |
|
106 | + $httpCode = $this->linkChecker->checkLink($href); |
|
107 | + if ($httpCode === null) { |
|
108 | + return; // Null link means uncheckable, such as an internal link |
|
109 | + } |
|
110 | + |
|
111 | + // If this code is broken then mark as such |
|
112 | + if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
113 | + // Create broken record |
|
114 | + $brokenLink = new BrokenExternalLink(); |
|
115 | + $brokenLink->Link = $href; |
|
116 | + $brokenLink->HTTPCode = $httpCode; |
|
117 | + $brokenLink->TrackID = $pageTrack->ID; |
|
118 | + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
119 | + $brokenLink->write(); |
|
120 | + } |
|
121 | + |
|
122 | + // Check if we need to update CSS class, otherwise return |
|
123 | + if ($markedBroken == $foundBroken) { |
|
124 | + return; |
|
125 | + } |
|
126 | + if ($foundBroken) { |
|
127 | + $class .= ' ss-broken'; |
|
128 | + } else { |
|
129 | + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
130 | + } |
|
131 | + $link->setAttribute('class', trim($class)); |
|
132 | + } |
|
133 | + |
|
134 | + /** |
|
135 | + * Determine if the given HTTP code is "broken" |
|
136 | + * |
|
137 | + * @param int $httpCode |
|
138 | + * @return bool True if this is a broken code |
|
139 | + */ |
|
140 | + protected function isCodeBroken($httpCode) |
|
141 | + { |
|
142 | + // Null represents no request attempted |
|
143 | + if ($httpCode === null) { |
|
144 | + return false; |
|
145 | + } |
|
146 | + |
|
147 | + // do we have any whitelisted codes |
|
148 | + $ignoreCodes = $this->config()->get('ignore_codes'); |
|
149 | + if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
150 | + return false; |
|
151 | + } |
|
152 | + |
|
153 | + // Check if code is outside valid range |
|
154 | + return $httpCode < 200 || $httpCode > 302; |
|
155 | + } |
|
156 | + |
|
157 | + /** |
|
158 | + * Runs the links checker and returns the track used |
|
159 | + * |
|
160 | + * @param int $limit Limit to number of pages to run, or null to run all |
|
161 | + * @return BrokenExternalPageTrackStatus |
|
162 | + */ |
|
163 | + public function runLinksCheck($limit = null) |
|
164 | + { |
|
165 | + // Check the current status |
|
166 | + $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
167 | + |
|
168 | + // Calculate pages to run |
|
169 | + $pageTracks = $status->getIncompleteTracks(); |
|
170 | + if ($limit) { |
|
171 | + $pageTracks = $pageTracks->limit($limit); |
|
172 | + } |
|
173 | + |
|
174 | + // Check each page |
|
175 | + foreach ($pageTracks as $pageTrack) { |
|
176 | + // Flag as complete |
|
177 | + $pageTrack->Processed = 1; |
|
178 | + $pageTrack->write(); |
|
179 | + |
|
180 | + // Check value of html area |
|
181 | + $page = $pageTrack->Page(); |
|
182 | + $this->log("Checking {$page->Title}"); |
|
183 | + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
184 | + if (!$htmlValue->isValid()) { |
|
185 | + continue; |
|
186 | + } |
|
187 | + |
|
188 | + // Check each link |
|
189 | + $links = $htmlValue->getElementsByTagName('a'); |
|
190 | + foreach ($links as $link) { |
|
191 | + $this->checkPageLink($pageTrack, $link); |
|
192 | + } |
|
193 | + |
|
194 | + // Update content of page based on link fixes / breakages |
|
195 | + $htmlValue->saveHTML(); |
|
196 | + $page->Content = $htmlValue->getContent(); |
|
197 | + $page->write(); |
|
198 | + |
|
199 | + // Once all links have been created for this page update HasBrokenLinks |
|
200 | + $count = $pageTrack->BrokenLinks()->count(); |
|
201 | + $this->log("Found {$count} broken links"); |
|
202 | + if ($count) { |
|
203 | + $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); |
|
204 | + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
205 | + DB::query(sprintf( |
|
206 | + 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
207 | + $siteTreeTable, |
|
208 | + intval($pageTrack->ID) |
|
209 | + )); |
|
210 | + } |
|
211 | + } |
|
212 | + |
|
213 | + $status->updateJobInfo('Updating completed pages'); |
|
214 | + $status->updateStatus(); |
|
215 | + return $status; |
|
216 | + } |
|
217 | + |
|
218 | + private function updateCompletedPages($trackID = 0) |
|
219 | + { |
|
220 | + $noPages = BrokenExternalPageTrack::get() |
|
221 | + ->filter(array( |
|
222 | + 'TrackID' => $trackID, |
|
223 | + 'Processed' => 1 |
|
224 | + )) |
|
225 | + ->count(); |
|
226 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
227 | + $track->CompletedPages = $noPages; |
|
228 | + $track->write(); |
|
229 | + return $noPages; |
|
230 | + } |
|
231 | + |
|
232 | + private function updateJobInfo($message) |
|
233 | + { |
|
234 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
235 | + if ($track) { |
|
236 | + $track->JobInfo = $message; |
|
237 | + $track->write(); |
|
238 | + } |
|
239 | + } |
|
240 | 240 | } |
@@ -18,7 +18,7 @@ |
||
18 | 18 | class CheckExternalLinksTask extends BuildTask |
19 | 19 | { |
20 | 20 | private static $dependencies = [ |
21 | - 'LinkChecker' => '%$' . LinkChecker::class |
|
21 | + 'LinkChecker' => '%$'.LinkChecker::class |
|
22 | 22 | ]; |
23 | 23 | |
24 | 24 | private static $segment = 'CheckExternalLinksTask'; |
@@ -17,68 +17,68 @@ |
||
17 | 17 | */ |
18 | 18 | class BrokenExternalLink extends DataObject |
19 | 19 | { |
20 | - private static $table_name = 'BrokenExternalLink'; |
|
20 | + private static $table_name = 'BrokenExternalLink'; |
|
21 | 21 | |
22 | - private static $db = array( |
|
23 | - 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. |
|
24 | - 'HTTPCode' =>'Int' |
|
25 | - ); |
|
22 | + private static $db = array( |
|
23 | + 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. |
|
24 | + 'HTTPCode' =>'Int' |
|
25 | + ); |
|
26 | 26 | |
27 | - private static $has_one = array( |
|
28 | - 'Track' => BrokenExternalPageTrack::class, |
|
29 | - 'Status' => BrokenExternalPageTrackStatus::class |
|
30 | - ); |
|
27 | + private static $has_one = array( |
|
28 | + 'Track' => BrokenExternalPageTrack::class, |
|
29 | + 'Status' => BrokenExternalPageTrackStatus::class |
|
30 | + ); |
|
31 | 31 | |
32 | - private static $summary_fields = array( |
|
33 | - 'Created' => 'Checked', |
|
34 | - 'Link' => 'External Link', |
|
35 | - 'HTTPCodeDescription' => 'HTTP Error Code', |
|
36 | - 'Page.Title' => 'Page link is on' |
|
37 | - ); |
|
32 | + private static $summary_fields = array( |
|
33 | + 'Created' => 'Checked', |
|
34 | + 'Link' => 'External Link', |
|
35 | + 'HTTPCodeDescription' => 'HTTP Error Code', |
|
36 | + 'Page.Title' => 'Page link is on' |
|
37 | + ); |
|
38 | 38 | |
39 | - private static $searchable_fields = array( |
|
40 | - 'HTTPCode' => array('title' => 'HTTP Code') |
|
41 | - ); |
|
39 | + private static $searchable_fields = array( |
|
40 | + 'HTTPCode' => array('title' => 'HTTP Code') |
|
41 | + ); |
|
42 | 42 | |
43 | - /** |
|
44 | - * @return SiteTree |
|
45 | - */ |
|
46 | - public function Page() |
|
47 | - { |
|
48 | - return $this->Track()->Page(); |
|
49 | - } |
|
43 | + /** |
|
44 | + * @return SiteTree |
|
45 | + */ |
|
46 | + public function Page() |
|
47 | + { |
|
48 | + return $this->Track()->Page(); |
|
49 | + } |
|
50 | 50 | |
51 | - public function canEdit($member = false) |
|
52 | - { |
|
53 | - return false; |
|
54 | - } |
|
51 | + public function canEdit($member = false) |
|
52 | + { |
|
53 | + return false; |
|
54 | + } |
|
55 | 55 | |
56 | - public function canView($member = false) |
|
57 | - { |
|
58 | - $member = $member ? $member : Security::getCurrentUser(); |
|
59 | - $codes = array('content-authors', 'administrators'); |
|
60 | - return Permission::checkMember($member, $codes); |
|
61 | - } |
|
56 | + public function canView($member = false) |
|
57 | + { |
|
58 | + $member = $member ? $member : Security::getCurrentUser(); |
|
59 | + $codes = array('content-authors', 'administrators'); |
|
60 | + return Permission::checkMember($member, $codes); |
|
61 | + } |
|
62 | 62 | |
63 | - /** |
|
64 | - * Retrieve a human readable description of a response code |
|
65 | - * |
|
66 | - * @return string |
|
67 | - */ |
|
68 | - public function getHTTPCodeDescription() |
|
69 | - { |
|
70 | - $code = $this->HTTPCode; |
|
63 | + /** |
|
64 | + * Retrieve a human readable description of a response code |
|
65 | + * |
|
66 | + * @return string |
|
67 | + */ |
|
68 | + public function getHTTPCodeDescription() |
|
69 | + { |
|
70 | + $code = $this->HTTPCode; |
|
71 | 71 | |
72 | - try { |
|
73 | - $response = HTTPResponse::create('', $code); |
|
74 | - // Assume that $code = 0 means there was no response |
|
75 | - $description = $code ? |
|
76 | - $response->getStatusDescription() : |
|
77 | - _t(__CLASS__ . '.NOTAVAILABLE', 'Server Not Available'); |
|
78 | - } catch (InvalidArgumentException $e) { |
|
79 | - $description = _t(__CLASS__ . '.UNKNOWNRESPONSE', 'Unknown Response Code'); |
|
80 | - } |
|
72 | + try { |
|
73 | + $response = HTTPResponse::create('', $code); |
|
74 | + // Assume that $code = 0 means there was no response |
|
75 | + $description = $code ? |
|
76 | + $response->getStatusDescription() : |
|
77 | + _t(__CLASS__ . '.NOTAVAILABLE', 'Server Not Available'); |
|
78 | + } catch (InvalidArgumentException $e) { |
|
79 | + $description = _t(__CLASS__ . '.UNKNOWNRESPONSE', 'Unknown Response Code'); |
|
80 | + } |
|
81 | 81 | |
82 | - return sprintf("%d (%s)", $code, $description); |
|
83 | - } |
|
82 | + return sprintf("%d (%s)", $code, $description); |
|
83 | + } |
|
84 | 84 | } |