1 | <?php |
||
2 | |||
3 | namespace SilverStripe\ExternalLinks\Tasks; |
||
4 | |||
5 | use DOMNode; |
||
6 | use SilverStripe\CMS\Model\SiteTree; |
||
7 | use SilverStripe\Core\Config\Config; |
||
8 | use SilverStripe\Core\Injector\Injector; |
||
9 | use SilverStripe\Dev\BuildTask; |
||
10 | use SilverStripe\Dev\Debug; |
||
11 | use SilverStripe\ExternalLinks\Model\BrokenExternalLink; |
||
12 | use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrack; |
||
13 | use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrackStatus; |
||
14 | use SilverStripe\ExternalLinks\Tasks\LinkChecker; |
||
15 | use SilverStripe\ORM\DataObject; |
||
16 | use SilverStripe\ORM\DB; |
||
17 | use SilverStripe\ORM\ValidationException; |
||
18 | |||
19 | class CheckExternalLinksTask extends BuildTask |
||
20 | { |
||
21 | private static $dependencies = [ |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
22 | 'LinkChecker' => '%$' . LinkChecker::class |
||
23 | ]; |
||
24 | |||
25 | private static $segment = 'CheckExternalLinksTask'; |
||
0 ignored issues
–
show
|
|||
26 | |||
27 | /** |
||
28 | * Define a list of HTTP response codes that should not be treated as "broken", where they usually |
||
29 | * might be. |
||
30 | * |
||
31 | * @config |
||
32 | * @var array |
||
33 | */ |
||
34 | private static $ignore_codes = []; |
||
0 ignored issues
–
show
|
|||
35 | |||
36 | /** |
||
37 | * @var bool |
||
38 | */ |
||
39 | protected $silent = false; |
||
40 | |||
41 | /** |
||
42 | * @var LinkChecker |
||
43 | */ |
||
44 | protected $linkChecker; |
||
45 | |||
46 | protected $title = 'Checking broken External links in the SiteTree'; |
||
47 | |||
48 | protected $description = 'A task that records external broken links in the SiteTree'; |
||
49 | |||
50 | protected $enabled = true; |
||
51 | |||
52 | /** |
||
53 | * Log a message |
||
54 | * |
||
55 | * @param string $message |
||
56 | */ |
||
57 | protected function log($message) |
||
58 | { |
||
59 | if (!$this->silent) { |
||
60 | Debug::message($message); |
||
61 | } |
||
62 | } |
||
63 | |||
64 | public function run($request) |
||
65 | { |
||
66 | $this->runLinksCheck(); |
||
67 | } |
||
68 | /** |
||
69 | * Turn on or off message output |
||
70 | * |
||
71 | * @param bool $silent |
||
72 | */ |
||
73 | public function setSilent($silent) |
||
74 | { |
||
75 | $this->silent = $silent; |
||
76 | } |
||
77 | |||
78 | /** |
||
79 | * @param LinkChecker $linkChecker |
||
80 | */ |
||
81 | public function setLinkChecker(LinkChecker $linkChecker) |
||
82 | { |
||
83 | $this->linkChecker = $linkChecker; |
||
84 | } |
||
85 | |||
86 | /** |
||
87 | * @return LinkChecker |
||
88 | */ |
||
89 | public function getLinkChecker() |
||
90 | { |
||
91 | return $this->linkChecker; |
||
92 | } |
||
93 | |||
94 | /** |
||
95 | * Check the status of a single link on a page |
||
96 | * |
||
97 | * @param BrokenExternalPageTrack $pageTrack |
||
98 | * @param DOMNode $link |
||
99 | */ |
||
100 | protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
||
101 | { |
||
102 | $class = $link->getAttribute('class'); |
||
103 | $href = $link->getAttribute('href'); |
||
104 | $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
||
105 | |||
106 | // Check link |
||
107 | $httpCode = $this->linkChecker->checkLink($href); |
||
108 | if ($httpCode === null) { |
||
0 ignored issues
–
show
|
|||
109 | return; // Null link means uncheckable, such as an internal link |
||
110 | } |
||
111 | |||
112 | // If this code is broken then mark as such |
||
113 | if ($foundBroken = $this->isCodeBroken($httpCode)) { |
||
114 | // Create broken record |
||
115 | $brokenLink = new BrokenExternalLink(); |
||
116 | $brokenLink->Link = $href; |
||
117 | $brokenLink->HTTPCode = $httpCode; |
||
118 | $brokenLink->TrackID = $pageTrack->ID; |
||
0 ignored issues
–
show
The property
TrackID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink . Since you implemented __set , consider adding a @property annotation.
![]() |
|||
119 | $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
||
0 ignored issues
–
show
The property
StatusID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink . Since you implemented __set , consider adding a @property annotation.
![]() The property
StatusID does not exist on SilverStripe\ExternalLin...BrokenExternalPageTrack . Since you implemented __get , consider adding a @property annotation.
![]() |
|||
120 | $brokenLink->write(); |
||
121 | } |
||
122 | |||
123 | // Check if we need to update CSS class, otherwise return |
||
124 | if ($markedBroken == $foundBroken) { |
||
125 | return; |
||
126 | } |
||
127 | if ($foundBroken) { |
||
128 | $class .= ' ss-broken'; |
||
129 | } else { |
||
130 | $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
||
131 | } |
||
132 | $link->setAttribute('class', trim($class)); |
||
133 | } |
||
134 | |||
135 | /** |
||
136 | * Determine if the given HTTP code is "broken" |
||
137 | * |
||
138 | * @param int $httpCode |
||
139 | * @return bool True if this is a broken code |
||
140 | */ |
||
141 | protected function isCodeBroken($httpCode) |
||
142 | { |
||
143 | // Null represents no request attempted |
||
144 | if ($httpCode === null) { |
||
0 ignored issues
–
show
|
|||
145 | return false; |
||
146 | } |
||
147 | |||
148 | // do we have any whitelisted codes |
||
149 | $ignoreCodes = $this->config()->get('ignore_codes'); |
||
150 | if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
||
151 | return false; |
||
152 | } |
||
153 | |||
154 | // Check if code is outside valid range |
||
155 | return $httpCode < 200 || $httpCode > 302; |
||
156 | } |
||
157 | |||
158 | /** |
||
159 | * Runs the links checker and returns the track used |
||
160 | * |
||
161 | * @param int $limit Limit to number of pages to run, or null to run all |
||
162 | * @return BrokenExternalPageTrackStatus |
||
163 | */ |
||
164 | public function runLinksCheck($limit = null) |
||
165 | { |
||
166 | // Check the current status |
||
167 | $status = BrokenExternalPageTrackStatus::get_or_create(); |
||
168 | |||
169 | // Calculate pages to run |
||
170 | $pageTracks = $status->getIncompleteTracks(); |
||
171 | if ($limit) { |
||
0 ignored issues
–
show
The expression
$limit of type integer|null is loosely compared to true ; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.
In PHP, under loose comparison (like For 0 == false // true
0 == null // true
123 == false // false
123 == null // false
// It is often better to use strict comparison
0 === false // false
0 === null // false
![]() |
|||
172 | $pageTracks = $pageTracks->limit($limit); |
||
173 | } |
||
174 | |||
175 | // Check each page |
||
176 | foreach ($pageTracks as $pageTrack) { |
||
177 | // Flag as complete |
||
178 | $pageTrack->Processed = 1; |
||
179 | $pageTrack->write(); |
||
180 | |||
181 | // Check value of html area |
||
182 | $page = $pageTrack->Page(); |
||
183 | $this->log("Checking {$page->Title}"); |
||
184 | $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
||
185 | if (!$htmlValue->isValid()) { |
||
186 | continue; |
||
187 | } |
||
188 | |||
189 | // Check each link |
||
190 | $links = $htmlValue->getElementsByTagName('a'); |
||
191 | foreach ($links as $link) { |
||
192 | $this->checkPageLink($pageTrack, $link); |
||
193 | } |
||
194 | |||
195 | // Update content of page based on link fixes / breakages |
||
196 | $htmlValue->saveHTML(); |
||
197 | $page->Content = $htmlValue->getContent(); |
||
198 | try { |
||
199 | $page->write(); |
||
200 | } catch (ValidationException $ex) { |
||
201 | $this->log("Exception caught for {$page->Title}, skipping. Message: " . $ex->getMessage()); |
||
202 | continue; |
||
203 | } |
||
204 | |||
205 | // Once all links have been created for this page update HasBrokenLinks |
||
206 | $count = $pageTrack->BrokenLinks()->count(); |
||
207 | $this->log("Found {$count} broken links"); |
||
208 | if ($count) { |
||
209 | $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); |
||
210 | // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
||
211 | DB::query(sprintf( |
||
212 | 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
||
213 | $siteTreeTable, |
||
214 | intval($pageTrack->ID) |
||
215 | )); |
||
216 | } |
||
217 | } |
||
218 | |||
219 | $status->updateJobInfo('Updating completed pages'); |
||
220 | $status->updateStatus(); |
||
221 | return $status; |
||
222 | } |
||
223 | |||
224 | private function updateCompletedPages($trackID = 0) |
||
0 ignored issues
–
show
|
|||
225 | { |
||
226 | $noPages = BrokenExternalPageTrack::get() |
||
227 | ->filter(array( |
||
228 | 'TrackID' => $trackID, |
||
229 | 'Processed' => 1 |
||
230 | )) |
||
231 | ->count(); |
||
232 | $track = BrokenExternalPageTrackStatus::get_latest(); |
||
233 | $track->CompletedPages = $noPages; |
||
234 | $track->write(); |
||
235 | return $noPages; |
||
236 | } |
||
237 | |||
238 | private function updateJobInfo($message) |
||
239 | { |
||
240 | $track = BrokenExternalPageTrackStatus::get_latest(); |
||
241 | if ($track) { |
||
0 ignored issues
–
show
|
|||
242 | $track->JobInfo = $message; |
||
0 ignored issues
–
show
|
|||
243 | $track->write(); |
||
244 | } |
||
245 | } |
||
246 | } |
||
247 |