1 | <?php |
||
2 | |||
3 | namespace SilverStripe\ExternalLinks\Tasks; |
||
4 | |||
5 | use DOMNode; |
||
6 | use SilverStripe\CMS\Model\SiteTree; |
||
7 | use SilverStripe\Core\Config\Config; |
||
8 | use SilverStripe\Core\Injector\Injector; |
||
9 | use SilverStripe\Dev\BuildTask; |
||
10 | use SilverStripe\Dev\Debug; |
||
11 | use SilverStripe\ExternalLinks\Model\BrokenExternalLink; |
||
12 | use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrack; |
||
13 | use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrackStatus; |
||
14 | use SilverStripe\ExternalLinks\Tasks\LinkChecker; |
||
15 | use SilverStripe\ORM\DataObject; |
||
16 | use SilverStripe\ORM\DB; |
||
17 | |||
18 | class CheckExternalLinksTask extends BuildTask |
||
19 | { |
||
20 | private static $dependencies = [ |
||
21 | 'LinkChecker' => '%$' . LinkChecker::class |
||
22 | ]; |
||
23 | |||
24 | private static $segment = 'CheckExternalLinksTask'; |
||
25 | |||
26 | /** |
||
27 | * Define a list of HTTP response codes that should not be treated as "broken", where they usually |
||
28 | * might be. |
||
29 | * |
||
30 | * @config |
||
31 | * @var array |
||
32 | */ |
||
33 | private static $ignore_codes = []; |
||
34 | |||
35 | /** |
||
36 | * @var bool |
||
37 | */ |
||
38 | protected $silent = false; |
||
39 | |||
40 | /** |
||
41 | * @var LinkChecker |
||
42 | */ |
||
43 | protected $linkChecker; |
||
44 | |||
45 | protected $title = 'Checking broken External links in the SiteTree'; |
||
46 | |||
47 | protected $description = 'A task that records external broken links in the SiteTree'; |
||
48 | |||
49 | protected $enabled = true; |
||
50 | |||
51 | /** |
||
52 | * Log a message |
||
53 | * |
||
54 | * @param string $message |
||
55 | */ |
||
56 | protected function log($message) |
||
57 | { |
||
58 | if (!$this->silent) { |
||
59 | Debug::message($message); |
||
60 | } |
||
61 | } |
||
62 | |||
63 | public function run($request) |
||
64 | { |
||
65 | $this->runLinksCheck(); |
||
66 | } |
||
67 | /** |
||
68 | * Turn on or off message output |
||
69 | * |
||
70 | * @param bool $silent |
||
71 | */ |
||
72 | public function setSilent($silent) |
||
73 | { |
||
74 | $this->silent = $silent; |
||
75 | } |
||
76 | |||
77 | /** |
||
78 | * @param LinkChecker $linkChecker |
||
79 | */ |
||
80 | public function setLinkChecker(LinkChecker $linkChecker) |
||
81 | { |
||
82 | $this->linkChecker = $linkChecker; |
||
83 | } |
||
84 | |||
85 | /** |
||
86 | * @return LinkChecker |
||
87 | */ |
||
88 | public function getLinkChecker() |
||
89 | { |
||
90 | return $this->linkChecker; |
||
91 | } |
||
92 | |||
93 | /** |
||
94 | * Check the status of a single link on a page |
||
95 | * |
||
96 | * @param BrokenExternalPageTrack $pageTrack |
||
97 | * @param DOMNode $link |
||
98 | */ |
||
99 | protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
||
100 | { |
||
101 | $class = $link->getAttribute('class'); |
||
102 | $href = $link->getAttribute('href'); |
||
103 | $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
||
104 | |||
105 | // Check link |
||
106 | $httpCode = $this->linkChecker->checkLink($href); |
||
107 | if ($httpCode === null) { |
||
108 | return; // Null link means uncheckable, such as an internal link |
||
109 | } |
||
110 | |||
111 | // If this code is broken then mark as such |
||
112 | if ($foundBroken = $this->isCodeBroken($httpCode)) { |
||
113 | // Create broken record |
||
114 | $brokenLink = new BrokenExternalLink(); |
||
115 | $brokenLink->Link = $href; |
||
0 ignored issues
–
show
Bug
Best Practice
introduced
by
![]() |
|||
116 | $brokenLink->HTTPCode = $httpCode; |
||
0 ignored issues
–
show
The property
HTTPCode does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink . Since you implemented __set , consider adding a @property annotation.
![]() |
|||
117 | $brokenLink->TrackID = $pageTrack->ID; |
||
0 ignored issues
–
show
The property
TrackID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink . Since you implemented __set , consider adding a @property annotation.
![]() |
|||
118 | $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
||
0 ignored issues
–
show
The property
StatusID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink . Since you implemented __set , consider adding a @property annotation.
![]() The property
StatusID does not exist on SilverStripe\ExternalLin...BrokenExternalPageTrack . Since you implemented __get , consider adding a @property annotation.
![]() |
|||
119 | $brokenLink->write(); |
||
120 | } |
||
121 | |||
122 | // Check if we need to update CSS class, otherwise return |
||
123 | if ($markedBroken == $foundBroken) { |
||
124 | return; |
||
125 | } |
||
126 | if ($foundBroken) { |
||
127 | $class .= ' ss-broken'; |
||
128 | } else { |
||
129 | $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
||
130 | } |
||
131 | $link->setAttribute('class', trim($class)); |
||
132 | } |
||
133 | |||
134 | /** |
||
135 | * Determine if the given HTTP code is "broken" |
||
136 | * |
||
137 | * @param int $httpCode |
||
138 | * @return bool True if this is a broken code |
||
139 | */ |
||
140 | protected function isCodeBroken($httpCode) |
||
141 | { |
||
142 | // Null represents no request attempted |
||
143 | if ($httpCode === null) { |
||
144 | return false; |
||
145 | } |
||
146 | |||
147 | // do we have any whitelisted codes |
||
148 | $ignoreCodes = $this->config()->get('ignore_codes'); |
||
149 | if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
||
150 | return false; |
||
151 | } |
||
152 | |||
153 | // Check if code is outside valid range |
||
154 | return $httpCode < 200 || $httpCode > 302; |
||
155 | } |
||
156 | |||
157 | /** |
||
158 | * Runs the links checker and returns the track used |
||
159 | * |
||
160 | * @param int $limit Limit to number of pages to run, or null to run all |
||
161 | * @return BrokenExternalPageTrackStatus |
||
162 | */ |
||
163 | public function runLinksCheck($limit = null) |
||
164 | { |
||
165 | // Check the current status |
||
166 | $status = BrokenExternalPageTrackStatus::get_or_create(); |
||
167 | |||
168 | // Calculate pages to run |
||
169 | $pageTracks = $status->getIncompleteTracks(); |
||
170 | if ($limit) { |
||
0 ignored issues
–
show
The expression
$limit of type null|integer is loosely compared to true ; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.
In PHP, under loose comparison (like For 0 == false // true
0 == null // true
123 == false // false
123 == null // false
// It is often better to use strict comparison
0 === false // false
0 === null // false
![]() |
|||
171 | $pageTracks = $pageTracks->limit($limit); |
||
172 | } |
||
173 | |||
174 | // Check each page |
||
175 | foreach ($pageTracks as $pageTrack) { |
||
176 | // Flag as complete |
||
177 | $pageTrack->Processed = 1; |
||
178 | $pageTrack->write(); |
||
179 | |||
180 | // Check value of html area |
||
181 | $page = $pageTrack->Page(); |
||
182 | $this->log("Checking {$page->Title}"); |
||
183 | $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
||
184 | if (!$htmlValue->isValid()) { |
||
185 | continue; |
||
186 | } |
||
187 | |||
188 | // Check each link |
||
189 | $links = $htmlValue->getElementsByTagName('a'); |
||
190 | foreach ($links as $link) { |
||
191 | $this->checkPageLink($pageTrack, $link); |
||
192 | } |
||
193 | |||
194 | // Update content of page based on link fixes / breakages |
||
195 | $htmlValue->saveHTML(); |
||
196 | $page->Content = $htmlValue->getContent(); |
||
197 | $page->write(); |
||
198 | |||
199 | // Once all links have been created for this page update HasBrokenLinks |
||
200 | $count = $pageTrack->BrokenLinks()->count(); |
||
201 | $this->log("Found {$count} broken links"); |
||
202 | if ($count) { |
||
203 | $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); |
||
204 | // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
||
205 | DB::query(sprintf( |
||
206 | 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
||
207 | $siteTreeTable, |
||
208 | intval($pageTrack->ID) |
||
209 | )); |
||
210 | } |
||
211 | } |
||
212 | |||
213 | $status->updateJobInfo('Updating completed pages'); |
||
214 | $status->updateStatus(); |
||
215 | return $status; |
||
216 | } |
||
217 | |||
218 | private function updateCompletedPages($trackID = 0) |
||
219 | { |
||
220 | $noPages = BrokenExternalPageTrack::get() |
||
221 | ->filter(array( |
||
222 | 'TrackID' => $trackID, |
||
223 | 'Processed' => 1 |
||
224 | )) |
||
225 | ->count(); |
||
226 | $track = BrokenExternalPageTrackStatus::get_latest(); |
||
227 | $track->CompletedPages = $noPages; |
||
228 | $track->write(); |
||
229 | return $noPages; |
||
230 | } |
||
231 | |||
232 | private function updateJobInfo($message) |
||
233 | { |
||
234 | $track = BrokenExternalPageTrackStatus::get_latest(); |
||
235 | if ($track) { |
||
236 | $track->JobInfo = $message; |
||
0 ignored issues
–
show
|
|||
237 | $track->write(); |
||
238 | } |
||
239 | } |
||
240 | } |
||
241 |