Passed
Push — master ( 673916...a119ad )
by
unknown
02:00
created

src/Tasks/CheckExternalLinksTask.php (7 issues)

1
<?php
2
3
namespace SilverStripe\ExternalLinks\Tasks;
4
5
use DOMNode;
6
use SilverStripe\CMS\Model\SiteTree;
7
use SilverStripe\Core\Config\Config;
8
use SilverStripe\Core\Injector\Injector;
9
use SilverStripe\Dev\BuildTask;
10
use SilverStripe\Dev\Debug;
11
use SilverStripe\ExternalLinks\Model\BrokenExternalLink;
12
use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrack;
13
use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrackStatus;
14
use SilverStripe\ExternalLinks\Tasks\LinkChecker;
15
use SilverStripe\ORM\DataObject;
16
use SilverStripe\ORM\DB;
17
18
class CheckExternalLinksTask extends BuildTask
19
{
20
    private static $dependencies = [
21
        'LinkChecker' => '%$' . LinkChecker::class
22
    ];
23
24
    private static $segment = 'CheckExternalLinksTask';
25
26
    /**
27
     * Define a list of HTTP response codes that should not be treated as "broken", where they usually
28
     * might be.
29
     *
30
     * @config
31
     * @var array
32
     */
33
    private static $ignore_codes = [];
34
35
    /**
36
     * @var bool
37
     */
38
    protected $silent = false;
39
40
    /**
41
     * @var LinkChecker
42
     */
43
    protected $linkChecker;
44
45
    protected $title = 'Checking broken External links in the SiteTree';
46
47
    protected $description = 'A task that records external broken links in the SiteTree';
48
49
    protected $enabled = true;
50
51
    /**
52
     * Log a message
53
     *
54
     * @param string $message
55
     */
56
    protected function log($message)
57
    {
58
        if (!$this->silent) {
59
            Debug::message($message);
60
        }
61
    }
62
63
    public function run($request)
64
    {
65
        $this->runLinksCheck();
66
    }
67
    /**
68
     * Turn on or off message output
69
     *
70
     * @param bool $silent
71
     */
72
    public function setSilent($silent)
73
    {
74
        $this->silent = $silent;
75
    }
76
77
    /**
78
     * @param LinkChecker $linkChecker
79
     */
80
    public function setLinkChecker(LinkChecker $linkChecker)
81
    {
82
        $this->linkChecker = $linkChecker;
83
    }
84
85
    /**
86
     * @return LinkChecker
87
     */
88
    public function getLinkChecker()
89
    {
90
        return $this->linkChecker;
91
    }
92
93
    /**
94
     * Check the status of a single link on a page
95
     *
96
     * @param BrokenExternalPageTrack $pageTrack
97
     * @param DOMNode $link
98
     */
99
    protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link)
100
    {
101
        $class = $link->getAttribute('class');
102
        $href = $link->getAttribute('href');
103
        $markedBroken = preg_match('/\b(ss-broken)\b/', $class);
104
105
        // Check link
106
        $httpCode = $this->linkChecker->checkLink($href);
107
        if ($httpCode === null) {
108
            return; // Null link means uncheckable, such as an internal link
109
        }
110
111
        // If this code is broken then mark as such
112
        if ($foundBroken = $this->isCodeBroken($httpCode)) {
113
            // Create broken record
114
            $brokenLink = new BrokenExternalLink();
115
            $brokenLink->Link = $href;
0 ignored issues
show
Bug Best Practice introduced by
The property Link does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
116
            $brokenLink->HTTPCode = $httpCode;
0 ignored issues
show
Bug Best Practice introduced by
The property HTTPCode does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
117
            $brokenLink->TrackID = $pageTrack->ID;
0 ignored issues
show
Bug Best Practice introduced by
The property TrackID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
118
            $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons
0 ignored issues
show
Bug Best Practice introduced by
The property StatusID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
Bug Best Practice introduced by
The property StatusID does not exist on SilverStripe\ExternalLin...BrokenExternalPageTrack. Since you implemented __get, consider adding a @property annotation.
Loading history...
119
            $brokenLink->write();
120
        }
121
122
        // Check if we need to update CSS class, otherwise return
123
        if ($markedBroken == $foundBroken) {
124
            return;
125
        }
126
        if ($foundBroken) {
127
            $class .= ' ss-broken';
128
        } else {
129
            $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class);
130
        }
131
        $link->setAttribute('class', trim($class));
132
    }
133
134
    /**
135
     * Determine if the given HTTP code is "broken"
136
     *
137
     * @param int $httpCode
138
     * @return bool True if this is a broken code
139
     */
140
    protected function isCodeBroken($httpCode)
141
    {
142
        // Null represents no request attempted
143
        if ($httpCode === null) {
144
            return false;
145
        }
146
147
        // do we have any whitelisted codes
148
        $ignoreCodes = $this->config()->get('ignore_codes');
149
        if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) {
150
            return false;
151
        }
152
153
        // Check if code is outside valid range
154
        return $httpCode < 200 || $httpCode > 302;
155
    }
156
157
    /**
158
     * Runs the links checker and returns the track used
159
     *
160
     * @param int $limit Limit to number of pages to run, or null to run all
161
     * @return BrokenExternalPageTrackStatus
162
     */
163
    public function runLinksCheck($limit = null)
164
    {
165
        // Check the current status
166
        $status = BrokenExternalPageTrackStatus::get_or_create();
167
168
        // Calculate pages to run
169
        $pageTracks = $status->getIncompleteTracks();
170
        if ($limit) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $limit of type null|integer is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
171
            $pageTracks = $pageTracks->limit($limit);
172
        }
173
174
        // Check each page
175
        foreach ($pageTracks as $pageTrack) {
176
            // Flag as complete
177
            $pageTrack->Processed = 1;
178
            $pageTrack->write();
179
180
            // Check value of html area
181
            $page = $pageTrack->Page();
182
            $this->log("Checking {$page->Title}");
183
            $htmlValue = Injector::inst()->create('HTMLValue', $page->Content);
184
            if (!$htmlValue->isValid()) {
185
                continue;
186
            }
187
188
            // Check each link
189
            $links = $htmlValue->getElementsByTagName('a');
190
            foreach ($links as $link) {
191
                $this->checkPageLink($pageTrack, $link);
192
            }
193
194
            // Update content of page based on link fixes / breakages
195
            $htmlValue->saveHTML();
196
            $page->Content = $htmlValue->getContent();
197
            $page->write();
198
199
            // Once all links have been created for this page update HasBrokenLinks
200
            $count = $pageTrack->BrokenLinks()->count();
201
            $this->log("Found {$count} broken links");
202
            if ($count) {
203
                $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class);
204
                // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true
205
                DB::query(sprintf(
206
                    'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'',
207
                    $siteTreeTable,
208
                    intval($pageTrack->ID)
209
                ));
210
            }
211
        }
212
213
        $status->updateJobInfo('Updating completed pages');
214
        $status->updateStatus();
215
        return $status;
216
    }
217
218
    private function updateCompletedPages($trackID = 0)
219
    {
220
        $noPages = BrokenExternalPageTrack::get()
221
            ->filter(array(
222
                'TrackID' => $trackID,
223
                'Processed' => 1
224
            ))
225
            ->count();
226
        $track = BrokenExternalPageTrackStatus::get_latest();
227
        $track->CompletedPages = $noPages;
228
        $track->write();
229
        return $noPages;
230
    }
231
232
    private function updateJobInfo($message)
233
    {
234
        $track = BrokenExternalPageTrackStatus::get_latest();
235
        if ($track) {
236
            $track->JobInfo = $message;
0 ignored issues
show
Bug Best Practice introduced by
The property JobInfo does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
237
            $track->write();
238
        }
239
    }
240
}
241