Issues (51)

src/Tasks/CheckExternalLinksTask.php (12 issues)

1
<?php
2
3
namespace SilverStripe\ExternalLinks\Tasks;
4
5
use DOMNode;
6
use SilverStripe\CMS\Model\SiteTree;
7
use SilverStripe\Core\Config\Config;
8
use SilverStripe\Core\Injector\Injector;
9
use SilverStripe\Dev\BuildTask;
10
use SilverStripe\Dev\Debug;
11
use SilverStripe\ExternalLinks\Model\BrokenExternalLink;
12
use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrack;
13
use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrackStatus;
14
use SilverStripe\ExternalLinks\Tasks\LinkChecker;
15
use SilverStripe\ORM\DataObject;
16
use SilverStripe\ORM\DB;
17
use SilverStripe\ORM\ValidationException;
18
19
class CheckExternalLinksTask extends BuildTask
20
{
21
    private static $dependencies = [
0 ignored issues
show
The private property $dependencies is not used, and could be removed.
Loading history...
22
        'LinkChecker' => '%$' . LinkChecker::class
23
    ];
24
25
    private static $segment = 'CheckExternalLinksTask';
0 ignored issues
show
The private property $segment is not used, and could be removed.
Loading history...
26
27
    /**
28
     * Define a list of HTTP response codes that should not be treated as "broken", where they usually
29
     * might be.
30
     *
31
     * @config
32
     * @var array
33
     */
34
    private static $ignore_codes = [];
0 ignored issues
show
The private property $ignore_codes is not used, and could be removed.
Loading history...
35
36
    /**
37
     * @var bool
38
     */
39
    protected $silent = false;
40
41
    /**
42
     * @var LinkChecker
43
     */
44
    protected $linkChecker;
45
46
    protected $title = 'Checking broken External links in the SiteTree';
47
48
    protected $description = 'A task that records external broken links in the SiteTree';
49
50
    protected $enabled = true;
51
52
    /**
53
     * Log a message
54
     *
55
     * @param string $message
56
     */
57
    protected function log($message)
58
    {
59
        if (!$this->silent) {
60
            Debug::message($message);
61
        }
62
    }
63
64
    public function run($request)
65
    {
66
        $this->runLinksCheck();
67
    }
68
    /**
69
     * Turn on or off message output
70
     *
71
     * @param bool $silent
72
     */
73
    public function setSilent($silent)
74
    {
75
        $this->silent = $silent;
76
    }
77
78
    /**
79
     * @param LinkChecker $linkChecker
80
     */
81
    public function setLinkChecker(LinkChecker $linkChecker)
82
    {
83
        $this->linkChecker = $linkChecker;
84
    }
85
86
    /**
87
     * @return LinkChecker
88
     */
89
    public function getLinkChecker()
90
    {
91
        return $this->linkChecker;
92
    }
93
94
    /**
95
     * Check the status of a single link on a page
96
     *
97
     * @param BrokenExternalPageTrack $pageTrack
98
     * @param DOMNode $link
99
     */
100
    protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link)
101
    {
102
        $class = $link->getAttribute('class');
103
        $href = $link->getAttribute('href');
104
        $markedBroken = preg_match('/\b(ss-broken)\b/', $class);
105
106
        // Check link
107
        $httpCode = $this->linkChecker->checkLink($href);
108
        if ($httpCode === null) {
0 ignored issues
show
The condition $httpCode === null is always false.
Loading history...
109
            return; // Null link means uncheckable, such as an internal link
110
        }
111
112
        // If this code is broken then mark as such
113
        if ($foundBroken = $this->isCodeBroken($httpCode)) {
114
            // Create broken record
115
            $brokenLink = new BrokenExternalLink();
116
            $brokenLink->Link = $href;
117
            $brokenLink->HTTPCode = $httpCode;
118
            $brokenLink->TrackID = $pageTrack->ID;
0 ignored issues
show
Bug Best Practice introduced by
The property TrackID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
119
            $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons
0 ignored issues
show
Bug Best Practice introduced by
The property StatusID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
Bug Best Practice introduced by
The property StatusID does not exist on SilverStripe\ExternalLin...BrokenExternalPageTrack. Since you implemented __get, consider adding a @property annotation.
Loading history...
120
            $brokenLink->write();
121
        }
122
123
        // Check if we need to update CSS class, otherwise return
124
        if ($markedBroken == $foundBroken) {
125
            return;
126
        }
127
        if ($foundBroken) {
128
            $class .= ' ss-broken';
129
        } else {
130
            $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class);
131
        }
132
        $link->setAttribute('class', trim($class));
133
    }
134
135
    /**
136
     * Determine if the given HTTP code is "broken"
137
     *
138
     * @param int $httpCode
139
     * @return bool True if this is a broken code
140
     */
141
    protected function isCodeBroken($httpCode)
142
    {
143
        // Null represents no request attempted
144
        if ($httpCode === null) {
0 ignored issues
show
The condition $httpCode === null is always false.
Loading history...
145
            return false;
146
        }
147
148
        // do we have any whitelisted codes
149
        $ignoreCodes = $this->config()->get('ignore_codes');
150
        if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) {
151
            return false;
152
        }
153
154
        // Check if code is outside valid range
155
        return $httpCode < 200 || $httpCode > 302;
156
    }
157
158
    /**
159
     * Runs the links checker and returns the track used
160
     *
161
     * @param int $limit Limit to number of pages to run, or null to run all
162
     * @return BrokenExternalPageTrackStatus
163
     */
164
    public function runLinksCheck($limit = null)
165
    {
166
        // Check the current status
167
        $status = BrokenExternalPageTrackStatus::get_or_create();
168
169
        // Calculate pages to run
170
        $pageTracks = $status->getIncompleteTracks();
171
        if ($limit) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $limit of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
172
            $pageTracks = $pageTracks->limit($limit);
173
        }
174
175
        // Check each page
176
        foreach ($pageTracks as $pageTrack) {
177
            // Flag as complete
178
            $pageTrack->Processed = 1;
179
            $pageTrack->write();
180
181
            // Check value of html area
182
            $page = $pageTrack->Page();
183
            $this->log("Checking {$page->Title}");
184
            $htmlValue = Injector::inst()->create('HTMLValue', $page->Content);
185
            if (!$htmlValue->isValid()) {
186
                continue;
187
            }
188
189
            // Check each link
190
            $links = $htmlValue->getElementsByTagName('a');
191
            foreach ($links as $link) {
192
                $this->checkPageLink($pageTrack, $link);
193
            }
194
195
            // Update content of page based on link fixes / breakages
196
            $htmlValue->saveHTML();
197
            $page->Content = $htmlValue->getContent();
198
            try {
199
                $page->write();
200
            } catch (ValidationException $ex) {
201
                $this->log("Exception caught for {$page->Title}, skipping. Message: " . $ex->getMessage());
202
                continue;
203
            }
204
205
            // Once all links have been created for this page update HasBrokenLinks
206
            $count = $pageTrack->BrokenLinks()->count();
207
            $this->log("Found {$count} broken links");
208
            if ($count) {
209
                $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class);
210
                // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true
211
                DB::query(sprintf(
212
                    'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'',
213
                    $siteTreeTable,
214
                    intval($pageTrack->ID)
215
                ));
216
            }
217
        }
218
219
        $status->updateJobInfo('Updating completed pages');
220
        $status->updateStatus();
221
        return $status;
222
    }
223
224
    private function updateCompletedPages($trackID = 0)
0 ignored issues
show
The method updateCompletedPages() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
225
    {
226
        $noPages = BrokenExternalPageTrack::get()
227
            ->filter(array(
228
                'TrackID' => $trackID,
229
                'Processed' => 1
230
            ))
231
            ->count();
232
        $track = BrokenExternalPageTrackStatus::get_latest();
233
        $track->CompletedPages = $noPages;
234
        $track->write();
235
        return $noPages;
236
    }
237
238
    private function updateJobInfo($message)
239
    {
240
        $track = BrokenExternalPageTrackStatus::get_latest();
241
        if ($track) {
0 ignored issues
show
$track is of type SilverStripe\ExternalLin...ExternalPageTrackStatus, thus it always evaluated to true.
Loading history...
242
            $track->JobInfo = $message;
0 ignored issues
show
Bug Best Practice introduced by
The property JobInfo does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
243
            $track->write();
244
        }
245
    }
246
}
247