Completed
Pull Request — master (#27)
by
unknown
03:13
created

CheckExternalLinksTask   A

Complexity

Total Complexity 25

Size/Duplication

Total Lines 208
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
dl 0
loc 208
rs 10
c 0
b 0
f 0
wmc 25

10 Methods

Rating   Name   Duplication   Size   Complexity  
B runLinksCheck() 0 51 6
B isCodeBroken() 0 15 5
A setSilent() 0 3 1
A setLinkChecker() 0 3 1
A updateJobInfo() 0 6 2
B checkPageLink() 0 33 5
A updateCompletedPages() 0 12 1
A run() 0 3 1
A log() 0 4 2
A getLinkChecker() 0 3 1
1
<?php
2
3
namespace SilverStripe\ExternalLinks\Tasks;
4
5
use DOMNode;
6
7
8
9
10
11
use SilverStripe\Dev\Debug;
12
use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrack;
13
use SilverStripe\ExternalLinks\Model\BrokenExternalLink;
14
use SilverStripe\Core\Config\Config;
15
use SilverStripe\ExternalLinks\Model\BrokenExternalPageTrackStatus;
16
use SilverStripe\Core\Injector\Injector;
17
use SilverStripe\ORM\DB;
18
use SilverStripe\Dev\BuildTask;
19
20
class CheckExternalLinksTask extends BuildTask
21
{
22
23
    private static $dependencies = array(
0 ignored issues
show
introduced by
The private property $dependencies is not used, and could be removed.
Loading history...
24
        'LinkChecker' => '%$LinkChecker'
25
    );
26
27
    /**
28
     * @var bool
29
     */
30
    protected $silent = false;
31
32
    /**
33
     * @var LinkChecker
34
     */
35
    protected $linkChecker;
36
37
    protected $title = 'Checking broken External links in the SiteTree';
38
39
    protected $description = 'A task that records external broken links in the SiteTree';
40
41
    protected $enabled = true;
42
43
    /**
44
     * Log a message
45
     *
46
     * @param string $message
47
     */
48
    protected function log($message)
49
    {
50
        if (!$this->silent) {
51
            Debug::message($message);
52
        }
53
    }
54
55
    public function run($request)
56
    {
57
        $this->runLinksCheck();
58
    }
59
    /**
60
     * Turn on or off message output
61
     *
62
     * @param bool $silent
63
     */
64
    public function setSilent($silent)
65
    {
66
        $this->silent = $silent;
67
    }
68
69
    /**
70
     * @param LinkChecker $linkChecker
71
     */
72
    public function setLinkChecker(LinkChecker $linkChecker)
73
    {
74
        $this->linkChecker = $linkChecker;
75
    }
76
77
    /**
78
     * @return LinkChecker
79
     */
80
    public function getLinkChecker()
81
    {
82
        return $this->linkChecker;
83
    }
84
85
    /**
86
     * Check the status of a single link on a page
87
     *
88
     * @param BrokenExternalPageTrack $pageTrack
89
     * @param DOMNode $link
90
     */
91
    protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link)
92
    {
93
        $class = $link->getAttribute('class');
94
        $href = $link->getAttribute('href');
95
        $markedBroken = preg_match('/\b(ss-broken)\b/', $class);
96
97
        // Check link
98
        $httpCode = $this->linkChecker->checkLink($href);
99
        if ($httpCode === null) {
100
            return; // Null link means uncheckable, such as an internal link
101
        }
102
103
        // If this code is broken then mark as such
104
        if ($foundBroken = $this->isCodeBroken($httpCode)) {
105
            // Create broken record
106
            $brokenLink = new BrokenExternalLink();
107
            $brokenLink->Link = $href;
0 ignored issues
show
Bug Best Practice introduced by
The property Link does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
108
            $brokenLink->HTTPCode = $httpCode;
0 ignored issues
show
Bug Best Practice introduced by
The property HTTPCode does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
109
            $brokenLink->TrackID = $pageTrack->ID;
0 ignored issues
show
Bug Best Practice introduced by
The property TrackID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
110
            $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons
0 ignored issues
show
Bug Best Practice introduced by
The property StatusID does not exist on SilverStripe\ExternalLin...odel\BrokenExternalLink. Since you implemented __set, consider adding a @property annotation.
Loading history...
Bug Best Practice introduced by
The property StatusID does not exist on SilverStripe\ExternalLin...BrokenExternalPageTrack. Since you implemented __get, consider adding a @property annotation.
Loading history...
111
            $brokenLink->write();
112
        }
113
114
        // Check if we need to update CSS class, otherwise return
115
        if ($markedBroken == $foundBroken) {
116
            return;
117
        }
118
        if ($foundBroken) {
119
            $class .= ' ss-broken';
120
        } else {
121
            $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class);
122
        }
123
        $link->setAttribute('class', trim($class));
124
    }
125
126
    /**
127
     * Determine if the given HTTP code is "broken"
128
     *
129
     * @param int $httpCode
130
     * @return bool True if this is a broken code
131
     */
132
    protected function isCodeBroken($httpCode)
133
    {
134
        // Null represents no request attempted
135
        if ($httpCode === null) {
136
            return false;
137
        }
138
139
        // do we have any whitelisted codes
140
        $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes');
141
        if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) {
142
            return false;
143
        }
144
145
        // Check if code is outside valid range
146
        return $httpCode < 200 || $httpCode > 302;
147
    }
148
149
    /**
150
     * Runs the links checker and returns the track used
151
     *
152
     * @param int $limit Limit to number of pages to run, or null to run all
153
     * @return BrokenExternalPageTrackStatus
154
     */
155
    public function runLinksCheck($limit = null)
156
    {
157
        // Check the current status
158
        $status = BrokenExternalPageTrackStatus::get_or_create();
159
160
        // Calculate pages to run
161
        $pageTracks = $status->getIncompleteTracks();
162
        if ($limit) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $limit of type null|integer is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
163
            $pageTracks = $pageTracks->limit($limit);
164
        }
165
166
        // Check each page
167
        foreach ($pageTracks as $pageTrack) {
168
            // Flag as complete
169
            $pageTrack->Processed = 1;
170
            $pageTrack->write();
171
172
            // Check value of html area
173
            $page = $pageTrack->Page();
174
            $this->log("Checking {$page->Title}");
175
            $htmlValue = Injector::inst()->create('HTMLValue', $page->Content);
176
            if (!$htmlValue->isValid()) {
177
                continue;
178
            }
179
180
            // Check each link
181
            $links = $htmlValue->getElementsByTagName('a');
182
            foreach ($links as $link) {
183
                $this->checkPageLink($pageTrack, $link);
184
            }
185
186
            // Update content of page based on link fixes / breakages
187
            $htmlValue->saveHTML();
188
            $page->Content = $htmlValue->getContent();
189
            $page->write();
190
191
            // Once all links have been created for this page update HasBrokenLinks
192
            $count = $pageTrack->BrokenLinks()->count();
193
            $this->log("Found {$count} broken links");
194
            if ($count) {
195
                // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true
196
                DB::query(sprintf(
197
                    'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'',
198
                    intval($pageTrack->ID)
199
                ));
200
            }
201
        }
202
203
        $status->updateJobInfo('Updating completed pages');
204
        $status->updateStatus();
205
        return $status;
206
    }
207
208
    private function updateCompletedPages($trackID = 0)
0 ignored issues
show
Unused Code introduced by
The method updateCompletedPages() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
209
    {
210
        $noPages = BrokenExternalPageTrack::get()
211
            ->filter(array(
212
                'TrackID' => $trackID,
213
                'Processed' => 1
214
            ))
215
            ->count();
216
        $track = BrokenExternalPageTrackStatus::get_latest();
217
        $track->CompletedPages = $noPages;
218
        $track->write();
219
        return $noPages;
220
    }
221
222
    private function updateJobInfo($message)
223
    {
224
        $track = BrokenExternalPageTrackStatus::get_latest();
225
        if ($track) {
226
            $track->JobInfo = $message;
0 ignored issues
show
Bug Best Practice introduced by
The property JobInfo does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
227
            $track->write();
228
        }
229
    }
230
}
231