@@ -7,9 +7,9 @@ |
||
7 | 7 | |
8 | 8 | class ExternalLinksTestPage extends Page implements TestOnly |
9 | 9 | { |
10 | - private static $table_name = 'ExternalLinksTestPage'; |
|
10 | + private static $table_name = 'ExternalLinksTestPage'; |
|
11 | 11 | |
12 | - private static $db = array( |
|
13 | - 'ExpectedContent' => 'HTMLText' |
|
14 | - ); |
|
12 | + private static $db = array( |
|
13 | + 'ExpectedContent' => 'HTMLText' |
|
14 | + ); |
|
15 | 15 | } |
@@ -16,150 +16,150 @@ |
||
16 | 16 | class ExternalLinksTest extends SapphireTest |
17 | 17 | { |
18 | 18 | |
19 | - protected static $fixture_file = 'ExternalLinksTest.yml'; |
|
20 | - |
|
21 | - protected static $extra_dataobjects = array( |
|
22 | - ExternalLinksTestPage::class |
|
23 | - ); |
|
24 | - |
|
25 | - public function setUpOnce() |
|
26 | - { |
|
27 | - if (class_exists(Phockito::class)) { |
|
28 | - Phockito::include_hamcrest(false); |
|
29 | - } |
|
30 | - |
|
31 | - parent::setUpOnce(); |
|
32 | - } |
|
33 | - |
|
34 | - public function setUp() |
|
35 | - { |
|
36 | - parent::setUp(); |
|
37 | - |
|
38 | - // Check dependencies |
|
39 | - if (!class_exists(Phockito::class)) { |
|
40 | - $this->skipTest = true; |
|
41 | - return $this->markTestSkipped("These tests need the Phockito module installed to run"); |
|
42 | - } |
|
43 | - |
|
44 | - // Mock link checker |
|
45 | - $checker = Phockito::mock(LinkChecker::class); |
|
46 | - Phockito::when($checker) |
|
47 | - ->checkLink('http://www.working.com') |
|
48 | - ->return(200); |
|
49 | - |
|
50 | - Phockito::when($checker) |
|
51 | - ->checkLink('http://www.broken.com/url/thing') // 404 on working site |
|
52 | - ->return(404); |
|
53 | - |
|
54 | - Phockito::when($checker) |
|
55 | - ->checkLink('http://www.broken.com') // 403 on working site |
|
56 | - ->return(403); |
|
57 | - |
|
58 | - Phockito::when($checker) |
|
59 | - ->checkLink('http://www.nodomain.com') // no ping |
|
60 | - ->return(0); |
|
61 | - |
|
62 | - Phockito::when($checker) |
|
63 | - ->checkLink('/internal/link') |
|
64 | - ->return(null); |
|
65 | - |
|
66 | - Phockito::when($checker) |
|
67 | - ->checkLink('[sitetree_link,id=9999]') |
|
68 | - ->return(null); |
|
69 | - |
|
70 | - Phockito::when($checker) |
|
71 | - ->checkLink('home') |
|
72 | - ->return(null); |
|
73 | - |
|
74 | - Phockito::when($checker) |
|
75 | - ->checkLink('broken-internal') |
|
76 | - ->return(null); |
|
77 | - |
|
78 | - Phockito::when($checker) |
|
79 | - ->checkLink('[sitetree_link,id=1]') |
|
80 | - ->return(null); |
|
81 | - |
|
82 | - Phockito::when($checker) |
|
83 | - ->checkLink(Hamcrest_Matchers::anything()) // anything else is 404 |
|
84 | - ->return(404); |
|
85 | - |
|
86 | - Injector::inst()->registerService($checker, LinkChecker::class); |
|
87 | - } |
|
88 | - |
|
89 | - public function testLinks() |
|
90 | - { |
|
91 | - // Run link checker |
|
92 | - $task = CheckExternalLinksTask::create(); |
|
93 | - $task->setSilent(true); // Be quiet during the test! |
|
94 | - $task->runLinksCheck(); |
|
95 | - |
|
96 | - // Get all links checked |
|
97 | - $status = BrokenExternalPageTrackStatus::get_latest(); |
|
98 | - $this->assertEquals('Completed', $status->Status); |
|
99 | - $this->assertEquals(5, $status->TotalPages); |
|
100 | - $this->assertEquals(5, $status->CompletedPages); |
|
101 | - |
|
102 | - // Check all pages have had the correct HTML adjusted |
|
103 | - for ($i = 1; $i <= 5; $i++) { |
|
104 | - $page = $this->objFromFixture('ExternalLinksTestPage', 'page'.$i); |
|
105 | - $this->assertNotEmpty($page->Content); |
|
106 | - $this->assertEquals( |
|
107 | - $page->ExpectedContent, |
|
108 | - $page->Content, |
|
109 | - "Assert that the content of page{$i} has been updated" |
|
110 | - ); |
|
111 | - } |
|
112 | - |
|
113 | - // Check that the correct report of broken links is generated |
|
114 | - $links = $status |
|
115 | - ->BrokenLinks() |
|
116 | - ->sort('Link'); |
|
117 | - |
|
118 | - $this->assertEquals(4, $links->count()); |
|
119 | - $this->assertEquals( |
|
120 | - array( |
|
121 | - 'http://www.broken.com', |
|
122 | - 'http://www.broken.com/url/thing', |
|
123 | - 'http://www.broken.com/url/thing', |
|
124 | - 'http://www.nodomain.com' |
|
125 | - ), |
|
126 | - array_values($links->map('ID', 'Link')->toArray()) |
|
127 | - ); |
|
128 | - |
|
129 | - // Check response codes are correct |
|
130 | - $expected = array( |
|
131 | - 'http://www.broken.com' => 403, |
|
132 | - 'http://www.broken.com/url/thing' => 404, |
|
133 | - 'http://www.nodomain.com' => 0 |
|
134 | - ); |
|
135 | - $actual = $links->map('Link', 'HTTPCode')->toArray(); |
|
136 | - $this->assertEquals($expected, $actual); |
|
137 | - |
|
138 | - // Check response descriptions are correct |
|
139 | - i18n::set_locale('en_NZ'); |
|
140 | - $expected = array( |
|
141 | - 'http://www.broken.com' => '403 (Forbidden)', |
|
142 | - 'http://www.broken.com/url/thing' => '404 (Not Found)', |
|
143 | - 'http://www.nodomain.com' => '0 (Server Not Available)' |
|
144 | - ); |
|
145 | - $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); |
|
146 | - $this->assertEquals($expected, $actual); |
|
147 | - } |
|
148 | - |
|
149 | - /** |
|
150 | - * Test that broken links appears in the reports list |
|
151 | - */ |
|
152 | - public function testReportExists() |
|
153 | - { |
|
154 | - $reports = Report::get_reports(); |
|
155 | - $reportNames = array(); |
|
156 | - foreach ($reports as $report) { |
|
157 | - $reportNames[] = $report->class; |
|
158 | - } |
|
159 | - $this->assertContains( |
|
160 | - BrokenExternalLinksReport::class, |
|
161 | - $reportNames, |
|
162 | - 'BrokenExternalLinksReport is in reports list' |
|
163 | - ); |
|
164 | - } |
|
19 | + protected static $fixture_file = 'ExternalLinksTest.yml'; |
|
20 | + |
|
21 | + protected static $extra_dataobjects = array( |
|
22 | + ExternalLinksTestPage::class |
|
23 | + ); |
|
24 | + |
|
25 | + public function setUpOnce() |
|
26 | + { |
|
27 | + if (class_exists(Phockito::class)) { |
|
28 | + Phockito::include_hamcrest(false); |
|
29 | + } |
|
30 | + |
|
31 | + parent::setUpOnce(); |
|
32 | + } |
|
33 | + |
|
34 | + public function setUp() |
|
35 | + { |
|
36 | + parent::setUp(); |
|
37 | + |
|
38 | + // Check dependencies |
|
39 | + if (!class_exists(Phockito::class)) { |
|
40 | + $this->skipTest = true; |
|
41 | + return $this->markTestSkipped("These tests need the Phockito module installed to run"); |
|
42 | + } |
|
43 | + |
|
44 | + // Mock link checker |
|
45 | + $checker = Phockito::mock(LinkChecker::class); |
|
46 | + Phockito::when($checker) |
|
47 | + ->checkLink('http://www.working.com') |
|
48 | + ->return(200); |
|
49 | + |
|
50 | + Phockito::when($checker) |
|
51 | + ->checkLink('http://www.broken.com/url/thing') // 404 on working site |
|
52 | + ->return(404); |
|
53 | + |
|
54 | + Phockito::when($checker) |
|
55 | + ->checkLink('http://www.broken.com') // 403 on working site |
|
56 | + ->return(403); |
|
57 | + |
|
58 | + Phockito::when($checker) |
|
59 | + ->checkLink('http://www.nodomain.com') // no ping |
|
60 | + ->return(0); |
|
61 | + |
|
62 | + Phockito::when($checker) |
|
63 | + ->checkLink('/internal/link') |
|
64 | + ->return(null); |
|
65 | + |
|
66 | + Phockito::when($checker) |
|
67 | + ->checkLink('[sitetree_link,id=9999]') |
|
68 | + ->return(null); |
|
69 | + |
|
70 | + Phockito::when($checker) |
|
71 | + ->checkLink('home') |
|
72 | + ->return(null); |
|
73 | + |
|
74 | + Phockito::when($checker) |
|
75 | + ->checkLink('broken-internal') |
|
76 | + ->return(null); |
|
77 | + |
|
78 | + Phockito::when($checker) |
|
79 | + ->checkLink('[sitetree_link,id=1]') |
|
80 | + ->return(null); |
|
81 | + |
|
82 | + Phockito::when($checker) |
|
83 | + ->checkLink(Hamcrest_Matchers::anything()) // anything else is 404 |
|
84 | + ->return(404); |
|
85 | + |
|
86 | + Injector::inst()->registerService($checker, LinkChecker::class); |
|
87 | + } |
|
88 | + |
|
89 | + public function testLinks() |
|
90 | + { |
|
91 | + // Run link checker |
|
92 | + $task = CheckExternalLinksTask::create(); |
|
93 | + $task->setSilent(true); // Be quiet during the test! |
|
94 | + $task->runLinksCheck(); |
|
95 | + |
|
96 | + // Get all links checked |
|
97 | + $status = BrokenExternalPageTrackStatus::get_latest(); |
|
98 | + $this->assertEquals('Completed', $status->Status); |
|
99 | + $this->assertEquals(5, $status->TotalPages); |
|
100 | + $this->assertEquals(5, $status->CompletedPages); |
|
101 | + |
|
102 | + // Check all pages have had the correct HTML adjusted |
|
103 | + for ($i = 1; $i <= 5; $i++) { |
|
104 | + $page = $this->objFromFixture('ExternalLinksTestPage', 'page'.$i); |
|
105 | + $this->assertNotEmpty($page->Content); |
|
106 | + $this->assertEquals( |
|
107 | + $page->ExpectedContent, |
|
108 | + $page->Content, |
|
109 | + "Assert that the content of page{$i} has been updated" |
|
110 | + ); |
|
111 | + } |
|
112 | + |
|
113 | + // Check that the correct report of broken links is generated |
|
114 | + $links = $status |
|
115 | + ->BrokenLinks() |
|
116 | + ->sort('Link'); |
|
117 | + |
|
118 | + $this->assertEquals(4, $links->count()); |
|
119 | + $this->assertEquals( |
|
120 | + array( |
|
121 | + 'http://www.broken.com', |
|
122 | + 'http://www.broken.com/url/thing', |
|
123 | + 'http://www.broken.com/url/thing', |
|
124 | + 'http://www.nodomain.com' |
|
125 | + ), |
|
126 | + array_values($links->map('ID', 'Link')->toArray()) |
|
127 | + ); |
|
128 | + |
|
129 | + // Check response codes are correct |
|
130 | + $expected = array( |
|
131 | + 'http://www.broken.com' => 403, |
|
132 | + 'http://www.broken.com/url/thing' => 404, |
|
133 | + 'http://www.nodomain.com' => 0 |
|
134 | + ); |
|
135 | + $actual = $links->map('Link', 'HTTPCode')->toArray(); |
|
136 | + $this->assertEquals($expected, $actual); |
|
137 | + |
|
138 | + // Check response descriptions are correct |
|
139 | + i18n::set_locale('en_NZ'); |
|
140 | + $expected = array( |
|
141 | + 'http://www.broken.com' => '403 (Forbidden)', |
|
142 | + 'http://www.broken.com/url/thing' => '404 (Not Found)', |
|
143 | + 'http://www.nodomain.com' => '0 (Server Not Available)' |
|
144 | + ); |
|
145 | + $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); |
|
146 | + $this->assertEquals($expected, $actual); |
|
147 | + } |
|
148 | + |
|
149 | + /** |
|
150 | + * Test that broken links appears in the reports list |
|
151 | + */ |
|
152 | + public function testReportExists() |
|
153 | + { |
|
154 | + $reports = Report::get_reports(); |
|
155 | + $reportNames = array(); |
|
156 | + foreach ($reports as $report) { |
|
157 | + $reportNames[] = $report->class; |
|
158 | + } |
|
159 | + $this->assertContains( |
|
160 | + BrokenExternalLinksReport::class, |
|
161 | + $reportNames, |
|
162 | + 'BrokenExternalLinksReport is in reports list' |
|
163 | + ); |
|
164 | + } |
|
165 | 165 | } |
@@ -11,59 +11,59 @@ |
||
11 | 11 | class CMSExternalLinksController extends Controller |
12 | 12 | { |
13 | 13 | |
14 | - private static $allowed_actions = array('getJobStatus', 'start'); |
|
14 | + private static $allowed_actions = array('getJobStatus', 'start'); |
|
15 | 15 | |
16 | - /* |
|
16 | + /* |
|
17 | 17 | * Respond to Ajax requests for info on a running job |
18 | 18 | * |
19 | 19 | * @return string JSON string detailing status of the job |
20 | 20 | */ |
21 | - public function getJobStatus() |
|
22 | - { |
|
23 | - // Set headers |
|
24 | - HTTP::set_cache_age(0); |
|
25 | - HTTP::add_cache_headers($this->response); |
|
26 | - $this->response |
|
27 | - ->addHeader('Content-Type', 'application/json') |
|
28 | - ->addHeader('Content-Encoding', 'UTF-8') |
|
29 | - ->addHeader('X-Content-Type-Options', 'nosniff'); |
|
21 | + public function getJobStatus() |
|
22 | + { |
|
23 | + // Set headers |
|
24 | + HTTP::set_cache_age(0); |
|
25 | + HTTP::add_cache_headers($this->response); |
|
26 | + $this->response |
|
27 | + ->addHeader('Content-Type', 'application/json') |
|
28 | + ->addHeader('Content-Encoding', 'UTF-8') |
|
29 | + ->addHeader('X-Content-Type-Options', 'nosniff'); |
|
30 | 30 | |
31 | - // Format status |
|
32 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
33 | - if ($track) { |
|
34 | - return json_encode(array( |
|
35 | - 'TrackID' => $track->ID, |
|
36 | - 'Status' => $track->Status, |
|
37 | - 'Completed' => $track->getCompletedPages(), |
|
38 | - 'Total' => $track->getTotalPages() |
|
39 | - )); |
|
40 | - } |
|
41 | - } |
|
31 | + // Format status |
|
32 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
33 | + if ($track) { |
|
34 | + return json_encode(array( |
|
35 | + 'TrackID' => $track->ID, |
|
36 | + 'Status' => $track->Status, |
|
37 | + 'Completed' => $track->getCompletedPages(), |
|
38 | + 'Total' => $track->getTotalPages() |
|
39 | + )); |
|
40 | + } |
|
41 | + } |
|
42 | 42 | |
43 | 43 | |
44 | - /* |
|
44 | + /* |
|
45 | 45 | * Starts a broken external link check |
46 | 46 | */ |
47 | - public function start() |
|
48 | - { |
|
49 | - // return if the a job is already running |
|
50 | - $status = BrokenExternalPageTrackStatus::get_latest(); |
|
51 | - if ($status && $status->Status == 'Running') { |
|
52 | - return; |
|
53 | - } |
|
47 | + public function start() |
|
48 | + { |
|
49 | + // return if the a job is already running |
|
50 | + $status = BrokenExternalPageTrackStatus::get_latest(); |
|
51 | + if ($status && $status->Status == 'Running') { |
|
52 | + return; |
|
53 | + } |
|
54 | 54 | |
55 | - // Create a new job |
|
56 | - if (class_exists('QueuedJobService')) { |
|
57 | - // Force the creation of a new run |
|
58 | - BrokenExternalPageTrackStatus::create_status(); |
|
59 | - $checkLinks = new CheckExternalLinksJob(); |
|
60 | - singleton('QueuedJobService')->queueJob($checkLinks); |
|
61 | - } else { |
|
62 | - //TODO this hangs as it waits for the connection to be released |
|
63 | - // should return back and continue processing |
|
64 | - // http://us3.php.net/manual/en/features.connection-handling.php |
|
65 | - $task = CheckExternalLinksTask::create(); |
|
66 | - $task->runLinksCheck(); |
|
67 | - } |
|
68 | - } |
|
55 | + // Create a new job |
|
56 | + if (class_exists('QueuedJobService')) { |
|
57 | + // Force the creation of a new run |
|
58 | + BrokenExternalPageTrackStatus::create_status(); |
|
59 | + $checkLinks = new CheckExternalLinksJob(); |
|
60 | + singleton('QueuedJobService')->queueJob($checkLinks); |
|
61 | + } else { |
|
62 | + //TODO this hangs as it waits for the connection to be released |
|
63 | + // should return back and continue processing |
|
64 | + // http://us3.php.net/manual/en/features.connection-handling.php |
|
65 | + $task = CheckExternalLinksTask::create(); |
|
66 | + $task->runLinksCheck(); |
|
67 | + } |
|
68 | + } |
|
69 | 69 | } |
@@ -13,27 +13,27 @@ |
||
13 | 13 | */ |
14 | 14 | class BrokenExternalPageTrack extends DataObject |
15 | 15 | { |
16 | - private static $table_name = 'BrokenExternalPageTrack'; |
|
16 | + private static $table_name = 'BrokenExternalPageTrack'; |
|
17 | 17 | |
18 | - private static $db = array( |
|
19 | - 'Processed' => 'Boolean' |
|
20 | - ); |
|
18 | + private static $db = array( |
|
19 | + 'Processed' => 'Boolean' |
|
20 | + ); |
|
21 | 21 | |
22 | - private static $has_one = array( |
|
23 | - 'Page' => SiteTree::class, |
|
24 | - 'Status' => BrokenExternalPageTrackStatus::class |
|
25 | - ); |
|
22 | + private static $has_one = array( |
|
23 | + 'Page' => SiteTree::class, |
|
24 | + 'Status' => BrokenExternalPageTrackStatus::class |
|
25 | + ); |
|
26 | 26 | |
27 | - private static $has_many = array( |
|
28 | - 'BrokenLinks' => BrokenExternalLink::class |
|
29 | - ); |
|
27 | + private static $has_many = array( |
|
28 | + 'BrokenLinks' => BrokenExternalLink::class |
|
29 | + ); |
|
30 | 30 | |
31 | - /** |
|
32 | - * @return SiteTree |
|
33 | - */ |
|
34 | - public function Page() |
|
35 | - { |
|
36 | - return Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
37 | - ->byID($this->PageID); |
|
38 | - } |
|
31 | + /** |
|
32 | + * @return SiteTree |
|
33 | + */ |
|
34 | + public function Page() |
|
35 | + { |
|
36 | + return Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
37 | + ->byID($this->PageID); |
|
38 | + } |
|
39 | 39 | } |
@@ -18,132 +18,132 @@ |
||
18 | 18 | */ |
19 | 19 | class BrokenExternalPageTrackStatus extends DataObject |
20 | 20 | { |
21 | - private static $table_name = 'BrokenExternalPageTrackStatus'; |
|
22 | - |
|
23 | - private static $db = array( |
|
24 | - 'Status' => 'Enum("Completed, Running", "Running")', |
|
25 | - 'JobInfo' => 'Varchar(255)' |
|
26 | - ); |
|
27 | - |
|
28 | - private static $has_many = array( |
|
29 | - 'TrackedPages' => BrokenExternalPageTrack::class, |
|
30 | - 'BrokenLinks' => BrokenExternalLink::class |
|
31 | - ); |
|
32 | - |
|
33 | - /** |
|
34 | - * Get the latest track status |
|
35 | - * |
|
36 | - * @return self |
|
37 | - */ |
|
38 | - public static function get_latest() |
|
39 | - { |
|
40 | - return self::get() |
|
41 | - ->sort('ID', 'DESC') |
|
42 | - ->first(); |
|
43 | - } |
|
44 | - |
|
45 | - /** |
|
46 | - * Gets the list of Pages yet to be checked |
|
47 | - * |
|
48 | - * @return DataList |
|
49 | - */ |
|
50 | - public function getIncompletePageList() |
|
51 | - { |
|
52 | - $pageIDs = $this |
|
53 | - ->getIncompleteTracks() |
|
54 | - ->column('PageID'); |
|
55 | - if ($pageIDs) { |
|
56 | - return Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
57 | - ->byIDs($pageIDs); |
|
58 | - } |
|
59 | - } |
|
60 | - |
|
61 | - /** |
|
62 | - * Get the list of incomplete BrokenExternalPageTrack |
|
63 | - * |
|
64 | - * @return DataList |
|
65 | - */ |
|
66 | - public function getIncompleteTracks() |
|
67 | - { |
|
68 | - return $this |
|
69 | - ->TrackedPages() |
|
70 | - ->filter('Processed', 0); |
|
71 | - } |
|
72 | - |
|
73 | - /** |
|
74 | - * Get total pages count |
|
75 | - */ |
|
76 | - public function getTotalPages() |
|
77 | - { |
|
78 | - return $this->TrackedPages()->count(); |
|
79 | - } |
|
80 | - |
|
81 | - /** |
|
82 | - * Get completed pages count |
|
83 | - */ |
|
84 | - public function getCompletedPages() |
|
85 | - { |
|
86 | - return $this |
|
87 | - ->TrackedPages() |
|
88 | - ->filter('Processed', 1) |
|
89 | - ->count(); |
|
90 | - } |
|
91 | - |
|
92 | - /** |
|
93 | - * Returns the latest run, or otherwise creates a new one |
|
94 | - * |
|
95 | - * @return self |
|
96 | - */ |
|
97 | - public static function get_or_create() |
|
98 | - { |
|
99 | - // Check the current status |
|
100 | - $status = self::get_latest(); |
|
101 | - if ($status && $status->Status == 'Running') { |
|
102 | - $status->updateStatus(); |
|
103 | - return $status; |
|
104 | - } |
|
105 | - |
|
106 | - return self::create_status(); |
|
107 | - } |
|
108 | - |
|
109 | - /* |
|
21 | + private static $table_name = 'BrokenExternalPageTrackStatus'; |
|
22 | + |
|
23 | + private static $db = array( |
|
24 | + 'Status' => 'Enum("Completed, Running", "Running")', |
|
25 | + 'JobInfo' => 'Varchar(255)' |
|
26 | + ); |
|
27 | + |
|
28 | + private static $has_many = array( |
|
29 | + 'TrackedPages' => BrokenExternalPageTrack::class, |
|
30 | + 'BrokenLinks' => BrokenExternalLink::class |
|
31 | + ); |
|
32 | + |
|
33 | + /** |
|
34 | + * Get the latest track status |
|
35 | + * |
|
36 | + * @return self |
|
37 | + */ |
|
38 | + public static function get_latest() |
|
39 | + { |
|
40 | + return self::get() |
|
41 | + ->sort('ID', 'DESC') |
|
42 | + ->first(); |
|
43 | + } |
|
44 | + |
|
45 | + /** |
|
46 | + * Gets the list of Pages yet to be checked |
|
47 | + * |
|
48 | + * @return DataList |
|
49 | + */ |
|
50 | + public function getIncompletePageList() |
|
51 | + { |
|
52 | + $pageIDs = $this |
|
53 | + ->getIncompleteTracks() |
|
54 | + ->column('PageID'); |
|
55 | + if ($pageIDs) { |
|
56 | + return Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
57 | + ->byIDs($pageIDs); |
|
58 | + } |
|
59 | + } |
|
60 | + |
|
61 | + /** |
|
62 | + * Get the list of incomplete BrokenExternalPageTrack |
|
63 | + * |
|
64 | + * @return DataList |
|
65 | + */ |
|
66 | + public function getIncompleteTracks() |
|
67 | + { |
|
68 | + return $this |
|
69 | + ->TrackedPages() |
|
70 | + ->filter('Processed', 0); |
|
71 | + } |
|
72 | + |
|
73 | + /** |
|
74 | + * Get total pages count |
|
75 | + */ |
|
76 | + public function getTotalPages() |
|
77 | + { |
|
78 | + return $this->TrackedPages()->count(); |
|
79 | + } |
|
80 | + |
|
81 | + /** |
|
82 | + * Get completed pages count |
|
83 | + */ |
|
84 | + public function getCompletedPages() |
|
85 | + { |
|
86 | + return $this |
|
87 | + ->TrackedPages() |
|
88 | + ->filter('Processed', 1) |
|
89 | + ->count(); |
|
90 | + } |
|
91 | + |
|
92 | + /** |
|
93 | + * Returns the latest run, or otherwise creates a new one |
|
94 | + * |
|
95 | + * @return self |
|
96 | + */ |
|
97 | + public static function get_or_create() |
|
98 | + { |
|
99 | + // Check the current status |
|
100 | + $status = self::get_latest(); |
|
101 | + if ($status && $status->Status == 'Running') { |
|
102 | + $status->updateStatus(); |
|
103 | + return $status; |
|
104 | + } |
|
105 | + |
|
106 | + return self::create_status(); |
|
107 | + } |
|
108 | + |
|
109 | + /* |
|
110 | 110 | * Create and prepare a new status |
111 | 111 | * |
112 | 112 | * @return self |
113 | 113 | */ |
114 | - public static function create_status() |
|
115 | - { |
|
116 | - // If the script is to be started create a new status |
|
117 | - $status = self::create(); |
|
118 | - $status->updateJobInfo('Creating new tracking object'); |
|
119 | - |
|
120 | - // Setup all pages to test |
|
121 | - $pageIDs = Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
122 | - ->column('ID'); |
|
123 | - foreach ($pageIDs as $pageID) { |
|
124 | - $trackPage = BrokenExternalPageTrack::create(); |
|
125 | - $trackPage->PageID = $pageID; |
|
126 | - $trackPage->StatusID = $status->ID; |
|
127 | - $trackPage->write(); |
|
128 | - } |
|
129 | - |
|
130 | - return $status; |
|
131 | - } |
|
132 | - |
|
133 | - public function updateJobInfo($message) |
|
134 | - { |
|
135 | - $this->JobInfo = $message; |
|
136 | - $this->write(); |
|
137 | - } |
|
138 | - |
|
139 | - /** |
|
140 | - * Self check status |
|
141 | - */ |
|
142 | - public function updateStatus() |
|
143 | - { |
|
144 | - if ($this->CompletedPages == $this->TotalPages) { |
|
145 | - $this->Status = 'Completed'; |
|
146 | - $this->updateJobInfo('Setting to completed'); |
|
147 | - } |
|
148 | - } |
|
114 | + public static function create_status() |
|
115 | + { |
|
116 | + // If the script is to be started create a new status |
|
117 | + $status = self::create(); |
|
118 | + $status->updateJobInfo('Creating new tracking object'); |
|
119 | + |
|
120 | + // Setup all pages to test |
|
121 | + $pageIDs = Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
122 | + ->column('ID'); |
|
123 | + foreach ($pageIDs as $pageID) { |
|
124 | + $trackPage = BrokenExternalPageTrack::create(); |
|
125 | + $trackPage->PageID = $pageID; |
|
126 | + $trackPage->StatusID = $status->ID; |
|
127 | + $trackPage->write(); |
|
128 | + } |
|
129 | + |
|
130 | + return $status; |
|
131 | + } |
|
132 | + |
|
133 | + public function updateJobInfo($message) |
|
134 | + { |
|
135 | + $this->JobInfo = $message; |
|
136 | + $this->write(); |
|
137 | + } |
|
138 | + |
|
139 | + /** |
|
140 | + * Self check status |
|
141 | + */ |
|
142 | + public function updateStatus() |
|
143 | + { |
|
144 | + if ($this->CompletedPages == $this->TotalPages) { |
|
145 | + $this->Status = 'Completed'; |
|
146 | + $this->updateJobInfo('Setting to completed'); |
|
147 | + } |
|
148 | + } |
|
149 | 149 | } |
@@ -18,67 +18,67 @@ |
||
18 | 18 | */ |
19 | 19 | class BrokenExternalLink extends DataObject |
20 | 20 | { |
21 | - private static $table_name = 'BrokenExternalLink'; |
|
21 | + private static $table_name = 'BrokenExternalLink'; |
|
22 | 22 | |
23 | - private static $db = array( |
|
24 | - 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. |
|
25 | - 'HTTPCode' =>'Int' |
|
26 | - ); |
|
23 | + private static $db = array( |
|
24 | + 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. |
|
25 | + 'HTTPCode' =>'Int' |
|
26 | + ); |
|
27 | 27 | |
28 | - private static $has_one = array( |
|
29 | - 'Track' => BrokenExternalPageTrack::class, |
|
30 | - 'Status' => BrokenExternalPageTrackStatus::class |
|
31 | - ); |
|
28 | + private static $has_one = array( |
|
29 | + 'Track' => BrokenExternalPageTrack::class, |
|
30 | + 'Status' => BrokenExternalPageTrackStatus::class |
|
31 | + ); |
|
32 | 32 | |
33 | - private static $summary_fields = array( |
|
34 | - 'Created' => 'Checked', |
|
35 | - 'Link' => 'External Link', |
|
36 | - 'HTTPCodeDescription' => 'HTTP Error Code', |
|
37 | - 'Page.Title' => 'Page link is on' |
|
38 | - ); |
|
33 | + private static $summary_fields = array( |
|
34 | + 'Created' => 'Checked', |
|
35 | + 'Link' => 'External Link', |
|
36 | + 'HTTPCodeDescription' => 'HTTP Error Code', |
|
37 | + 'Page.Title' => 'Page link is on' |
|
38 | + ); |
|
39 | 39 | |
40 | - private static $searchable_fields = array( |
|
41 | - 'HTTPCode' => array('title' => 'HTTP Code') |
|
42 | - ); |
|
40 | + private static $searchable_fields = array( |
|
41 | + 'HTTPCode' => array('title' => 'HTTP Code') |
|
42 | + ); |
|
43 | 43 | |
44 | - /** |
|
45 | - * @return SiteTree |
|
46 | - */ |
|
47 | - public function Page() |
|
48 | - { |
|
49 | - return $this->Track()->Page(); |
|
50 | - } |
|
44 | + /** |
|
45 | + * @return SiteTree |
|
46 | + */ |
|
47 | + public function Page() |
|
48 | + { |
|
49 | + return $this->Track()->Page(); |
|
50 | + } |
|
51 | 51 | |
52 | - public function canEdit($member = false) |
|
53 | - { |
|
54 | - return false; |
|
55 | - } |
|
52 | + public function canEdit($member = false) |
|
53 | + { |
|
54 | + return false; |
|
55 | + } |
|
56 | 56 | |
57 | - public function canView($member = false) |
|
58 | - { |
|
59 | - $member = $member ? $member : Member::currentUser(); |
|
60 | - $codes = array('content-authors', 'administrators'); |
|
61 | - return Permission::checkMember($member, $codes); |
|
62 | - } |
|
57 | + public function canView($member = false) |
|
58 | + { |
|
59 | + $member = $member ? $member : Member::currentUser(); |
|
60 | + $codes = array('content-authors', 'administrators'); |
|
61 | + return Permission::checkMember($member, $codes); |
|
62 | + } |
|
63 | 63 | |
64 | - /** |
|
65 | - * Retrieve a human readable description of a response code |
|
66 | - * |
|
67 | - * @return string |
|
68 | - */ |
|
69 | - public function getHTTPCodeDescription() |
|
70 | - { |
|
71 | - $code = $this->HTTPCode; |
|
72 | - if (empty($code)) { |
|
73 | - // Assume that $code = 0 means there was no response |
|
74 | - $description = _t('BrokenExternalLink.NOTAVAILABLE', 'Server Not Available'); |
|
75 | - } elseif (($descriptions = Config::inst()->get(HTTPResponse::class, 'status_codes')) |
|
76 | - && isset($descriptions[$code]) |
|
77 | - ) { |
|
78 | - $description = $descriptions[$code]; |
|
79 | - } else { |
|
80 | - $description = _t('BrokenExternalLink.UNKNOWNRESPONSE', 'Unknown Response Code'); |
|
81 | - } |
|
82 | - return sprintf("%d (%s)", $code, $description); |
|
83 | - } |
|
64 | + /** |
|
65 | + * Retrieve a human readable description of a response code |
|
66 | + * |
|
67 | + * @return string |
|
68 | + */ |
|
69 | + public function getHTTPCodeDescription() |
|
70 | + { |
|
71 | + $code = $this->HTTPCode; |
|
72 | + if (empty($code)) { |
|
73 | + // Assume that $code = 0 means there was no response |
|
74 | + $description = _t('BrokenExternalLink.NOTAVAILABLE', 'Server Not Available'); |
|
75 | + } elseif (($descriptions = Config::inst()->get(HTTPResponse::class, 'status_codes')) |
|
76 | + && isset($descriptions[$code]) |
|
77 | + ) { |
|
78 | + $description = $descriptions[$code]; |
|
79 | + } else { |
|
80 | + $description = _t('BrokenExternalLink.UNKNOWNRESPONSE', 'Unknown Response Code'); |
|
81 | + } |
|
82 | + return sprintf("%d (%s)", $code, $description); |
|
83 | + } |
|
84 | 84 | } |
@@ -8,11 +8,11 @@ |
||
8 | 8 | interface LinkChecker |
9 | 9 | { |
10 | 10 | |
11 | - /** |
|
12 | - * Determine the http status code for a given link |
|
13 | - * |
|
14 | - * @param string $href URL to check |
|
15 | - * @return int HTTP status code, or null if not checkable (not a link) |
|
16 | - */ |
|
17 | - public function checkLink($href); |
|
11 | + /** |
|
12 | + * Determine the http status code for a given link |
|
13 | + * |
|
14 | + * @param string $href URL to check |
|
15 | + * @return int HTTP status code, or null if not checkable (not a link) |
|
16 | + */ |
|
17 | + public function checkLink($href); |
|
18 | 18 | } |
@@ -10,51 +10,51 @@ |
||
10 | 10 | class CurlLinkChecker implements LinkChecker |
11 | 11 | { |
12 | 12 | |
13 | - /** |
|
14 | - * Return cache |
|
15 | - * |
|
16 | - * @return Zend_Cache_Frontend |
|
17 | - */ |
|
18 | - protected function getCache() |
|
19 | - { |
|
20 | - return SS_Cache::factory( |
|
21 | - __CLASS__, |
|
22 | - 'Output', |
|
23 | - array('automatic_serialization' => true) |
|
24 | - ); |
|
25 | - } |
|
26 | - |
|
27 | - /** |
|
28 | - * Determine the http status code for a given link |
|
29 | - * |
|
30 | - * @param string $href URL to check |
|
31 | - * @return int HTTP status code, or null if not checkable (not a link) |
|
32 | - */ |
|
33 | - public function checkLink($href) |
|
34 | - { |
|
35 | - // Skip non-external links |
|
36 | - if (!preg_match('/^https?[^:]*:\/\//', $href)) { |
|
37 | - return null; |
|
38 | - } |
|
39 | - |
|
40 | - // Check if we have a cached result |
|
41 | - $cacheKey = md5($href); |
|
42 | - $result = $this->getCache()->load($cacheKey); |
|
43 | - if ($result !== false) { |
|
44 | - return $result; |
|
45 | - } |
|
46 | - |
|
47 | - // No cached result so just request |
|
48 | - $handle = curl_init($href); |
|
49 | - curl_setopt($handle, CURLOPT_RETURNTRANSFER, true); |
|
50 | - curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); |
|
51 | - curl_setopt($handle, CURLOPT_TIMEOUT, 10); |
|
52 | - curl_exec($handle); |
|
53 | - $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); |
|
54 | - curl_close($handle); |
|
55 | - |
|
56 | - // Cache result |
|
57 | - $this->getCache()->save($httpCode, $cacheKey); |
|
58 | - return $httpCode; |
|
59 | - } |
|
13 | + /** |
|
14 | + * Return cache |
|
15 | + * |
|
16 | + * @return Zend_Cache_Frontend |
|
17 | + */ |
|
18 | + protected function getCache() |
|
19 | + { |
|
20 | + return SS_Cache::factory( |
|
21 | + __CLASS__, |
|
22 | + 'Output', |
|
23 | + array('automatic_serialization' => true) |
|
24 | + ); |
|
25 | + } |
|
26 | + |
|
27 | + /** |
|
28 | + * Determine the http status code for a given link |
|
29 | + * |
|
30 | + * @param string $href URL to check |
|
31 | + * @return int HTTP status code, or null if not checkable (not a link) |
|
32 | + */ |
|
33 | + public function checkLink($href) |
|
34 | + { |
|
35 | + // Skip non-external links |
|
36 | + if (!preg_match('/^https?[^:]*:\/\//', $href)) { |
|
37 | + return null; |
|
38 | + } |
|
39 | + |
|
40 | + // Check if we have a cached result |
|
41 | + $cacheKey = md5($href); |
|
42 | + $result = $this->getCache()->load($cacheKey); |
|
43 | + if ($result !== false) { |
|
44 | + return $result; |
|
45 | + } |
|
46 | + |
|
47 | + // No cached result so just request |
|
48 | + $handle = curl_init($href); |
|
49 | + curl_setopt($handle, CURLOPT_RETURNTRANSFER, true); |
|
50 | + curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); |
|
51 | + curl_setopt($handle, CURLOPT_TIMEOUT, 10); |
|
52 | + curl_exec($handle); |
|
53 | + $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); |
|
54 | + curl_close($handle); |
|
55 | + |
|
56 | + // Cache result |
|
57 | + $this->getCache()->save($httpCode, $cacheKey); |
|
58 | + return $httpCode; |
|
59 | + } |
|
60 | 60 | } |
@@ -20,211 +20,211 @@ |
||
20 | 20 | class CheckExternalLinksTask extends BuildTask |
21 | 21 | { |
22 | 22 | |
23 | - private static $dependencies = array( |
|
24 | - 'LinkChecker' => '%$LinkChecker' |
|
25 | - ); |
|
26 | - |
|
27 | - /** |
|
28 | - * @var bool |
|
29 | - */ |
|
30 | - protected $silent = false; |
|
31 | - |
|
32 | - /** |
|
33 | - * @var LinkChecker |
|
34 | - */ |
|
35 | - protected $linkChecker; |
|
36 | - |
|
37 | - protected $title = 'Checking broken External links in the SiteTree'; |
|
38 | - |
|
39 | - protected $description = 'A task that records external broken links in the SiteTree'; |
|
40 | - |
|
41 | - protected $enabled = true; |
|
42 | - |
|
43 | - /** |
|
44 | - * Log a message |
|
45 | - * |
|
46 | - * @param string $message |
|
47 | - */ |
|
48 | - protected function log($message) |
|
49 | - { |
|
50 | - if (!$this->silent) { |
|
51 | - Debug::message($message); |
|
52 | - } |
|
53 | - } |
|
54 | - |
|
55 | - public function run($request) |
|
56 | - { |
|
57 | - $this->runLinksCheck(); |
|
58 | - } |
|
59 | - /** |
|
60 | - * Turn on or off message output |
|
61 | - * |
|
62 | - * @param bool $silent |
|
63 | - */ |
|
64 | - public function setSilent($silent) |
|
65 | - { |
|
66 | - $this->silent = $silent; |
|
67 | - } |
|
68 | - |
|
69 | - /** |
|
70 | - * @param LinkChecker $linkChecker |
|
71 | - */ |
|
72 | - public function setLinkChecker(LinkChecker $linkChecker) |
|
73 | - { |
|
74 | - $this->linkChecker = $linkChecker; |
|
75 | - } |
|
76 | - |
|
77 | - /** |
|
78 | - * @return LinkChecker |
|
79 | - */ |
|
80 | - public function getLinkChecker() |
|
81 | - { |
|
82 | - return $this->linkChecker; |
|
83 | - } |
|
84 | - |
|
85 | - /** |
|
86 | - * Check the status of a single link on a page |
|
87 | - * |
|
88 | - * @param BrokenExternalPageTrack $pageTrack |
|
89 | - * @param DOMNode $link |
|
90 | - */ |
|
91 | - protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
92 | - { |
|
93 | - $class = $link->getAttribute('class'); |
|
94 | - $href = $link->getAttribute('href'); |
|
95 | - $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
96 | - |
|
97 | - // Check link |
|
98 | - $httpCode = $this->linkChecker->checkLink($href); |
|
99 | - if ($httpCode === null) { |
|
100 | - return; // Null link means uncheckable, such as an internal link |
|
101 | - } |
|
102 | - |
|
103 | - // If this code is broken then mark as such |
|
104 | - if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
105 | - // Create broken record |
|
106 | - $brokenLink = new BrokenExternalLink(); |
|
107 | - $brokenLink->Link = $href; |
|
108 | - $brokenLink->HTTPCode = $httpCode; |
|
109 | - $brokenLink->TrackID = $pageTrack->ID; |
|
110 | - $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
111 | - $brokenLink->write(); |
|
112 | - } |
|
113 | - |
|
114 | - // Check if we need to update CSS class, otherwise return |
|
115 | - if ($markedBroken == $foundBroken) { |
|
116 | - return; |
|
117 | - } |
|
118 | - if ($foundBroken) { |
|
119 | - $class .= ' ss-broken'; |
|
120 | - } else { |
|
121 | - $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
122 | - } |
|
123 | - $link->setAttribute('class', trim($class)); |
|
124 | - } |
|
125 | - |
|
126 | - /** |
|
127 | - * Determine if the given HTTP code is "broken" |
|
128 | - * |
|
129 | - * @param int $httpCode |
|
130 | - * @return bool True if this is a broken code |
|
131 | - */ |
|
132 | - protected function isCodeBroken($httpCode) |
|
133 | - { |
|
134 | - // Null represents no request attempted |
|
135 | - if ($httpCode === null) { |
|
136 | - return false; |
|
137 | - } |
|
138 | - |
|
139 | - // do we have any whitelisted codes |
|
140 | - $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); |
|
141 | - if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
142 | - return false; |
|
143 | - } |
|
144 | - |
|
145 | - // Check if code is outside valid range |
|
146 | - return $httpCode < 200 || $httpCode > 302; |
|
147 | - } |
|
148 | - |
|
149 | - /** |
|
150 | - * Runs the links checker and returns the track used |
|
151 | - * |
|
152 | - * @param int $limit Limit to number of pages to run, or null to run all |
|
153 | - * @return BrokenExternalPageTrackStatus |
|
154 | - */ |
|
155 | - public function runLinksCheck($limit = null) |
|
156 | - { |
|
157 | - // Check the current status |
|
158 | - $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
159 | - |
|
160 | - // Calculate pages to run |
|
161 | - $pageTracks = $status->getIncompleteTracks(); |
|
162 | - if ($limit) { |
|
163 | - $pageTracks = $pageTracks->limit($limit); |
|
164 | - } |
|
165 | - |
|
166 | - // Check each page |
|
167 | - foreach ($pageTracks as $pageTrack) { |
|
168 | - // Flag as complete |
|
169 | - $pageTrack->Processed = 1; |
|
170 | - $pageTrack->write(); |
|
171 | - |
|
172 | - // Check value of html area |
|
173 | - $page = $pageTrack->Page(); |
|
174 | - $this->log("Checking {$page->Title}"); |
|
175 | - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
176 | - if (!$htmlValue->isValid()) { |
|
177 | - continue; |
|
178 | - } |
|
179 | - |
|
180 | - // Check each link |
|
181 | - $links = $htmlValue->getElementsByTagName('a'); |
|
182 | - foreach ($links as $link) { |
|
183 | - $this->checkPageLink($pageTrack, $link); |
|
184 | - } |
|
185 | - |
|
186 | - // Update content of page based on link fixes / breakages |
|
187 | - $htmlValue->saveHTML(); |
|
188 | - $page->Content = $htmlValue->getContent(); |
|
189 | - $page->write(); |
|
190 | - |
|
191 | - // Once all links have been created for this page update HasBrokenLinks |
|
192 | - $count = $pageTrack->BrokenLinks()->count(); |
|
193 | - $this->log("Found {$count} broken links"); |
|
194 | - if ($count) { |
|
195 | - // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
196 | - DB::query(sprintf( |
|
197 | - 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
198 | - intval($pageTrack->ID) |
|
199 | - )); |
|
200 | - } |
|
201 | - } |
|
202 | - |
|
203 | - $status->updateJobInfo('Updating completed pages'); |
|
204 | - $status->updateStatus(); |
|
205 | - return $status; |
|
206 | - } |
|
207 | - |
|
208 | - private function updateCompletedPages($trackID = 0) |
|
209 | - { |
|
210 | - $noPages = BrokenExternalPageTrack::get() |
|
211 | - ->filter(array( |
|
212 | - 'TrackID' => $trackID, |
|
213 | - 'Processed' => 1 |
|
214 | - )) |
|
215 | - ->count(); |
|
216 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
217 | - $track->CompletedPages = $noPages; |
|
218 | - $track->write(); |
|
219 | - return $noPages; |
|
220 | - } |
|
221 | - |
|
222 | - private function updateJobInfo($message) |
|
223 | - { |
|
224 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
225 | - if ($track) { |
|
226 | - $track->JobInfo = $message; |
|
227 | - $track->write(); |
|
228 | - } |
|
229 | - } |
|
23 | + private static $dependencies = array( |
|
24 | + 'LinkChecker' => '%$LinkChecker' |
|
25 | + ); |
|
26 | + |
|
27 | + /** |
|
28 | + * @var bool |
|
29 | + */ |
|
30 | + protected $silent = false; |
|
31 | + |
|
32 | + /** |
|
33 | + * @var LinkChecker |
|
34 | + */ |
|
35 | + protected $linkChecker; |
|
36 | + |
|
37 | + protected $title = 'Checking broken External links in the SiteTree'; |
|
38 | + |
|
39 | + protected $description = 'A task that records external broken links in the SiteTree'; |
|
40 | + |
|
41 | + protected $enabled = true; |
|
42 | + |
|
43 | + /** |
|
44 | + * Log a message |
|
45 | + * |
|
46 | + * @param string $message |
|
47 | + */ |
|
48 | + protected function log($message) |
|
49 | + { |
|
50 | + if (!$this->silent) { |
|
51 | + Debug::message($message); |
|
52 | + } |
|
53 | + } |
|
54 | + |
|
55 | + public function run($request) |
|
56 | + { |
|
57 | + $this->runLinksCheck(); |
|
58 | + } |
|
59 | + /** |
|
60 | + * Turn on or off message output |
|
61 | + * |
|
62 | + * @param bool $silent |
|
63 | + */ |
|
64 | + public function setSilent($silent) |
|
65 | + { |
|
66 | + $this->silent = $silent; |
|
67 | + } |
|
68 | + |
|
69 | + /** |
|
70 | + * @param LinkChecker $linkChecker |
|
71 | + */ |
|
72 | + public function setLinkChecker(LinkChecker $linkChecker) |
|
73 | + { |
|
74 | + $this->linkChecker = $linkChecker; |
|
75 | + } |
|
76 | + |
|
77 | + /** |
|
78 | + * @return LinkChecker |
|
79 | + */ |
|
80 | + public function getLinkChecker() |
|
81 | + { |
|
82 | + return $this->linkChecker; |
|
83 | + } |
|
84 | + |
|
85 | + /** |
|
86 | + * Check the status of a single link on a page |
|
87 | + * |
|
88 | + * @param BrokenExternalPageTrack $pageTrack |
|
89 | + * @param DOMNode $link |
|
90 | + */ |
|
91 | + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
92 | + { |
|
93 | + $class = $link->getAttribute('class'); |
|
94 | + $href = $link->getAttribute('href'); |
|
95 | + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
96 | + |
|
97 | + // Check link |
|
98 | + $httpCode = $this->linkChecker->checkLink($href); |
|
99 | + if ($httpCode === null) { |
|
100 | + return; // Null link means uncheckable, such as an internal link |
|
101 | + } |
|
102 | + |
|
103 | + // If this code is broken then mark as such |
|
104 | + if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
105 | + // Create broken record |
|
106 | + $brokenLink = new BrokenExternalLink(); |
|
107 | + $brokenLink->Link = $href; |
|
108 | + $brokenLink->HTTPCode = $httpCode; |
|
109 | + $brokenLink->TrackID = $pageTrack->ID; |
|
110 | + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
111 | + $brokenLink->write(); |
|
112 | + } |
|
113 | + |
|
114 | + // Check if we need to update CSS class, otherwise return |
|
115 | + if ($markedBroken == $foundBroken) { |
|
116 | + return; |
|
117 | + } |
|
118 | + if ($foundBroken) { |
|
119 | + $class .= ' ss-broken'; |
|
120 | + } else { |
|
121 | + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
122 | + } |
|
123 | + $link->setAttribute('class', trim($class)); |
|
124 | + } |
|
125 | + |
|
126 | + /** |
|
127 | + * Determine if the given HTTP code is "broken" |
|
128 | + * |
|
129 | + * @param int $httpCode |
|
130 | + * @return bool True if this is a broken code |
|
131 | + */ |
|
132 | + protected function isCodeBroken($httpCode) |
|
133 | + { |
|
134 | + // Null represents no request attempted |
|
135 | + if ($httpCode === null) { |
|
136 | + return false; |
|
137 | + } |
|
138 | + |
|
139 | + // do we have any whitelisted codes |
|
140 | + $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); |
|
141 | + if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
142 | + return false; |
|
143 | + } |
|
144 | + |
|
145 | + // Check if code is outside valid range |
|
146 | + return $httpCode < 200 || $httpCode > 302; |
|
147 | + } |
|
148 | + |
|
149 | + /** |
|
150 | + * Runs the links checker and returns the track used |
|
151 | + * |
|
152 | + * @param int $limit Limit to number of pages to run, or null to run all |
|
153 | + * @return BrokenExternalPageTrackStatus |
|
154 | + */ |
|
155 | + public function runLinksCheck($limit = null) |
|
156 | + { |
|
157 | + // Check the current status |
|
158 | + $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
159 | + |
|
160 | + // Calculate pages to run |
|
161 | + $pageTracks = $status->getIncompleteTracks(); |
|
162 | + if ($limit) { |
|
163 | + $pageTracks = $pageTracks->limit($limit); |
|
164 | + } |
|
165 | + |
|
166 | + // Check each page |
|
167 | + foreach ($pageTracks as $pageTrack) { |
|
168 | + // Flag as complete |
|
169 | + $pageTrack->Processed = 1; |
|
170 | + $pageTrack->write(); |
|
171 | + |
|
172 | + // Check value of html area |
|
173 | + $page = $pageTrack->Page(); |
|
174 | + $this->log("Checking {$page->Title}"); |
|
175 | + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
176 | + if (!$htmlValue->isValid()) { |
|
177 | + continue; |
|
178 | + } |
|
179 | + |
|
180 | + // Check each link |
|
181 | + $links = $htmlValue->getElementsByTagName('a'); |
|
182 | + foreach ($links as $link) { |
|
183 | + $this->checkPageLink($pageTrack, $link); |
|
184 | + } |
|
185 | + |
|
186 | + // Update content of page based on link fixes / breakages |
|
187 | + $htmlValue->saveHTML(); |
|
188 | + $page->Content = $htmlValue->getContent(); |
|
189 | + $page->write(); |
|
190 | + |
|
191 | + // Once all links have been created for this page update HasBrokenLinks |
|
192 | + $count = $pageTrack->BrokenLinks()->count(); |
|
193 | + $this->log("Found {$count} broken links"); |
|
194 | + if ($count) { |
|
195 | + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
196 | + DB::query(sprintf( |
|
197 | + 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
198 | + intval($pageTrack->ID) |
|
199 | + )); |
|
200 | + } |
|
201 | + } |
|
202 | + |
|
203 | + $status->updateJobInfo('Updating completed pages'); |
|
204 | + $status->updateStatus(); |
|
205 | + return $status; |
|
206 | + } |
|
207 | + |
|
208 | + private function updateCompletedPages($trackID = 0) |
|
209 | + { |
|
210 | + $noPages = BrokenExternalPageTrack::get() |
|
211 | + ->filter(array( |
|
212 | + 'TrackID' => $trackID, |
|
213 | + 'Processed' => 1 |
|
214 | + )) |
|
215 | + ->count(); |
|
216 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
217 | + $track->CompletedPages = $noPages; |
|
218 | + $track->write(); |
|
219 | + return $noPages; |
|
220 | + } |
|
221 | + |
|
222 | + private function updateJobInfo($message) |
|
223 | + { |
|
224 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
225 | + if ($track) { |
|
226 | + $track->JobInfo = $message; |
|
227 | + $track->write(); |
|
228 | + } |
|
229 | + } |
|
230 | 230 | } |