@@ -7,9 +7,9 @@ |
||
| 7 | 7 | |
| 8 | 8 | class ExternalLinksTestPage extends Page implements TestOnly |
| 9 | 9 | { |
| 10 | - private static $table_name = 'ExternalLinksTestPage'; |
|
| 10 | + private static $table_name = 'ExternalLinksTestPage'; |
|
| 11 | 11 | |
| 12 | - private static $db = array( |
|
| 13 | - 'ExpectedContent' => 'HTMLText' |
|
| 14 | - ); |
|
| 12 | + private static $db = array( |
|
| 13 | + 'ExpectedContent' => 'HTMLText' |
|
| 14 | + ); |
|
| 15 | 15 | } |
@@ -16,150 +16,150 @@ |
||
| 16 | 16 | class ExternalLinksTest extends SapphireTest |
| 17 | 17 | { |
| 18 | 18 | |
| 19 | - protected static $fixture_file = 'ExternalLinksTest.yml'; |
|
| 20 | - |
|
| 21 | - protected static $extra_dataobjects = array( |
|
| 22 | - ExternalLinksTestPage::class |
|
| 23 | - ); |
|
| 24 | - |
|
| 25 | - public function setUpOnce() |
|
| 26 | - { |
|
| 27 | - if (class_exists(Phockito::class)) { |
|
| 28 | - Phockito::include_hamcrest(false); |
|
| 29 | - } |
|
| 30 | - |
|
| 31 | - parent::setUpOnce(); |
|
| 32 | - } |
|
| 33 | - |
|
| 34 | - public function setUp() |
|
| 35 | - { |
|
| 36 | - parent::setUp(); |
|
| 37 | - |
|
| 38 | - // Check dependencies |
|
| 39 | - if (!class_exists(Phockito::class)) { |
|
| 40 | - $this->skipTest = true; |
|
| 41 | - return $this->markTestSkipped("These tests need the Phockito module installed to run"); |
|
| 42 | - } |
|
| 43 | - |
|
| 44 | - // Mock link checker |
|
| 45 | - $checker = Phockito::mock(LinkChecker::class); |
|
| 46 | - Phockito::when($checker) |
|
| 47 | - ->checkLink('http://www.working.com') |
|
| 48 | - ->return(200); |
|
| 49 | - |
|
| 50 | - Phockito::when($checker) |
|
| 51 | - ->checkLink('http://www.broken.com/url/thing') // 404 on working site |
|
| 52 | - ->return(404); |
|
| 53 | - |
|
| 54 | - Phockito::when($checker) |
|
| 55 | - ->checkLink('http://www.broken.com') // 403 on working site |
|
| 56 | - ->return(403); |
|
| 57 | - |
|
| 58 | - Phockito::when($checker) |
|
| 59 | - ->checkLink('http://www.nodomain.com') // no ping |
|
| 60 | - ->return(0); |
|
| 61 | - |
|
| 62 | - Phockito::when($checker) |
|
| 63 | - ->checkLink('/internal/link') |
|
| 64 | - ->return(null); |
|
| 65 | - |
|
| 66 | - Phockito::when($checker) |
|
| 67 | - ->checkLink('[sitetree_link,id=9999]') |
|
| 68 | - ->return(null); |
|
| 69 | - |
|
| 70 | - Phockito::when($checker) |
|
| 71 | - ->checkLink('home') |
|
| 72 | - ->return(null); |
|
| 73 | - |
|
| 74 | - Phockito::when($checker) |
|
| 75 | - ->checkLink('broken-internal') |
|
| 76 | - ->return(null); |
|
| 77 | - |
|
| 78 | - Phockito::when($checker) |
|
| 79 | - ->checkLink('[sitetree_link,id=1]') |
|
| 80 | - ->return(null); |
|
| 81 | - |
|
| 82 | - Phockito::when($checker) |
|
| 83 | - ->checkLink(Hamcrest_Matchers::anything()) // anything else is 404 |
|
| 84 | - ->return(404); |
|
| 85 | - |
|
| 86 | - Injector::inst()->registerService($checker, LinkChecker::class); |
|
| 87 | - } |
|
| 88 | - |
|
| 89 | - public function testLinks() |
|
| 90 | - { |
|
| 91 | - // Run link checker |
|
| 92 | - $task = CheckExternalLinksTask::create(); |
|
| 93 | - $task->setSilent(true); // Be quiet during the test! |
|
| 94 | - $task->runLinksCheck(); |
|
| 95 | - |
|
| 96 | - // Get all links checked |
|
| 97 | - $status = BrokenExternalPageTrackStatus::get_latest(); |
|
| 98 | - $this->assertEquals('Completed', $status->Status); |
|
| 99 | - $this->assertEquals(5, $status->TotalPages); |
|
| 100 | - $this->assertEquals(5, $status->CompletedPages); |
|
| 101 | - |
|
| 102 | - // Check all pages have had the correct HTML adjusted |
|
| 103 | - for ($i = 1; $i <= 5; $i++) { |
|
| 104 | - $page = $this->objFromFixture('ExternalLinksTestPage', 'page'.$i); |
|
| 105 | - $this->assertNotEmpty($page->Content); |
|
| 106 | - $this->assertEquals( |
|
| 107 | - $page->ExpectedContent, |
|
| 108 | - $page->Content, |
|
| 109 | - "Assert that the content of page{$i} has been updated" |
|
| 110 | - ); |
|
| 111 | - } |
|
| 112 | - |
|
| 113 | - // Check that the correct report of broken links is generated |
|
| 114 | - $links = $status |
|
| 115 | - ->BrokenLinks() |
|
| 116 | - ->sort('Link'); |
|
| 117 | - |
|
| 118 | - $this->assertEquals(4, $links->count()); |
|
| 119 | - $this->assertEquals( |
|
| 120 | - array( |
|
| 121 | - 'http://www.broken.com', |
|
| 122 | - 'http://www.broken.com/url/thing', |
|
| 123 | - 'http://www.broken.com/url/thing', |
|
| 124 | - 'http://www.nodomain.com' |
|
| 125 | - ), |
|
| 126 | - array_values($links->map('ID', 'Link')->toArray()) |
|
| 127 | - ); |
|
| 128 | - |
|
| 129 | - // Check response codes are correct |
|
| 130 | - $expected = array( |
|
| 131 | - 'http://www.broken.com' => 403, |
|
| 132 | - 'http://www.broken.com/url/thing' => 404, |
|
| 133 | - 'http://www.nodomain.com' => 0 |
|
| 134 | - ); |
|
| 135 | - $actual = $links->map('Link', 'HTTPCode')->toArray(); |
|
| 136 | - $this->assertEquals($expected, $actual); |
|
| 137 | - |
|
| 138 | - // Check response descriptions are correct |
|
| 139 | - i18n::set_locale('en_NZ'); |
|
| 140 | - $expected = array( |
|
| 141 | - 'http://www.broken.com' => '403 (Forbidden)', |
|
| 142 | - 'http://www.broken.com/url/thing' => '404 (Not Found)', |
|
| 143 | - 'http://www.nodomain.com' => '0 (Server Not Available)' |
|
| 144 | - ); |
|
| 145 | - $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); |
|
| 146 | - $this->assertEquals($expected, $actual); |
|
| 147 | - } |
|
| 148 | - |
|
| 149 | - /** |
|
| 150 | - * Test that broken links appears in the reports list |
|
| 151 | - */ |
|
| 152 | - public function testReportExists() |
|
| 153 | - { |
|
| 154 | - $reports = Report::get_reports(); |
|
| 155 | - $reportNames = array(); |
|
| 156 | - foreach ($reports as $report) { |
|
| 157 | - $reportNames[] = $report->class; |
|
| 158 | - } |
|
| 159 | - $this->assertContains( |
|
| 160 | - BrokenExternalLinksReport::class, |
|
| 161 | - $reportNames, |
|
| 162 | - 'BrokenExternalLinksReport is in reports list' |
|
| 163 | - ); |
|
| 164 | - } |
|
| 19 | + protected static $fixture_file = 'ExternalLinksTest.yml'; |
|
| 20 | + |
|
| 21 | + protected static $extra_dataobjects = array( |
|
| 22 | + ExternalLinksTestPage::class |
|
| 23 | + ); |
|
| 24 | + |
|
| 25 | + public function setUpOnce() |
|
| 26 | + { |
|
| 27 | + if (class_exists(Phockito::class)) { |
|
| 28 | + Phockito::include_hamcrest(false); |
|
| 29 | + } |
|
| 30 | + |
|
| 31 | + parent::setUpOnce(); |
|
| 32 | + } |
|
| 33 | + |
|
| 34 | + public function setUp() |
|
| 35 | + { |
|
| 36 | + parent::setUp(); |
|
| 37 | + |
|
| 38 | + // Check dependencies |
|
| 39 | + if (!class_exists(Phockito::class)) { |
|
| 40 | + $this->skipTest = true; |
|
| 41 | + return $this->markTestSkipped("These tests need the Phockito module installed to run"); |
|
| 42 | + } |
|
| 43 | + |
|
| 44 | + // Mock link checker |
|
| 45 | + $checker = Phockito::mock(LinkChecker::class); |
|
| 46 | + Phockito::when($checker) |
|
| 47 | + ->checkLink('http://www.working.com') |
|
| 48 | + ->return(200); |
|
| 49 | + |
|
| 50 | + Phockito::when($checker) |
|
| 51 | + ->checkLink('http://www.broken.com/url/thing') // 404 on working site |
|
| 52 | + ->return(404); |
|
| 53 | + |
|
| 54 | + Phockito::when($checker) |
|
| 55 | + ->checkLink('http://www.broken.com') // 403 on working site |
|
| 56 | + ->return(403); |
|
| 57 | + |
|
| 58 | + Phockito::when($checker) |
|
| 59 | + ->checkLink('http://www.nodomain.com') // no ping |
|
| 60 | + ->return(0); |
|
| 61 | + |
|
| 62 | + Phockito::when($checker) |
|
| 63 | + ->checkLink('/internal/link') |
|
| 64 | + ->return(null); |
|
| 65 | + |
|
| 66 | + Phockito::when($checker) |
|
| 67 | + ->checkLink('[sitetree_link,id=9999]') |
|
| 68 | + ->return(null); |
|
| 69 | + |
|
| 70 | + Phockito::when($checker) |
|
| 71 | + ->checkLink('home') |
|
| 72 | + ->return(null); |
|
| 73 | + |
|
| 74 | + Phockito::when($checker) |
|
| 75 | + ->checkLink('broken-internal') |
|
| 76 | + ->return(null); |
|
| 77 | + |
|
| 78 | + Phockito::when($checker) |
|
| 79 | + ->checkLink('[sitetree_link,id=1]') |
|
| 80 | + ->return(null); |
|
| 81 | + |
|
| 82 | + Phockito::when($checker) |
|
| 83 | + ->checkLink(Hamcrest_Matchers::anything()) // anything else is 404 |
|
| 84 | + ->return(404); |
|
| 85 | + |
|
| 86 | + Injector::inst()->registerService($checker, LinkChecker::class); |
|
| 87 | + } |
|
| 88 | + |
|
| 89 | + public function testLinks() |
|
| 90 | + { |
|
| 91 | + // Run link checker |
|
| 92 | + $task = CheckExternalLinksTask::create(); |
|
| 93 | + $task->setSilent(true); // Be quiet during the test! |
|
| 94 | + $task->runLinksCheck(); |
|
| 95 | + |
|
| 96 | + // Get all links checked |
|
| 97 | + $status = BrokenExternalPageTrackStatus::get_latest(); |
|
| 98 | + $this->assertEquals('Completed', $status->Status); |
|
| 99 | + $this->assertEquals(5, $status->TotalPages); |
|
| 100 | + $this->assertEquals(5, $status->CompletedPages); |
|
| 101 | + |
|
| 102 | + // Check all pages have had the correct HTML adjusted |
|
| 103 | + for ($i = 1; $i <= 5; $i++) { |
|
| 104 | + $page = $this->objFromFixture('ExternalLinksTestPage', 'page'.$i); |
|
| 105 | + $this->assertNotEmpty($page->Content); |
|
| 106 | + $this->assertEquals( |
|
| 107 | + $page->ExpectedContent, |
|
| 108 | + $page->Content, |
|
| 109 | + "Assert that the content of page{$i} has been updated" |
|
| 110 | + ); |
|
| 111 | + } |
|
| 112 | + |
|
| 113 | + // Check that the correct report of broken links is generated |
|
| 114 | + $links = $status |
|
| 115 | + ->BrokenLinks() |
|
| 116 | + ->sort('Link'); |
|
| 117 | + |
|
| 118 | + $this->assertEquals(4, $links->count()); |
|
| 119 | + $this->assertEquals( |
|
| 120 | + array( |
|
| 121 | + 'http://www.broken.com', |
|
| 122 | + 'http://www.broken.com/url/thing', |
|
| 123 | + 'http://www.broken.com/url/thing', |
|
| 124 | + 'http://www.nodomain.com' |
|
| 125 | + ), |
|
| 126 | + array_values($links->map('ID', 'Link')->toArray()) |
|
| 127 | + ); |
|
| 128 | + |
|
| 129 | + // Check response codes are correct |
|
| 130 | + $expected = array( |
|
| 131 | + 'http://www.broken.com' => 403, |
|
| 132 | + 'http://www.broken.com/url/thing' => 404, |
|
| 133 | + 'http://www.nodomain.com' => 0 |
|
| 134 | + ); |
|
| 135 | + $actual = $links->map('Link', 'HTTPCode')->toArray(); |
|
| 136 | + $this->assertEquals($expected, $actual); |
|
| 137 | + |
|
| 138 | + // Check response descriptions are correct |
|
| 139 | + i18n::set_locale('en_NZ'); |
|
| 140 | + $expected = array( |
|
| 141 | + 'http://www.broken.com' => '403 (Forbidden)', |
|
| 142 | + 'http://www.broken.com/url/thing' => '404 (Not Found)', |
|
| 143 | + 'http://www.nodomain.com' => '0 (Server Not Available)' |
|
| 144 | + ); |
|
| 145 | + $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); |
|
| 146 | + $this->assertEquals($expected, $actual); |
|
| 147 | + } |
|
| 148 | + |
|
| 149 | + /** |
|
| 150 | + * Test that broken links appears in the reports list |
|
| 151 | + */ |
|
| 152 | + public function testReportExists() |
|
| 153 | + { |
|
| 154 | + $reports = Report::get_reports(); |
|
| 155 | + $reportNames = array(); |
|
| 156 | + foreach ($reports as $report) { |
|
| 157 | + $reportNames[] = $report->class; |
|
| 158 | + } |
|
| 159 | + $this->assertContains( |
|
| 160 | + BrokenExternalLinksReport::class, |
|
| 161 | + $reportNames, |
|
| 162 | + 'BrokenExternalLinksReport is in reports list' |
|
| 163 | + ); |
|
| 164 | + } |
|
| 165 | 165 | } |
@@ -11,59 +11,59 @@ |
||
| 11 | 11 | class CMSExternalLinksController extends Controller |
| 12 | 12 | { |
| 13 | 13 | |
| 14 | - private static $allowed_actions = array('getJobStatus', 'start'); |
|
| 14 | + private static $allowed_actions = array('getJobStatus', 'start'); |
|
| 15 | 15 | |
| 16 | - /* |
|
| 16 | + /* |
|
| 17 | 17 | * Respond to Ajax requests for info on a running job |
| 18 | 18 | * |
| 19 | 19 | * @return string JSON string detailing status of the job |
| 20 | 20 | */ |
| 21 | - public function getJobStatus() |
|
| 22 | - { |
|
| 23 | - // Set headers |
|
| 24 | - HTTP::set_cache_age(0); |
|
| 25 | - HTTP::add_cache_headers($this->response); |
|
| 26 | - $this->response |
|
| 27 | - ->addHeader('Content-Type', 'application/json') |
|
| 28 | - ->addHeader('Content-Encoding', 'UTF-8') |
|
| 29 | - ->addHeader('X-Content-Type-Options', 'nosniff'); |
|
| 21 | + public function getJobStatus() |
|
| 22 | + { |
|
| 23 | + // Set headers |
|
| 24 | + HTTP::set_cache_age(0); |
|
| 25 | + HTTP::add_cache_headers($this->response); |
|
| 26 | + $this->response |
|
| 27 | + ->addHeader('Content-Type', 'application/json') |
|
| 28 | + ->addHeader('Content-Encoding', 'UTF-8') |
|
| 29 | + ->addHeader('X-Content-Type-Options', 'nosniff'); |
|
| 30 | 30 | |
| 31 | - // Format status |
|
| 32 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 33 | - if ($track) { |
|
| 34 | - return json_encode(array( |
|
| 35 | - 'TrackID' => $track->ID, |
|
| 36 | - 'Status' => $track->Status, |
|
| 37 | - 'Completed' => $track->getCompletedPages(), |
|
| 38 | - 'Total' => $track->getTotalPages() |
|
| 39 | - )); |
|
| 40 | - } |
|
| 41 | - } |
|
| 31 | + // Format status |
|
| 32 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 33 | + if ($track) { |
|
| 34 | + return json_encode(array( |
|
| 35 | + 'TrackID' => $track->ID, |
|
| 36 | + 'Status' => $track->Status, |
|
| 37 | + 'Completed' => $track->getCompletedPages(), |
|
| 38 | + 'Total' => $track->getTotalPages() |
|
| 39 | + )); |
|
| 40 | + } |
|
| 41 | + } |
|
| 42 | 42 | |
| 43 | 43 | |
| 44 | - /* |
|
| 44 | + /* |
|
| 45 | 45 | * Starts a broken external link check |
| 46 | 46 | */ |
| 47 | - public function start() |
|
| 48 | - { |
|
| 49 | - // return if the a job is already running |
|
| 50 | - $status = BrokenExternalPageTrackStatus::get_latest(); |
|
| 51 | - if ($status && $status->Status == 'Running') { |
|
| 52 | - return; |
|
| 53 | - } |
|
| 47 | + public function start() |
|
| 48 | + { |
|
| 49 | + // return if the a job is already running |
|
| 50 | + $status = BrokenExternalPageTrackStatus::get_latest(); |
|
| 51 | + if ($status && $status->Status == 'Running') { |
|
| 52 | + return; |
|
| 53 | + } |
|
| 54 | 54 | |
| 55 | - // Create a new job |
|
| 56 | - if (class_exists('QueuedJobService')) { |
|
| 57 | - // Force the creation of a new run |
|
| 58 | - BrokenExternalPageTrackStatus::create_status(); |
|
| 59 | - $checkLinks = new CheckExternalLinksJob(); |
|
| 60 | - singleton('QueuedJobService')->queueJob($checkLinks); |
|
| 61 | - } else { |
|
| 62 | - //TODO this hangs as it waits for the connection to be released |
|
| 63 | - // should return back and continue processing |
|
| 64 | - // http://us3.php.net/manual/en/features.connection-handling.php |
|
| 65 | - $task = CheckExternalLinksTask::create(); |
|
| 66 | - $task->runLinksCheck(); |
|
| 67 | - } |
|
| 68 | - } |
|
| 55 | + // Create a new job |
|
| 56 | + if (class_exists('QueuedJobService')) { |
|
| 57 | + // Force the creation of a new run |
|
| 58 | + BrokenExternalPageTrackStatus::create_status(); |
|
| 59 | + $checkLinks = new CheckExternalLinksJob(); |
|
| 60 | + singleton('QueuedJobService')->queueJob($checkLinks); |
|
| 61 | + } else { |
|
| 62 | + //TODO this hangs as it waits for the connection to be released |
|
| 63 | + // should return back and continue processing |
|
| 64 | + // http://us3.php.net/manual/en/features.connection-handling.php |
|
| 65 | + $task = CheckExternalLinksTask::create(); |
|
| 66 | + $task->runLinksCheck(); |
|
| 67 | + } |
|
| 68 | + } |
|
| 69 | 69 | } |
@@ -13,27 +13,27 @@ |
||
| 13 | 13 | */ |
| 14 | 14 | class BrokenExternalPageTrack extends DataObject |
| 15 | 15 | { |
| 16 | - private static $table_name = 'BrokenExternalPageTrack'; |
|
| 16 | + private static $table_name = 'BrokenExternalPageTrack'; |
|
| 17 | 17 | |
| 18 | - private static $db = array( |
|
| 19 | - 'Processed' => 'Boolean' |
|
| 20 | - ); |
|
| 18 | + private static $db = array( |
|
| 19 | + 'Processed' => 'Boolean' |
|
| 20 | + ); |
|
| 21 | 21 | |
| 22 | - private static $has_one = array( |
|
| 23 | - 'Page' => SiteTree::class, |
|
| 24 | - 'Status' => BrokenExternalPageTrackStatus::class |
|
| 25 | - ); |
|
| 22 | + private static $has_one = array( |
|
| 23 | + 'Page' => SiteTree::class, |
|
| 24 | + 'Status' => BrokenExternalPageTrackStatus::class |
|
| 25 | + ); |
|
| 26 | 26 | |
| 27 | - private static $has_many = array( |
|
| 28 | - 'BrokenLinks' => BrokenExternalLink::class |
|
| 29 | - ); |
|
| 27 | + private static $has_many = array( |
|
| 28 | + 'BrokenLinks' => BrokenExternalLink::class |
|
| 29 | + ); |
|
| 30 | 30 | |
| 31 | - /** |
|
| 32 | - * @return SiteTree |
|
| 33 | - */ |
|
| 34 | - public function Page() |
|
| 35 | - { |
|
| 36 | - return Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
| 37 | - ->byID($this->PageID); |
|
| 38 | - } |
|
| 31 | + /** |
|
| 32 | + * @return SiteTree |
|
| 33 | + */ |
|
| 34 | + public function Page() |
|
| 35 | + { |
|
| 36 | + return Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
| 37 | + ->byID($this->PageID); |
|
| 38 | + } |
|
| 39 | 39 | } |
@@ -18,132 +18,132 @@ |
||
| 18 | 18 | */ |
| 19 | 19 | class BrokenExternalPageTrackStatus extends DataObject |
| 20 | 20 | { |
| 21 | - private static $table_name = 'BrokenExternalPageTrackStatus'; |
|
| 22 | - |
|
| 23 | - private static $db = array( |
|
| 24 | - 'Status' => 'Enum("Completed, Running", "Running")', |
|
| 25 | - 'JobInfo' => 'Varchar(255)' |
|
| 26 | - ); |
|
| 27 | - |
|
| 28 | - private static $has_many = array( |
|
| 29 | - 'TrackedPages' => BrokenExternalPageTrack::class, |
|
| 30 | - 'BrokenLinks' => BrokenExternalLink::class |
|
| 31 | - ); |
|
| 32 | - |
|
| 33 | - /** |
|
| 34 | - * Get the latest track status |
|
| 35 | - * |
|
| 36 | - * @return self |
|
| 37 | - */ |
|
| 38 | - public static function get_latest() |
|
| 39 | - { |
|
| 40 | - return self::get() |
|
| 41 | - ->sort('ID', 'DESC') |
|
| 42 | - ->first(); |
|
| 43 | - } |
|
| 44 | - |
|
| 45 | - /** |
|
| 46 | - * Gets the list of Pages yet to be checked |
|
| 47 | - * |
|
| 48 | - * @return DataList |
|
| 49 | - */ |
|
| 50 | - public function getIncompletePageList() |
|
| 51 | - { |
|
| 52 | - $pageIDs = $this |
|
| 53 | - ->getIncompleteTracks() |
|
| 54 | - ->column('PageID'); |
|
| 55 | - if ($pageIDs) { |
|
| 56 | - return Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
| 57 | - ->byIDs($pageIDs); |
|
| 58 | - } |
|
| 59 | - } |
|
| 60 | - |
|
| 61 | - /** |
|
| 62 | - * Get the list of incomplete BrokenExternalPageTrack |
|
| 63 | - * |
|
| 64 | - * @return DataList |
|
| 65 | - */ |
|
| 66 | - public function getIncompleteTracks() |
|
| 67 | - { |
|
| 68 | - return $this |
|
| 69 | - ->TrackedPages() |
|
| 70 | - ->filter('Processed', 0); |
|
| 71 | - } |
|
| 72 | - |
|
| 73 | - /** |
|
| 74 | - * Get total pages count |
|
| 75 | - */ |
|
| 76 | - public function getTotalPages() |
|
| 77 | - { |
|
| 78 | - return $this->TrackedPages()->count(); |
|
| 79 | - } |
|
| 80 | - |
|
| 81 | - /** |
|
| 82 | - * Get completed pages count |
|
| 83 | - */ |
|
| 84 | - public function getCompletedPages() |
|
| 85 | - { |
|
| 86 | - return $this |
|
| 87 | - ->TrackedPages() |
|
| 88 | - ->filter('Processed', 1) |
|
| 89 | - ->count(); |
|
| 90 | - } |
|
| 91 | - |
|
| 92 | - /** |
|
| 93 | - * Returns the latest run, or otherwise creates a new one |
|
| 94 | - * |
|
| 95 | - * @return self |
|
| 96 | - */ |
|
| 97 | - public static function get_or_create() |
|
| 98 | - { |
|
| 99 | - // Check the current status |
|
| 100 | - $status = self::get_latest(); |
|
| 101 | - if ($status && $status->Status == 'Running') { |
|
| 102 | - $status->updateStatus(); |
|
| 103 | - return $status; |
|
| 104 | - } |
|
| 105 | - |
|
| 106 | - return self::create_status(); |
|
| 107 | - } |
|
| 108 | - |
|
| 109 | - /* |
|
| 21 | + private static $table_name = 'BrokenExternalPageTrackStatus'; |
|
| 22 | + |
|
| 23 | + private static $db = array( |
|
| 24 | + 'Status' => 'Enum("Completed, Running", "Running")', |
|
| 25 | + 'JobInfo' => 'Varchar(255)' |
|
| 26 | + ); |
|
| 27 | + |
|
| 28 | + private static $has_many = array( |
|
| 29 | + 'TrackedPages' => BrokenExternalPageTrack::class, |
|
| 30 | + 'BrokenLinks' => BrokenExternalLink::class |
|
| 31 | + ); |
|
| 32 | + |
|
| 33 | + /** |
|
| 34 | + * Get the latest track status |
|
| 35 | + * |
|
| 36 | + * @return self |
|
| 37 | + */ |
|
| 38 | + public static function get_latest() |
|
| 39 | + { |
|
| 40 | + return self::get() |
|
| 41 | + ->sort('ID', 'DESC') |
|
| 42 | + ->first(); |
|
| 43 | + } |
|
| 44 | + |
|
| 45 | + /** |
|
| 46 | + * Gets the list of Pages yet to be checked |
|
| 47 | + * |
|
| 48 | + * @return DataList |
|
| 49 | + */ |
|
| 50 | + public function getIncompletePageList() |
|
| 51 | + { |
|
| 52 | + $pageIDs = $this |
|
| 53 | + ->getIncompleteTracks() |
|
| 54 | + ->column('PageID'); |
|
| 55 | + if ($pageIDs) { |
|
| 56 | + return Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
| 57 | + ->byIDs($pageIDs); |
|
| 58 | + } |
|
| 59 | + } |
|
| 60 | + |
|
| 61 | + /** |
|
| 62 | + * Get the list of incomplete BrokenExternalPageTrack |
|
| 63 | + * |
|
| 64 | + * @return DataList |
|
| 65 | + */ |
|
| 66 | + public function getIncompleteTracks() |
|
| 67 | + { |
|
| 68 | + return $this |
|
| 69 | + ->TrackedPages() |
|
| 70 | + ->filter('Processed', 0); |
|
| 71 | + } |
|
| 72 | + |
|
| 73 | + /** |
|
| 74 | + * Get total pages count |
|
| 75 | + */ |
|
| 76 | + public function getTotalPages() |
|
| 77 | + { |
|
| 78 | + return $this->TrackedPages()->count(); |
|
| 79 | + } |
|
| 80 | + |
|
| 81 | + /** |
|
| 82 | + * Get completed pages count |
|
| 83 | + */ |
|
| 84 | + public function getCompletedPages() |
|
| 85 | + { |
|
| 86 | + return $this |
|
| 87 | + ->TrackedPages() |
|
| 88 | + ->filter('Processed', 1) |
|
| 89 | + ->count(); |
|
| 90 | + } |
|
| 91 | + |
|
| 92 | + /** |
|
| 93 | + * Returns the latest run, or otherwise creates a new one |
|
| 94 | + * |
|
| 95 | + * @return self |
|
| 96 | + */ |
|
| 97 | + public static function get_or_create() |
|
| 98 | + { |
|
| 99 | + // Check the current status |
|
| 100 | + $status = self::get_latest(); |
|
| 101 | + if ($status && $status->Status == 'Running') { |
|
| 102 | + $status->updateStatus(); |
|
| 103 | + return $status; |
|
| 104 | + } |
|
| 105 | + |
|
| 106 | + return self::create_status(); |
|
| 107 | + } |
|
| 108 | + |
|
| 109 | + /* |
|
| 110 | 110 | * Create and prepare a new status |
| 111 | 111 | * |
| 112 | 112 | * @return self |
| 113 | 113 | */ |
| 114 | - public static function create_status() |
|
| 115 | - { |
|
| 116 | - // If the script is to be started create a new status |
|
| 117 | - $status = self::create(); |
|
| 118 | - $status->updateJobInfo('Creating new tracking object'); |
|
| 119 | - |
|
| 120 | - // Setup all pages to test |
|
| 121 | - $pageIDs = Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
| 122 | - ->column('ID'); |
|
| 123 | - foreach ($pageIDs as $pageID) { |
|
| 124 | - $trackPage = BrokenExternalPageTrack::create(); |
|
| 125 | - $trackPage->PageID = $pageID; |
|
| 126 | - $trackPage->StatusID = $status->ID; |
|
| 127 | - $trackPage->write(); |
|
| 128 | - } |
|
| 129 | - |
|
| 130 | - return $status; |
|
| 131 | - } |
|
| 132 | - |
|
| 133 | - public function updateJobInfo($message) |
|
| 134 | - { |
|
| 135 | - $this->JobInfo = $message; |
|
| 136 | - $this->write(); |
|
| 137 | - } |
|
| 138 | - |
|
| 139 | - /** |
|
| 140 | - * Self check status |
|
| 141 | - */ |
|
| 142 | - public function updateStatus() |
|
| 143 | - { |
|
| 144 | - if ($this->CompletedPages == $this->TotalPages) { |
|
| 145 | - $this->Status = 'Completed'; |
|
| 146 | - $this->updateJobInfo('Setting to completed'); |
|
| 147 | - } |
|
| 148 | - } |
|
| 114 | + public static function create_status() |
|
| 115 | + { |
|
| 116 | + // If the script is to be started create a new status |
|
| 117 | + $status = self::create(); |
|
| 118 | + $status->updateJobInfo('Creating new tracking object'); |
|
| 119 | + |
|
| 120 | + // Setup all pages to test |
|
| 121 | + $pageIDs = Versioned::get_by_stage(SiteTree::class, 'Stage') |
|
| 122 | + ->column('ID'); |
|
| 123 | + foreach ($pageIDs as $pageID) { |
|
| 124 | + $trackPage = BrokenExternalPageTrack::create(); |
|
| 125 | + $trackPage->PageID = $pageID; |
|
| 126 | + $trackPage->StatusID = $status->ID; |
|
| 127 | + $trackPage->write(); |
|
| 128 | + } |
|
| 129 | + |
|
| 130 | + return $status; |
|
| 131 | + } |
|
| 132 | + |
|
| 133 | + public function updateJobInfo($message) |
|
| 134 | + { |
|
| 135 | + $this->JobInfo = $message; |
|
| 136 | + $this->write(); |
|
| 137 | + } |
|
| 138 | + |
|
| 139 | + /** |
|
| 140 | + * Self check status |
|
| 141 | + */ |
|
| 142 | + public function updateStatus() |
|
| 143 | + { |
|
| 144 | + if ($this->CompletedPages == $this->TotalPages) { |
|
| 145 | + $this->Status = 'Completed'; |
|
| 146 | + $this->updateJobInfo('Setting to completed'); |
|
| 147 | + } |
|
| 148 | + } |
|
| 149 | 149 | } |
@@ -18,67 +18,67 @@ |
||
| 18 | 18 | */ |
| 19 | 19 | class BrokenExternalLink extends DataObject |
| 20 | 20 | { |
| 21 | - private static $table_name = 'BrokenExternalLink'; |
|
| 21 | + private static $table_name = 'BrokenExternalLink'; |
|
| 22 | 22 | |
| 23 | - private static $db = array( |
|
| 24 | - 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. |
|
| 25 | - 'HTTPCode' =>'Int' |
|
| 26 | - ); |
|
| 23 | + private static $db = array( |
|
| 24 | + 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. |
|
| 25 | + 'HTTPCode' =>'Int' |
|
| 26 | + ); |
|
| 27 | 27 | |
| 28 | - private static $has_one = array( |
|
| 29 | - 'Track' => BrokenExternalPageTrack::class, |
|
| 30 | - 'Status' => BrokenExternalPageTrackStatus::class |
|
| 31 | - ); |
|
| 28 | + private static $has_one = array( |
|
| 29 | + 'Track' => BrokenExternalPageTrack::class, |
|
| 30 | + 'Status' => BrokenExternalPageTrackStatus::class |
|
| 31 | + ); |
|
| 32 | 32 | |
| 33 | - private static $summary_fields = array( |
|
| 34 | - 'Created' => 'Checked', |
|
| 35 | - 'Link' => 'External Link', |
|
| 36 | - 'HTTPCodeDescription' => 'HTTP Error Code', |
|
| 37 | - 'Page.Title' => 'Page link is on' |
|
| 38 | - ); |
|
| 33 | + private static $summary_fields = array( |
|
| 34 | + 'Created' => 'Checked', |
|
| 35 | + 'Link' => 'External Link', |
|
| 36 | + 'HTTPCodeDescription' => 'HTTP Error Code', |
|
| 37 | + 'Page.Title' => 'Page link is on' |
|
| 38 | + ); |
|
| 39 | 39 | |
| 40 | - private static $searchable_fields = array( |
|
| 41 | - 'HTTPCode' => array('title' => 'HTTP Code') |
|
| 42 | - ); |
|
| 40 | + private static $searchable_fields = array( |
|
| 41 | + 'HTTPCode' => array('title' => 'HTTP Code') |
|
| 42 | + ); |
|
| 43 | 43 | |
| 44 | - /** |
|
| 45 | - * @return SiteTree |
|
| 46 | - */ |
|
| 47 | - public function Page() |
|
| 48 | - { |
|
| 49 | - return $this->Track()->Page(); |
|
| 50 | - } |
|
| 44 | + /** |
|
| 45 | + * @return SiteTree |
|
| 46 | + */ |
|
| 47 | + public function Page() |
|
| 48 | + { |
|
| 49 | + return $this->Track()->Page(); |
|
| 50 | + } |
|
| 51 | 51 | |
| 52 | - public function canEdit($member = false) |
|
| 53 | - { |
|
| 54 | - return false; |
|
| 55 | - } |
|
| 52 | + public function canEdit($member = false) |
|
| 53 | + { |
|
| 54 | + return false; |
|
| 55 | + } |
|
| 56 | 56 | |
| 57 | - public function canView($member = false) |
|
| 58 | - { |
|
| 59 | - $member = $member ? $member : Member::currentUser(); |
|
| 60 | - $codes = array('content-authors', 'administrators'); |
|
| 61 | - return Permission::checkMember($member, $codes); |
|
| 62 | - } |
|
| 57 | + public function canView($member = false) |
|
| 58 | + { |
|
| 59 | + $member = $member ? $member : Member::currentUser(); |
|
| 60 | + $codes = array('content-authors', 'administrators'); |
|
| 61 | + return Permission::checkMember($member, $codes); |
|
| 62 | + } |
|
| 63 | 63 | |
| 64 | - /** |
|
| 65 | - * Retrieve a human readable description of a response code |
|
| 66 | - * |
|
| 67 | - * @return string |
|
| 68 | - */ |
|
| 69 | - public function getHTTPCodeDescription() |
|
| 70 | - { |
|
| 71 | - $code = $this->HTTPCode; |
|
| 72 | - if (empty($code)) { |
|
| 73 | - // Assume that $code = 0 means there was no response |
|
| 74 | - $description = _t('BrokenExternalLink.NOTAVAILABLE', 'Server Not Available'); |
|
| 75 | - } elseif (($descriptions = Config::inst()->get(HTTPResponse::class, 'status_codes')) |
|
| 76 | - && isset($descriptions[$code]) |
|
| 77 | - ) { |
|
| 78 | - $description = $descriptions[$code]; |
|
| 79 | - } else { |
|
| 80 | - $description = _t('BrokenExternalLink.UNKNOWNRESPONSE', 'Unknown Response Code'); |
|
| 81 | - } |
|
| 82 | - return sprintf("%d (%s)", $code, $description); |
|
| 83 | - } |
|
| 64 | + /** |
|
| 65 | + * Retrieve a human readable description of a response code |
|
| 66 | + * |
|
| 67 | + * @return string |
|
| 68 | + */ |
|
| 69 | + public function getHTTPCodeDescription() |
|
| 70 | + { |
|
| 71 | + $code = $this->HTTPCode; |
|
| 72 | + if (empty($code)) { |
|
| 73 | + // Assume that $code = 0 means there was no response |
|
| 74 | + $description = _t('BrokenExternalLink.NOTAVAILABLE', 'Server Not Available'); |
|
| 75 | + } elseif (($descriptions = Config::inst()->get(HTTPResponse::class, 'status_codes')) |
|
| 76 | + && isset($descriptions[$code]) |
|
| 77 | + ) { |
|
| 78 | + $description = $descriptions[$code]; |
|
| 79 | + } else { |
|
| 80 | + $description = _t('BrokenExternalLink.UNKNOWNRESPONSE', 'Unknown Response Code'); |
|
| 81 | + } |
|
| 82 | + return sprintf("%d (%s)", $code, $description); |
|
| 83 | + } |
|
| 84 | 84 | } |
@@ -8,11 +8,11 @@ |
||
| 8 | 8 | interface LinkChecker |
| 9 | 9 | { |
| 10 | 10 | |
| 11 | - /** |
|
| 12 | - * Determine the http status code for a given link |
|
| 13 | - * |
|
| 14 | - * @param string $href URL to check |
|
| 15 | - * @return int HTTP status code, or null if not checkable (not a link) |
|
| 16 | - */ |
|
| 17 | - public function checkLink($href); |
|
| 11 | + /** |
|
| 12 | + * Determine the http status code for a given link |
|
| 13 | + * |
|
| 14 | + * @param string $href URL to check |
|
| 15 | + * @return int HTTP status code, or null if not checkable (not a link) |
|
| 16 | + */ |
|
| 17 | + public function checkLink($href); |
|
| 18 | 18 | } |
@@ -10,51 +10,51 @@ |
||
| 10 | 10 | class CurlLinkChecker implements LinkChecker |
| 11 | 11 | { |
| 12 | 12 | |
| 13 | - /** |
|
| 14 | - * Return cache |
|
| 15 | - * |
|
| 16 | - * @return Zend_Cache_Frontend |
|
| 17 | - */ |
|
| 18 | - protected function getCache() |
|
| 19 | - { |
|
| 20 | - return SS_Cache::factory( |
|
| 21 | - __CLASS__, |
|
| 22 | - 'Output', |
|
| 23 | - array('automatic_serialization' => true) |
|
| 24 | - ); |
|
| 25 | - } |
|
| 26 | - |
|
| 27 | - /** |
|
| 28 | - * Determine the http status code for a given link |
|
| 29 | - * |
|
| 30 | - * @param string $href URL to check |
|
| 31 | - * @return int HTTP status code, or null if not checkable (not a link) |
|
| 32 | - */ |
|
| 33 | - public function checkLink($href) |
|
| 34 | - { |
|
| 35 | - // Skip non-external links |
|
| 36 | - if (!preg_match('/^https?[^:]*:\/\//', $href)) { |
|
| 37 | - return null; |
|
| 38 | - } |
|
| 39 | - |
|
| 40 | - // Check if we have a cached result |
|
| 41 | - $cacheKey = md5($href); |
|
| 42 | - $result = $this->getCache()->load($cacheKey); |
|
| 43 | - if ($result !== false) { |
|
| 44 | - return $result; |
|
| 45 | - } |
|
| 46 | - |
|
| 47 | - // No cached result so just request |
|
| 48 | - $handle = curl_init($href); |
|
| 49 | - curl_setopt($handle, CURLOPT_RETURNTRANSFER, true); |
|
| 50 | - curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); |
|
| 51 | - curl_setopt($handle, CURLOPT_TIMEOUT, 10); |
|
| 52 | - curl_exec($handle); |
|
| 53 | - $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); |
|
| 54 | - curl_close($handle); |
|
| 55 | - |
|
| 56 | - // Cache result |
|
| 57 | - $this->getCache()->save($httpCode, $cacheKey); |
|
| 58 | - return $httpCode; |
|
| 59 | - } |
|
| 13 | + /** |
|
| 14 | + * Return cache |
|
| 15 | + * |
|
| 16 | + * @return Zend_Cache_Frontend |
|
| 17 | + */ |
|
| 18 | + protected function getCache() |
|
| 19 | + { |
|
| 20 | + return SS_Cache::factory( |
|
| 21 | + __CLASS__, |
|
| 22 | + 'Output', |
|
| 23 | + array('automatic_serialization' => true) |
|
| 24 | + ); |
|
| 25 | + } |
|
| 26 | + |
|
| 27 | + /** |
|
| 28 | + * Determine the http status code for a given link |
|
| 29 | + * |
|
| 30 | + * @param string $href URL to check |
|
| 31 | + * @return int HTTP status code, or null if not checkable (not a link) |
|
| 32 | + */ |
|
| 33 | + public function checkLink($href) |
|
| 34 | + { |
|
| 35 | + // Skip non-external links |
|
| 36 | + if (!preg_match('/^https?[^:]*:\/\//', $href)) { |
|
| 37 | + return null; |
|
| 38 | + } |
|
| 39 | + |
|
| 40 | + // Check if we have a cached result |
|
| 41 | + $cacheKey = md5($href); |
|
| 42 | + $result = $this->getCache()->load($cacheKey); |
|
| 43 | + if ($result !== false) { |
|
| 44 | + return $result; |
|
| 45 | + } |
|
| 46 | + |
|
| 47 | + // No cached result so just request |
|
| 48 | + $handle = curl_init($href); |
|
| 49 | + curl_setopt($handle, CURLOPT_RETURNTRANSFER, true); |
|
| 50 | + curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); |
|
| 51 | + curl_setopt($handle, CURLOPT_TIMEOUT, 10); |
|
| 52 | + curl_exec($handle); |
|
| 53 | + $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); |
|
| 54 | + curl_close($handle); |
|
| 55 | + |
|
| 56 | + // Cache result |
|
| 57 | + $this->getCache()->save($httpCode, $cacheKey); |
|
| 58 | + return $httpCode; |
|
| 59 | + } |
|
| 60 | 60 | } |
@@ -20,211 +20,211 @@ |
||
| 20 | 20 | class CheckExternalLinksTask extends BuildTask |
| 21 | 21 | { |
| 22 | 22 | |
| 23 | - private static $dependencies = array( |
|
| 24 | - 'LinkChecker' => '%$LinkChecker' |
|
| 25 | - ); |
|
| 26 | - |
|
| 27 | - /** |
|
| 28 | - * @var bool |
|
| 29 | - */ |
|
| 30 | - protected $silent = false; |
|
| 31 | - |
|
| 32 | - /** |
|
| 33 | - * @var LinkChecker |
|
| 34 | - */ |
|
| 35 | - protected $linkChecker; |
|
| 36 | - |
|
| 37 | - protected $title = 'Checking broken External links in the SiteTree'; |
|
| 38 | - |
|
| 39 | - protected $description = 'A task that records external broken links in the SiteTree'; |
|
| 40 | - |
|
| 41 | - protected $enabled = true; |
|
| 42 | - |
|
| 43 | - /** |
|
| 44 | - * Log a message |
|
| 45 | - * |
|
| 46 | - * @param string $message |
|
| 47 | - */ |
|
| 48 | - protected function log($message) |
|
| 49 | - { |
|
| 50 | - if (!$this->silent) { |
|
| 51 | - Debug::message($message); |
|
| 52 | - } |
|
| 53 | - } |
|
| 54 | - |
|
| 55 | - public function run($request) |
|
| 56 | - { |
|
| 57 | - $this->runLinksCheck(); |
|
| 58 | - } |
|
| 59 | - /** |
|
| 60 | - * Turn on or off message output |
|
| 61 | - * |
|
| 62 | - * @param bool $silent |
|
| 63 | - */ |
|
| 64 | - public function setSilent($silent) |
|
| 65 | - { |
|
| 66 | - $this->silent = $silent; |
|
| 67 | - } |
|
| 68 | - |
|
| 69 | - /** |
|
| 70 | - * @param LinkChecker $linkChecker |
|
| 71 | - */ |
|
| 72 | - public function setLinkChecker(LinkChecker $linkChecker) |
|
| 73 | - { |
|
| 74 | - $this->linkChecker = $linkChecker; |
|
| 75 | - } |
|
| 76 | - |
|
| 77 | - /** |
|
| 78 | - * @return LinkChecker |
|
| 79 | - */ |
|
| 80 | - public function getLinkChecker() |
|
| 81 | - { |
|
| 82 | - return $this->linkChecker; |
|
| 83 | - } |
|
| 84 | - |
|
| 85 | - /** |
|
| 86 | - * Check the status of a single link on a page |
|
| 87 | - * |
|
| 88 | - * @param BrokenExternalPageTrack $pageTrack |
|
| 89 | - * @param DOMNode $link |
|
| 90 | - */ |
|
| 91 | - protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
| 92 | - { |
|
| 93 | - $class = $link->getAttribute('class'); |
|
| 94 | - $href = $link->getAttribute('href'); |
|
| 95 | - $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
| 96 | - |
|
| 97 | - // Check link |
|
| 98 | - $httpCode = $this->linkChecker->checkLink($href); |
|
| 99 | - if ($httpCode === null) { |
|
| 100 | - return; // Null link means uncheckable, such as an internal link |
|
| 101 | - } |
|
| 102 | - |
|
| 103 | - // If this code is broken then mark as such |
|
| 104 | - if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
| 105 | - // Create broken record |
|
| 106 | - $brokenLink = new BrokenExternalLink(); |
|
| 107 | - $brokenLink->Link = $href; |
|
| 108 | - $brokenLink->HTTPCode = $httpCode; |
|
| 109 | - $brokenLink->TrackID = $pageTrack->ID; |
|
| 110 | - $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
| 111 | - $brokenLink->write(); |
|
| 112 | - } |
|
| 113 | - |
|
| 114 | - // Check if we need to update CSS class, otherwise return |
|
| 115 | - if ($markedBroken == $foundBroken) { |
|
| 116 | - return; |
|
| 117 | - } |
|
| 118 | - if ($foundBroken) { |
|
| 119 | - $class .= ' ss-broken'; |
|
| 120 | - } else { |
|
| 121 | - $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
| 122 | - } |
|
| 123 | - $link->setAttribute('class', trim($class)); |
|
| 124 | - } |
|
| 125 | - |
|
| 126 | - /** |
|
| 127 | - * Determine if the given HTTP code is "broken" |
|
| 128 | - * |
|
| 129 | - * @param int $httpCode |
|
| 130 | - * @return bool True if this is a broken code |
|
| 131 | - */ |
|
| 132 | - protected function isCodeBroken($httpCode) |
|
| 133 | - { |
|
| 134 | - // Null represents no request attempted |
|
| 135 | - if ($httpCode === null) { |
|
| 136 | - return false; |
|
| 137 | - } |
|
| 138 | - |
|
| 139 | - // do we have any whitelisted codes |
|
| 140 | - $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); |
|
| 141 | - if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
| 142 | - return false; |
|
| 143 | - } |
|
| 144 | - |
|
| 145 | - // Check if code is outside valid range |
|
| 146 | - return $httpCode < 200 || $httpCode > 302; |
|
| 147 | - } |
|
| 148 | - |
|
| 149 | - /** |
|
| 150 | - * Runs the links checker and returns the track used |
|
| 151 | - * |
|
| 152 | - * @param int $limit Limit to number of pages to run, or null to run all |
|
| 153 | - * @return BrokenExternalPageTrackStatus |
|
| 154 | - */ |
|
| 155 | - public function runLinksCheck($limit = null) |
|
| 156 | - { |
|
| 157 | - // Check the current status |
|
| 158 | - $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
| 159 | - |
|
| 160 | - // Calculate pages to run |
|
| 161 | - $pageTracks = $status->getIncompleteTracks(); |
|
| 162 | - if ($limit) { |
|
| 163 | - $pageTracks = $pageTracks->limit($limit); |
|
| 164 | - } |
|
| 165 | - |
|
| 166 | - // Check each page |
|
| 167 | - foreach ($pageTracks as $pageTrack) { |
|
| 168 | - // Flag as complete |
|
| 169 | - $pageTrack->Processed = 1; |
|
| 170 | - $pageTrack->write(); |
|
| 171 | - |
|
| 172 | - // Check value of html area |
|
| 173 | - $page = $pageTrack->Page(); |
|
| 174 | - $this->log("Checking {$page->Title}"); |
|
| 175 | - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
| 176 | - if (!$htmlValue->isValid()) { |
|
| 177 | - continue; |
|
| 178 | - } |
|
| 179 | - |
|
| 180 | - // Check each link |
|
| 181 | - $links = $htmlValue->getElementsByTagName('a'); |
|
| 182 | - foreach ($links as $link) { |
|
| 183 | - $this->checkPageLink($pageTrack, $link); |
|
| 184 | - } |
|
| 185 | - |
|
| 186 | - // Update content of page based on link fixes / breakages |
|
| 187 | - $htmlValue->saveHTML(); |
|
| 188 | - $page->Content = $htmlValue->getContent(); |
|
| 189 | - $page->write(); |
|
| 190 | - |
|
| 191 | - // Once all links have been created for this page update HasBrokenLinks |
|
| 192 | - $count = $pageTrack->BrokenLinks()->count(); |
|
| 193 | - $this->log("Found {$count} broken links"); |
|
| 194 | - if ($count) { |
|
| 195 | - // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
| 196 | - DB::query(sprintf( |
|
| 197 | - 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
| 198 | - intval($pageTrack->ID) |
|
| 199 | - )); |
|
| 200 | - } |
|
| 201 | - } |
|
| 202 | - |
|
| 203 | - $status->updateJobInfo('Updating completed pages'); |
|
| 204 | - $status->updateStatus(); |
|
| 205 | - return $status; |
|
| 206 | - } |
|
| 207 | - |
|
| 208 | - private function updateCompletedPages($trackID = 0) |
|
| 209 | - { |
|
| 210 | - $noPages = BrokenExternalPageTrack::get() |
|
| 211 | - ->filter(array( |
|
| 212 | - 'TrackID' => $trackID, |
|
| 213 | - 'Processed' => 1 |
|
| 214 | - )) |
|
| 215 | - ->count(); |
|
| 216 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 217 | - $track->CompletedPages = $noPages; |
|
| 218 | - $track->write(); |
|
| 219 | - return $noPages; |
|
| 220 | - } |
|
| 221 | - |
|
| 222 | - private function updateJobInfo($message) |
|
| 223 | - { |
|
| 224 | - $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 225 | - if ($track) { |
|
| 226 | - $track->JobInfo = $message; |
|
| 227 | - $track->write(); |
|
| 228 | - } |
|
| 229 | - } |
|
| 23 | + private static $dependencies = array( |
|
| 24 | + 'LinkChecker' => '%$LinkChecker' |
|
| 25 | + ); |
|
| 26 | + |
|
| 27 | + /** |
|
| 28 | + * @var bool |
|
| 29 | + */ |
|
| 30 | + protected $silent = false; |
|
| 31 | + |
|
| 32 | + /** |
|
| 33 | + * @var LinkChecker |
|
| 34 | + */ |
|
| 35 | + protected $linkChecker; |
|
| 36 | + |
|
| 37 | + protected $title = 'Checking broken External links in the SiteTree'; |
|
| 38 | + |
|
| 39 | + protected $description = 'A task that records external broken links in the SiteTree'; |
|
| 40 | + |
|
| 41 | + protected $enabled = true; |
|
| 42 | + |
|
| 43 | + /** |
|
| 44 | + * Log a message |
|
| 45 | + * |
|
| 46 | + * @param string $message |
|
| 47 | + */ |
|
| 48 | + protected function log($message) |
|
| 49 | + { |
|
| 50 | + if (!$this->silent) { |
|
| 51 | + Debug::message($message); |
|
| 52 | + } |
|
| 53 | + } |
|
| 54 | + |
|
| 55 | + public function run($request) |
|
| 56 | + { |
|
| 57 | + $this->runLinksCheck(); |
|
| 58 | + } |
|
| 59 | + /** |
|
| 60 | + * Turn on or off message output |
|
| 61 | + * |
|
| 62 | + * @param bool $silent |
|
| 63 | + */ |
|
| 64 | + public function setSilent($silent) |
|
| 65 | + { |
|
| 66 | + $this->silent = $silent; |
|
| 67 | + } |
|
| 68 | + |
|
| 69 | + /** |
|
| 70 | + * @param LinkChecker $linkChecker |
|
| 71 | + */ |
|
| 72 | + public function setLinkChecker(LinkChecker $linkChecker) |
|
| 73 | + { |
|
| 74 | + $this->linkChecker = $linkChecker; |
|
| 75 | + } |
|
| 76 | + |
|
| 77 | + /** |
|
| 78 | + * @return LinkChecker |
|
| 79 | + */ |
|
| 80 | + public function getLinkChecker() |
|
| 81 | + { |
|
| 82 | + return $this->linkChecker; |
|
| 83 | + } |
|
| 84 | + |
|
| 85 | + /** |
|
| 86 | + * Check the status of a single link on a page |
|
| 87 | + * |
|
| 88 | + * @param BrokenExternalPageTrack $pageTrack |
|
| 89 | + * @param DOMNode $link |
|
| 90 | + */ |
|
| 91 | + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) |
|
| 92 | + { |
|
| 93 | + $class = $link->getAttribute('class'); |
|
| 94 | + $href = $link->getAttribute('href'); |
|
| 95 | + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); |
|
| 96 | + |
|
| 97 | + // Check link |
|
| 98 | + $httpCode = $this->linkChecker->checkLink($href); |
|
| 99 | + if ($httpCode === null) { |
|
| 100 | + return; // Null link means uncheckable, such as an internal link |
|
| 101 | + } |
|
| 102 | + |
|
| 103 | + // If this code is broken then mark as such |
|
| 104 | + if ($foundBroken = $this->isCodeBroken($httpCode)) { |
|
| 105 | + // Create broken record |
|
| 106 | + $brokenLink = new BrokenExternalLink(); |
|
| 107 | + $brokenLink->Link = $href; |
|
| 108 | + $brokenLink->HTTPCode = $httpCode; |
|
| 109 | + $brokenLink->TrackID = $pageTrack->ID; |
|
| 110 | + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons |
|
| 111 | + $brokenLink->write(); |
|
| 112 | + } |
|
| 113 | + |
|
| 114 | + // Check if we need to update CSS class, otherwise return |
|
| 115 | + if ($markedBroken == $foundBroken) { |
|
| 116 | + return; |
|
| 117 | + } |
|
| 118 | + if ($foundBroken) { |
|
| 119 | + $class .= ' ss-broken'; |
|
| 120 | + } else { |
|
| 121 | + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); |
|
| 122 | + } |
|
| 123 | + $link->setAttribute('class', trim($class)); |
|
| 124 | + } |
|
| 125 | + |
|
| 126 | + /** |
|
| 127 | + * Determine if the given HTTP code is "broken" |
|
| 128 | + * |
|
| 129 | + * @param int $httpCode |
|
| 130 | + * @return bool True if this is a broken code |
|
| 131 | + */ |
|
| 132 | + protected function isCodeBroken($httpCode) |
|
| 133 | + { |
|
| 134 | + // Null represents no request attempted |
|
| 135 | + if ($httpCode === null) { |
|
| 136 | + return false; |
|
| 137 | + } |
|
| 138 | + |
|
| 139 | + // do we have any whitelisted codes |
|
| 140 | + $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); |
|
| 141 | + if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { |
|
| 142 | + return false; |
|
| 143 | + } |
|
| 144 | + |
|
| 145 | + // Check if code is outside valid range |
|
| 146 | + return $httpCode < 200 || $httpCode > 302; |
|
| 147 | + } |
|
| 148 | + |
|
| 149 | + /** |
|
| 150 | + * Runs the links checker and returns the track used |
|
| 151 | + * |
|
| 152 | + * @param int $limit Limit to number of pages to run, or null to run all |
|
| 153 | + * @return BrokenExternalPageTrackStatus |
|
| 154 | + */ |
|
| 155 | + public function runLinksCheck($limit = null) |
|
| 156 | + { |
|
| 157 | + // Check the current status |
|
| 158 | + $status = BrokenExternalPageTrackStatus::get_or_create(); |
|
| 159 | + |
|
| 160 | + // Calculate pages to run |
|
| 161 | + $pageTracks = $status->getIncompleteTracks(); |
|
| 162 | + if ($limit) { |
|
| 163 | + $pageTracks = $pageTracks->limit($limit); |
|
| 164 | + } |
|
| 165 | + |
|
| 166 | + // Check each page |
|
| 167 | + foreach ($pageTracks as $pageTrack) { |
|
| 168 | + // Flag as complete |
|
| 169 | + $pageTrack->Processed = 1; |
|
| 170 | + $pageTrack->write(); |
|
| 171 | + |
|
| 172 | + // Check value of html area |
|
| 173 | + $page = $pageTrack->Page(); |
|
| 174 | + $this->log("Checking {$page->Title}"); |
|
| 175 | + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); |
|
| 176 | + if (!$htmlValue->isValid()) { |
|
| 177 | + continue; |
|
| 178 | + } |
|
| 179 | + |
|
| 180 | + // Check each link |
|
| 181 | + $links = $htmlValue->getElementsByTagName('a'); |
|
| 182 | + foreach ($links as $link) { |
|
| 183 | + $this->checkPageLink($pageTrack, $link); |
|
| 184 | + } |
|
| 185 | + |
|
| 186 | + // Update content of page based on link fixes / breakages |
|
| 187 | + $htmlValue->saveHTML(); |
|
| 188 | + $page->Content = $htmlValue->getContent(); |
|
| 189 | + $page->write(); |
|
| 190 | + |
|
| 191 | + // Once all links have been created for this page update HasBrokenLinks |
|
| 192 | + $count = $pageTrack->BrokenLinks()->count(); |
|
| 193 | + $this->log("Found {$count} broken links"); |
|
| 194 | + if ($count) { |
|
| 195 | + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true |
|
| 196 | + DB::query(sprintf( |
|
| 197 | + 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', |
|
| 198 | + intval($pageTrack->ID) |
|
| 199 | + )); |
|
| 200 | + } |
|
| 201 | + } |
|
| 202 | + |
|
| 203 | + $status->updateJobInfo('Updating completed pages'); |
|
| 204 | + $status->updateStatus(); |
|
| 205 | + return $status; |
|
| 206 | + } |
|
| 207 | + |
|
| 208 | + private function updateCompletedPages($trackID = 0) |
|
| 209 | + { |
|
| 210 | + $noPages = BrokenExternalPageTrack::get() |
|
| 211 | + ->filter(array( |
|
| 212 | + 'TrackID' => $trackID, |
|
| 213 | + 'Processed' => 1 |
|
| 214 | + )) |
|
| 215 | + ->count(); |
|
| 216 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 217 | + $track->CompletedPages = $noPages; |
|
| 218 | + $track->write(); |
|
| 219 | + return $noPages; |
|
| 220 | + } |
|
| 221 | + |
|
| 222 | + private function updateJobInfo($message) |
|
| 223 | + { |
|
| 224 | + $track = BrokenExternalPageTrackStatus::get_latest(); |
|
| 225 | + if ($track) { |
|
| 226 | + $track->JobInfo = $message; |
|
| 227 | + $track->write(); |
|
| 228 | + } |
|
| 229 | + } |
|
| 230 | 230 | } |