| Total Complexity | 3 |
| Total Lines | 43 |
| Duplicated Lines | 0 % |
| Coverage | 100% |
| Changes | 2 | ||
| Bugs | 0 | Features | 0 |
| 1 | <?php |
||
| 15 | class Crawler implements CrawlerInterface |
||
| 16 | { |
||
| 17 | /** |
||
| 18 | * Returns an array of articles to scrape. |
||
| 19 | * |
||
| 20 | * @return string[] |
||
| 21 | */ |
||
| 22 | 3 | public function crawl() |
|
| 23 | { |
||
| 24 | 3 | $base = 'https://cnnphilippines.com'; |
|
| 25 | |||
| 26 | 3 | $response = Client::request($base); |
|
| 27 | |||
| 28 | 2 | $callback = function (DomCrawler $node) use ($base) |
|
| 29 | { |
||
| 30 | 3 | return $base . $node->filter('a')->attr('href'); |
|
| 31 | 3 | }; |
|
| 32 | |||
| 33 | 3 | $crawler = new DomCrawler((string) $response); |
|
| 34 | |||
| 35 | 3 | $news = $crawler->filter('.cbwidget-list > li'); |
|
| 36 | |||
| 37 | 3 | $news = $this->verify($news->each($callback)); |
|
| 38 | |||
| 39 | 3 | return array_reverse((array) $news); |
|
| 40 | } |
||
| 41 | |||
| 42 | /** |
||
| 43 | * Returns the allowed article URLs to scrape. |
||
| 44 | * |
||
| 45 | * @param string[] $items |
||
| 46 | * @return string[] |
||
| 47 | */ |
||
| 48 | protected function verify($items) |
||
| 58 | } |
||
| 59 | } |
||
| 60 |