| Total Complexity | 3 |
| Total Lines | 49 |
| Duplicated Lines | 0 % |
| Coverage | 83.33% |
| Changes | 2 | ||
| Bugs | 0 | Features | 0 |
| 1 | <?php |
||
| 15 | class Crawler implements CrawlerInterface |
||
| 16 | { |
||
| 17 | /** |
||
| 18 | * Returns an array of articles to scrape. |
||
| 19 | * |
||
| 20 | * @return string[] |
||
| 21 | */ |
||
| 22 | 3 | public function crawl() |
|
| 23 | { |
||
| 24 | 3 | $base = (string) 'https://cnnphilippines.com'; |
|
| 25 | |||
| 26 | 3 | $url = 'https://cnnphilippines.com/search/?order=DESC'; |
|
| 27 | |||
| 28 | 3 | $query = '&page=1&q=a&sort=PUBLISHDATE'; |
|
| 29 | |||
| 30 | 3 | $response = Client::request($url . $query); |
|
| 31 | |||
| 32 | 2 | $callback = function (DomCrawler $node) use ($base) |
|
| 33 | { |
||
| 34 | $link = $node->filter('.media-heading > a'); |
||
| 35 | |||
| 36 | return (string) $base . $link->attr('href'); |
||
| 37 | 3 | }; |
|
| 38 | |||
| 39 | 3 | $crawler = new DomCrawler((string) $response); |
|
| 40 | |||
| 41 | 3 | $news = $crawler->filter('.results > .media'); |
|
| 42 | |||
| 43 | 3 | $news = $this->verify($news->each($callback)); |
|
| 44 | |||
| 45 | 3 | return array_reverse((array) $news); |
|
| 46 | } |
||
| 47 | |||
| 48 | /** |
||
| 49 | * Returns the allowed article URLs to scrape. |
||
| 50 | * |
||
| 51 | * @param string[] $items |
||
| 52 | * @return string[] |
||
| 53 | */ |
||
| 54 | protected function verify($items) |
||
| 64 | } |
||
| 65 | } |
||
| 66 |