Total Complexity | 3 |
Total Lines | 43 |
Duplicated Lines | 0 % |
Coverage | 100% |
Changes | 2 | ||
Bugs | 0 | Features | 0 |
1 | <?php |
||
15 | class Crawler implements CrawlerInterface |
||
16 | { |
||
17 | /** |
||
18 | * Returns an array of articles to scrape. |
||
19 | * |
||
20 | * @return string[] |
||
21 | */ |
||
22 | 3 | public function crawl() |
|
23 | { |
||
24 | 3 | $base = 'https://cnnphilippines.com'; |
|
25 | |||
26 | 3 | $response = Client::request($base); |
|
27 | |||
28 | 2 | $callback = function (DomCrawler $node) use ($base) |
|
29 | { |
||
30 | 3 | return $base . $node->filter('a')->attr('href'); |
|
31 | 3 | }; |
|
32 | |||
33 | 3 | $crawler = new DomCrawler((string) $response); |
|
34 | |||
35 | 3 | $news = $crawler->filter('.cbwidget-list > li'); |
|
36 | |||
37 | 3 | $news = $this->verify($news->each($callback)); |
|
38 | |||
39 | 3 | return array_reverse((array) $news); |
|
40 | } |
||
41 | |||
42 | /** |
||
43 | * Returns the allowed article URLs to scrape. |
||
44 | * |
||
45 | * @param string[] $items |
||
46 | * @return string[] |
||
47 | */ |
||
48 | protected function verify($items) |
||
58 | } |
||
59 | } |
||
60 |