Total Complexity | 3 |
Total Lines | 49 |
Duplicated Lines | 0 % |
Coverage | 83.33% |
Changes | 2 | ||
Bugs | 0 | Features | 0 |
1 | <?php |
||
15 | class Crawler implements CrawlerInterface |
||
16 | { |
||
17 | /** |
||
18 | * Returns an array of articles to scrape. |
||
19 | * |
||
20 | * @return string[] |
||
21 | */ |
||
22 | 3 | public function crawl() |
|
23 | { |
||
24 | 3 | $base = (string) 'https://cnnphilippines.com'; |
|
25 | |||
26 | 3 | $url = 'https://cnnphilippines.com/search/?order=DESC'; |
|
27 | |||
28 | 3 | $query = '&page=1&q=a&sort=PUBLISHDATE'; |
|
29 | |||
30 | 3 | $response = Client::request($url . $query); |
|
31 | |||
32 | 2 | $callback = function (DomCrawler $node) use ($base) |
|
33 | { |
||
34 | $link = $node->filter('.media-heading > a'); |
||
35 | |||
36 | return (string) $base . $link->attr('href'); |
||
37 | 3 | }; |
|
38 | |||
39 | 3 | $crawler = new DomCrawler((string) $response); |
|
40 | |||
41 | 3 | $news = $crawler->filter('.results > .media'); |
|
42 | |||
43 | 3 | $news = $this->verify($news->each($callback)); |
|
44 | |||
45 | 3 | return array_reverse((array) $news); |
|
46 | } |
||
47 | |||
48 | /** |
||
49 | * Returns the allowed article URLs to scrape. |
||
50 | * |
||
51 | * @param string[] $items |
||
52 | * @return string[] |
||
53 | */ |
||
54 | protected function verify($items) |
||
64 | } |
||
65 | } |
||
66 |