Passed
Push — master ( 48eaaa...d014fd )
by Rougin
09:32
created

Crawler::verify()   A

Complexity

Conditions 2
Paths 1

Size

Total Lines 10
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 1
nop 1
dl 0
loc 10
ccs 5
cts 5
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Pilipinews\Website\Cnn;
4
5
use Pilipinews\Common\Client;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\CrawlerInterface;
8
9
/**
10
 * CNN Philippines Crawler
11
 *
12
 * @package Pilipinews
13
 * @author  Rougin Gutib <[email protected]>
14
 */
15
class Crawler implements CrawlerInterface
16
{
17
    /**
18
     * Returns an array of articles to scrape.
19
     *
20
     * @return string[]
21
     */
22 3
    public function crawl()
23
    {
24 3
        $base = (string) 'https://cnnphilippines.com';
25
26 3
        $url = 'https://cnnphilippines.com/search/?order=DESC';
27
28 3
        $query = '&page=1&q=a&sort=PUBLISHDATE';
29
30 3
        $response = Client::request($url . $query);
31
32
        $callback = function (DomCrawler $node) use ($base)
33
        {
34 3
            $link = $node->filter('.media-heading > a');
35
36 3
            return (string) $base . $link->attr('href');
37 3
        };
38
39 3
        $crawler = new DomCrawler((string) $response);
40
41 3
        $news = $crawler->filter('.results > .media');
42
43 3
        $news = $this->verify($news->each($callback));
44
45 3
        return array_reverse((array) $news);
46
    }
47
48
    /**
49
     * Returns the allowed article URLs to scrape.
50
     *
51
     * @param  string[] $items
52
     * @return string[]
53
     */
54
    protected function verify($items)
55
    {
56 3
        $callback = function ($link)
57
        {
58 3
            return strpos($link, '/news/') !== false ? $link : null;
59 3
        };
60
61 3
        $items = array_map($callback, (array) $items);
62
63 3
        return array_values(array_filter($items));
64
    }
65
}
66