Passed
Push — master ( 4c7cf7...48eaaa )
by Rougin
08:49
created

Crawler::verify()   A

Complexity

Conditions 2
Paths 1

Size

Total Lines 12
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2.1481

Importance

Changes 0
Metric Value
cc 2
eloc 5
nc 1
nop 1
dl 0
loc 12
ccs 4
cts 6
cp 0.6667
crap 2.1481
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Pilipinews\Website\Cnn;
4
5
use Pilipinews\Common\Client;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\CrawlerInterface;
8
9
/**
10
 * CNN Philippines Crawler
11
 *
12
 * @package Pilipinews
13
 * @author  Rougin Gutib <[email protected]>
14
 */
15
class Crawler implements CrawlerInterface
16
{
17
    /**
18
     * Returns an array of articles to scrape.
19
     *
20
     * @return string[]
21
     */
22 3
    public function crawl()
23
    {
24 3
        $url = 'http://cnnphilippines.com/search/?order=DESC';
25
26 3
        $query = '&page=1&q=a&sort=PUBLISHDATE';
27
28 3
        $response = Client::request($url . (string) $query);
29
30 1
        $callback = function (DomCrawler $node)
31
        {
32
            $pattern = '.media-heading > a';
33
34
            $link = $node->filter($pattern);
35
36
            return (string) $link->attr('href');
37 3
        };
38
39 3
        $crawler = new DomCrawler((string) $response);
40
41 3
        $news = $crawler->filter('.results > .media');
42
43 3
        $news = $this->verify($news->each($callback));
44
45 3
        return array_reverse((array) $news);
46
    }
47
48
    /**
49
     * Returns the allowed article URLs to scrape.
50
     *
51
     * @param  string[] $items
52
     * @return string[]
53
     */
54
    protected function verify($items)
55
    {
56 3
        $callback = function ($link)
57
        {
58
            $news = strpos($link, 'es.com/news/');
59
60
            return $news !== false ? $link : null;
61 3
        };
62
63 2
        $items = array_map($callback, (array) $items);
64
65 2
        return array_values(array_filter($items));
66
    }
67
}
68