Crawler::verify()   A
last analyzed

Complexity

Conditions 2
Paths 1

Size

Total Lines 10
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 2
eloc 4
c 2
b 0
f 0
nc 1
nop 1
dl 0
loc 10
ccs 5
cts 5
cp 1
crap 2
rs 10
1
<?php
2
3
namespace Pilipinews\Website\Cnn;
4
5
use Pilipinews\Common\Client;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\CrawlerInterface;
8
9
/**
10
 * CNN Philippines Crawler
11
 *
12
 * @package Pilipinews
13
 * @author  Rougin Gutib <[email protected]>
14
 */
15
class Crawler implements CrawlerInterface
16
{
17
    /**
18
     * Returns an array of articles to scrape.
19
     *
20
     * @return string[]
21
     */
22 3
    public function crawl()
23
    {
24 3
        $base = 'https://cnnphilippines.com';
25
26 3
        $response = Client::request($base);
27
28 2
        $callback = function (DomCrawler $node) use ($base)
29
        {
30 3
            return $base . $node->filter('a')->attr('href');
31 3
        };
32
33 3
        $crawler = new DomCrawler((string) $response);
34
35 3
        $news = $crawler->filter('.cbwidget-list > li');
36
37 3
        $news = $this->verify($news->each($callback));
38
39 3
        return array_reverse((array) $news);
40
    }
41
42
    /**
43
     * Returns the allowed article URLs to scrape.
44
     *
45
     * @param  string[] $items
46
     * @return string[]
47
     */
48
    protected function verify($items)
49
    {
50 3
        $callback = function ($link)
51
        {
52 3
            return strpos($link, '/news/') !== false ? $link : null;
53 3
        };
54
55 1
        $items = array_map($callback, (array) $items);
56
57 1
        return array_values(array_filter($items));
58
    }
59
}
60