Crawler::crawl()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 18
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 8
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 18
ccs 10
cts 10
cp 1
crap 1
rs 10
1
<?php
2
3
namespace Pilipinews\Website\Abscbn;
4
5
use Pilipinews\Common\Client;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\CrawlerInterface;
8
9
/**
10
 * ABS-CBN News Crawler
11
 *
12
 * @package Pilipinews
13
 * @author  Rougin Gutib <[email protected]>
14
 */
15
class Crawler implements CrawlerInterface
16
{
17
    /**
18
     * @var string
19
     */
20
    protected $link = 'https://news.abs-cbn.com/news';
21
22
    /**
23
     * Returns an array of articles to scrape.
24
     *
25
     * @return string[]
26
     */
27 3
    public function crawl()
28
    {
29 3
        $response = Client::request($this->link);
30
31 3
        $callback = function (DomCrawler $node)
32
        {
33 3
            $url = 'https://news.abs-cbn.com';
34
35 3
            return $url . $node->attr('href');
36 3
        };
37
38 3
        $crawler = new DomCrawler((string) $response);
39
40 3
        $news = $crawler->filter('#latest-news li > p > a');
41
42 3
        $news = $this->verify($news->each($callback));
43
44 3
        return array_reverse(array_filter((array) $news));
45
    }
46
47
    /**
48
     * Returns the allowed article URLs to scrape.
49
     *
50
     * @param  string[] $items
51
     * @return string[]
52
     */
53
    protected function verify($items)
54
    {
55 3
        $callback = function ($link)
56
        {
57 3
            $news = strpos($link, '.com/news/') !== false;
58
59 3
            $media = strpos($link, 'news/multimedia') === false;
60
61 3
            return $news && $media === true ? $link : null;
62 3
        };
63
64
        $items = array_map($callback, (array) $items);
65
66
        return array_values(array_filter($items));
67
    }
68
}
69