Crawler::crawl()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 21
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 9
c 1
b 0
f 0
dl 0
loc 21
ccs 12
cts 12
cp 1
rs 9.9666
cc 2
nc 2
nop 0
crap 2
1
<?php
2
3
namespace Pilipinews\Website\Bulletin;
4
5
use Pilipinews\Common\Client;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\CrawlerInterface;
8
9
/**
10
 * Manila Bulletin Crawler
11
 *
12
 * @package Pilipinews
13
 * @author  Rougin Gutib <[email protected]>
14
 */
15
class Crawler implements CrawlerInterface
16
{
17
    /**
18
     * @var string[]
19
     */
20
    protected $categories = array(
21
        'https://mb.com.ph/category/news/national/',
22
        'https://mb.com.ph/category/news/metro/',
23
    );
24
25
    /**
26
     * Returns an array of articles to scrape.
27
     *
28
     * @return string[]
29
     */
30 3
    public function crawl()
31
    {
32 3
        $articles = array();
33
34 3
        foreach ((array) $this->categories as $link)
35
        {
36 3
            $crawler = new DomCrawler(Client::request($link));
37
38 3
            $news = $crawler->filter('.articles-list > .article');
39
40 3
            $items = $news->each(function (DomCrawler $node)
41
            {
42 3
                $current = $node->filter('h4.title > a');
43
44 3
                return (string) $current->attr('href');
45 3
            });
46
47 1
            $articles = array_merge($articles, $items);
48 1
        }
49
50 1
        return array_reverse((array) $articles);
51
    }
52
}
53