Crawler   A
last analyzed

Complexity

Total Complexity 4

Size/Duplication

Total Lines 54
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 18
c 2
b 0
f 0
dl 0
loc 54
ccs 15
cts 15
cp 1
rs 10
wmc 4

2 Methods

Rating   Name   Duplication   Size   Complexity  
A crawl() 0 16 3
A items() 0 16 1
1
<?php
2
3
namespace Pilipinews\Website\Pna;
4
5
use Pilipinews\Common\Client;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\CrawlerInterface;
8
9
/**
10
 * Philippine News Agency Crawler
11
 *
12
 * @package Pilipinews
13
 * @author  Rougin Gutib <[email protected]>
14
 */
15
class Crawler implements CrawlerInterface
16
{
17
    /**
18
     * @var string[]
19
     */
20
    protected $categories = array(
21
        'https://www.pna.gov.ph/categories/national',
22
        'https://www.pna.gov.ph/categories/provincial',
23
    );
24
25
    /**
26
     * Returns an array of articles to scrape.
27
     *
28
     * @return string[]
29
     */
30 3
    public function crawl()
31
    {
32 3
        list($articles, $result) = array(array(), array());
33
34 3
        foreach ($this->categories as $category)
35
        {
36 3
            $result[] = $this->items($category);
37
        }
38
39 3
        foreach ($result[0] as $key => $item)
40
        {
41 3
            $articles[] = $result[0][$key];
42 3
            $articles[] = $result[1][$key];
43
        }
44
45 3
        return array_reverse((array) $articles);
46
    }
47
48
    /**
49
     * Returns an array of articles to scrape.
50
     *
51
     * @return string[]
52
     */
53 3
    protected function items($link)
54
    {
55 3
        $pattern = '.articles > .article.media';
56
57 3
        $base = 'https://www.pna.gov.ph';
58
59 3
        $callback = function (DomCrawler $node) use ($base)
60
        {
61 3
            $link = $node->filter('.media-heading > a');
62
63 3
            return $base . (string) $link->attr('href');
64 3
        };
65
66
        $crawler = new DomCrawler(Client::request($link));
67
68
        return $crawler->filter($pattern)->each($callback);
69
    }
70
}
71