Passed
Push — master ( f60a2c...2d7c5c )
by Dev
10:34 queued 19s
created

ExtractLinks   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 102
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 38
dl 0
loc 102
ccs 36
cts 36
cp 1
rs 10
c 0
b 0
f 0
wmc 17

6 Methods

Rating   Name   Duplication   Size   Complexity  
B getUrl() 0 19 7
A isItALink() 0 5 2
A extractLinks() 0 16 4
A getElements() 0 6 2
A __construct() 0 2 1
A get() 0 9 1
1
<?php
2
3
namespace PiedWeb\UrlHarvester;
4
5
use phpUri;
6
7
class ExtractLinks
8
{
9
    const SELECT_A = 'a[href]';
10
11
    const SELECT_ALL = 'href,src';
12
13
    /**
14
     * @var \simple_html_dom
15
     */
16
    private $dom;
17
18
    /**
19
     * @var string
20
     */
21
    private $baseUrl;
22
23
    /**
24
     * @var string
25
     */
26
    private $selector;
27
28
    /**
29
     * @param string $dom      HTML code from the page
30
     * @param string $baseUrl  To get absolute urls
31
     * @param string $selector
32
     *
33
     * @return array
34
     */
35 9
    public static function get(\simple_html_dom $dom, string $baseUrl, $selector = self::SELECT_A)
36
    {
37 9
        $self = new self();
38
39 9
        $self->selector = $selector;
40 9
        $self->baseUrl = $baseUrl;
41 9
        $self->dom = $dom;
42
43 9
        return $self->extractLinks();
44
    }
45
46 9
    private function __construct()
47
    {
48 9
    }
49
50
    /**
51
     * @return array
52
     */
53 9
    private function extractLinks()
54
    {
55 9
        $links = [];
56 9
        $elements = $this->getElements();
57
58 9
        if ($elements) {
59 9
            foreach ($elements as $element) {
60 9
                $href = $this->getUrl($element);
61
62 9
                if (null !== $href) {
63 9
                    $links[] = new Link($href, $element);
64
                }
65
            }
66
        }
67
68 9
        return $links;
69
    }
70
71 9
    private function getElements()
72
    {
73 9
        if (self::SELECT_A == $this->selector) {
74 6
            return $this->dom->find($this->selector);
75
        } else {
76 3
            return $this->dom->find('['.implode('],*[', explode(',', $this->selector)).']');
77
        }
78
    }
79
80
    /**
81
     * @return string|null
82
     */
83 9
    private function getUrl($element)
84
    {
85 9
        if (self::SELECT_A == $this->selector) {
86 6
            $href = $element->href;
87
        } else {
88 3
            $attributes = explode(',', $this->selector);
89 3
            foreach ($attributes as $attribute) {
90 3
                $href = $element->$attribute;
91 3
                if (null !== $href) {
92 3
                    break;
93
                }
94
            }
95
        }
96
97 9
        $href = $this->isItALink($href) ? $href : null;
1 ignored issue
show
Comprehensibility Best Practice introduced by
The variable $href does not seem to be defined for all execution paths leading up to this point.
Loading history...
98 9
        $parsed = phpUri::parse($this->baseUrl)->join($href);
99 9
        $href = null !== $href ? ($parsed ? $parsed : $href) : null;
100
101 9
        return $href;
102
    }
103
104 9
    private function isItALink($href)
105
    {
106
        return
107 9
            0 !== stripos($href, 'mailto:')
108 9
            && 0 !== strpos($href, 'javascript:');
109
    }
110
}
111