Passed
Push — master ( 9abeb8...36c770 )
by Dev
13:09
created

ExtractLinks::getElements()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 4
c 1
b 0
f 0
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 10
cc 2
nc 2
nop 0
crap 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A ExtractLinks::isWebLink() 0 3 1
1
<?php
2
3
namespace PiedWeb\UrlHarvester;
4
5
class ExtractLinks
6
{
7
    const SELECT_A = 'a[href]';
8
9
    const SELECT_ALL = '[href],[src]';
10
11
    /** @var Harvest */
12
    private $harvest;
13
14
    /** @var string */
15
    private $selector;
16
17
    public static function get(Harvest $harvest, $selector = self::SELECT_A): array
18
    {
19
        $self = new self();
20
21
        $self->selector = $selector;
22
        $self->harvest = $harvest;
23
24
        return $self->extractLinks();
25
    }
26
27
    private function __construct()
28
    {
29
    }
30
31
    /**
32
     * @return array
33
     */
34
    private function extractLinks()
35 9
    {
36
        $links = [];
37 9
        $elements = $this->harvest->getDom()->filter($this->selector); // what happen if find nothing
38
39 9
        foreach ($elements as $element) {
40 9
            //var_dump(get_class_methods($element->getNode()));
41 9
            //if (!$element instanceof \DomElement) { continue; } // wtf ?
42
            $url = $this->extractUrl($element);
43 9
            //$type = $element->getAttribute('href') ? Link::LINK_A : Link::LINK_SRC;
44
            if (null !== $url) {
45
                //$links[] = (new Link($url, $element, $type))->setParent($this->parentUrl);
46 9
                $links[] = (new Link($url, $this->harvest, $element));
47
            }
48 9
        }
49
50
        return $links;
51
    }
52
53 9
    /**
54
     * @return string|null absolute url
55 9
     */
56 9
    private function extractUrl(\DomElement $element): ?string
57
    {
58 9
        $attributes = explode(',', str_replace(['a[', '*[', '[', ']'], '', $this->selector));
59 9
        foreach ($attributes as $attribute) {
60 9
            $url = $element->getAttribute($attribute);
61
            if (null !== $url) {
62 9
                break;
63 9
            }
64
        }
65
66
        if (!$this->isWebLink($url)) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $url seems to be defined by a foreach iteration on line 59. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
67
            return null;
68 9
        }
69
70
        return $this->harvest->url()->resolve($url);
71 9
    }
72
73 9
    public static function isWebLink(string $url)
74 9
    {
75
        return preg_match('@^((?:(http:|https:)//([\w\d-]+\.)+[\w\d-]+){0,1}(/?[\w~,;\-\./?%&+#=]*))$@', $url);
76 6
    }
77
}
78