Passed
Push — master ( 54e7bb...9abeb8 )
by Dev
14:25
created

Link::normalizeUrl()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 4
c 0
b 0
f 0
dl 0
loc 9
ccs 1
cts 1
cp 1
rs 10
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
/**
4
 * Entity.
5
 */
6
7
namespace PiedWeb\UrlHarvester;
8
9
use Symfony\Component\DomCrawler\Crawler as DomCrawler;
10
11
class Link
12
{
13 12
    private $url;
14
    private $anchor;
15 12
    private $element;
16 12
17 12
    /**
18
     * @param int
19 12
     */
20 12
    private $type;
21
22 12
    const LINK_A = 1;
23
    const LINK_SRC = 4;
24 12
    const LINK_3XX = 2;
25
    const LINK_301 = 3;
26 12
27
    public function __construct(string $url, \DOMElement $element = null, int $type = null)
28
    {
29 12
        $this->url = self::normalizeUrl($url);
30
        if (null !== $element) {
31 9
            $this->setAnchor($element);
32
        }
33 9
        $this->element = $element;
34
35
        $this->type = $type ?? (null !== $element ? $this->getTypeFromElement($element) : null);
36 3
    }
37
38 3
    public static function createRedirection(string $url, int $redirType = null): self
39
    {
40
        return new self($url, null, $redirType ?? self::LINK_3XX);
41 3
    }
42
43 3
    public static function create(\DOMElement $element)
44
    {
45
        if ('a' != $element->tagName || !$element->getAttribute('href')) {
46 3
            throw new \Exception('no link found in '.$element->nodeValue);
47
        }
48 3
49
        return new self($element->getAttribute('href'), $element, self::LINK_A);
50
    }
51 6
52
    protected function getTypeFromElement(\DomElement $element)
53 6
    {
54 3
        return 'a' == $element->tagName && $element->getAttribute('href') ? self::LINK_A : null;
55 3
    }
56
57
    protected static function normalizeUrl(string $url)
58
    {
59 3
        $url = trim($url);
60
61
        if ('' == preg_replace('@(.*\://?([^\/]+))@', '', $url)) {
62
            $url = $url.'/';
63
        }
64
65
        return $url;
66
    }
67
68
    protected function getType(): ?int
69
    {
70
        return $this->type;
71
    }
72
73
    protected function setAnchor(\DomElement $element)
74
    {
75
        // Get classic text anchor
76
        $this->anchor = $element->textContent;
77
78
        // If get nothing, then maybe we can get an alternative text (eg: img)
79
        if (empty($this->anchor)) {
80
            $alt = (new DomCrawler($element))->filter('*[alt]');
81
            if ($alt->count() > 0) {
82
                $this->anchor = $alt->eq(0)->attr('alt') ?? '';
83
            }
84
        }
85
86
        // Limit to 100 characters
87
        // Totally subjective
88
        $this->anchor = substr(Helper::clean($this->anchor), 0, 99);
89
90
        return $this;
91
    }
92
93
    public function getUrl()
94
    {
95
        return $this->url;
96
    }
97
98
    public function getPageUrl()
99
    {
100
        return preg_replace('/(\#.*)/si', '', $this->url);
101
    }
102
103
    public function getPageUrlWithoutBase(string $base, bool $page = false)
104
    {
105
        $url = $this->url;
106
107
        if (0 === strpos($this->url, $base)) {
108
            $url = substr($url, strlen($base));
109
        }
110
111
        return true === $page ? preg_replace('/(\#.*)/si', '', $url) : $url;
112
    }
113
114
    public function getAnchor()
115
    {
116
        return $this->anchor;
117
    }
118
119
    public function getElement()
120
    {
121
        return $this->element;
122
    }
123
124
    /**
125
     * @return bool
126
     */
127
    public function mayFollow()
128
    {
129
        if (null !== $this->element && null !== $this->element->getAttribute('rel')) {
130
            if (false !== strpos($this->element->getAttribute('rel'), 'nofollow')) {
131
                return false;
132
            }
133
        }
134
135
        return true;
136
    }
137
138
    /**
139
     * @return string
140
     */
141
    public function getRel(): ?string
142
    {
143
        return $this->element->getAttribute('rel');
0 ignored issues
show
Bug introduced by
The method getAttribute() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

143
        return $this->element->/** @scrutinizer ignore-call */ getAttribute('rel');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
144
    }
145
}
146