Completed
Push — master ( 505018...16cf89 )
by D.
13s
created

RegexBasedLinkParser::parse()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 14
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 14
rs 9.2
cc 4
eloc 9
nc 2
nop 0
1
<?php
2
/**
3
 * This file is part of sitemap-common.
4
 *
5
 * (c) 2016 Daniele Moraschi
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace SiteMap\Parse;
12
13
use SiteMap\Http\Url;
14
use SiteMap\Http\UrlUtil;
15
16
final class RegexBasedLinkParser implements LinkParser, Parser
17
{
18
    /**
19
     * @var string  REGEX
20
     */
21
    const REGEX = "<a\s[^>]*href=([\"\']??)([^\\1 >]*?)\\1[^>]*>(.*)<\/a>";
22
23
    /**
24
     * @var Url
25
     */
26
    private $url;
27
28
    /**
29
     * @var string
30
     */
31
    private $webPageContent;
32
33
    /**
34
     * @var array $pages
35
     */
36
    private $pages;
37
38
    /**
39
     * @param Url $url
40
     * @param mixed $content
41
     * @return mixed
42
     */
43
    public function setContent(Url $url, $content)
44
    {
45
        $this->pages = array();
46
        $this->url = $url;
47
        $this->webPageContent = (string) $content;
48
        return $this;
49
    }
50
51
    /**
52
     * @return array
53
     */
54
    public function findLinks() {
55
        return $this->parse();
56
    }
57
58
    /**
59
     * @return array
60
     */
61
    public function parse() {
62
        if (empty($this->pages) && preg_match_all(
63
            "/" . self::REGEX . "/siU",
64
            $this->webPageContent,
65
            $matches,
66
            PREG_SET_ORDER
67
        )) {
68
            foreach ($matches as $match) {
69
                $this->pages[] = trim(UrlUtil::getAbsoluteLink($this->url, $match[2]));
70
            }
71
        }
72
73
        return $this->pages;
74
    }
75
    
76
}