Completed
Pull Request — master (#257)
by
unknown
01:57
created

Explorer::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 5
ccs 0
cts 4
cp 0
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 2
crap 2
1
<?php declare(strict_types=1);
2
/*
3
 * This file is part of the feed-io package.
4
 *
5
 * (c) Alexandre Debril <[email protected]>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace FeedIo;
12
13
use FeedIo\Adapter\ClientInterface;
14
use Psr\Log\LoggerInterface;
15
16
class Explorer
17
{
18
19
    /**
20
     * @var \FeedIo\Adapter\ClientInterface;
21
     */
22
    protected $client;
23
24
    /**
25
     * @var \Psr\Log\LoggerInterface
26
     */
27
    protected $logger;
28
29
    const VALID_TYPES = [
30
        'application/atom+xml',
31
        'application/rss+xml'
32
    ];
33
34
    /**
35
     * @param \FeedIo\Adapter\ClientInterface $client
36
     * @param \Psr\Log\LoggerInterface        $logger
37
     */
38
    public function __construct(ClientInterface $client, LoggerInterface $logger)
39
    {
40
        $this->client = $client;
41
        $this->logger = $logger;
42
    }
43
44
    /**
45
     * Discover feeds from the webpage's headers
46
     * @param  string $url
47
     * @return array
48
     */
49
    public function discover(string $url) : array
50
    {
51
        $this->logger->info("discover feeds from {$url}");
52
        $stream = $this->client->getResponse($url, new \DateTime);
53
54
        $internalErrors = libxml_use_internal_errors(true);
55
        $entityLoaderDisabled = libxml_disable_entity_loader(true);
56
57
        $feeds = $this->extractFeeds($stream->getBody());
58
59
        libxml_use_internal_errors($internalErrors);
60
        libxml_disable_entity_loader($entityLoaderDisabled);
61
62
        return $feeds;
63
    }
64
65
    /**
66
     * Extract feeds Urls from HTML stream
67
     * @param  string $html
68
     * @return array
69
     */
70
    protected function extractFeeds(string $html) : array
71
    {
72
        $dom = new \DOMDocument();
73
        $dom->loadHTML($html);
74
75
        $links = $dom->getElementsByTagName('link');
76
        $feeds = [];
77
        foreach ($links as $link) {
78
            if ($this->isFeedLink($link)) {
79
                $feeds[] = $link->getAttribute('href');
80
            }
81
        }
82
83
        return $feeds;
84
    }
85
86
    /**
87
     * Tells if the given Element contains a valid Feed Url
88
     * @param  DomElement $element
89
     * @return bool
90
     */
91
    protected function isFeedLink(\DomElement $element) : bool
92
    {
93
        return $element->hasAttribute('type')
94
                && in_array($element->getAttribute('type'), self::VALID_TYPES);
95
    }
96
}
97