Completed
Push — master ( 70ef2d...0e5aaa )
by Alex
16s queued 10s
created

Explorer::extractFeeds()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 15
ccs 9
cts 9
cp 1
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 9
nc 3
nop 1
crap 3
1
<?php declare(strict_types=1);
2
/*
3
 * This file is part of the feed-io package.
4
 *
5
 * (c) Alexandre Debril <[email protected]>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace FeedIo;
12
13
use FeedIo\Adapter\ClientInterface;
14
use Psr\Log\LoggerInterface;
15
16
class Explorer
17
{
18
19
    /**
20
     * @var \FeedIo\Adapter\ClientInterface;
21
     */
22
    protected $client;
23
24
    /**
25
     * @var \Psr\Log\LoggerInterface
26
     */
27
    protected $logger;
28
29
    const VALID_TYPES = [
30
        'application/atom+xml',
31
        'application/rss+xml'
32
    ];
33
34
    /**
35
     * @param \FeedIo\Adapter\ClientInterface $client
36
     * @param \Psr\Log\LoggerInterface        $logger
37
     */
38 1
    public function __construct(ClientInterface $client, LoggerInterface $logger)
39
    {
40 1
        $this->client = $client;
41 1
        $this->logger = $logger;
42 1
    }
43
44
    /**
45
     * Discover feeds from the webpage's headers
46
     * @param  string $url
47
     * @return array
48
     */
49 1
    public function discover(string $url) : array
50
    {
51 1
        $this->logger->info("discover feeds from {$url}");
52 1
        $stream = $this->client->getResponse($url, new \DateTime);
53
54 1
        $internalErrors = libxml_use_internal_errors(true);
55 1
        $entityLoaderDisabled = libxml_disable_entity_loader(true);
56
57 1
        $feeds = $this->extractFeeds($stream->getBody());
58
59 1
        libxml_use_internal_errors($internalErrors);
60 1
        libxml_disable_entity_loader($entityLoaderDisabled);
61
62 1
        return $feeds;
63
    }
64
65
    /**
66
     * Extract feeds Urls from HTML stream
67
     * @param  string $html
68
     * @return array
69
     */
70 1
    protected function extractFeeds(string $html) : array
71
    {
72 1
        $dom = new \DOMDocument();
73 1
        $dom->loadHTML($html);
74
75 1
        $links = $dom->getElementsByTagName('link');
76 1
        $feeds = [];
77 1
        foreach ($links as $link) {
78 1
            if ($this->isFeedLink($link)) {
79 1
                $feeds[] = $link->getAttribute('href');
80
            }
81
        }
82
83 1
        return $feeds;
84
    }
85
86
    /**
87
     * Tells if the given Element contains a valid Feed Url
88
     * @param  DomElement $element
89
     * @return bool
90
     */
91 1
    protected function isFeedLink(\DomElement $element) : bool
92
    {
93 1
        return $element->hasAttribute('type')
94 1
                && in_array($element->getAttribute('type'), self::VALID_TYPES);
95
    }
96
}
97