Passed
Push — master ( 730218...5f362f )
by Matthias
10:39 queued 08:10
created

Consumer::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 4
cts 4
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 2
crap 1
1
<?php
2
3
namespace Fusonic\OpenGraph;
4
5
use DOMElement;
6
use Fusonic\Linq\Linq;
7
use Fusonic\OpenGraph\Objects\ObjectBase;
8
use Fusonic\OpenGraph\Objects\Website;
9
use LogicException;
10
use Psr\Http\Client\ClientExceptionInterface;
11
use Psr\Http\Client\ClientInterface;
12
use Psr\Http\Message\RequestFactoryInterface;
13
use Symfony\Component\DomCrawler\Crawler;
14
15
/**
16
 * Consumer that extracts Open Graph data from either a URL or a HTML string.
17
 */
18
class Consumer
19
{
20
    private ?ClientInterface $client;
0 ignored issues
show
Bug introduced by
This code did not parse for me. Apparently, there is an error somewhere around this line:

Syntax error, unexpected '?', expecting T_FUNCTION or T_CONST
Loading history...
21
    private ?RequestFactoryInterface $requestFactory;
22
23
    /**
24
     * When enabled, crawler will read content of title and meta description if no
25
     * Open Graph data is provided by target page.
26
     */
27
    public bool $useFallbackMode = false;
28
29
    /**
30
     * When enabled, crawler will throw exceptions for some crawling errors like unexpected
31
     * Open Graph elements.
32
     */
33
    public bool $debug = false;
34
35
    /**
36
     * @param ClientInterface|null         $client         A PSR-18 ClientInterface implementation.
37
     * @param RequestFactoryInterface|null $requestFactory A PSR-17 RequestFactoryInterface implementation.
38
     */
39 16
    public function __construct(?ClientInterface $client = null, ?RequestFactoryInterface $requestFactory = null)
40
    {
41 16
        $this->client = $client;
42 16
        $this->requestFactory = $requestFactory;
43 16
    }
44
45
    /**
46
     * Fetches HTML content from the given URL and then crawls it for Open Graph data.
47
     *
48
     * @param string $url URL to be crawled.
49
     *
50
     * @return ObjectBase
51
     *
52
     * @throws ClientExceptionInterface
53
     */
54
    public function loadUrl(string $url): ObjectBase
55
    {
56
        if ($this->client === null) {
57
            throw new LogicException(
58
                "To use loadUrl() you must provide \$client and \$requestFactory when instantiating the consumer."
59
            );
60
        }
61
62
        $request = $this->requestFactory->createRequest("GET", $url);
63
        $response = $this->client->sendRequest($request);
64
65
        return $this->loadHtml($response->getBody()->getContents(), $url);
66
    }
67
68
    /**
69
     * Crawls the given HTML string for OpenGraph data.
70
     *
71
     * @param string $html        HTML string, usually whole content of crawled web resource.
72
     * @param string $fallbackUrl URL to use when fallback mode is enabled.
73
     *
74
     * @return  ObjectBase
75
     */
76 16
    public function loadHtml(string $html, string $fallbackUrl = null): ObjectBase
77
    {
78
        // Extract all data that can be found
79 16
        $page = $this->extractOpenGraphData($html);
80
81
        // Use the user's URL as fallback
82 13
        if ($this->useFallbackMode && $page->url === null) {
83 1
            $page->url = $fallbackUrl;
84
        }
85
86
        // Return result
87 13
        return $page;
88
    }
89
90 16
    private function extractOpenGraphData(string $content): ObjectBase
91
    {
92 16
        $crawler = new Crawler;
93 16
        $crawler->addHTMLContent($content, 'UTF-8');
94
95 16
        $properties = [];
96 16
        foreach(['name', 'property'] as $t)
97
        {
98
            // Get all meta-tags starting with "og:"
99 16
            $ogMetaTags = $crawler->filter("meta[{$t}^='og:']");
100
            // Create clean property array
101 16
            $props = Linq::from($ogMetaTags)
102 16
                ->select(
103
                    function (DOMElement $tag) use ($t) {
104 13
                        $name = strtolower(trim($tag->getAttribute($t)));
105 13
                        $value = trim($tag->getAttribute("content"));
106 13
                        return new Property($name, $value);
107 16
                    }
108
                )
109 16
                ->toArray();
110 16
            $properties = array_merge($properties, $props);
111
          
112
        }
113
            
114
        // Create new object of the correct type
115 16
        $typeProperty = Linq::from($properties)
116 16
            ->firstOrNull(
117
                function (Property $property) {
118 13
                    return $property->key === Property::TYPE;
119 16
                }
120
            );
121 16
        switch ($typeProperty !== null ? $typeProperty->value : null) {
122
            default:
123 16
                $object = new Website();
124 16
                break;
125
        }
126
127
        // Assign all properties to the object
128 16
        $object->assignProperties($properties, $this->debug);
129
130
        // Fallback for url
131 13
        if ($this->useFallbackMode && !$object->url) {
132 2
            $urlElement = $crawler->filter("link[rel='canonical']")->first();
133 2
            if ($urlElement->count() > 0) {
134 1
                $object->url = trim($urlElement->attr("href"));
135
            }
136
        }
137
138
        // Fallback for title
139 13
        if ($this->useFallbackMode && !$object->title) {
140 2
            $titleElement = $crawler->filter("title")->first();
141 2
            if ($titleElement->count() > 0) {
142 2
                $object->title = trim($titleElement->text());
143
            }
144
        }
145
146
        // Fallback for description
147 13
        if ($this->useFallbackMode && !$object->description) {
148 2
            $descriptionElement = $crawler->filter("meta[property='description']")->first();
149 2
            if ($descriptionElement->count() > 0) {
150 2
                $object->description = trim($descriptionElement->attr("content"));
151
            }
152
        }
153
154 13
        return $object;
155
    }
156
}
157