Passed
Push — dev ( 8e8e3b...6bb8f6 )
by Dispositif
03:16 queued 15s
created

ExternPage   A

Complexity

Total Complexity 18

Size/Duplication

Total Lines 151
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
eloc 40
dl 0
loc 151
rs 10
c 1
b 0
f 1
wmc 18

7 Methods

Rating   Name   Duplication   Size   Complexity  
A getPrettyDomainName() 0 13 5
A parseLdJson() 0 22 4
A getSubDomain() 0 6 2
A getUrl() 0 3 1
A parseMetaTags() 0 24 3
A getData() 0 6 1
A __construct() 0 8 2
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
11
namespace App\Domain;
12
13
use App\Application\Http\ExternHttpClient;
14
use App\Infrastructure\TagParser;
15
use Exception;
16
use Psr\Log\LoggerInterface;
17
18
/**
19
 * Représente une page web d'un Lien Externe (hors wiki)
20
 * Class ExternPage
21
 *
22
 * @package App\Domain
23
 */
24
class ExternPage
25
{
26
    /**
27
     * @var string
28
     */
29
    private $url;
30
31
    /**
32
     * @var string
33
     */
34
    private $html;
35
36
    /**
37
     * @var LoggerInterface|null
38
     */
39
    private $log;
40
41
    /**
42
     * ExternPage constructor.
43
     *
44
     * @param string               $url
45
     * @param string               $html
46
     * @param LoggerInterface|null $log
47
     *
48
     * @throws Exception
49
     */
50
    public function __construct(string $url, string $html, ?LoggerInterface $log = null)
51
    {
52
        if (!ExternHttpClient::isWebURL($url)) {
53
            throw new Exception('string is not an URL');
54
        }
55
        $this->url = $url;
56
        $this->html = $html;
57
        $this->log = $log;
58
    }
59
60
    /**
61
     * @return string
62
     */
63
    public function getUrl(): string
64
    {
65
        return $this->url;
66
    }
67
68
    /**
69
     * @return array
70
     * @throws Exception
71
     */
72
    public function getData(): array
73
    {
74
        $ld = $this->parseLdJson($this->html);
75
        $meta = $this->parseMetaTags($this->html);
76
77
        return ['JSON-LD' => $ld, 'meta' => $meta];
78
    }
79
80
    /**
81
     * extract LD-JSON metadata from <script type="application/ld+json">.
82
     *
83
     * @param string $html
84
     *
85
     * @return array
86
     * @throws Exception
87
     * @throws Exception
88
     */
89
    private function parseLdJson(string $html): array
90
    {
91
        $parser = new TagParser();
92
        $results = $parser->importHtml($html)->xpathResults(
93
            '//script[@type="application/ld+json"]'
94
        );
95
96
        foreach ($results as $result) {
97
            $json = trim($result);
98
            // filtrage empty value (todo?)
99
            if (0 === strlen($json)) {
100
                continue;
101
            }
102
            $data = json_decode($json, true);
103
            if (!is_array($data)) {
104
                return [];
105
            }
106
107
            return $data;
108
        }
109
110
        return [];
111
    }
112
113
    /**
114
     * todo move? /refac/delete?
115
     *
116
     * @param string $str
117
     *
118
     * @return array
119
     */
120
    private function parseMetaTags(string $str): array
121
    {
122
        $pattern = '
123
              ~<\s*meta\s
124
              # using lookahead to capture type to $1
125
                (?=[^>]*?
126
                \b(?:name|property|http-equiv)\s*=\s*
127
                (?|"\s*([^"]*?)\s*"|\'\s*([^\']*?)\s*\'|
128
                ([^"\'>]*?)(?=\s*/?\s*>|\s\w+\s*=))
129
              )
130
              # capture content to $2
131
              [^>]*?\bcontent\s*=\s*
132
                (?|"\s*([^"]*?)\s*"|\'\s*([^\']*?)\s*\'|
133
                ([^"\'>]*?)(?=\s*/?\s*>|\s\w+\s*=))
134
              [^>]*>
135
              ~ix';
136
137
        if (preg_match_all($pattern, $str, $out)) {
138
            $combine = array_combine($out[1], $out[2]);
139
140
            return $combine ? $combine : [];
141
        }
142
143
        return [];
144
    }
145
146
    /**
147
     * test.com => Test.com
148
     * bla.test.com => Test.com
149
     * test.co.uk => google.co.uk (national commercial subdomain)
150
     * site.google.com => site.google.com ;)
151
     *
152
     * @return string
153
     */
154
    public function getPrettyDomainName(): string
155
    {
156
        $subDomain = $this->getSubDomain();
157
        if (!strpos($subDomain, '.co.uk') && !strpos($subDomain, '.co.ma')
158
            && !strpos($subDomain, 'site.google.')
159
        ) {
160
            // bla.test.com => Test.com
161
            if (preg_match('#\w+\.\w+$#', $subDomain, $matches)) {
162
                return ucfirst($matches[0]);
163
            }
164
        }
165
166
        return $subDomain;
167
    }
168
169
    public function getSubDomain(): string
170
    {
171
        try {
172
            return ExternDomains::extractSubDomain($this->url);
173
        } catch (Exception $e) {
174
            echo "erreur QDF";
0 ignored issues
show
Bug Best Practice introduced by
In this branch, the function will implicitly return null which is incompatible with the type-hinted return string. Consider adding a return statement or allowing null as return value.

For hinted functions/methods where all return statements with the correct type are only reachable via conditions, ?null? gets implicitly returned which may be incompatible with the hinted type. Let?s take a look at an example:

interface ReturnsInt {
    public function returnsIntHinted(): int;
}

class MyClass implements ReturnsInt {
    public function returnsIntHinted(): int
    {
        if (foo()) {
            return 123;
        }
        // here: null is implicitly returned
    }
}
Loading history...
175
        }
176
    }
177
178
}
179