ZendBasedParser::getUserAgent()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 3
rs 10
1
<?php
2
declare(strict_types=1);
3
4
namespace kosuha606\HtmlUniParser;
5
6
use kosuha606\HtmlUniParser\exceptions\ParserInvalidConfigException;
7
use PhantomInstaller\PhantomBinary;
0 ignored issues
show
Bug introduced by
The type PhantomInstaller\PhantomBinary was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
8
use Zend\Dom\Query;
9
10
/**
11
 * Parser helper based on zend query
12
 * @package app\Parsers
13
 */
14
class ZendBasedParser extends BaseObject
15
{
16
    /**
17
     * @var string
18
     */
19
    private $url;
20
21
    /**
22
     * @var string
23
     */
24
    private $lastUrl;
25
26
    /**
27
     * @var string
28
     */
29
    private $htmlBuffer;
30
31
    /**
32
     * @var int
33
     */
34
    private $sleepAfterRequest = 0;
35
36
    /**
37
     * @var string
38
     */
39
    private $userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
40
41
    /**
42
     * @param $url
43
     */
44
    public function setUrl($url)
45
    {
46
        $this->url = $url;
47
    }
48
49
    /**
50
     * @param int $sleepAfterRequest
51
     */
52
    public function setSleepAfterRequest($sleepAfterRequest)
53
    {
54
        $this->sleepAfterRequest = $sleepAfterRequest;
55
    }
56
57
    /**
58
     * @param $search
59
     * @return mixed
60
     * @throws ParserInvalidConfigException
61
     */
62
    public function query($search)
63
    {
64
        return $this->dom()->execute($search);
65
    }
66
67
    /**
68
     * @return Query
69
     */
70
    public function filegetcontentsDom(): Query
71
    {
72
        if ($this->getLastUrl() !== $this->getUrl()) {
73
            $this
74
                ->setHtmlBuffer('<meta charset="UTF-8" />'.file_get_contents($this->getUrl()))
75
                ->setLastUrl($this->getUrl())
76
            ;
77
            sleep($this->sleepAfterRequest);
78
        }
79
        $dom = new Query($this->htmlBuffer);
0 ignored issues
show
Deprecated Code introduced by
The class Zend\Dom\Query has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

79
        $dom = /** @scrutinizer ignore-deprecated */ new Query($this->htmlBuffer);
Loading history...
80
        return $dom;
81
    }
82
83
    /**
84
     * @return Query
85
     */
86
    public function phantomjsDom(): Query
87
    {
88
        if ($this->getLastUrl() !== $this->getUrl()) {
89
            $bin = PhantomBinary::BIN;
90
            $command = $bin.' '.__DIR__.'/nodejs/loadspeed.js ' . $this->url;
91
            $result = shell_exec($command);
92
            $this
93
                ->setHtmlBuffer('<meta charset="UTF-8" />'.$result)
94
                ->setLastUrl($this->getUrl())
95
            ;
96
            sleep($this->sleepAfterRequest);
97
        }
98
        $dom = new Query($this->htmlBuffer);
0 ignored issues
show
Deprecated Code introduced by
The class Zend\Dom\Query has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

98
        $dom = /** @scrutinizer ignore-deprecated */ new Query($this->htmlBuffer);
Loading history...
99
        return $dom;
100
    }
101
102
    /**
103
     * @return Query
104
     */
105
    public function wgetDom(): Query
106
    {
107
        if ($this->getLastUrl() !== $this->getUrl()) {
108
            $command = 'wget -qO- '.$this->url.' --no-check-certificate';
109
            $result = shell_exec($command);
110
            $this
111
                ->setHtmlBuffer('<meta charset="UTF-8" />'.$result)
112
                ->setLastUrl($this->getUrl())
113
            ;
114
            sleep($this->sleepAfterRequest);
115
        }
116
        $dom = new Query($this->htmlBuffer);
0 ignored issues
show
Deprecated Code introduced by
The class Zend\Dom\Query has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

116
        $dom = /** @scrutinizer ignore-deprecated */ new Query($this->htmlBuffer);
Loading history...
117
        return $dom;
118
    }
119
120
    /**
121
     * @return Query
122
     * @throws ParserInvalidConfigException
123
     */
124
    public function dom($encoding = 'UTF-8', $type='curl'): Query
125
    {
126
        if ($type==='curl') {
127
            if (!in_array('curl', get_loaded_extensions())) {
128
                throw new ParserInvalidConfigException('The curl extension in not loaded in system');
129
            }
130
            if ($this->getLastUrl() !== $this->getUrl()) {
131
                $ch = \curl_init($this->getUrl());
132
                \curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_setopt() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

132
                \curl_setopt(/** @scrutinizer ignore-type */ $ch, CURLOPT_USERAGENT, $this->userAgent);
Loading history...
133
                \curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
134
                \ob_start();
135
                \ob_implicit_flush();
136
                \curl_exec($ch);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_exec() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

136
                \curl_exec(/** @scrutinizer ignore-type */ $ch);
Loading history...
137
                \curl_close($ch);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_close() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

137
                \curl_close(/** @scrutinizer ignore-type */ $ch);
Loading history...
138
                $htmlBuffer = '<meta charset="UTF-8" />';
139
                $htmlBuffer .= \ob_get_clean();
140
                $this->setHtmlBuffer($htmlBuffer);
141
                $this->setLastUrl($this->getUrl());
142
                \sleep($this->getSleepAfterRequest());
143
            }
144
            $emptyHtml = '<html><head></head><body></body></html>';
145
            $html = $this->getHtmlBuffer();
146
            if (!$html) {
147
                $html = $emptyHtml;
148
            }
149
            $dom = new Query($html, $encoding);
0 ignored issues
show
Deprecated Code introduced by
The class Zend\Dom\Query has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

149
            $dom = /** @scrutinizer ignore-deprecated */ new Query($html, $encoding);
Loading history...
150
            return $dom;
151
        } else {
152
            $method = $type.'Dom';
153
            return $this->$method();
154
        }
155
    }
156
157
    /**
158
     * @return string
159
     */
160
    public function getRawHtml(): string
161
    {
162
        return $this->htmlBuffer ?: '';
163
    }
164
165
    /**
166
     * @param $html
167
     * @return ZendBasedParser
168
     */
169
    public function setRawHtml($html): self
170
    {
171
        $this->htmlBuffer = $html;
172
        return $this;
173
    }
174
175
    /**
176
     * @return string
177
     */
178
    public function getLastUrl(): string
179
    {
180
        return $this->lastUrl ?: '';
181
    }
182
183
    /**
184
     * @return string
185
     */
186
    public function getUrl(): string
187
    {
188
        return $this->url;
189
    }
190
191
    /**
192
     * @return string
193
     */
194
    public function getHtmlBuffer(): string
195
    {
196
        return $this->htmlBuffer;
197
    }
198
199
    /**
200
     * @return int
201
     */
202
    public function getSleepAfterRequest(): int
203
    {
204
        return $this->sleepAfterRequest;
205
    }
206
207
    /**
208
     * @return string
209
     */
210
    public function getUserAgent(): string
211
    {
212
        return $this->userAgent;
213
    }
214
215
    /**
216
     * @param string $lastUrl
217
     * @return ZendBasedParser
218
     */
219
    public function setLastUrl($lastUrl): self
220
    {
221
        $this->lastUrl = $lastUrl;
222
        return $this;
223
    }
224
225
    /**
226
     * @param string $htmlBuffer
227
     * @return ZendBasedParser
228
     */
229
    public function setHtmlBuffer($htmlBuffer): self
230
    {
231
        $this->htmlBuffer = $htmlBuffer;
232
        return $this;
233
    }
234
235
    /**
236
     * @param string $userAgent
237
     * @return ZendBasedParser
238
     */
239
    public function setUserAgent($userAgent): self
240
    {
241
        $this->userAgent = $userAgent;
242
        return $this;
243
    }
244
}