1 | <?php |
||||
2 | declare(strict_types=1); |
||||
3 | |||||
4 | namespace kosuha606\HtmlUniParser; |
||||
5 | |||||
6 | use kosuha606\HtmlUniParser\exceptions\ParserInvalidConfigException; |
||||
7 | use PhantomInstaller\PhantomBinary; |
||||
0 ignored issues
–
show
|
|||||
8 | use Zend\Dom\Query; |
||||
9 | |||||
10 | /** |
||||
11 | * Parser helper based on zend query |
||||
12 | * @package app\Parsers |
||||
13 | */ |
||||
14 | class ZendBasedParser extends BaseObject |
||||
15 | { |
||||
16 | /** |
||||
17 | * @var string |
||||
18 | */ |
||||
19 | private $url; |
||||
20 | |||||
21 | /** |
||||
22 | * @var string |
||||
23 | */ |
||||
24 | private $lastUrl; |
||||
25 | |||||
26 | /** |
||||
27 | * @var string |
||||
28 | */ |
||||
29 | private $htmlBuffer; |
||||
30 | |||||
31 | /** |
||||
32 | * @var int |
||||
33 | */ |
||||
34 | private $sleepAfterRequest = 0; |
||||
35 | |||||
36 | /** |
||||
37 | * @var string |
||||
38 | */ |
||||
39 | private $userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"; |
||||
40 | |||||
41 | /** |
||||
42 | * @param $url |
||||
43 | */ |
||||
44 | public function setUrl($url) |
||||
45 | { |
||||
46 | $this->url = $url; |
||||
47 | } |
||||
48 | |||||
49 | /** |
||||
50 | * @param int $sleepAfterRequest |
||||
51 | */ |
||||
52 | public function setSleepAfterRequest($sleepAfterRequest) |
||||
53 | { |
||||
54 | $this->sleepAfterRequest = $sleepAfterRequest; |
||||
55 | } |
||||
56 | |||||
57 | /** |
||||
58 | * @param $search |
||||
59 | * @return mixed |
||||
60 | * @throws ParserInvalidConfigException |
||||
61 | */ |
||||
62 | public function query($search) |
||||
63 | { |
||||
64 | return $this->dom()->execute($search); |
||||
65 | } |
||||
66 | |||||
67 | /** |
||||
68 | * @return Query |
||||
69 | */ |
||||
70 | public function filegetcontentsDom(): Query |
||||
71 | { |
||||
72 | if ($this->getLastUrl() !== $this->getUrl()) { |
||||
73 | $this |
||||
74 | ->setHtmlBuffer('<meta charset="UTF-8" />'.file_get_contents($this->getUrl())) |
||||
75 | ->setLastUrl($this->getUrl()) |
||||
76 | ; |
||||
77 | sleep($this->sleepAfterRequest); |
||||
78 | } |
||||
79 | $dom = new Query($this->htmlBuffer); |
||||
0 ignored issues
–
show
The class
Zend\Dom\Query has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
80 | return $dom; |
||||
81 | } |
||||
82 | |||||
83 | /** |
||||
84 | * @return Query |
||||
85 | */ |
||||
86 | public function phantomjsDom(): Query |
||||
87 | { |
||||
88 | if ($this->getLastUrl() !== $this->getUrl()) { |
||||
89 | $bin = PhantomBinary::BIN; |
||||
90 | $command = $bin.' '.__DIR__.'/nodejs/loadspeed.js ' . $this->url; |
||||
91 | $result = shell_exec($command); |
||||
92 | $this |
||||
93 | ->setHtmlBuffer('<meta charset="UTF-8" />'.$result) |
||||
94 | ->setLastUrl($this->getUrl()) |
||||
95 | ; |
||||
96 | sleep($this->sleepAfterRequest); |
||||
97 | } |
||||
98 | $dom = new Query($this->htmlBuffer); |
||||
0 ignored issues
–
show
The class
Zend\Dom\Query has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
99 | return $dom; |
||||
100 | } |
||||
101 | |||||
102 | /** |
||||
103 | * @return Query |
||||
104 | */ |
||||
105 | public function wgetDom(): Query |
||||
106 | { |
||||
107 | if ($this->getLastUrl() !== $this->getUrl()) { |
||||
108 | $command = 'wget -qO- '.$this->url.' --no-check-certificate'; |
||||
109 | $result = shell_exec($command); |
||||
110 | $this |
||||
111 | ->setHtmlBuffer('<meta charset="UTF-8" />'.$result) |
||||
112 | ->setLastUrl($this->getUrl()) |
||||
113 | ; |
||||
114 | sleep($this->sleepAfterRequest); |
||||
115 | } |
||||
116 | $dom = new Query($this->htmlBuffer); |
||||
0 ignored issues
–
show
The class
Zend\Dom\Query has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
117 | return $dom; |
||||
118 | } |
||||
119 | |||||
120 | /** |
||||
121 | * @return Query |
||||
122 | * @throws ParserInvalidConfigException |
||||
123 | */ |
||||
124 | public function dom($encoding = 'UTF-8', $type='curl'): Query |
||||
125 | { |
||||
126 | if ($type==='curl') { |
||||
127 | if (!in_array('curl', get_loaded_extensions())) { |
||||
128 | throw new ParserInvalidConfigException('The curl extension in not loaded in system'); |
||||
129 | } |
||||
130 | if ($this->getLastUrl() !== $this->getUrl()) { |
||||
131 | $ch = \curl_init($this->getUrl()); |
||||
132 | \curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent); |
||||
0 ignored issues
–
show
It seems like
$ch can also be of type false ; however, parameter $ch of curl_setopt() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
133 | \curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); |
||||
134 | \ob_start(); |
||||
135 | \ob_implicit_flush(); |
||||
136 | \curl_exec($ch); |
||||
0 ignored issues
–
show
It seems like
$ch can also be of type false ; however, parameter $ch of curl_exec() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
137 | \curl_close($ch); |
||||
0 ignored issues
–
show
It seems like
$ch can also be of type false ; however, parameter $ch of curl_close() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
138 | $htmlBuffer = '<meta charset="UTF-8" />'; |
||||
139 | $htmlBuffer .= \ob_get_clean(); |
||||
140 | $this->setHtmlBuffer($htmlBuffer); |
||||
141 | $this->setLastUrl($this->getUrl()); |
||||
142 | \sleep($this->getSleepAfterRequest()); |
||||
143 | } |
||||
144 | $emptyHtml = '<html><head></head><body></body></html>'; |
||||
145 | $html = $this->getHtmlBuffer(); |
||||
146 | if (!$html) { |
||||
147 | $html = $emptyHtml; |
||||
148 | } |
||||
149 | $dom = new Query($html, $encoding); |
||||
0 ignored issues
–
show
The class
Zend\Dom\Query has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
150 | return $dom; |
||||
151 | } else { |
||||
152 | $method = $type.'Dom'; |
||||
153 | return $this->$method(); |
||||
154 | } |
||||
155 | } |
||||
156 | |||||
157 | /** |
||||
158 | * @return string |
||||
159 | */ |
||||
160 | public function getRawHtml(): string |
||||
161 | { |
||||
162 | return $this->htmlBuffer ?: ''; |
||||
163 | } |
||||
164 | |||||
165 | /** |
||||
166 | * @param $html |
||||
167 | * @return ZendBasedParser |
||||
168 | */ |
||||
169 | public function setRawHtml($html): self |
||||
170 | { |
||||
171 | $this->htmlBuffer = $html; |
||||
172 | return $this; |
||||
173 | } |
||||
174 | |||||
175 | /** |
||||
176 | * @return string |
||||
177 | */ |
||||
178 | public function getLastUrl(): string |
||||
179 | { |
||||
180 | return $this->lastUrl ?: ''; |
||||
181 | } |
||||
182 | |||||
183 | /** |
||||
184 | * @return string |
||||
185 | */ |
||||
186 | public function getUrl(): string |
||||
187 | { |
||||
188 | return $this->url; |
||||
189 | } |
||||
190 | |||||
191 | /** |
||||
192 | * @return string |
||||
193 | */ |
||||
194 | public function getHtmlBuffer(): string |
||||
195 | { |
||||
196 | return $this->htmlBuffer; |
||||
197 | } |
||||
198 | |||||
199 | /** |
||||
200 | * @return int |
||||
201 | */ |
||||
202 | public function getSleepAfterRequest(): int |
||||
203 | { |
||||
204 | return $this->sleepAfterRequest; |
||||
205 | } |
||||
206 | |||||
207 | /** |
||||
208 | * @return string |
||||
209 | */ |
||||
210 | public function getUserAgent(): string |
||||
211 | { |
||||
212 | return $this->userAgent; |
||||
213 | } |
||||
214 | |||||
215 | /** |
||||
216 | * @param string $lastUrl |
||||
217 | * @return ZendBasedParser |
||||
218 | */ |
||||
219 | public function setLastUrl($lastUrl): self |
||||
220 | { |
||||
221 | $this->lastUrl = $lastUrl; |
||||
222 | return $this; |
||||
223 | } |
||||
224 | |||||
225 | /** |
||||
226 | * @param string $htmlBuffer |
||||
227 | * @return ZendBasedParser |
||||
228 | */ |
||||
229 | public function setHtmlBuffer($htmlBuffer): self |
||||
230 | { |
||||
231 | $this->htmlBuffer = $htmlBuffer; |
||||
232 | return $this; |
||||
233 | } |
||||
234 | |||||
235 | /** |
||||
236 | * @param string $userAgent |
||||
237 | * @return ZendBasedParser |
||||
238 | */ |
||||
239 | public function setUserAgent($userAgent): self |
||||
240 | { |
||||
241 | $this->userAgent = $userAgent; |
||||
242 | return $this; |
||||
243 | } |
||||
244 | } |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths