This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
0 ignored issues
–
show
|
|||
2 | |||
3 | namespace Error; |
||
4 | |||
5 | require_once (__DIR__ . '/vendor/autoload.php'); |
||
6 | |||
7 | /** |
||
8 | * Description of Checker Main |
||
9 | * |
||
10 | * @author bootjp |
||
11 | */ |
||
12 | class Checker |
||
13 | { |
||
14 | protected $client; |
||
15 | |||
16 | protected $contentsSize = 500; |
||
17 | |||
18 | protected $doubleCheck = true; |
||
19 | |||
20 | protected $recursion = false; |
||
21 | |||
22 | protected $garbage = []; |
||
23 | |||
24 | protected $isContentsFetch = true; |
||
25 | |||
26 | |||
27 | /** |
||
28 | * initialisation. |
||
29 | * @param array $args |
||
30 | */ |
||
31 | public function __construct(array $args) |
||
32 | { |
||
33 | $this->client = new \GuzzleHttp\Client([ |
||
34 | 'defaults' => [ |
||
35 | 'exceptions' => false, |
||
36 | 'headers' => [ |
||
37 | 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) ' . |
||
38 | 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36' |
||
39 | ] |
||
40 | ] |
||
41 | ] |
||
42 | ); |
||
43 | if (array_key_exists('contentSize', $args)) { |
||
44 | $this->contentsSize = (int) $args['contentSize']; |
||
45 | } |
||
46 | |||
47 | if (array_key_exists('doubleCheck', $args)) { |
||
48 | $this->doubleCheck = (bool) $args['doubleCheck']; |
||
49 | } |
||
50 | |||
51 | if (array_key_exists('isContentsFetch', $args)) { |
||
52 | $this->isContentsFetch = (bool) $args['isContentsFetch']; |
||
53 | } |
||
54 | |||
55 | if (array_key_exists('recursion', $args)) { |
||
56 | $this->recursion = (bool) $args['recursion']; |
||
57 | } |
||
58 | |||
59 | if (array_key_exists('auth', $args)) { |
||
60 | list($username, $password) = explode(':', $args['auth'], 2); |
||
61 | $this->client->setDefaultOption('auth', [$username, $password]); |
||
62 | } |
||
63 | |||
64 | } |
||
65 | |||
66 | /** |
||
67 | * Wrapper |
||
68 | * @param mixed $url [require] |
||
69 | * @return array |
||
70 | * @throws \ErrorException |
||
71 | * @throws \ReflectionException |
||
72 | */ |
||
73 | public function start($url) |
||
74 | { |
||
75 | $urlList = []; |
||
76 | $result = []; |
||
77 | $result['white'] = []; |
||
78 | $result['black'] = []; |
||
79 | |||
80 | if ((bool) $this->isContentsFetch) { |
||
81 | echo 'Contents fetching..'; |
||
82 | $url = $this->fetchByContents($url); |
||
83 | |||
84 | if ((bool) $this->recursion) { |
||
85 | $url = $this->urlFilter($url); |
||
86 | } |
||
87 | } |
||
88 | |||
89 | if (is_null($url)) { |
||
90 | throw new \ReflectionException('Start URL is not null.'); |
||
91 | } else if (is_array($url)) { |
||
92 | $urlList = $this->urlFilter($url); |
||
93 | } else if (is_string($url)) { |
||
94 | $urlList[] = $url; |
||
95 | } else if (is_object($url)) { |
||
96 | $urlList[] = (string) $url; |
||
97 | } |
||
98 | |||
99 | echo "\n"; |
||
100 | echo 'Cheking..'; |
||
101 | |||
102 | foreach ($urlList as $key => $url) { |
||
103 | try { |
||
104 | $metaData = $this->client->get($url); |
||
105 | } catch (\Exception $e) { |
||
106 | echo "\n {$url}\t {$e->getMessage()}"; |
||
107 | } |
||
108 | $hardCheck = (array) $this->hardCheckByHeader($metaData); |
||
109 | $softCheck = (array) $this->softCheckByContents($metaData); |
||
110 | |||
111 | if ($hardCheck['result'] && $softCheck['result']) { |
||
112 | $result['white'][$key]['url'] = $url; |
||
113 | $result['white'][$key]['status'] = 'OK'; |
||
114 | } else { |
||
115 | $result['black'][$key]['url'] = $url; |
||
116 | $result['black'][$key]['status'] = array_key_exists('status', $hardCheck) ? $hardCheck['status'] : $softCheck['status']; |
||
117 | } |
||
118 | |||
119 | usleep(500000); |
||
120 | echo '.'; |
||
121 | } |
||
122 | $result['UnknownLinks'] = $this->garbage; |
||
123 | |||
124 | return $result; |
||
125 | } |
||
126 | |||
127 | /** |
||
128 | * Fetch Page Contents Links |
||
129 | * @param mixed $baseUrl |
||
130 | * @return array URlList |
||
131 | * @throws \ErrorException |
||
132 | */ |
||
133 | private function fetchByContents($baseUrl) |
||
134 | { |
||
135 | $urlList = []; |
||
136 | $matches = []; |
||
137 | $urlList['baseUrl'] = (string) $baseUrl; |
||
138 | try { |
||
139 | $contents = $this->client->get($baseUrl)->getBody()->getContents(); |
||
140 | } catch (\Exception $e) { |
||
141 | echo "\n {$baseUrl}\t {$e->getMessage()}"; |
||
142 | } |
||
143 | |||
144 | preg_match_all('{<a.+?href=[\"|\'](?<url>.+?)[\"\|\'].*?>}is', $contents, $matches); |
||
145 | |||
146 | if (!array_key_exists('url', $matches)) { |
||
147 | throw new \ErrorException('Not match contents on url.'); |
||
148 | } |
||
149 | |||
150 | foreach ($matches['url'] as $url) { |
||
151 | |||
152 | if (preg_match('{https?://[\w/:%#\$&\?\(\)~\.=\+\-]+}i', $url)) { |
||
153 | $urlList[] = $url; |
||
154 | } else if (preg_match('{https?:\/\/[\w/:%#\$&\?\(\)~\.=\+\-]+}i', $baseUrl . $url)) { |
||
155 | if (preg_match("{(^#[A-Z0-9].+?$)}i", $url)) { |
||
156 | $this->garbage[] = $url; |
||
157 | } else if (preg_match("#javascript.*#i", $url)) { |
||
158 | $this->garbage[] = $url; |
||
159 | } else { |
||
160 | $urlList[] = $baseUrl . $url; |
||
161 | } |
||
162 | } else { |
||
163 | $this->garbage[] = $url; |
||
164 | } |
||
165 | |||
166 | usleep(500000); |
||
167 | echo '.'; |
||
168 | } |
||
169 | |||
170 | return array_unique($urlList); |
||
171 | } |
||
172 | |||
173 | /** |
||
174 | * Error check by header |
||
175 | * @param \GuzzleHttp\Message\Response $metaData |
||
176 | * @return array |
||
177 | */ |
||
178 | private function hardCheckByHeader(\GuzzleHttp\Message\Response $metaData) |
||
0 ignored issues
–
show
|
|||
179 | { |
||
180 | $headers = array_change_key_case($metaData->getHeaders()); |
||
181 | $statusCode = (int) $metaData->getStatusCode(); |
||
182 | |||
183 | $isErrorPageCode = [ |
||
184 | '40x' => [401, 403, 404], |
||
185 | '50x' => [500, 502, 503], |
||
186 | '30x' => [301, 302, 308] |
||
187 | ]; |
||
188 | |||
189 | foreach($isErrorPageCode as $errorType => $statuses) { |
||
190 | if (in_array($statusCode, $statuses)) { |
||
191 | return [ |
||
192 | 'result' => false, |
||
193 | 'status' => "NG : status code {$errorType}" |
||
194 | ]; |
||
195 | } |
||
196 | } |
||
197 | |||
198 | if ($statusCode === 200 && $statusCode === 304) { |
||
199 | return [ |
||
200 | 'result' => true |
||
201 | ]; |
||
202 | } |
||
203 | |||
204 | if (array_key_exists('content-length', $headers) && $headers['content-length'][0] < $this->contentsSize) { |
||
205 | return [ |
||
206 | 'result' => false, |
||
207 | 'status' => 'NG : contentsSize' |
||
208 | ]; |
||
209 | } |
||
210 | |||
211 | return [ |
||
212 | 'result' => true |
||
213 | ]; |
||
214 | } |
||
215 | |||
216 | /** |
||
217 | * Soft404 check by contents Length |
||
218 | * @param \GuzzleHttp\Message\Response $metaData |
||
219 | * @return array |
||
220 | */ |
||
221 | public function softCheckByContents(\GuzzleHttp\Message\Response $metaData) |
||
222 | { |
||
223 | if ($metaData->getBody()->getSize() <= $this->contentsSize) { |
||
224 | return [ |
||
225 | 'result' => false, |
||
226 | 'status' => 'NG : contentsSize' |
||
227 | ]; |
||
228 | } |
||
229 | |||
230 | if ($this->doubleCheck) { |
||
231 | $result = $this->softCheckByContentsWords($metaData); |
||
232 | if (!$result['result']) { |
||
233 | return [ |
||
234 | 'result' => $result['result'], |
||
235 | 'status' => $result['status'] |
||
236 | ]; |
||
237 | } |
||
238 | } |
||
239 | |||
240 | return [ |
||
241 | 'result' => true |
||
242 | ]; |
||
243 | } |
||
244 | |||
245 | /** |
||
246 | * Soft404 Error check by words |
||
247 | * @param \GuzzleHttp\Message\Response $metaData |
||
248 | * @return array Result |
||
249 | */ |
||
250 | private function softCheckByContentsWords(\GuzzleHttp\Message\Response $metaData) |
||
251 | { |
||
252 | foreach (self::getSoftErrorWords() as $word) { |
||
253 | if (mb_stripos($metaData->getBody()->getContents(), $word) !== false) { |
||
254 | return [ |
||
255 | 'result' => false, |
||
256 | 'status' => 'NG WORD : ' . $word |
||
257 | ]; |
||
258 | } |
||
259 | } |
||
260 | |||
261 | return [ |
||
262 | 'result' => true |
||
263 | ]; |
||
264 | |||
265 | } |
||
266 | |||
267 | /** |
||
268 | * Return soft404 Page on Words. |
||
269 | * @param none |
||
270 | * @return array |
||
271 | */ |
||
272 | private static function getSoftErrorWords() |
||
273 | { |
||
274 | return file(__DIR__ . '/ErrorPageWords.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
||
275 | } |
||
276 | |||
277 | /** |
||
278 | * multidimensional array to single arry comvert. |
||
279 | * @param array $urlList |
||
280 | * @return array URLLIST |
||
281 | */ |
||
282 | private function urlFilter(array $urlList) |
||
283 | { |
||
284 | $result = []; |
||
285 | array_walk_recursive($urlList, function($v) use (&$result) { |
||
286 | $result[] = $v; |
||
287 | }); |
||
288 | |||
289 | return array_values(array_unique($result)); |
||
290 | } |
||
291 | } |
||
292 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.