These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
0 ignored issues
–
show
|
|||
2 | |||
3 | namespace Error; |
||
4 | |||
5 | require_once (__DIR__ . '/vendor/autoload.php'); |
||
6 | |||
7 | /** |
||
8 | * Description of Checker Main |
||
9 | * |
||
10 | * @author bootjp |
||
11 | */ |
||
12 | class Checker |
||
13 | { |
||
14 | protected $client; |
||
15 | |||
16 | protected $contentsSize = 500; |
||
17 | |||
18 | protected $doubleCheck = true; |
||
19 | |||
20 | protected $recursion = false; |
||
21 | |||
22 | protected $garbage = []; |
||
23 | |||
24 | protected $isContentsFetch = true; |
||
25 | |||
26 | |||
27 | /** |
||
28 | * initialisation. |
||
29 | * @param array $args |
||
30 | */ |
||
31 | public function __construct(array $args) |
||
32 | { |
||
33 | $this->client = new \GuzzleHttp\Client([ |
||
34 | 'defaults' => [ |
||
35 | 'exceptions' => false, |
||
36 | 'headers' => [ |
||
37 | 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) ' . |
||
38 | 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36' |
||
39 | ] |
||
40 | ] |
||
41 | ] |
||
42 | ); |
||
43 | if (array_key_exists('contentSize', $args)) { |
||
44 | $this->contentsSize = (int) $args['contentSize']; |
||
45 | } |
||
46 | |||
47 | if (array_key_exists('doubleCheck', $args)) { |
||
48 | $this->doubleCheck = (bool) $args['doubleCheck']; |
||
49 | } |
||
50 | |||
51 | if (array_key_exists('isContentsFetch', $args)) { |
||
52 | $this->isContentsFetch = (bool) $args['isContentsFetch']; |
||
53 | } |
||
54 | |||
55 | if (array_key_exists('recursion', $args)) { |
||
56 | $this->recursion = (bool) $args['recursion']; |
||
57 | } |
||
58 | |||
59 | if (array_key_exists('auth', $args)) { |
||
60 | list($username, $password) = explode(':', $args['auth'], 2); |
||
61 | $this->client->setDefaultOption('auth', [$username, $password]); |
||
62 | } |
||
63 | |||
64 | } |
||
65 | |||
66 | /** |
||
67 | * Wrapper |
||
68 | * @param mixed $url [require] |
||
69 | * @return array |
||
70 | * @throws \ErrorException |
||
71 | * @throws \ReflectionException |
||
72 | */ |
||
73 | public function start($url) |
||
74 | { |
||
75 | $urlList = []; |
||
76 | $result['white'] = []; |
||
0 ignored issues
–
show
Coding Style
Comprehensibility
introduced
by
$result was never initialized. Although not strictly required by PHP, it is generally a good practice to add $result = array(); before regardless.
Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code. Let’s take a look at an example: foreach ($collection as $item) {
$myArray['foo'] = $item->getFoo();
if ($item->hasBar()) {
$myArray['bar'] = $item->getBar();
}
// do something with $myArray
}
As you can see in this example, the array This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop. ![]() |
|||
77 | $result['black'] = []; |
||
78 | |||
79 | if ((bool) $this->isContentsFetch) { |
||
80 | echo 'Contents fetching..'; |
||
81 | $url = $this->fetchByContents($url); |
||
82 | |||
83 | if ((bool) $this->recursion) { |
||
84 | $url = $this->urlFilter($url); |
||
85 | } |
||
86 | } |
||
87 | |||
88 | if (is_null($url)) { |
||
89 | throw new \ReflectionException('Start URL is not null.'); |
||
90 | } else if (is_array($url)) { |
||
91 | $urlList = $this->urlFilter($url); |
||
92 | } else if (is_string($url)) { |
||
93 | $urlList[] = $url; |
||
94 | } else if (is_object($url)) { |
||
95 | $urlList[] = (string) $url; |
||
96 | } |
||
97 | |||
98 | echo "\n"; |
||
99 | echo 'Cheking..'; |
||
100 | |||
101 | foreach ($urlList as $key => $url) { |
||
102 | try { |
||
103 | $metaData = $this->client->get($url); |
||
104 | } catch (\Exception $e) { |
||
105 | echo "\n {$url}\t {$e->getMessage()}"; |
||
106 | } |
||
107 | $hardCheck = (array) $this->hardCheckByHeader($metaData); |
||
108 | $softCheck = (array) $this->softCheckByContents($metaData); |
||
109 | |||
110 | if ($hardCheck['result'] && $softCheck['result']) { |
||
111 | $result['white'][$key]['url'] = $url; |
||
112 | $result['white'][$key]['status'] = 'OK'; |
||
113 | } else { |
||
114 | $result['black'][$key]['url'] = $url; |
||
115 | $result['black'][$key]['status'] = array_key_exists('status', $hardCheck) ? $hardCheck['status'] : $softCheck['status']; |
||
116 | } |
||
117 | |||
118 | usleep(500000); |
||
119 | echo '.'; |
||
120 | } |
||
121 | $result['UnknownLinks'] = $this->garbage; |
||
122 | |||
123 | return $result; |
||
124 | } |
||
125 | |||
126 | /** |
||
127 | * Fetch Page Contents Links |
||
128 | * @param mixed $baseUrl |
||
129 | * @return array URlList |
||
130 | * @throws \ErrorException |
||
131 | */ |
||
132 | private function fetchByContents($baseUrl) |
||
133 | { |
||
134 | $urlList = []; |
||
135 | $matches = []; |
||
136 | $urlList['baseUrl'] = (string) $baseUrl; |
||
137 | try { |
||
138 | $contents = $this->client->get($baseUrl)->getBody()->getContents(); |
||
139 | } catch (\Exception $e) { |
||
140 | echo "\n {$baseUrl}\t {$e->getMessage()}"; |
||
141 | } |
||
142 | |||
143 | preg_match_all('{<a.+?href=[\"|\'](?<url>.+?)[\"\|\'].*?>}is', $contents, $matches); |
||
144 | |||
145 | if (!array_key_exists('url', $matches)) { |
||
146 | throw new \ErrorException('Not match contents on url.'); |
||
147 | } |
||
148 | |||
149 | foreach ($matches['url'] as $url) { |
||
150 | |||
151 | if (preg_match('{https?://[\w/:%#\$&\?\(\)~\.=\+\-]+}i', $url)) { |
||
152 | $urlList[] = $url; |
||
153 | } else if (preg_match('{https?:\/\/[\w/:%#\$&\?\(\)~\.=\+\-]+}i', $baseUrl . $url)) { |
||
154 | if (preg_match("{(^#[A-Z0-9].+?$)}i", $url)) { |
||
155 | $this->garbage[] = $url; |
||
156 | } else if (preg_match("#javascript.*#i", $url)) { |
||
157 | $this->garbage[] = $url; |
||
158 | } else { |
||
159 | $urlList[] = $baseUrl . $url; |
||
160 | } |
||
161 | } else { |
||
162 | $this->garbage[] = $url; |
||
163 | } |
||
164 | |||
165 | usleep(500000); |
||
166 | echo '.'; |
||
167 | } |
||
168 | |||
169 | return array_unique($urlList); |
||
170 | } |
||
171 | |||
172 | /** |
||
173 | * Error check by header |
||
174 | * @param \GuzzleHttp\Message\Response $metaData |
||
175 | * @return array |
||
176 | */ |
||
177 | private function hardCheckByHeader(\GuzzleHttp\Message\Response $metaData) |
||
0 ignored issues
–
show
|
|||
178 | { |
||
179 | $headers = array_change_key_case($metaData->getHeaders()); |
||
180 | $statusCode = $metaData->getStatusCode(); |
||
181 | |||
182 | $isErrorPageCode = [ |
||
183 | '40x' => [401, 403, 404], |
||
184 | '50x' => [500, 502, 503], |
||
185 | '30x' => [301, 302, 308] |
||
186 | ]; |
||
187 | |||
188 | foreach($isErrorPageCode as $errorType => $statuses) { |
||
189 | if (in_array($statusCode, $statuses)) { |
||
190 | return [ |
||
191 | 'result' => false, |
||
192 | 'status' => "NG : status code {$errorType}" |
||
193 | ]; |
||
194 | } |
||
195 | } |
||
196 | |||
197 | if ($statusCode === 200 && $statusCode === 304) { |
||
0 ignored issues
–
show
|
|||
198 | return [ |
||
199 | 'result' => true |
||
200 | ]; |
||
201 | } |
||
202 | |||
203 | if (array_key_exists('content-length', $headers) && $headers['content-length'][0] < $this->contentsSize) { |
||
204 | return [ |
||
205 | 'result' => false, |
||
206 | 'status' => 'NG : contentsSize' |
||
207 | ]; |
||
208 | } |
||
209 | |||
210 | return [ |
||
211 | 'result' => true |
||
212 | ]; |
||
213 | } |
||
214 | |||
215 | /** |
||
216 | * Soft404 check by contents Length |
||
217 | * @param \GuzzleHttp\Message\Response $metaData |
||
218 | * @return array |
||
219 | */ |
||
220 | public function softCheckByContents(\GuzzleHttp\Message\Response $metaData) |
||
221 | { |
||
222 | if ($metaData->getBody()->getSize() <= $this->contentsSize) { |
||
223 | return [ |
||
224 | 'result' => false, |
||
225 | 'status' => 'NG : contentsSize' |
||
226 | ]; |
||
227 | } |
||
228 | |||
229 | if ($this->doubleCheck) { |
||
230 | $result = $this->softCheckByContentsWords($metaData); |
||
231 | if (!$result['result']) { |
||
232 | return [ |
||
233 | 'result' => $result['result'], |
||
234 | 'status' => $result['status'] |
||
235 | ]; |
||
236 | } |
||
237 | } |
||
238 | |||
239 | return [ |
||
240 | 'result' => true |
||
241 | ]; |
||
242 | } |
||
243 | |||
244 | /** |
||
245 | * Soft404 Error check by words |
||
246 | * @param \GuzzleHttp\Message\Response $metaData |
||
247 | * @return array Result |
||
248 | */ |
||
249 | private function softCheckByContentsWords(\GuzzleHttp\Message\Response $metaData) |
||
250 | { |
||
251 | foreach (self::getSoftErrorWords() as $word) { |
||
252 | if (mb_stripos($metaData->getBody()->getContents(), $word) !== false) { |
||
253 | return [ |
||
254 | 'result' => false, |
||
255 | 'status' => 'NG WORD : ' . $word |
||
256 | ]; |
||
257 | } |
||
258 | } |
||
259 | |||
260 | return [ |
||
261 | 'result' => true |
||
262 | ]; |
||
263 | |||
264 | } |
||
265 | |||
266 | /** |
||
267 | * Return soft404 Page on Words. |
||
268 | * @param none |
||
269 | * @return array |
||
270 | */ |
||
271 | private static function getSoftErrorWords() |
||
272 | { |
||
273 | return file(__DIR__ . '/ErrorPageWords.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
||
274 | } |
||
275 | |||
276 | /** |
||
277 | * multidimensional array to single arry comvert. |
||
278 | * @param array $urlList |
||
279 | * @return array URLLIST |
||
280 | */ |
||
281 | private function urlFilter(array $urlList) |
||
282 | { |
||
283 | $result = []; |
||
284 | array_walk_recursive($urlList, function($v) use (&$result) { |
||
285 | $result[] = $v; |
||
286 | }); |
||
287 | |||
288 | return array_values(array_unique($result)); |
||
289 | } |
||
290 | } |
||
291 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.