These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
0 ignored issues
–
show
|
|||
2 | |||
3 | require_once (__DIR__ . '/vendor/autoload.php'); |
||
4 | |||
5 | /** |
||
6 | * Description of Checker Main |
||
7 | * |
||
8 | * @author bootjp |
||
9 | */ |
||
10 | class Checker |
||
0 ignored issues
–
show
PSR1 recommends that each class must be in a namespace of at least one level to avoid collisions.
You can fix this by adding a namespace to your class: namespace YourVendor;
class YourClass { }
When choosing a vendor namespace, try to pick something that is not too generic to avoid conflicts with other libraries. ![]() |
|||
11 | { |
||
12 | protected $client; |
||
13 | |||
14 | protected $contentsSize = 500; |
||
15 | |||
16 | protected $doubleCheck = true; |
||
17 | |||
18 | protected $recursion = false; |
||
19 | |||
20 | protected $garbage = []; |
||
21 | |||
22 | protected $isContentsFetch = true; |
||
23 | |||
24 | |||
25 | /** |
||
26 | * initialisation. |
||
27 | * @param array $args |
||
28 | */ |
||
29 | public function __construct(array $args) |
||
30 | { |
||
31 | $this->client = new \GuzzleHttp\Client([ |
||
32 | 'defaults' => [ |
||
33 | 'exceptions' => false, |
||
34 | 'headers' => [ |
||
35 | 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) ' . |
||
36 | 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36' |
||
37 | ] |
||
38 | ] |
||
39 | ] |
||
40 | ); |
||
41 | if (array_key_exists('contentSize', $args)) { |
||
42 | $this->contentsSize = (int) $args['contentSize']; |
||
43 | } |
||
44 | |||
45 | if (array_key_exists('doubleCheck', $args)) { |
||
46 | $this->doubleCheck = (bool) $args['doubleCheck']; |
||
47 | } |
||
48 | |||
49 | if (array_key_exists('isContentsFetch', $args)) { |
||
50 | $this->isContentsFetch = (bool) $args['isContentsFetch']; |
||
51 | } |
||
52 | |||
53 | if (array_key_exists('recursion', $args)) { |
||
54 | $this->recursion = (bool) $args['recursion']; |
||
55 | } |
||
56 | |||
57 | if (array_key_exists('auth', $args)) { |
||
58 | list($username, $password) = explode(':', $args['auth'], 2); |
||
59 | $this->client->setDefaultOption('auth', [$username, $password]); |
||
60 | } |
||
61 | |||
62 | } |
||
63 | |||
64 | /** |
||
65 | * Wrapper |
||
66 | * @param mixed $url [require] |
||
67 | * @return array |
||
68 | * @throws \ErrorException |
||
69 | * @throws \ReflectionException |
||
70 | */ |
||
71 | public function start($url) |
||
72 | { |
||
73 | $urlList = []; |
||
74 | $result = []; |
||
75 | $result['white'] = []; |
||
76 | $result['black'] = []; |
||
77 | |||
78 | if ((bool) $this->isContentsFetch) { |
||
79 | echo 'Contents fetching..'; |
||
80 | $url = $this->fetchByContents($url); |
||
81 | |||
82 | if ((bool) $this->recursion) { |
||
83 | $url = $this->urlFilter($url); |
||
84 | } |
||
85 | } |
||
86 | |||
87 | if (is_null($url)) { |
||
88 | throw new \ReflectionException('Start URL is not null.'); |
||
89 | } else if (is_array($url)) { |
||
90 | $urlList = $this->urlFilter($url); |
||
91 | } else if (is_string($url)) { |
||
92 | $urlList[] = $url; |
||
93 | } else if (is_object($url)) { |
||
94 | $urlList[] = (string) $url; |
||
95 | } |
||
96 | |||
97 | echo "\n"; |
||
98 | echo 'Cheking..'; |
||
99 | |||
100 | foreach ($urlList as $key => $url) { |
||
101 | try { |
||
102 | $metaData = $this->client->get($url); |
||
103 | } catch (\Exception $e) { |
||
104 | echo "\n {$url}\t {$e->getMessage()}"; |
||
105 | } |
||
106 | $hardCheck = (array) $this->hardCheckByHeader($metaData); |
||
107 | $softCheck = (array) $this->softCheckByContents($metaData); |
||
108 | |||
109 | if ($hardCheck['result'] && $softCheck['result']) { |
||
110 | $result['white'][$key]['url'] = $url; |
||
111 | $result['white'][$key]['status'] = 'OK'; |
||
112 | } else { |
||
113 | $result['black'][$key]['url'] = $url; |
||
114 | $result['black'][$key]['status'] = array_key_exists('status', $hardCheck) ? $hardCheck['status'] : $softCheck['status']; |
||
115 | } |
||
116 | |||
117 | usleep(500000); |
||
118 | echo '.'; |
||
119 | } |
||
120 | $result['UnknownLinks'] = $this->garbage; |
||
121 | |||
122 | return $result; |
||
123 | } |
||
124 | |||
125 | /** |
||
126 | * Fetch Page Contents Links |
||
127 | * @param mixed $baseUrl |
||
128 | * @return array URlList |
||
129 | * @throws \ErrorException |
||
130 | */ |
||
131 | private function fetchByContents($baseUrl) |
||
132 | { |
||
133 | $urlList = []; |
||
134 | $matches = []; |
||
135 | $urlList['baseUrl'] = (string) $baseUrl; |
||
136 | try { |
||
137 | $contents = $this->client->get($baseUrl)->getBody()->getContents(); |
||
138 | } catch (\Exception $e) { |
||
139 | echo "\n {$baseUrl}\t {$e->getMessage()}"; |
||
140 | } |
||
141 | |||
142 | preg_match_all('{<a.+?href=[\"|\'](?<url>.+?)[\"\|\'].*?>}is', $contents, $matches); |
||
143 | |||
144 | if (!array_key_exists('url', $matches)) { |
||
145 | throw new \ErrorException('Not match contents on url.'); |
||
146 | } |
||
147 | |||
148 | foreach ($matches['url'] as $url) { |
||
149 | |||
150 | if (preg_match('{https?://[\w/:%#\$&\?\(\)~\.=\+\-]+}i', $url)) { |
||
151 | $urlList[] = $url; |
||
152 | } else if (preg_match('{https?:\/\/[\w/:%#\$&\?\(\)~\.=\+\-]+}i', $baseUrl . $url)) { |
||
153 | if (preg_match("{(^#[A-Z0-9].+?$)}i", $url)) { |
||
154 | $this->garbage[] = $url; |
||
155 | } else if (preg_match("#javascript.*#i", $url)) { |
||
156 | $this->garbage[] = $url; |
||
157 | } else { |
||
158 | $urlList[] = $baseUrl . $url; |
||
159 | } |
||
160 | } else { |
||
161 | $this->garbage[] = $url; |
||
162 | } |
||
163 | |||
164 | usleep(500000); |
||
165 | echo '.'; |
||
166 | } |
||
167 | |||
168 | return array_unique($urlList); |
||
169 | } |
||
170 | |||
171 | /** |
||
172 | * Error check by header |
||
173 | * @param \GuzzleHttp\Message\Response $metaData |
||
174 | * @return array |
||
175 | */ |
||
176 | private function hardCheckByHeader(\GuzzleHttp\Message\Response $metaData) |
||
0 ignored issues
–
show
|
|||
177 | { |
||
178 | $headers = array_change_key_case($metaData->getHeaders()); |
||
179 | $statusCode = (int) $metaData->getStatusCode(); |
||
180 | |||
181 | $isErrorPageCode = [ |
||
182 | '40x' => [401, 403, 404], |
||
183 | '50x' => [500, 502, 503], |
||
184 | '30x' => [301, 302, 308] |
||
185 | ]; |
||
186 | |||
187 | foreach($isErrorPageCode as $errorType => $statuses) { |
||
188 | if (in_array($statusCode, $statuses)) { |
||
189 | return [ |
||
190 | 'result' => false, |
||
191 | 'status' => "NG : status code {$errorType}" |
||
192 | ]; |
||
193 | } |
||
194 | } |
||
195 | |||
196 | if ($statusCode === 200 && $statusCode === 304) { |
||
197 | return [ |
||
198 | 'result' => true |
||
199 | ]; |
||
200 | } |
||
201 | |||
202 | if (array_key_exists('content-length', $headers) && $headers['content-length'][0] < $this->contentsSize) { |
||
203 | return [ |
||
204 | 'result' => false, |
||
205 | 'status' => 'NG : contentsSize' |
||
206 | ]; |
||
207 | } |
||
208 | |||
209 | return [ |
||
210 | 'result' => true |
||
211 | ]; |
||
212 | } |
||
213 | |||
214 | /** |
||
215 | * Soft404 check by contents Length |
||
216 | * @param \GuzzleHttp\Message\Response $metaData |
||
217 | * @return array |
||
218 | */ |
||
219 | public function softCheckByContents(\GuzzleHttp\Message\Response $metaData) |
||
220 | { |
||
221 | if ($metaData->getBody()->getSize() <= $this->contentsSize) { |
||
222 | return [ |
||
223 | 'result' => false, |
||
224 | 'status' => 'NG : contentsSize' |
||
225 | ]; |
||
226 | } |
||
227 | |||
228 | if ($this->doubleCheck) { |
||
229 | $result = $this->softCheckByContentsWords($metaData); |
||
230 | if (!$result['result']) { |
||
231 | return [ |
||
232 | 'result' => $result['result'], |
||
233 | 'status' => $result['status'] |
||
234 | ]; |
||
235 | } |
||
236 | } |
||
237 | |||
238 | return [ |
||
239 | 'result' => true |
||
240 | ]; |
||
241 | } |
||
242 | |||
243 | /** |
||
244 | * Soft404 Error check by words |
||
245 | * @param \GuzzleHttp\Message\Response $metaData |
||
246 | * @return array Result |
||
247 | */ |
||
248 | private function softCheckByContentsWords(\GuzzleHttp\Message\Response $metaData) |
||
249 | { |
||
250 | foreach (self::getSoftErrorWords() as $word) { |
||
251 | if (mb_stripos($metaData->getBody()->getContents(), $word) !== false) { |
||
252 | return [ |
||
253 | 'result' => false, |
||
254 | 'status' => 'NG WORD : ' . $word |
||
255 | ]; |
||
256 | } |
||
257 | } |
||
258 | |||
259 | return [ |
||
260 | 'result' => true |
||
261 | ]; |
||
262 | |||
263 | } |
||
264 | |||
265 | /** |
||
266 | * Return soft404 Page on Words. |
||
267 | * @param none |
||
268 | * @return array |
||
269 | */ |
||
270 | private static function getSoftErrorWords() |
||
271 | { |
||
272 | return file(__DIR__ . '/ErrorPageWords.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
||
273 | } |
||
274 | |||
275 | /** |
||
276 | * multidimensional array to single arry comvert. |
||
277 | * @param array $urlList |
||
278 | * @return array URLLIST |
||
279 | */ |
||
280 | private function urlFilter(array $urlList) |
||
281 | { |
||
282 | $result = []; |
||
283 | array_walk_recursive($urlList, function($v) use (&$result) { |
||
284 | $result[] = $v; |
||
285 | }); |
||
286 | |||
287 | return array_values(array_unique($result)); |
||
288 | } |
||
289 | } |
||
290 |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.