Passed
Push — main ( c4a646...dc1874 )
by Will
02:03
created

crawlers::get()   C

Complexity

Conditions 11
Paths 1

Size

Total Lines 315
Code Lines 246

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 246
c 1
b 0
f 0
dl 0
loc 315
rs 5.8533
cc 11
nc 1
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
declare(strict_types = 1);
3
namespace hexydec\agentzero;
4
5
class crawlers {
6
7
	public static function getApp(string $value, array $data = []) : array {
8
		if (!\str_contains($value, '://')) { // bot will be in the URL
9
			$parts = \explode('/', $value, 2);
10
11
			// process version
12
			if (!empty($parts[1])) {
13
				$parts[1] = \ltrim($parts[1], 'v');
14
				$parts[1] = \substr($parts[1], 0, \strspn($parts[1], '0123456789.'));
15
			}
16
			return \array_merge([
17
				'type' => 'robot',
18
				'app' => $parts[0],
19
				'appversion' => empty($parts[1]) ? null : $parts[1]
20
			], $data);
21
		}
22
		return [];
23
	}
24
25
	public static function get() {
26
		$fn = [
27
			'search' => fn (string $value) : array => self::getApp($value, ['category' => 'search']),
28
			'ads' => fn (string $value) : array => self::getApp($value, ['category' => 'ads']),
29
			'validator' => fn (string $value) : array => self::getApp($value, ['category' => 'validator']),
30
			'feed' => fn (string $value) : array => self::getApp($value, ['category' => 'feed']),
31
			'crawler' => fn (string $value) : array => self::getApp($value, ['category' => 'crawler']),
32
			'monitor' => fn (string $value) : array => self::getApp($value, ['category' => 'monitor']),
33
			'scraper' => fn (string $value) : array => self::getApp($value, ['category' => 'scraper']),
34
			'map' => function (string $value, int $i, array $tokens) : ?array {
35
				if (!\str_contains($value, '://')) { // bot will be in the URL
36
					$parts = \explode('/', $value, 2);
37
38
					// special case
39
					if (\strcasecmp($parts[0], 'Spider') === 0) {
40
						$ua = \implode(' ', $tokens);
41
						if (\mb_stripos($ua, 'Screaming Frog SEO Spider') === 0) {
42
							$parts[0] = 'Screaming Frog SEO Spider';
43
						} elseif (\mb_stripos($ua, 'Sogou web spider') === 0) {
44
							$parts[0] = 'Sogou web spider';
45
						}
46
					}
47
					$category = [
48
						'yacybot' => 'search',
49
						'Googlebot' => 'search',
50
						'Googlebot-Mobile' => 'search',
51
						'Googlebot-Image' => 'search',
52
						'Googlebot-Video' => 'search',
53
						'Googlebot-News' => 'search',
54
						'Storebot-Google' => 'search',
55
						'AdsBot-Google' => 'ads',
56
						'AdsBot-Google-Mobile' => 'ads',
57
						'Bingbot' => 'search',
58
						'bingbot' => 'search',
59
						'adidxbot' => 'ads',
60
						'DuckDuckBot' => 'search',
61
						'DuckDuckGo-Favicons-Bot' => 'search',
62
						'coccocbot-image' => 'search',
63
						'coccocbot-web' => 'search',
64
						'Baiduspider' => 'search',
65
						'Applebot' => 'search',
66
						'YandexBot' => 'search',
67
						'MJ12bot' => 'search',
68
						'Mail.RU_Bot' => 'search',
69
						'HaosouSpider' => 'search',
70
						'360Spider' => 'search',
71
						'Exabot' => 'search',
72
						'Sogou web spider' => 'search',
73
						'UptimeRobot' => 'monitor',
74
						'PetalBot' => 'search',
75
						'Screaming Frog SEO Spider' => 'crawler',
76
						'Twitterbot' => 'feed',
77
						'Xbot' => 'feed',
78
						'Discordbot' => 'feed',
79
						'PRTGCloudBot' => 'monitor',
80
						'Bytespider' => 'search',
81
						'LinkedInBot' => 'feed',
82
						'PaperLiBot' => 'feed',
83
						'bitlybot' => 'feed',
84
						'TinEye-bot' => 'search',
85
						'Pinterestbot' => 'feed',
86
						'WebCrawler' => 'crawler',
87
						'webprosbot' => 'crawler',
88
						'GuzzleHttp' => 'scraper',
89
						'TelegramBot' => 'feed',
90
						'Ruby' => 'scraper',
91
						'SEMrushBot' => 'crawler',
92
						'Mediatoolkitbot' => 'crawler'
93
					];
94
					return self::getApp($value, [
95
						'category' => $category[$parts[0]] ?? null,
96
						'app' => $parts[0]
97
					]);
98
				}
99
				return null;
100
			}
101
		];
102
		return [
103
			'Yahoo! Slurp' => [
104
				'match' => 'exact',
105
				'categories' => $fn['search']
106
			],
107
			'facebookexternalhit/' => [
108
				'match' => 'start',
109
				'categories' => $fn['feed']
110
			],
111
			'Google-Site-Verification/' => [
112
				'match' => 'start',
113
				'categories' => $fn['validator']
114
			],
115
			'Google-InspectionTool/' => [
116
				'match' => 'start',
117
				'categories' => $fn['search']
118
			],
119
			'Mediapartners-Google' => [
120
				'match' => 'start',
121
				'categories' => $fn['search']
122
			],
123
			'FeedFetcher-Google' => [
124
				'match' => 'exact',
125
				'categories' => $fn['feed']
126
			],
127
			'GoogleProducer' => [
128
				'match' => 'exact',
129
				'categories' => $fn['feed']
130
			],
131
			'CFNetwork/' => [
132
				'match' => 'start',
133
				'categories' => $fn['feed']
134
			],
135
			'Siteimprove.com' => [
136
				'match' => 'any',
137
				'categories' => $fn['crawler']
138
			],
139
			'Google Page Speed Insights' => [
140
				'match' => 'exact',
141
				'categories' => $fn['validator']
142
			],
143
			'Qwantify' => [
144
				'match' => 'start',
145
				'categories' => $fn['search']
146
			],
147
			'okhttp' => [
148
				'match' => 'start',
149
				'categories' => $fn['scraper']
150
			],
151
			'python' => [
152
				'match' => 'start',
153
				'categories' => $fn['scraper']
154
			],
155
			'Nessus' => [
156
				'match' => 'start',
157
				'categories' => $fn['monitor']
158
			],
159
			'Chrome-Lighthouse' => [
160
				'match' => 'start',
161
				'categories' => $fn['validator']
162
			],
163
			'PingdomTMS/' => [
164
				'match' => 'start',
165
				'categories' => $fn['monitor']
166
			],
167
			'Pingdom.com' => [
168
				'match' => 'start',
169
				'categories' => function (string $value) : array {
170
					$version = \explode('_', \trim($value, '_'));
171
					return [
172
						'type' => 'robot',
173
						'category' => 'monitor',
174
						'app' => 'Pingdom.com Bot',
175
						'appversion' => \end($version)
176
					];
177
				}
178
			],
179
			'proximic' => [
180
				'match' => 'exact',
181
				'categories' => $fn['ads']
182
			],
183
			'WordPress' => [
184
				'match' => 'start',
185
				'categories' => $fn['monitor']
186
			],
187
			'PRTG Network Monitor' => [
188
				'match' => 'exact',
189
				'categories' => $fn['monitor']
190
			],
191
			'Site24x7' => [
192
				'match' => 'exact',
193
				'categories' => $fn['monitor']
194
			],
195
			'adbeat.com' => [
196
				'match' => 'start',
197
				'categories' => fn (string $value) : array => [
198
					'type' => 'robot',
199
					'category' => 'ads',
200
					'app' => 'Adbeat',
201
					'url' => $value
202
				]
203
			],
204
			'MicrosoftPreview/' => [
205
				'match' => 'start',
206
				'categories' => $fn['feed']
207
			],
208
			'Let\'s Encrypt validation server' => [
209
				'match' => 'exact',
210
				'categories' => $fn['validator']
211
			],
212
			'Expanse' => [
213
				'match' => 'start',
214
				'categories' => $fn['crawler']
215
			],
216
			'WhatsApp/' => [
217
				'match' => 'start',
218
				'categories' => $fn['feed']
219
			],
220
			'Apache-HttpClient/' => [
221
				'match' => 'start',
222
				'categories' => $fn['scraper']
223
			],
224
			'eCairn-Grabber/' => [
225
				'match' => 'start',
226
				'categories' => $fn['scraper']
227
			],
228
			'SEOkicks' => [
229
				'match' => 'exact',
230
				'categories' => $fn['crawler']
231
			],
232
			'PostmanRuntime/' => [
233
				'match' => 'start',
234
				'categories' => $fn['scraper']
235
			],
236
			'axios/' => [
237
				'match' => 'start',
238
				'categories' => $fn['scraper']
239
			],
240
			'Rogerbot/' => [
241
				'match' => 'start',
242
				'categories' => $fn['crawler']
243
			],
244
			'Go-http-client/' => [
245
				'match' => 'start',
246
				'categories' => $fn['scraper']
247
			],
248
			'DashLinkPreviews/' => [
249
				'match' => 'start',
250
				'categories' => $fn['feed']
251
			],
252
			'Microsoft Office' => [
253
				'match' => 'start',
254
				'categories' => function (string $value, int $i, array $tokens) : array {
255
					$data = [
256
						'type' => 'robot',
257
						'category' => 'feed'
258
					];
259
					if (\str_contains($value, '/')) {
260
						foreach (\array_slice($tokens, $i + 1) AS $item) {
261
							if (\str_starts_with($item, 'Microsoft ')) {
262
								$parts = \explode(' ', $item);
263
								$data['app'] = $parts[0].' '.$parts[1];
264
								if (isset($parts[2])) {
265
									$data['appversion'] = $parts[2];
266
								}
267
								break;
268
							}
269
						}
270
						if (!isset($data['app'])) {
271
							$parts = \explode('/', $value, 2);
272
							$data['app'] = $parts[0];
273
							if (!isset($data['appversion'])) {
274
								$data['appversion'] = $parts[1];
275
							}
276
						}
277
					} else {
278
						$parts = \explode(' ', $value);
279
						$data['app'] = $parts[0].' '.$parts[1].' '.$parts[2];
280
						$data['appversion'] = $parts[3] ?? null;
281
					}
282
					return $data;
283
				}
284
			],
285
			'PycURL/' => [
286
				'match' => 'start',
287
				'categories' => $fn['scraper']
288
			],
289
			'lua-resty-http/' => [
290
				'match' => 'start',
291
				'categories' => $fn['scraper']
292
			],
293
			'Snapchat/' => [
294
				'match' => 'start',
295
				'categories' => $fn['feed']
296
			],
297
			'HTTPClient/' => [
298
				'match' => 'start',
299
				'categories' => $fn['scraper']
300
			],
301
			'WhatsApp/' => [
302
				'match' => 'any',
303
				'categories' => $fn['feed']
304
			],
305
			'Hootsuite-Authoring/' => [
306
				'match' => 'start',
307
				'categories' => $fn['feed']
308
			],
309
			'ApacheBench/' => [
310
				'match' => 'start',
311
				'categories' => $fn['validator']
312
			],
313
			'Asana/' => [
314
				'match' => 'start',
315
				'categories' => $fn['feed']
316
			],
317
			'Java/' => [
318
				'match' => 'start',
319
				'categories' => $fn['scraper']
320
			],
321
			'curl/' => [
322
				'match' => 'start',
323
				'categories' => $fn['scraper']
324
			],
325
			'feed' => [
326
				'match' => 'any',
327
				'categories' => $fn['feed']
328
			],
329
			'spider' => [
330
				'match' => 'any',
331
				'categories' => $fn['map']
332
			],
333
			'crawler' => [
334
				'match' => 'any',
335
				'categories' => $fn['map']
336
			],
337
			'bot' => [
338
				'match' => 'any',
339
				'categories' => $fn['map']
340
			],
341
		];
342
	}
343
}