crawlers::getApp()   C
last analyzed

Complexity

Conditions 9
Paths 3

Size

Total Lines 173
Code Lines 164

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 168
CRAP Score 9

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 9
eloc 164
c 3
b 0
f 0
nc 3
nop 2
dl 0
loc 173
ccs 168
cts 168
cp 1
crap 9
rs 6.4444

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
declare(strict_types = 1);
3
namespace hexydec\agentzero;
4
5
class crawlers {
6
7
	/**
8
	 * Extracts application and version information from a token
9
	 * 
10
	 * @param string $value The token to be processed
11
	 * @param array<string|null> $data An array containing existing data to merge
12
	 * @return array<string|int|float|null> The $data array with the processed application and version added
13
	 */
14 18
	public static function getApp(string $value, array $data = []) : array {
15 18
		if (!\str_contains($value, '://') && \mb_stripos($value, 'Chrome/') !== 0 && \strcasecmp('Cubot', $value) !== 0 && \strcasecmp('Power bot', $value) !== 0) { // bot will be in the URL
16 16
			$parts = \explode('/', $value, 2);
17
18
			// process version
19 16
			if (!empty($parts[1])) {
20 13
				$parts[1] = \ltrim($parts[1], 'v');
21 13
				$parts[1] = \substr($parts[1], 0, \strspn($parts[1], '0123456789.'));
22
			}
23 16
			$category = [
24 16
				'yacybot' => 'search',
25 16
				'googlebot' => 'search',
26 16
				'googlebot-mobile' => 'search',
27 16
				'googlebot-image' => 'search',
28 16
				'googlebot-video' => 'search',
29 16
				'googlebot-news' => 'search',
30 16
				'storebot-google' => 'search',
31 16
				'adsbot-google' => 'ads',
32 16
				'adsbot-google-mobile' => 'ads',
33 16
				'mediapartners-google' => 'ads',
34 16
				'bingbot' => 'search',
35 16
				'adidxbot' => 'ads',
36 16
				'duckduckbot' => 'search',
37 16
				'duckduckgo-favicons-bot' => 'search',
38 16
				'coccocbot-image' => 'search',
39 16
				'coccocbot-web' => 'search',
40 16
				'yandexbot' => 'search',
41 16
				'mj12bot' => 'search',
42 16
				'mail.ru_bot' => 'search',
43 16
				'exabot' => 'search',
44 16
				'uptimerobot' => 'monitor',
45 16
				'petalbot' => 'search',
46 16
				'twitterbot' => 'feed',
47 16
				'xbot' => 'feed',
48 16
				'discordbot' => 'feed',
49 16
				'sematextsyntheticsrobot' => 'monitor',
50 16
				'linkedinbot' => 'feed',
51 16
				'paperlibot' => 'feed',
52 16
				'bitlybot' => 'feed',
53 16
				'tineye-bot' => 'search',
54 16
				'pinterestbot' => 'feed',
55 16
				'webcrawler' => 'crawler',
56 16
				'webprosbot' => 'crawler',
57 16
				'guzzlehttp' => 'scraper',
58 16
				'telegrambot' => 'feed',
59 16
				'semrushbot' => 'crawler',
60 16
				'mediatoolkitbot' => 'crawler',
61 16
				'iploggerbot' => 'monitor',
62 16
				'baiduspider' => 'search',
63 16
				'baiduspider+' => 'search',
64 16
				'baiduspider-image+' => 'search',
65 16
				'baiduspider-ads' => 'ads',
66 16
				'haosouspider' => 'search',
67 16
				'yisouspider' => 'search',
68 16
				'360spider' => 'search',
69 16
				'sogou web spider' => 'search',
70 16
				'bytespider' => 'ai',
71 16
				'claudebot' => 'ai',
72 16
				'gptbot' => 'ai',
73 16
				'diffbot' => 'ai',
74 16
				'amazonbot' => 'ai',
75 16
				'applebot' => 'ai',
76 16
				'perplexitybot' => 'ai',
77 16
				'youbot' => 'ai',
78 16
				'iaskbot' => 'ai',
79 16
				'ccbot' => 'crawler',
80 16
				'wpbot' => 'ai',
81 16
				'imagesiftbot' => 'ai'
82 16
			];
83 16
			$apps = [
84 16
				'googlebot' => 'Google Bot',
85 16
				'googlebot-mobile' => 'Google Bot',
86 16
				'googlebot-image' => 'Google Bot',
87 16
				'googlebot-video' => 'Google Bot',
88 16
				'googlebot-news' => 'Google Bot',
89 16
				'storebot-google' => 'Google Bot',
90 16
				'adsbot-google' => 'Google Bot',
91 16
				'google-adwords-instant' => 'Google Bot',
92 16
				'adsbot-google-mobile' => 'Google Bot',
93 16
				'mediapartners-google' => 'Google Bot',
94 16
				'google-safety' => 'Google Safety',
95 16
				'duckduckbot' => 'DuckDuck Bot',
96 16
				'duckduckbot-https' => 'DuckDuck Bot',
97 16
				'duckduckgo-favicons-bot' => 'DuckDuck Bot',
98 16
				'coccocbot-image' => 'Coccoc Bot',
99 16
				'coccocbot-web' => 'Coccoc Bot',
100 16
				'mj12bot' => 'Majestic 12 Bot',
101 16
				'exabot' => 'ExaBot',
102 16
				'twitterbot' => 'TwitterBot',
103 16
				'discordbot' => 'DiscordBot',
104 16
				'sematextsyntheticsrobot' => 'Sematext Synthetics Robot',
105 16
				'bitlybot' => 'Bit.ly Bot',
106 16
				'webprosbot' => 'WebprosBot',
107 16
				'mediatoolkitbot' => 'MediaToolkit Bot',
108 16
				'cfnetwork' => 'Apple Core Foundation Network',
109 16
				'ncsc web check [email protected]' => 'NCSC Web Check',
110 16
				'enhanced webcheck [email protected]' => 'NCSC Enhanced Web Check',
111 16
				'the national archives uk government web archive:' => 'UK Government National Archives',
112 16
				'google-inspectiontool' => 'Google Inspection Tool',
113 16
				'google-pagerenderer google' => 'Google Page Renderer',
114 16
				'pingdomtms' => 'Pingdom Bot',
115 16
				'facebookexternalhit' => 'Facebook URL Preview',
116 16
				'facebookcatalog' => 'Facebook',
117 16
				'meta-externalagent' => 'Meta External Agent',
118 16
				'meta-externalfetcher' => 'Meta External Fetcher',
119 16
				'phxbot' => 'ProtonMail Bot',
120 16
				'monitoring360bot' => 'Monitoring360 Bot',
121 16
				'cloudflare-healthchecks' => 'Cloudflare Health Checks',
122 16
				'cloudflare-alwaysonline' => 'Cloudflare Always Online',
123 16
				'cloudflare-traffic-manager' => 'Cloudflare-Traffic-Manager',
124 16
				'cloudflare-prefetch' => 'Cloudflare Prefetch',
125 16
				'cloudflare-ssldetector' => 'Cloudflare SSL Detector',
126 16
				'cloudflare-diagnostics' => 'Cloudflare Diagnostics',
127 16
				'ptst' => 'Cloudflare Speed Test',
128 16
				'citoid' => 'Wikimedia Citoid',
129 16
				'user-agent: seolyt' => 'SEOlyt',
130 16
				'bytespider' => 'ByteDance Spider',
131 16
				'[email protected]' => 'ByteDance Spider',
132 16
				'oai-searchbot' => 'OpenAI SearchBot',
133 16
				'semrushbot' => 'Semrush Bot',
134 16
				'semrushbot-si' => 'Semrush Bot',
135 16
				'semrushbot-ocob' => 'Semrush Bot',
136 16
				'semrushbot-swa' => 'Semrush Bot',
137 16
				'semrushbot-ba' => 'Semrush Bot',
138 16
				'siteauditbot' => 'Semrush Bot',
139 16
				'splitsignalbot' => 'Semrush Bot',
140 16
				'linkcheck by siteimprove.com' => 'SiteImprove Crawler',
141 16
				'sitecheck-sitecrawl by siteimprove.com' => 'SiteImprove Crawler',
142 16
				'image size by siteimprove.com' => 'SiteImprove Crawler',
143 16
				'probe by siteimprove.com' => 'SiteImprove Crawler',
144 16
				'by siteimprove.com' => 'SiteImprove Crawler',
145 16
				'magpie-crawler' => 'Brandwatch Magpie Crawler',
146 16
				'linkedinbot' => 'LinkedIn Bot',
147 16
				'dotbot' => 'Moz DotBot',
148 16
				'dataforseobot' => 'DataForSeo Bot',
149 16
				'wordpress' => 'WordPress',
150 16
				'prtg network monitor' => 'Paessler PRTG Bot',
151 16
				'prtgcloudbot' => 'Paessler PRTG Bot',
152 16
				'powershell' => 'PowerShell',
153 16
				'ccbot' => 'CommonCrawl Bot',
154 16
				'oncrawl' => 'OnCrawl Bot',
155 16
				'pycurl' => 'PycURL',
156 16
				'chatgpt-user' => 'ChatGPT User',
157 16
				'mail.ru_bot' => 'Mail.ru Bot',
158 16
				'wpbot' => 'Wpbot',
159 16
				'dnbcrawler-analytics' => 'DnB Crawler Analytics',
160 16
				'baiduspider-image+' => 'Baidu Spider',
161 16
				'baiduspider-render' => 'Baidu Spider',
162 16
				'baiduspider-ads' => 'Baidu Spider',
163 16
				'amazon-qbusiness' => 'Amazon Bot',
164 16
				'amazon cloudfront' => 'Amazon Bot',
165 16
				'amazonbot-video' => 'Amazon Bot',
166 16
				'hubspot crawler' => 'HubSpot Crawler',
167 16
				'wordpress.com mshots' => 'WordPress.com mShots',
168 16
				'wordpress.com' => 'WordPress',
169 16
				'p3p validator' => 'P3P Validator',
170 16
				'w3c-checklink' => 'W3C Checklink',
171 16
				'w3c_validator' => 'W3C Validator',
172 16
				'omgili' => 'Webz.io',
173 16
				'bluesky cardyb' => 'Bluesky'
174 16
			];
175
			
176 16
			$lower = \mb_strtolower($parts[0]);
177 16
			return \array_merge([
178 16
				'type' => 'robot',
179 16
				'app' => $apps[$lower] ?? self::normaliseAppname($parts[0]),
180 16
				'appname' => $parts[0],
181 16
				'appversion' => empty($parts[1]) ? null : $parts[1]
182 16
			], $data, [
183 16
				'category' => $category[$lower] ?? $data['category'] ?? (\mb_stripos($value, 'crawl') !== false || \mb_stripos($value, 'bot') !== false ? 'crawler' : 'scraper')
184 16
			]);
185
		}
186 11
		return [];
187
	}
188
189 16
	public static function normaliseAppname(string $name) : string {
190 16
		$find = ['_', '-', '+', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'];
191 16
		$replace = [' ', ' ', '', ' A', ' B', ' C', ' D', ' E', ' F', ' G', ' H', ' I', ' J', ' K', ' L', ' M', ' N', ' O', ' P', ' Q', ' R', ' S', ' T', ' U', ' V', ' W', ' X', ' Y', ' Z'];
192 16
		$name = \trim(\str_replace($find, $replace, $name));
193 16
		$output = '';
194 16
		$single = true;
195 16
		foreach (\explode(' ', $name) AS $key => $item) {
196 16
			if ($item !== '') {
197 16
				$currsingle = \mb_strlen($item) === 1;
198 16
				$output .= ($single && ($currsingle || $key === 1) ? '' : ' ').(!$currsingle ? \ucfirst($item) : $item);
199 16
				$single = $currsingle;
200
			}
201
		}
202 16
		return \trim(\str_ireplace(['bot', 'crawler', 'spider', '  ', 'ro bot'], [' Bot', ' Crawler', ' Spider', ' ', 'Robot'], $output)); // replace afterward for where it is preceded by ACROYMN
0 ignored issues
show
Bug introduced by
It seems like str_ireplace(array('bot'...' ', 'Robot'), $output) can also be of type array; however, parameter $string of trim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

202
		return \trim(/** @scrutinizer ignore-type */ \str_ireplace(['bot', 'crawler', 'spider', '  ', 'ro bot'], [' Bot', ' Crawler', ' Spider', ' ', 'Robot'], $output)); // replace afterward for where it is preceded by ACROYMN
Loading history...
203
	}
204
205
	/**
206
	 * Generates a configuration array for matching crawlers
207
	 * 
208
	 * @return array<string,props> An array with keys representing the string to match, and values a props object defining how to generate the match and which properties to set
209
	 */
210 16
	public static function get() : array {
211 16
		$fn = [
212 16
			'search' => fn (string $value) : array => self::getApp($value, ['category' => 'search']),
213 16
			'ads' => fn (string $value) : array => self::getApp($value, ['category' => 'ads']),
214 16
			'validator' => fn (string $value) : array => self::getApp($value, ['category' => 'validator']),
215 16
			'ai' => fn (string $value) : array => self::getApp($value, ['category' => 'ai']),
216 16
			'feed' => fn (string $value) : array => self::getApp($value, \array_merge(
217 4
				\str_contains($value, 'WhatsApp/') ? [
218 4
					'app' => 'WhatsApp'
219 4
				] : [],
220 16
				[
221 16
					'category' => 'feed'
222 16
				]
223 16
			)),
224 16
			'crawler' => function (string $value) : array {
225 3
				return self::getApp($value, ['category' => 'crawler']);
226 16
			},
227 16
			'monitor' => fn (string $value) : array => self::getApp($value, ['category' => 'monitor']),
228 16
			'scraper' => fn (string $value) : array => self::getApp($value, ['category' => 'scraper']),
229 16
			'map' => fn (string $value) : array => self::getApp($value)
230 16
		];
231 2
		return [
232 2
			'Mozlila/' => new props('start', [
233 2
				'type' => 'robot',
234 2
				'categpry' => 'scraper'
235 2
			]),
236 2
			'Moblie' => new props('exact', [ // some samsung devices mispelt it
237 2
				'type' => 'robot',
238 2
				'category' => 'scraper'
239 2
			]),
240 2
			'HeadlessChrome/' => new props('start', fn (string $value) : array => [
241 1
				'type' => 'robot',
242 1
				'category' => 'crawler',
243 1
				'browser' => 'HeadlessChrome',
244 1
				'browserversion' => \mb_substr($value, 15)
245 1
			]),
246 2
			'Yahoo! Slurp' => new props('start', fn (string $value) : array => [
247 2
				'type' => 'robot',
248 2
				'category' => 'search',
249 2
				'app' => 'Yahoo! Slurp',
250 2
				'appname' => $value
251 2
			]),
252 2
			'Google-Site-Verification/' => new props('start', $fn['validator']),
253 2
			'Google-InspectionTool/' => new props('start', $fn['validator']),
254 2
			'Google-Safety' => new props('exact', $fn['validator']),
255 2
			'Google-Read-Aloud' => new props('exact', $fn['feed']),
256 2
			'Google AppsViewer' => new props('exact', $fn['feed']),
257 2
			'Mediapartners-Google' => new props('start', $fn['search']),
258 2
			'FeedFetcher-Google' => new props('exact', $fn['feed']),
259 2
			'Google-PageRenderer' => new props('start', $fn['crawler']),
260 2
			'GoogleProducer' => new props('exact', $fn['feed']),
261 2
			'Google-adstxt' => new props('exact', $fn['ads']),
262 2
			'Google-Adwords-Instant' => new props('exact', $fn['ads']),
263 2
			'CFNetwork/' => new props('start', $fn['feed']),
264 2
			'Siteimprove.com' => new props('any', fn (string $value) : array => \array_merge([
265 2
				'url' => 'https://siteimprove.com'
266 2
			], $fn['crawler']($value))),
267 2
			'SEOlyt/' => new props('any', $fn['crawler']),
268 2
			'CyotekWebCopy' => new props('start', $fn['scraper']),
269 2
			'Yandex' => new props('start', function (string $value) : array {
270 1
				$parts = \explode('/', $value, 3);
271 1
				return [
272 1
					'type' => 'robot',
273 1
					'category' => 'search',
274 1
					'app' => 'Yandex Bot',
275 1
					'appname' => $parts[0],
276 1
					'appversion' => $parts[1] ?? null
277 1
				];
278 2
			}),
279 2
			'Google Page Speed Insights' => new props('exact', $fn['validator']),
280 2
			'Qwantify' => new props('start', function (string $value) : array {
281 1
				$parts = \explode('/', $value, 3);
282 1
				return [
283 1
					'type' => 'robot',
284 1
					'category' => 'search',
285 1
					'app' => 'Qwant Web Crawler',
286 1
					'appname' => $parts[0],
287 1
					'appversion' => $parts[1] ?? null
288 1
				];
289 2
			}),
290 2
			'amazon-kendra' => new props('start', fn () : array => [
291 2
				'type' => 'robot',
292 2
				'category' => 'crawler',
293 2
				'app' => 'Amazon Bot',
294 2
				'appname' => 'Amazon Kendra'
295 2
			]),
296 2
			'amazon-QBusiness' => new props('exact', $fn['ai']),
297 2
			'amazon CloudFront' => new props('exact', $fn['validator']),
298 2
			'Amazonbot-Video/' => new props('start', $fn['crawler']),
299 2
			'okhttp' => new props('start', $fn['scraper']),
300 2
			'python' => new props('start', $fn['scraper']),
301 2
			'grpc-python/' => new props('start', $fn['scraper']),
302 2
			'LWP::Simple/' => new props('start', $fn['scraper']),
303 2
			'jsdom/' => new props('start', $fn['scraper']),
304 2
			'Nessus' => new props('start', $fn['monitor']),
305 2
			'monitoring360bot' => new props('start', $fn['monitor']),
306 2
			'Cloudflare' => new props('start', $fn['validator']),
307 2
			'PTST/' => new props('start', $fn['validator']),
308 2
			'+https://developers.cloudflare.com/security-center/' => new props('exact', $fn['monitor']),
309 2
			'AppSignalBot/' => new props('start', $fn['monitor']),
310 2
			'Better Uptime Bot' => new props('start', [
311 2
				'type' => 'robot',
312 2
				'category' => 'monitor',
313 2
				'app' => 'Better Uptime Bot',
314 2
				'appname' => 'Better Uptime Bot'
315 2
			]),
316 2
			'Chrome-Lighthouse' => new props('start', $fn['validator']),
317 2
			'Siege/' => new props('start', $fn['validator']),
318 2
			'Microsoft Profiling/' => new props('any', $fn['validator']),
319 2
			'Bidtellect' => new props('start', $fn['crawler']),
320 2
			'magpie-crawler/' => new props('start', $fn['crawler']),
321 2
			'Web Measure/' => new props('start', $fn['crawler']),
322 2
			'Bluesky Cardyb/' => new props('start', $fn['crawler']),
323 2
			'PingdomTMS/' => new props('start', $fn['monitor']),
324 2
			'DynGate' => new props('exact', $fn['monitor']),
325 2
			'CensysInspect/' => new props('start', $fn['monitor']),
326 2
			'Datadog/Synthetics' => new props('exact', [
327 2
				'type' => 'robot',
328 2
				'category' => 'monitor',
329 2
				'app' => 'Datadog/Synthetics'
330 2
			]),
331 2
			'RuxitSynthetic/' => new props('start', $fn['monitor']),
332 2
			'Checkly/' => new props('start', $fn['monitor']),
333 2
			'Uptime/' => new props('start', $fn['monitor']),
334 2
			'HostTracker/' => new props('start', $fn['monitor']),
335 2
			'NCSC Web Check [email protected]' => new props('exact', $fn['monitor']),
336 2
			'Enhanced WebCheck [email protected]' => new props('exact', $fn['monitor']),
337 2
			'Pingdom.com' => new props('start', function (string $value) : array {
338 1
				$version = \explode('_', \trim($value, '_'));
339 1
				return [
340 1
					'type' => 'robot',
341 1
					'category' => 'monitor',
342 1
					'app' => 'Pingdom Bot',
343 1
					'appname' => \trim($value, '_'),
344 1
					'appversion' => \end($version)
345 1
				];
346 2
			}),
347 2
			'proximic' => new props('exact', $fn['ads']),
348 2
			'WordPress' => new props('start', $fn['feed']),
349 2
			'PRTG Network Monitor' => new props('exact', $fn['monitor']),
350 2
			'PRTGCloudBot/' => new props('start', $fn['monitor']),
351 2
			'Site24x7' => new props('exact', $fn['monitor']),
352 2
			'StatusCake' => new props('exact', $fn['monitor']),
353 2
			'AWS Network Health' => new props('start', $fn['monitor']),
354 2
			'adbeat.com' => new props('start', fn (string $value) : array => [
355 2
				'type' => 'robot',
356 2
				'category' => 'ads',
357 2
				'app' => 'Adbeat',
358 2
				'appname' => 'Adbeat',
359 2
				'url' => 'https://'.$value
360 2
			]),
361 2
			'MicrosoftPreview/' => new props('start', $fn['feed']),
362 2
			'YahooMailProxy' => new props('exact', $fn['feed']),
363 2
			'PhxBot/' => new props('start', $fn['feed']), // proton mail
364 2
			'Embedly/' => new props('start', $fn['feed']),
365 2
			'PayPal IPN' => new props('exact', $fn['feed']),
366 2
			'DropboxPreviewBot/' => new props('start', $fn['feed']),
367 2
			'Pleroma' => new props('start', fn (string $value) : array => [ // mastodon
368 1
				'type' => 'robot',
369 1
				'category' => 'feed',
370 1
				'app' => 'Mastodon',
371 1
				'appname' => 'Pleroma',
372 1
				'appversion' => \mb_substr($value, 8)
373 1
			]),
374 2
			'Outlook-Android/' => new props('start', fn (string $value) : array => [ // mastodon
375 2
				'type' => 'robot',
376 2
				'category' => 'feed',
377 2
				'app' => 'Outlook',
378 2
				'appname' => 'Outlook-Android',
379 2
				'platform' => 'Android',
380 2
				'appversion' => \mb_substr($value, 16)
381 2
			]),
382 2
			'Outlook-iOS/' => new props('start', fn (string $value, int $i, array $tokens) : array => [ // mastodon
383 2
				'type' => 'robot',
384 2
				'category' => 'feed',
385 2
				'app' => 'Outlook',
386 2
				'appname' => 'Outlook-iOS',
387 2
				'platform' => 'iOS',
388 2
				'appversion' => $tokens[$i+1] ?? \mb_substr($value, 12)
389 2
			]),
390 2
			'OutlookMobileCloudService-Autodetect/' => new props('start', fn (string $value) : array => [
391 2
				'type' => 'robot',
392 2
				'category' => 'feed',
393 2
				'app' => 'Outlook',
394 2
				'appname' => 'OutlookMobileCloudService-Autodetect',
395 2
				'appversion' => \mb_substr($value, 37)
396 2
			]),
397 2
			'HubSpot Connect ' => new props('start', function (string $value, int $i, array $tokens) : array {
398 1
				$app = 'HubSpot Connect';
399 1
				$count = \count($tokens);
400 1
				for ($n = $i; $n < $count; $n++) {
401 1
					if (\str_starts_with($tokens[$n], 'namespace: ')) {
402 1
						$app = \mb_substr($tokens[$n], 11).' - '.$tokens[$n+1];
403 1
						break;
404
					}
405
				}
406 1
				return [
407 1
					'type' => 'robot',
408 1
					'category' => 'feed',
409 1
					'app' => 'HubSpot Connect',
410 1
					'appname' => $app,
411 1
					'appversion' => \mb_substr($value, 16)
412 1
				];
413 2
			}),
414 2
			'Pro-Sitemaps/' => new props('start', $fn['crawler']),
415 2
			'Pandalytics/' => new props('start', $fn['crawler']),
416 2
			'omgili/' => new props('start', $fn['crawler']),
417
			// 'CCBot/' => new props('start', $fn['crawler']),
418 2
			'The National Archives UK Government Web Archive' => new props('start', $fn['crawler']),
419 2
			'Citoid' => new props('exact', $fn['crawler']),
420 2
			'trendictionbot' => new props('start', fn (string $value) : array => [
421 1
				'type' => 'robot',
422 1
				'category' => 'crawler',
423 1
				'app' => 'Trendicion Bot',
424 1
				'appname' => 'trendictionbot',
425 1
				'appversion' => \mb_substr($value, 14)
426 1
			]),
427 2
			'Chrome Privacy Preserving Prefetch Proxy' => new props('exact', $fn['feed']),
428 2
			'ViberUrlDownloader' => new props('exact', $fn['feed']),
429 2
			'GoogleDocs' => new props('exact', fn (string $value, int $i, array $tokens) : array => [
430 2
				'type' => 'robot',
431 2
				'category' => 'feed',
432 2
				'app' => 'Google Docs',
433 2
				'appname' => $value.'; '.$tokens[$i+1]
434 2
			]),
435 2
			'Google-Lens' => new props('exact', $fn['feed']),
436 2
			'ManicTime/' => new props('start', $fn['feed']),
437 2
			'Yik Yak/' => new props('start', $fn['feed']),
438 2
			'HubSpot-Link-Resolver' => new props('exact', $fn['feed']),
439 2
			'AppleExchangeWebServices/' => new props('start', $fn['feed']),
440 2
			'The Lounge IRC Client' => new props('exact', $fn['feed']),
441 2
			'W3C-checklink/' => new props('start', $fn['validator']),
442 2
			'CSSCheck/' => new props('start', $fn['validator']),
443 2
			'Let\'s Encrypt validation server' => new props('exact', $fn['validator']),
444 2
			'SEO-Macroscope/' => new props('start', $fn['validator']),
445 2
			'Electronic Frontier Foundation\'s Do Not Track Verifier' => new props('exact', $fn['validator']),
446 2
			'Barracuda Sentinel' => new props('start', $fn['validator']),
447 2
			'Expanse' => new props('start', $fn['crawler']),
448 2
			'eCairn-Grabber/' => new props('start', $fn['scraper']),
449 2
			'SEOkicks' => new props('exact', $fn['crawler']),
450 2
			'PostmanRuntime/' => new props('start', $fn['scraper']),
451 2
			'axios/' => new props('start', $fn['scraper']),
452 2
			'Rogerbot/' => new props('start', $fn['crawler']),
453 2
			'DashLinkPreviews/' => new props('start', $fn['feed']),
454 2
			'Snapchat/' => new props('start', $fn['feed']),
455 2
			'HTTPClient/' => new props('start', $fn['scraper']),
456 2
			'WhatsApp/' => new props('any', $fn['feed']),
457 2
			'Hootsuite-Authoring/' => new props('start', $fn['feed']),
458 2
			'URL Preview' => new props('any', $fn['feed']),
459 2
			'Link Preview' => new props('any', $fn['feed']),
460 2
			'ApacheBench/' => new props('start', $fn['validator']),
461 2
			'Wheregoes.com Redirect Checker/' => new props('start', $fn['validator']),
462 2
			'Asana/' => new props('start', $fn['feed']),
463 2
			'Java/' => new props('any', fn (string $value) : array => [
464 1
				'type' => 'robot',
465 1
				'category' => 'scraper',
466 1
				'app' => 'Java',
467 1
				'appname' => $value,
468 1
				'appversion' => \explode('/', $value, 3)[1]
469 1
			]),
470 2
			'curl/' => new props('any', $fn['scraper']),
471 2
			'Wget/' => new props('start', $fn['scraper']),
472 2
			'rest-client/' => new props('start', $fn['scraper']),
473 2
			'ruby/' => new props('start', $fn['scraper']),
474 2
			'Bun/' => new props('start', $fn['scraper']),
475 2
			'CakePHP' => new props('start', $fn['scraper']),
476 2
			'cpp-httplib/' => new props('start', $fn['scraper']),
477 2
			'Dart/' => new props('start', $fn['scraper']),
478 2
			'Deno/' => new props('start', $fn['scraper']),
479 2
			'Datadog' => new props('start', $fn['scraper']),
480
			// 'libwww-perl/' => new props('start', $fn['scraper']),
481 2
			'http/' => new props('start', $fn['scraper']),
482 2
			'Cpanel-HTTP-Client/' => new props('start', $fn['scraper']),
483 2
			'http-client/' => new props('any', $fn['scraper']),
484 2
			'HttpClient/' => new props('any', $fn['scraper']),
485 2
			'PowerShell/' => new props('start', $fn['scraper']),
486 2
			'OAI-SearchBot/' => new props('start', $fn['search']),
487 2
			'Google-Extended' => new props('start', $fn['ai']),
488 2
			'ChatGPT-User/' => new props('start', $fn['ai']),
489 2
			'Cohere' => new props('start', $fn['ai']),
490 2
			'facebookexternalhit/' => new props('start', $fn['feed']),
491 2
			'facebookcatalog/' => new props('start', $fn['crawler']),
492 2
			'meta-externalagent' => new props('start', $fn['ai']),
493 2
			'meta-externalfetcher' => new props('start', $fn['feed']),
494 2
			'Validator' => new props('any', $fn['validator']),
495 2
			'feed' => new props('any', $fn['feed']),
496 2
			'bot/' => new props('any', $fn['map']),
497 2
			'bot-' => new props('any', $fn['map']),
498 2
			' bot ' => new props('any', $fn['map']),
499 2
			'bot' => new props('end', $fn['map']),
500 2
			'spider' => new props('any', $fn['crawler']),
501 2
			'crawler' => new props('any', $fn['map']),
502 2
		];
503
	}
504
}