Passed
Push — main ( 683f0d...4c4127 )
by Will
04:44 queued 53s
created

crawlers::normaliseAppname()   B

Complexity

Conditions 7
Paths 14

Size

Total Lines 14
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 11
c 0
b 0
f 0
nc 14
nop 1
dl 0
loc 14
ccs 12
cts 12
cp 1
crap 7
rs 8.8333
1
<?php
2
declare(strict_types = 1);
3
namespace hexydec\agentzero;
4
5
class crawlers {
6
7
	/**
8
	 * Extracts application and version information from a token
9
	 * 
10
	 * @param string $value The token to be processed
11
	 * @param array<string|null> $data An array containing existing data to merge
12
	 * @return array<string|int|float|null> The $data array with the processed application and version added
13
	 */
14 18
	public static function getApp(string $value, array $data = []) : array {
15 18
		if (!\str_contains($value, '://') && \mb_stripos($value, 'Chrome/') !== 0 && \strcasecmp('Cubot', $value) !== 0 && \strcasecmp('Power bot', $value) !== 0) { // bot will be in the URL
16 16
			$parts = \explode('/', $value, 2);
17
18
			// process version
19 16
			if (!empty($parts[1])) {
20 13
				$parts[1] = \ltrim($parts[1], 'v');
21 13
				$parts[1] = \substr($parts[1], 0, \strspn($parts[1], '0123456789.'));
22
			}
23 16
			$category = [
24 16
				'yacybot' => 'search',
25 16
				'googlebot' => 'search',
26 16
				'googlebot-mobile' => 'search',
27 16
				'googlebot-image' => 'search',
28 16
				'googlebot-video' => 'search',
29 16
				'googlebot-news' => 'search',
30 16
				'storebot-google' => 'search',
31 16
				'adsbot-google' => 'ads',
32 16
				'adsbot-google-mobile' => 'ads',
33 16
				'mediapartners-google' => 'ads',
34 16
				'bingbot' => 'search',
35 16
				'adidxbot' => 'ads',
36 16
				'duckduckbot' => 'search',
37 16
				'duckduckgo-favicons-bot' => 'search',
38 16
				'coccocbot-image' => 'search',
39 16
				'coccocbot-web' => 'search',
40 16
				'yandexbot' => 'search',
41 16
				'mj12bot' => 'search',
42 16
				'mail.ru_bot' => 'search',
43 16
				'exabot' => 'search',
44 16
				'uptimerobot' => 'monitor',
45 16
				'petalbot' => 'search',
46 16
				'twitterbot' => 'feed',
47 16
				'xbot' => 'feed',
48 16
				'discordbot' => 'feed',
49 16
				'sematextsyntheticsrobot' => 'monitor',
50 16
				'linkedinbot' => 'feed',
51 16
				'paperlibot' => 'feed',
52 16
				'bitlybot' => 'feed',
53 16
				'tineye-bot' => 'search',
54 16
				'pinterestbot' => 'feed',
55 16
				'webcrawler' => 'crawler',
56 16
				'webprosbot' => 'crawler',
57 16
				'guzzlehttp' => 'scraper',
58 16
				'telegrambot' => 'feed',
59 16
				'semrushbot' => 'crawler',
60 16
				'mediatoolkitbot' => 'crawler',
61 16
				'iploggerbot' => 'monitor',
62 16
				'baiduspider' => 'search',
63 16
				'baiduspider+' => 'search',
64 16
				'baiduspider-image+' => 'search',
65 16
				'baiduspider-ads' => 'ads',
66 16
				'haosouspider' => 'search',
67 16
				'yisouspider' => 'search',
68 16
				'360spider' => 'search',
69 16
				'sogou web spider' => 'search',
70 16
				'bytespider' => 'ai',
71 16
				'claudebot' => 'ai',
72 16
				'gptbot' => 'ai',
73 16
				'diffbot' => 'ai',
74 16
				'amazonbot' => 'ai',
75 16
				'applebot' => 'ai',
76 16
				'perplexitybot' => 'ai',
77 16
				'youbot' => 'ai',
78 16
				'iaskbot' => 'ai',
79 16
				'ccbot' => 'crawler',
80 16
				'wpbot' => 'ai',
81 16
				'imagesiftbot' => 'ai',
82 16
				'aihitbot' => 'ai',
83 16
				'andibot' => 'ai',
84 16
				'bedrockbot' => 'ai',
85 16
				'addsearchbot' => 'ai',
86 16
				'ai2bot' => 'ai',
87 16
				'google-cloudvertexbot' => 'ai',
88 16
				'duckassistbot' => 'ai',
89 16
				'echobot bot' => 'ai',
90 16
				'EchoboxBot' => 'ai',
91 16
				'factset_spyderbot' => 'ai',
92 16
				'kangaroo bot' => 'ai',
93 16
				'linerbot' => 'ai',
94 16
				'mycentralaiscraperbot' => 'ai',
95 16
				'omgilibot' => 'crawler', // webz.io
96 16
				'Webzio' => 'crawler',
97 16
				'pangubot' => 'ai', // huawei
98 16
				'phindbot' => 'ai',
99 16
				'qualifiedbot' => 'ai',
100 16
				'quillbot' => 'ai',
101 16
				'sbintuitionsbot' => 'ai',
102 16
				'sidetradebot' => 'ai',
103 16
				'thinkbot' => 'ai',
104 16
				'ai2bot' => 'ai',
105 16
				'timpibot' => 'ai',
106 16
				'wardbot' => 'monitor'
107 16
			];
108 16
			$apps = [
109 16
				'googlebot' => 'Google Bot',
110 16
				'googlebot-mobile' => 'Google Bot',
111 16
				'googlebot-image' => 'Google Bot',
112 16
				'googlebot-video' => 'Google Bot',
113 16
				'googlebot-news' => 'Google Bot',
114 16
				'storebot-google' => 'Google Bot',
115 16
				'adsbot-google' => 'Google Bot',
116 16
				'google-adwords-instant' => 'Google Bot',
117 16
				'adsbot-google-mobile' => 'Google Bot',
118 16
				'mediapartners-google' => 'Google Bot',
119 16
				'google-safety' => 'Google Safety',
120 16
				'duckduckbot' => 'DuckDuck Bot',
121 16
				'duckduckbot-https' => 'DuckDuck Bot',
122 16
				'duckduckgo-favicons-bot' => 'DuckDuck Bot',
123 16
				'coccocbot-image' => 'Coccoc Bot',
124 16
				'coccocbot-web' => 'Coccoc Bot',
125 16
				'mj12bot' => 'Majestic 12 Bot',
126 16
				'exabot' => 'ExaBot',
127 16
				'twitterbot' => 'TwitterBot',
128 16
				'discordbot' => 'DiscordBot',
129 16
				'sematextsyntheticsrobot' => 'Sematext Synthetics Robot',
130 16
				'bitlybot' => 'Bit.ly Bot',
131 16
				'webprosbot' => 'WebprosBot',
132 16
				'mediatoolkitbot' => 'MediaToolkit Bot',
133 16
				'cfnetwork' => 'Apple Core Foundation Network',
134 16
				'ncsc web check [email protected]' => 'NCSC Web Check',
135 16
				'enhanced webcheck [email protected]' => 'NCSC Enhanced Web Check',
136 16
				'the national archives uk government web archive:' => 'UK Government National Archives',
137 16
				'google-inspectiontool' => 'Google Inspection Tool',
138 16
				'google-pagerenderer google' => 'Google Page Renderer',
139 16
				'pingdomtms' => 'Pingdom Bot',
140 16
				'facebookexternalhit' => 'Facebook URL Preview',
141 16
				'facebookcatalog' => 'Facebook',
142 16
				'meta-externalagent' => 'Meta External Agent',
143 16
				'meta-externalfetcher' => 'Meta External Fetcher',
144 16
				'phxbot' => 'ProtonMail Bot',
145 16
				'monitoring360bot' => 'Monitoring360 Bot',
146 16
				'cloudflare-healthchecks' => 'Cloudflare Health Checks',
147 16
				'cloudflare-alwaysonline' => 'Cloudflare Always Online',
148 16
				'cloudflare-traffic-manager' => 'Cloudflare-Traffic-Manager',
149 16
				'cloudflare-prefetch' => 'Cloudflare Prefetch',
150 16
				'cloudflare-ssldetector' => 'Cloudflare SSL Detector',
151 16
				'cloudflare-diagnostics' => 'Cloudflare Diagnostics',
152 16
				'ptst' => 'Cloudflare Speed Test',
153 16
				'citoid' => 'Wikimedia Citoid',
154 16
				'user-agent: seolyt' => 'SEOlyt',
155 16
				'bytespider' => 'ByteDance Spider',
156 16
				'[email protected]' => 'ByteDance Spider',
157 16
				'oai-searchbot' => 'OpenAI SearchBot',
158 16
				'semrushbot' => 'Semrush Bot',
159 16
				'semrushbot-si' => 'Semrush Bot',
160 16
				'semrushbot-ocob' => 'Semrush Bot',
161 16
				'semrushbot-swa' => 'Semrush Bot',
162 16
				'semrushbot-ba' => 'Semrush Bot',
163 16
				'siteauditbot' => 'Semrush Bot',
164 16
				'splitsignalbot' => 'Semrush Bot',
165 16
				'linkcheck by siteimprove.com' => 'SiteImprove Crawler',
166 16
				'sitecheck-sitecrawl by siteimprove.com' => 'SiteImprove Crawler',
167 16
				'image size by siteimprove.com' => 'SiteImprove Crawler',
168 16
				'probe by siteimprove.com' => 'SiteImprove Crawler',
169 16
				'by siteimprove.com' => 'SiteImprove Crawler',
170 16
				'magpie-crawler' => 'Brandwatch Magpie Crawler',
171 16
				'linkedinbot' => 'LinkedIn Bot',
172 16
				'dotbot' => 'Moz DotBot',
173 16
				'dataforseobot' => 'DataForSeo Bot',
174 16
				'wordpress' => 'WordPress',
175 16
				'prtg network monitor' => 'Paessler PRTG Bot',
176 16
				'prtgcloudbot' => 'Paessler PRTG Bot',
177 16
				'powershell' => 'PowerShell',
178 16
				'ccbot' => 'CommonCrawl Bot',
179 16
				'oncrawl' => 'OnCrawl Bot',
180 16
				'pycurl' => 'PycURL',
181 16
				'chatgpt-user' => 'ChatGPT User',
182 16
				'mail.ru_bot' => 'Mail.ru Bot',
183 16
				'wpbot' => 'Wpbot',
184 16
				'dnbcrawler-analytics' => 'DnB Crawler Analytics',
185 16
				'baiduspider-image+' => 'Baidu Spider',
186 16
				'baiduspider-render' => 'Baidu Spider',
187 16
				'baiduspider-ads' => 'Baidu Spider',
188 16
				'amazon-qbusiness' => 'Amazon Bot',
189 16
				'amazon cloudfront' => 'Amazon Bot',
190 16
				'amazonbot-video' => 'Amazon Bot',
191 16
				'hubspot crawler' => 'HubSpot Crawler',
192 16
				'wordpress.com mshots' => 'WordPress.com mShots',
193 16
				'wordpress.com' => 'WordPress',
194 16
				'p3p validator' => 'P3P Validator',
195 16
				'w3c-checklink' => 'W3C Checklink',
196 16
				'w3c_validator' => 'W3C Validator',
197 16
				'omgili' => 'Webz.io',
198 16
				'bluesky cardyb' => 'Bluesky'
199 16
			];
200
			
201 16
			$lower = \mb_strtolower($parts[0]);
202 16
			return \array_merge([
203 16
				'type' => 'robot',
204 16
				'app' => $apps[$lower] ?? self::normaliseAppname($parts[0]),
205 16
				'appname' => $parts[0],
206 16
				'appversion' => empty($parts[1]) ? null : $parts[1]
207 16
			], $data, [
208 16
				'category' => $category[$lower] ?? $data['category'] ?? (\mb_stripos($value, 'crawl') !== false || \mb_stripos($value, 'bot') !== false ? 'crawler' : 'scraper')
209 16
			]);
210
		}
211 11
		return [];
212
	}
213
214 16
	public static function normaliseAppname(string $name) : string {
215 16
		$find = ['_', '-', '+', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'];
216 16
		$replace = [' ', ' ', '', ' A', ' B', ' C', ' D', ' E', ' F', ' G', ' H', ' I', ' J', ' K', ' L', ' M', ' N', ' O', ' P', ' Q', ' R', ' S', ' T', ' U', ' V', ' W', ' X', ' Y', ' Z'];
217 16
		$name = \trim(\str_replace($find, $replace, $name));
218 16
		$output = '';
219 16
		$single = true;
220 16
		foreach (\explode(' ', $name) AS $key => $item) {
221 16
			if ($item !== '') {
222 16
				$currsingle = \mb_strlen($item) === 1;
223 16
				$output .= ($single && ($currsingle || $key === 1) ? '' : ' ').(!$currsingle ? \ucfirst($item) : $item);
224 16
				$single = $currsingle;
225
			}
226
		}
227 16
		return \trim(\str_ireplace(['bot', 'crawler', 'spider', '  ', 'ro bot'], [' Bot', ' Crawler', ' Spider', ' ', 'Robot'], $output)); // replace afterward for where it is preceded by ACROYMN
0 ignored issues
show
Bug introduced by
It seems like str_ireplace(array('bot'...' ', 'Robot'), $output) can also be of type array; however, parameter $string of trim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

227
		return \trim(/** @scrutinizer ignore-type */ \str_ireplace(['bot', 'crawler', 'spider', '  ', 'ro bot'], [' Bot', ' Crawler', ' Spider', ' ', 'Robot'], $output)); // replace afterward for where it is preceded by ACROYMN
Loading history...
228
	}
229
230
	/**
231
	 * Generates a configuration array for matching crawlers
232
	 * 
233
	 * @return array<string,props> An array with keys representing the string to match, and values a props object defining how to generate the match and which properties to set
234
	 */
235 16
	public static function get() : array {
236 16
		$fn = [
237 16
			'search' => fn (string $value) : array => self::getApp($value, ['category' => 'search']),
238 16
			'ads' => fn (string $value) : array => self::getApp($value, ['category' => 'ads']),
239 16
			'validator' => fn (string $value) : array => self::getApp($value, ['category' => 'validator']),
240 16
			'ai' => fn (string $value) : array => self::getApp($value, ['category' => 'ai']),
241 16
			'feed' => fn (string $value) : array => self::getApp($value, \array_merge(
242 5
				\str_contains($value, 'WhatsApp/') ? [
243 5
					'app' => 'WhatsApp'
244 5
				] : [],
245 16
				[
246 16
					'category' => 'feed'
247 16
				]
248 16
			)),
249 16
			'crawler' => fn (string $value) : array => self::getApp($value, ['category' => 'crawler']),
250 16
			'monitor' => fn (string $value) : array => self::getApp($value, ['category' => 'monitor']),
251 16
			'scraper' => fn (string $value) : array => self::getApp($value, ['category' => 'scraper']),
252 16
			'map' => fn (string $value) : array => self::getApp($value)
253 16
		];
254 2
		return [
255 2
			'Mozlila/' => new props('start', [
256 2
				'type' => 'robot',
257 2
				'categpry' => 'scraper'
258 2
			]),
259 2
			'Moblie' => new props('exact', [ // some samsung devices mispelt it
260 2
				'type' => 'robot',
261 2
				'category' => 'scraper'
262 2
			]),
263 2
			'HeadlessChrome/' => new props('start', fn (string $value) : array => [
264 1
				'type' => 'robot',
265 1
				'category' => 'crawler',
266 1
				'browser' => 'HeadlessChrome',
267 1
				'browserversion' => \mb_substr($value, 15)
268 1
			]),
269 2
			'Yahoo! Slurp' => new props('start', fn (string $value) : array => [
270 2
				'type' => 'robot',
271 2
				'category' => 'search',
272 2
				'app' => 'Yahoo! Slurp',
273 2
				'appname' => $value
274 2
			]),
275 2
			'Google-Site-Verification/' => new props('start', $fn['validator']),
276 2
			'Google-InspectionTool/' => new props('start', $fn['validator']),
277 2
			'Google-Safety' => new props('exact', $fn['validator']),
278 2
			'Google-Read-Aloud' => new props('exact', $fn['feed']),
279 2
			'Google AppsViewer' => new props('exact', $fn['feed']),
280 2
			'Mediapartners-Google' => new props('start', $fn['search']),
281 2
			'FeedFetcher-Google' => new props('exact', $fn['feed']),
282 2
			'Google-PageRenderer' => new props('start', $fn['crawler']),
283 2
			'GoogleProducer' => new props('exact', $fn['feed']),
284 2
			'Google-adstxt' => new props('exact', $fn['ads']),
285 2
			'Google-Adwords-Instant' => new props('exact', $fn['ads']),
286 2
			'Gemini-Deep-Research' => new props('exact', $fn['ai']),
287 2
			'GoogleAgent-Mariner' => new props('exact', $fn['ai']),
288 2
			'CFNetwork/' => new props('start', $fn['feed']),
289 2
			'Siteimprove.com' => new props('any', fn (string $value) : array => \array_merge([
290 2
				'url' => 'https://siteimprove.com'
291 2
			], $fn['crawler']($value))),
292 2
			'SEOlyt/' => new props('any', $fn['crawler']),
293 2
			'CyotekWebCopy' => new props('start', $fn['scraper']),
294 2
			'scrapy' => new props('start', $fn['scraper']),
295 2
			'Yandex' => new props('start', function (string $value) : array {
296 1
				$parts = \explode('/', $value, 3);
297 1
				return [
298 1
					'type' => 'robot',
299 1
					'category' => 'search',
300 1
					'app' => 'Yandex Bot',
301 1
					'appname' => $parts[0],
302 1
					'appversion' => $parts[1] ?? null
303 1
				];
304 2
			}),
305 2
			'Google Page Speed Insights' => new props('exact', $fn['validator']),
306 2
			'Qwantify' => new props('start', function (string $value) : array {
307 1
				$parts = \explode('/', $value, 3);
308 1
				return [
309 1
					'type' => 'robot',
310 1
					'category' => 'search',
311 1
					'app' => 'Qwant Web Crawler',
312 1
					'appname' => $parts[0],
313 1
					'appversion' => $parts[1] ?? null
314 1
				];
315 2
			}),
316 2
			'amazon-kendra' => new props('start', fn () : array => [
317 2
				'type' => 'robot',
318 2
				'category' => 'crawler',
319 2
				'app' => 'Amazon Bot',
320 2
				'appname' => 'Amazon Kendra'
321 2
			]),
322 2
			'amazon-QBusiness' => new props('exact', $fn['ai']),
323 2
			'amazon CloudFront' => new props('exact', $fn['validator']),
324 2
			'Amazonbot-Video/' => new props('start', $fn['crawler']),
325 2
			'okhttp' => new props('start', $fn['scraper']),
326 2
			'python' => new props('start', $fn['scraper']),
327 2
			'grpc-python/' => new props('start', $fn['scraper']),
328 2
			'LWP::Simple/' => new props('start', $fn['scraper']),
329 2
			'jsdom/' => new props('start', $fn['scraper']),
330 2
			'Nessus' => new props('start', $fn['monitor']),
331 2
			'monitoring360bot' => new props('start', $fn['monitor']),
332 2
			'Cloudflare' => new props('start', $fn['validator']),
333 2
			'PTST/' => new props('start', $fn['validator']),
334 2
			'+https://developers.cloudflare.com/security-center/' => new props('exact', $fn['monitor']),
335 2
			'AppSignalBot/' => new props('start', $fn['monitor']),
336 2
			'Better Uptime Bot' => new props('start', [
337 2
				'type' => 'robot',
338 2
				'category' => 'monitor',
339 2
				'app' => 'Better Uptime Bot',
340 2
				'appname' => 'Better Uptime Bot'
341 2
			]),
342 2
			'Chrome-Lighthouse' => new props('start', $fn['validator']),
343 2
			'Siege/' => new props('start', $fn['validator']),
344 2
			'Microsoft Profiling/' => new props('any', $fn['validator']),
345 2
			'Bidtellect' => new props('start', $fn['crawler']),
346 2
			'magpie-crawler/' => new props('start', $fn['crawler']),
347 2
			'Web Measure/' => new props('start', $fn['crawler']),
348 2
			'Bluesky Cardyb/' => new props('start', $fn['crawler']),
349 2
			'PingdomTMS/' => new props('start', $fn['monitor']),
350 2
			'DynGate' => new props('exact', $fn['monitor']),
351 2
			'CensysInspect/' => new props('start', $fn['monitor']),
352 2
			'Datadog/Synthetics' => new props('exact', [
353 2
				'type' => 'robot',
354 2
				'category' => 'monitor',
355 2
				'app' => 'Datadog/Synthetics'
356 2
			]),
357 2
			'RuxitSynthetic/' => new props('start', $fn['monitor']),
358 2
			'Checkly/' => new props('start', $fn['monitor']),
359 2
			'Uptime/' => new props('start', $fn['monitor']),
360 2
			'HostTracker/' => new props('start', $fn['monitor']),
361 2
			'NCSC Web Check [email protected]' => new props('exact', $fn['monitor']),
362 2
			'Enhanced WebCheck [email protected]' => new props('exact', $fn['monitor']),
363 2
			'Pingdom.com' => new props('start', function (string $value) : array {
364 1
				$version = \explode('_', \trim($value, '_'));
365 1
				return [
366 1
					'type' => 'robot',
367 1
					'category' => 'monitor',
368 1
					'app' => 'Pingdom Bot',
369 1
					'appname' => \trim($value, '_'),
370 1
					'appversion' => \end($version)
371 1
				];
372 2
			}),
373 2
			'proximic' => new props('exact', $fn['ads']),
374 2
			'WordPress' => new props('start', $fn['feed']),
375 2
			'PRTG Network Monitor' => new props('exact', $fn['monitor']),
376 2
			'PRTGCloudBot/' => new props('start', $fn['monitor']),
377 2
			'Site24x7' => new props('exact', $fn['monitor']),
378 2
			'StatusCake' => new props('exact', $fn['monitor']),
379 2
			'AWS Network Health' => new props('start', $fn['monitor']),
380 2
			'StatusCake' => new props('exact', $fn['monitor']),
381 2
			'adbeat.com' => new props('start', fn (string $value) : array => [
382 2
				'type' => 'robot',
383 2
				'category' => 'ads',
384 2
				'app' => 'Adbeat',
385 2
				'appname' => 'Adbeat',
386 2
				'url' => 'https://'.$value
387 2
			]),
388 2
			'MicrosoftPreview/' => new props('start', $fn['feed']),
389 2
			'YahooMailProxy' => new props('exact', $fn['feed']),
390 2
			'PhxBot/' => new props('start', $fn['feed']), // proton mail
391 2
			'Embedly/' => new props('start', $fn['feed']),
392 2
			'PayPal IPN' => new props('exact', $fn['feed']),
393 2
			'DropboxPreviewBot/' => new props('start', $fn['feed']),
394 2
			'Pleroma' => new props('start', fn (string $value) : array => [ // mastodon
395 1
				'type' => 'robot',
396 1
				'category' => 'feed',
397 1
				'app' => 'Mastodon',
398 1
				'appname' => 'Pleroma',
399 1
				'appversion' => \mb_substr($value, 8)
400 1
			]),
401 2
			'Outlook-Android/' => new props('start', fn (string $value) : array => [
402 2
				'type' => 'robot',
403 2
				'category' => 'feed',
404 2
				'app' => 'Outlook',
405 2
				'appname' => 'Outlook-Android',
406 2
				'platform' => 'Android',
407 2
				'appversion' => \mb_substr($value, 16)
408 2
			]),
409 2
			'Outlook-iOS/' => new props('start', fn (string $value, int $i, array $tokens) : array => [
410 2
				'type' => 'robot',
411 2
				'category' => 'feed',
412 2
				'app' => 'Outlook',
413 2
				'appname' => 'Outlook-iOS',
414 2
				'platform' => 'iOS',
415 2
				'appversion' => $tokens[$i+1] ?? \mb_substr($value, 12)
416 2
			]),
417 2
			'OutlookMobileCloudService-Autodetect/' => new props('start', fn (string $value) : array => [
418 2
				'type' => 'robot',
419 2
				'category' => 'feed',
420 2
				'app' => 'Outlook',
421 2
				'appname' => 'OutlookMobileCloudService-Autodetect',
422 2
				'appversion' => \mb_substr($value, 37)
423 2
			]),
424 2
			'HubSpot Connect ' => new props('start', function (string $value, int $i, array $tokens) : array {
425 1
				$app = 'HubSpot Connect';
426 1
				$count = \count($tokens);
427 1
				for ($n = $i; $n < $count; $n++) {
428 1
					if (\str_starts_with($tokens[$n], 'namespace: ')) {
429 1
						$app = \mb_substr($tokens[$n], 11).' - '.$tokens[$n+1];
430 1
						break;
431
					}
432
				}
433 1
				return [
434 1
					'type' => 'robot',
435 1
					'category' => 'feed',
436 1
					'app' => 'HubSpot Connect',
437 1
					'appname' => $app,
438 1
					'appversion' => \mb_substr($value, 16) ?: null
439 1
				];
440 2
			}),
441 2
			'TikTokSpider' => new props('start', $fn['feed']),
442 2
			'Pro-Sitemaps/' => new props('start', $fn['crawler']),
443 2
			'Pandalytics/' => new props('start', $fn['crawler']),
444 2
			'omgili/' => new props('start', $fn['crawler']),
445 2
			'AwarioBot/' => new props('start', $fn['crawler']),
446 2
			'AwarioSmartBot/' => new props('start', $fn['crawler']),
447 2
			'AwarioRssBot/' => new props('start', $fn['crawler']),
448 2
			'ICC-Crawler/' => new props('start', $fn['crawler']),
449 2
			'The National Archives UK Government Web Archive' => new props('start', $fn['crawler']),
450 2
			'Citoid' => new props('exact', $fn['crawler']),
451 2
			'trendictionbot' => new props('start', fn (string $value) : array => [
452 1
				'type' => 'robot',
453 1
				'category' => 'crawler',
454 1
				'app' => 'Trendicion Bot',
455 1
				'appname' => 'trendictionbot',
456 1
				'appversion' => \mb_substr($value, 14) ?: null
457 1
			]),
458 2
			'Chrome Privacy Preserving Prefetch Proxy' => new props('exact', $fn['feed']),
459 2
			'ViberUrlDownloader' => new props('exact', $fn['feed']),
460 2
			'GoogleDocs' => new props('exact', fn (string $value, int $i, array $tokens) : array => [
461 2
				'type' => 'robot',
462 2
				'category' => 'feed',
463 2
				'app' => 'Google Docs',
464 2
				'appname' => $value.'; '.$tokens[$i+1]
465 2
			]),
466 2
			'Google-Lens' => new props('exact', $fn['feed']),
467 2
			'ManicTime/' => new props('start', $fn['feed']),
468 2
			'Yik Yak/' => new props('start', $fn['feed']),
469 2
			'HubSpot-Link-Resolver' => new props('exact', $fn['feed']),
470 2
			'AppleExchangeWebServices/' => new props('start', $fn['feed']),
471 2
			'The Lounge IRC Client' => new props('exact', $fn['feed']),
472 2
			'W3C-checklink/' => new props('start', $fn['validator']),
473 2
			'CSSCheck/' => new props('start', $fn['validator']),
474 2
			'Let\'s Encrypt validation server' => new props('exact', $fn['validator']),
475 2
			'SEO-Macroscope/' => new props('start', $fn['validator']),
476 2
			'Electronic Frontier Foundation\'s Do Not Track Verifier' => new props('exact', $fn['validator']),
477 2
			'Barracuda Sentinel' => new props('start', $fn['validator']),
478 2
			'Expanse' => new props('start', $fn['crawler']),
479 2
			'eCairn-Grabber/' => new props('start', $fn['scraper']),
480 2
			'SEOkicks' => new props('exact', $fn['crawler']),
481 2
			'PostmanRuntime/' => new props('start', $fn['scraper']),
482 2
			'axios/' => new props('start', $fn['scraper']),
483 2
			'Rogerbot/' => new props('start', $fn['crawler']),
484 2
			'DashLinkPreviews/' => new props('start', $fn['feed']),
485 2
			'Snapchat/' => new props('start', $fn['feed']),
486 2
			'WhatsApp/' => new props('any', $fn['feed']),
487 2
			'Hootsuite-Authoring/' => new props('start', $fn['feed']),
488 2
			'URL Preview' => new props('any', $fn['feed']),
489 2
			'Link Preview' => new props('any', $fn['feed']),
490 2
			'ApacheBench/' => new props('start', $fn['validator']),
491 2
			'Wheregoes.com Redirect Checker/' => new props('start', $fn['validator']),
492 2
			'Asana/' => new props('start', $fn['feed']),
493 2
			'Java/' => new props('any', fn (string $value) : array => [
494 1
				'type' => 'robot',
495 1
				'category' => 'scraper',
496 1
				'app' => 'Java',
497 1
				'appname' => $value,
498 1
				'appversion' => \explode('/', $value, 3)[1]
499 1
			]),
500 2
			'curl/' => new props('any', $fn['scraper']),
501 2
			'Wget/' => new props('start', $fn['scraper']),
502 2
			'rest-client/' => new props('start', $fn['scraper']),
503 2
			'ruby/' => new props('start', $fn['scraper']),
504 2
			'Bun/' => new props('start', $fn['scraper']),
505 2
			'CakePHP' => new props('start', $fn['scraper']),
506 2
			'cpp-httplib/' => new props('start', $fn['scraper']),
507 2
			'Dart/' => new props('start', $fn['scraper']),
508 2
			'Deno/' => new props('start', $fn['scraper']),
509 2
			'Datadog' => new props('start', $fn['scraper']),
510
			// 'libwww-perl/' => new props('start', $fn['scraper']),
511 2
			'http/' => new props('start', $fn['scraper']),
512 2
			'Cpanel-HTTP-Client/' => new props('start', $fn['scraper']),
513 2
			'http-client/' => new props('any', $fn['scraper']),
514 2
			'HttpClient/' => new props('any', $fn['scraper']),
515 2
			'PowerShell/' => new props('start', $fn['scraper']),
516 2
			'OAI-SearchBot/' => new props('start', $fn['search']),
517 2
			'iaskspider/' => new props('start', $fn['search']),
518 2
			'MeltwaterNews' => new props('start', fn (string $value) : array => [
519 2
				'type' => 'robot',
520 2
				'category' => 'crawler',
521 2
				'app' => 'Meltwater News',
522 2
				'appname' => 'MeltwaterNews',
523 2
				'url' => \mb_substr($value, 14) ?: null
524 2
			]),
525 2
			'Google-Extended' => new props('start', $fn['ai']),
526 2
			'ChatGPT-User/' => new props('start', $fn['feed']),
527 2
			'Cohere' => new props('start', $fn['ai']),
528 2
			'facebookexternalhit/' => new props('start', $fn['feed']),
529 2
			'facebookcatalog/' => new props('start', $fn['crawler']),
530 2
			'meta-externalagent' => new props('start', $fn['ai']),
531 2
			'meta-externalfetcher' => new props('start', $fn['feed']),
532 2
			'BrightBot ' => new props('start', fn (string $value) : array => [
533 2
				'type' => 'robot',
534 2
				'category' => 'ai',
535 2
				'app' => 'Bright Bot',
536 2
				'appname' => 'BrightBot',
537 2
				'appversion' => \mb_substr($value, 10) ?: null
538 2
			]),
539 2
			'anthropic-ai' => new props('start', $fn['ai']),
540 2
			'bigsur.ai' => new props('start', $fn['ai']),
541 2
			'Claude User' => new props('start', $fn['ai']),
542 2
			'Claude Web' => new props('start', $fn['ai']),
543 2
			'cohere-ai' => new props('start', $fn['ai']),
544 2
			'cohere-training-data-crawler' => new props('start', $fn['ai']),
545 2
			'Cotoyogi' => new props('start', $fn['ai']),
546 2
			'Crawlspace' => new props('start', $fn['ai']),
547 2
			'Datenbank Crawler' => new props('start', $fn['ai']),
548 2
			'Devin' => new props('start', $fn['ai']),
549 2
			'FirecrawlAgent' => new props('start', $fn['ai']),
550 2
			'FriendlyCrawler' => new props('start', $fn['ai']),
551 2
			'MistralAI-User' => new props('start', $fn['ai']),
552 2
			'NovaAct' => new props('start', $fn['ai']), // amazon
553 2
			'Panscient' => new props('start', $fn['ai']),
554 2
			'pantest' => new props('start', $fn['ai']),
555 2
			'Perplexity' => new props('start', $fn['ai']),
556 2
			'VelenPublicWebCrawler' => new props('start', $fn['ai']),
557 2
			'Validator' => new props('any', $fn['validator']),
558 2
			'feed' => new props('any', $fn['feed']),
559 2
			'bot/' => new props('any', $fn['map']),
560 2
			'bot-' => new props('any', $fn['map']),
561 2
			' bot ' => new props('any', $fn['map']),
562 2
			'bot' => new props('end', $fn['map']),
563 2
			'spider' => new props('any', $fn['crawler']),
564 2
			'crawler' => new props('any', $fn['map']),
565 2
		];
566
	}
567
}