Passed
Push — main ( b01898...c50189 )
by Will
13:15
created

crawlers::normaliseAppname()   B

Complexity

Conditions 7
Paths 14

Size

Total Lines 14
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 11
c 0
b 0
f 0
nc 14
nop 1
dl 0
loc 14
ccs 12
cts 12
cp 1
crap 7
rs 8.8333
1
<?php
2
declare(strict_types = 1);
3
namespace hexydec\agentzero;
4
5
class crawlers {
6
7
	/**
8
	 * Extracts application and version information from a token
9
	 * 
10
	 * @param string $value The token to be processed
11
	 * @param array<string|null> $data An array containing existing data to merge
12
	 * @return array<string|int|float|null> The $data array with the processed application and version added
13
	 */
14 18
	public static function getApp(string $value, array $data = []) : array {
15 18
		if (!\str_contains($value, '://') && \mb_stripos($value, 'Chrome/') !== 0 && \strcasecmp('Cubot', $value) !== 0 && \strcasecmp('Power bot', $value) !== 0) { // bot will be in the URL
16 16
			$parts = \explode('/', $value, 2);
17
18
			// process version
19 16
			if (!empty($parts[1])) {
20 13
				$parts[1] = \ltrim($parts[1], 'v');
21 13
				$parts[1] = \substr($parts[1], 0, \strspn($parts[1], '0123456789.'));
22
			}
23 16
			$category = [
24 16
				'yacybot' => 'search',
25 16
				'googlebot' => 'search',
26 16
				'googlebot-mobile' => 'search',
27 16
				'googlebot-image' => 'search',
28 16
				'googlebot-video' => 'search',
29 16
				'googlebot-news' => 'search',
30 16
				'storebot-google' => 'search',
31 16
				'adsbot-google' => 'ads',
32 16
				'adsbot-google-mobile' => 'ads',
33 16
				'mediapartners-google' => 'ads',
34 16
				'bingbot' => 'search',
35 16
				'adidxbot' => 'ads',
36 16
				'duckduckbot' => 'search',
37 16
				'duckduckgo-favicons-bot' => 'search',
38 16
				'coccocbot-image' => 'search',
39 16
				'coccocbot-web' => 'search',
40 16
				'applebot' => 'ai',
41 16
				'yandexbot' => 'search',
42 16
				'mj12bot' => 'search',
43 16
				'mail.ru_bot' => 'search',
44 16
				'exabot' => 'search',
45 16
				'uptimerobot' => 'monitor',
46 16
				'petalbot' => 'search',
47 16
				'twitterbot' => 'feed',
48 16
				'xbot' => 'feed',
49 16
				'discordbot' => 'feed',
50 16
				'sematextsyntheticsrobot' => 'monitor',
51 16
				'linkedinbot' => 'feed',
52 16
				'paperlibot' => 'feed',
53 16
				'bitlybot' => 'feed',
54 16
				'tineye-bot' => 'search',
55 16
				'pinterestbot' => 'feed',
56 16
				'webcrawler' => 'crawler',
57 16
				'webprosbot' => 'crawler',
58 16
				'guzzlehttp' => 'scraper',
59 16
				'telegrambot' => 'feed',
60 16
				'semrushbot' => 'crawler',
61 16
				'mediatoolkitbot' => 'crawler',
62 16
				'iploggerbot' => 'monitor',
63 16
				'baiduspider' => 'search',
64 16
				'baiduspider+' => 'search',
65 16
				'baiduspider-image+' => 'search',
66 16
				'baiduspider-ads' => 'ads',
67 16
				'haosouspider' => 'search',
68 16
				'yisouspider' => 'search',
69 16
				'360spider' => 'search',
70 16
				'sogou web spider' => 'search',
71 16
				'bytespider' => 'crawler',
72 16
				'claudebot' => 'ai',
73 16
				'gptbot' => 'ai',
74 16
				'diffbot' => 'ai',
75 16
				'amazonbot' => 'ai',
76 16
				'applebot' => 'ai',
77 16
				'perplexitybot' => 'ai',
78 16
				'youbot' => 'ai',
79 16
				'iaskbot' => 'ai',
80 16
				'ccbot' => 'crawler',
81 16
				'wpbot' => 'ai'
82 16
			];
83 16
			$apps = [
84 16
				'googlebot' => 'Google Bot',
85 16
				'googlebot-mobile' => 'Google Bot',
86 16
				'googlebot-image' => 'Google Bot',
87 16
				'googlebot-video' => 'Google Bot',
88 16
				'googlebot-news' => 'Google Bot',
89 16
				'storebot-google' => 'Google Bot',
90 16
				'adsbot-google' => 'Google Bot',
91 16
				'google-adwords-instant' => 'Google Bot',
92 16
				'adsbot-google-mobile' => 'Google Bot',
93 16
				'mediapartners-google' => 'Google Bot',
94 16
				'google-safety' => 'Google Safety',
95 16
				'duckduckbot' => 'DuckDuck Bot',
96 16
				'duckduckbot-https' => 'DuckDuck Bot',
97 16
				'duckduckgo-favicons-bot' => 'DuckDuck Bot',
98 16
				'coccocbot-image' => 'Coccoc Bot',
99 16
				'coccocbot-web' => 'Coccoc Bot',
100 16
				'mj12bot' => 'Majestic 12 Bot',
101 16
				'exabot' => 'ExaBot',
102 16
				'twitterbot' => 'TwitterBot',
103 16
				'discordbot' => 'DiscordBot',
104 16
				'sematextsyntheticsrobot' => 'Sematext Synthetics Robot',
105 16
				'bitlybot' =>  'Bit.ly Bot',
106 16
				'webprosbot' => 'WebprosBot',
107 16
				'mediatoolkitbot' => 'MediaToolkit Bot',
108 16
				'cfnetwork' => 'Apple Core Foundation Network',
109 16
				'ncsc web check [email protected]' => 'NCSC Web Check',
110 16
				'enhanced webcheck [email protected]' => 'NCSC Enhanced Web Check',
111 16
				'the national archives uk government web archive:' => 'UK Government National Archives',
112 16
				'google-inspectiontool' => 'Google Inspection Tool',
113 16
				'google-pagerenderer google' => 'Google Page Renderer',
114 16
				'pingdomtms' => 'Pingdom Bot',
115 16
				'facebookexternalhit' => 'Facebook URL Preview',
116 16
				'phxbot' => 'ProtonMail Bot',
117 16
				'monitoring360bot' => 'Monitoring360 Bot',
118 16
				'cloudflare-healthchecks' => 'Cloudflare Health Checks',
119 16
				'cloudflare-alwaysonline' => 'Cloudflare Always Online',
120 16
				'cloudflare-traffic-manager' => 'Cloudflare-Traffic-Manager',
121 16
				'cloudflare-prefetch' => 'Cloudflare Prefetch',
122 16
				'cloudflare-ssldetector' => 'Cloudflare SSL Detector',
123 16
				'cloudflare-diagnostics' => 'Cloudflare Diagnostics',
124 16
				'ptst' => 'Cloudflare Speed Test',
125 16
				'citoid' => 'Wikimedia Citoid',
126 16
				'user-agent: seolyt' => 'SEOlyt',
127 16
				'bytespider' => 'ByteDance Spider',
128 16
				'[email protected]' => 'ByteDance Spider',
129 16
				'oai-searchbot' => 'OpenAI SearchBot',
130 16
				'semrushbot' => 'Semrush Bot',
131 16
				'semrushbot-si' => 'Semrush Bot',
132 16
				'semrushbot-ocob' => 'Semrush Bot',
133 16
				'semrushbot-swa' => 'Semrush Bot',
134 16
				'semrushbot-ba' => 'Semrush Bot',
135 16
				'siteauditbot' => 'Semrush Bot',
136 16
				'splitsignalbot' => 'Semrush Bot',
137 16
				'linkcheck by siteimprove.com' => 'SiteImprove Crawler',
138 16
				'sitecheck-sitecrawl by siteimprove.com' => 'SiteImprove Crawler',
139 16
				'image size by siteimprove.com' => 'SiteImprove Crawler',
140 16
				'probe by siteimprove.com' => 'SiteImprove Crawler',
141 16
				'by siteimprove.com' => 'SiteImprove Crawler',
142 16
				'magpie-crawler' => 'Brandwatch Magpie Crawler',
143 16
				'linkedinbot' => 'LinkedIn Bot',
144 16
				'dotbot' => 'Moz DotBot',
145 16
				'dataforseobot' => 'DataForSeo Bot',
146 16
				'wordpress' => 'WordPress',
147 16
				'prtg network monitor' => 'Paessler PRTG Bot',
148 16
				'prtgcloudbot' => 'Paessler PRTG Bot',
149 16
				'powershell' => 'PowerShell',
150 16
				'ccbot' => 'CommonCrawl Bot',
151 16
				'oncrawl' => 'OnCrawl Bot',
152 16
				'pycurl' => 'PycURL',
153 16
				'chatgpt-user' => 'ChatGPT User',
154 16
				'mail.ru_bot' => 'Mail.ru Bot',
155 16
				'wpbot' => 'Wpbot',
156 16
				'dnbcrawler-analytics' => 'DnB Crawler Analytics',
157 16
				'baiduspider-image+' => 'Baidu Spider',
158 16
				'baiduspider-render' => 'Baidu Spider',
159 16
				'baiduspider-ads' => 'Baidu Spider',
160 16
				'amazon-kendra' => 'Amazon Bot',
161 16
				'amazon-qbusiness' => 'Amazon Bot',
162 16
				'amazon cloudfront' => 'Amazon Bot',
163 16
				'amazonbot-video' => 'Amazon Bot',
164 16
				'hubspot crawler' => 'HubSpot Crawler',
165 16
				'wordpress.com mshots' => 'WordPress.com mShots',
166 16
				'wordpress.com' => 'WordPress',
167 16
				'p3p validator' => 'P3P Validator',
168 16
				'w3c-checklink' => 'W3C Checklink',
169 16
				'w3c_validator' => 'W3C Validator'
170 16
			];
171
			
172 16
			$lower = \mb_strtolower($parts[0]);
173 16
			return \array_merge([
174 16
				'type' => 'robot',
175 16
				'app' => $apps[$lower] ?? self::normaliseAppname($parts[0]),
176 16
				'appname' => $parts[0],
177 16
				'appversion' => empty($parts[1]) ? null : $parts[1]
178 16
			], $data, [
179 16
				'category' => $category[$lower] ?? $data['category'] ?? (\mb_stripos($value, 'crawl') !== false || \mb_stripos($value, 'bot') !== false ? 'crawler' : 'scraper')
180 16
			]);
181
		}
182 11
		return [];
183
	}
184
185 15
	protected static function normaliseAppname(string $name) {
186 15
		$find = ['_', '-', '+', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'];
187 15
		$replace = [' ', ' ', '', ' A', ' B', ' C', ' D', ' E', ' F', ' G', ' H', ' I', ' J', ' K', ' L', ' M', ' N', ' O', ' P', ' Q', ' R', ' S', ' T', ' U', ' V', ' W', ' X', ' Y', ' Z'];
188 15
		$name = \trim(\str_replace($find, $replace, $name));
189 15
		$output = '';
190 15
		$single = true;
191 15
		foreach (\explode(' ', $name) AS $key => $item) {
192 15
			if ($item !== '') {
193 15
				$currsingle = \mb_strlen($item) === 1;
194 15
				$output .= ($single && ($currsingle || $key === 1) ? '' : ' ').(!$currsingle ? \ucfirst($item) : $item);
195 15
				$single = $currsingle;
196
			}
197
		}
198 15
		return \trim(\str_ireplace(['bot', 'crawler', 'spider', '  ', 'ro bot'], [' Bot', ' Crawler', ' Spider', ' ', 'Robot'], $output)); // replace afterward for where it is preceded by ACROYMN
0 ignored issues
show
Bug introduced by
It seems like str_ireplace(array('bot'...' ', 'Robot'), $output) can also be of type array; however, parameter $string of trim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

198
		return \trim(/** @scrutinizer ignore-type */ \str_ireplace(['bot', 'crawler', 'spider', '  ', 'ro bot'], [' Bot', ' Crawler', ' Spider', ' ', 'Robot'], $output)); // replace afterward for where it is preceded by ACROYMN
Loading history...
199
	}
200
201
	/**
202
	 * Generates a configuration array for matching crawlers
203
	 * 
204
	 * @return array<string,props> An array with keys representing the string to match, and values a props object defining how to generate the match and which properties to set
205
	 */
206 16
	public static function get() : array {
207 16
		$fn = [
208 16
			'search' => fn (string $value) : array => self::getApp($value, ['category' => 'search']),
209 16
			'ads' => fn (string $value) : array => self::getApp($value, ['category' => 'ads']),
210 16
			'validator' => fn (string $value) : array => self::getApp($value, ['category' => 'validator']),
211 16
			'ai' => fn (string $value) : array => self::getApp($value, ['category' => 'ai']),
212 16
			'feed' => fn (string $value) : array => self::getApp($value, \array_merge(
213 4
				\str_contains($value, 'WhatsApp/') ? [
214 4
					'app' => 'WhatsApp'
215 4
				] : [],
216 16
				[
217 16
					'category' => 'feed'
218 16
				]
219 16
			)),
220 16
			'crawler' => function (string $value) : array {
221 2
				return self::getApp($value, ['category' => 'crawler']);
222 16
			},
223 16
			'monitor' => fn (string $value) : array => self::getApp($value, ['category' => 'monitor']),
224 16
			'scraper' => fn (string $value) : array => self::getApp($value, ['category' => 'scraper']),
225 16
			'map' => fn (string $value) : array => self::getApp($value)
226 16
		];
227 1
		return [
228 1
			'Mozlila/' => new props('start', [
229 1
				'type' => 'robot',
230 1
				'categpry' => 'scraper'
231 1
			]),
232 1
			'Moblie' => new props('exact', [ // some samsung devices mispelt it
233 1
				'type' => 'robot',
234 1
				'category' => 'scraper'
235 1
			]),
236 1
			'Yahoo! Slurp' => new props('start', fn (string $value) : array => [
237 2
				'type' => 'robot',
238 2
				'category' => 'search',
239 2
				'app' => 'Yahoo! Slurp',
240 2
				'appname' => $value
241 2
			]),
242 1
			'Google-Site-Verification/' => new props('start', $fn['validator']),
243 1
			'Google-InspectionTool/' => new props('start', $fn['validator']),
244 1
			'Google-Safety' => new props('exact', $fn['validator']),
245 1
			'Google-Read-Aloud' => new props('exact', $fn['feed']),
246 1
			'Google AppsViewer' => new props('exact', $fn['feed']),
247 1
			'Mediapartners-Google' => new props('start', $fn['search']),
248 1
			'FeedFetcher-Google' => new props('exact', $fn['feed']),
249 1
			'Google-PageRenderer' => new props('start', $fn['crawler']),
250 1
			'GoogleProducer' => new props('exact', $fn['feed']),
251 1
			'Google-adstxt' => new props('exact', $fn['ads']),
252 1
			'Google-Adwords-Instant' => new props('exact', $fn['ads']),
253 1
			'CFNetwork/' => new props('start', $fn['feed']),
254 1
			'Siteimprove.com' => new props('any', $fn['crawler']),
255 1
			'SEOlyt/' => new props('any', $fn['crawler']),
256 1
			'CyotekWebCopy' => new props('start', $fn['scraper']),
257 1
			'Yandex' => new props('start', function (string $value) : array {
258 1
				$parts = \explode('/', $value, 3);
259 1
				return [
260 1
					'type' => 'robot',
261 1
					'category' => 'search',
262 1
					'app' => 'Yandex Bot',
263 1
					'appname' => $parts[0],
264 1
					'appversion' => $parts[1] ?? null
265 1
				];
266 1
			}),
267 1
			'Google Page Speed Insights' => new props('exact', $fn['validator']),
268 1
			'Qwantify' => new props('start', function (string $value) : array {
269 1
				$parts = \explode('/', $value, 3);
270 1
				return [
271 1
					'type' => 'robot',
272 1
					'category' => 'search',
273 1
					'app' => 'Qwant Web Crawler',
274 1
					'appname' => $parts[0],
275 1
					'appversion' => $parts[1] ?? null
276 1
				];
277 1
			}),
278 1
			'amazon-kendra' => new props('exact', $fn['crawler']),
279 1
			'amazon-QBusiness' => new props('exact', $fn['ai']),
280 1
			'amazon CloudFront' => new props('exact', $fn['validator']),
281 1
			'Amazonbot-Video/' => new props('start', $fn['crawler']),
282 1
			'okhttp' => new props('start', $fn['scraper']),
283 1
			'python' => new props('start', $fn['scraper']),
284 1
			'grpc-python/' => new props('start', $fn['scraper']),
285 1
			'LWP::Simple/' => new props('start', $fn['scraper']),
286 1
			'jsdom/' => new props('start', $fn['scraper']),
287 1
			'Nessus' => new props('start', $fn['monitor']),
288 1
			'monitoring360bot' => new props('start', $fn['monitor']),
289 1
			'Cloudflare' => new props('start', $fn['validator']),
290 1
			'PTST/' => new props('start', $fn['validator']),
291 1
			'+https://developers.cloudflare.com/security-center/' => new props('exact', $fn['monitor']),
292 1
			'AppSignalBot/' => new props('start', $fn['monitor']),
293 1
			'Better Uptime Bot' => new props('start', [
294 1
				'type' => 'robot',
295 1
				'category' => 'monitor',
296 1
				'app' => 'Better Uptime Bot',
297 1
				'appname' => 'Better Uptime Bot'
298 1
			]),
299 1
			'Chrome-Lighthouse' => new props('start', $fn['validator']),
300 1
			'Siege/' => new props('start', $fn['validator']),
301 1
			'Microsoft Profiling/' => new props('any', $fn['validator']),
302 1
			'Bidtellect' => new props('start', $fn['crawler']),
303 1
			'magpie-crawler/' => new props('start', $fn['crawler']),
304 1
			'Web Measure/' => new props('start', $fn['crawler']),
305 1
			'PingdomTMS/' => new props('start', $fn['monitor']),
306 1
			'DynGate' => new props('exact', $fn['monitor']),
307 1
			'CensysInspect/' => new props('start', $fn['monitor']),
308 1
			'Datadog/Synthetics' => new props('exact', [
309 1
				'type' => 'robot',
310 1
				'category' => 'monitor',
311 1
				'app' => 'Datadog/Synthetics'
312 1
			]),
313 1
			'RuxitSynthetic/' => new props('start', $fn['monitor']),
314 1
			'Checkly/' => new props('start', $fn['monitor']),
315 1
			'Uptime/' => new props('start', $fn['monitor']),
316 1
			'HostTracker/' => new props('start', $fn['monitor']),
317 1
			'NCSC Web Check [email protected]' => new props('exact', $fn['monitor']),
318 1
			'Enhanced WebCheck [email protected]' => new props('exact', $fn['monitor']),
319 1
			'Pingdom.com' => new props('start', function (string $value) : array {
320 1
				$version = \explode('_', \trim($value, '_'));
321 1
				return [
322 1
					'type' => 'robot',
323 1
					'category' => 'monitor',
324 1
					'app' => 'Pingdom Bot',
325 1
					'appname' => \trim($value, '_'),
326 1
					'appversion' => \end($version)
327 1
				];
328 1
			}),
329 1
			'proximic' => new props('exact', $fn['ads']),
330 1
			'WordPress' => new props('start', $fn['feed']),
331 1
			'PRTG Network Monitor' => new props('exact', $fn['monitor']),
332 1
			'PRTGCloudBot/' => new props('start', $fn['monitor']),
333 1
			'Site24x7' => new props('exact', $fn['monitor']),
334 1
			'StatusCake' => new props('exact', $fn['monitor']),
335 1
			'AWS Network Health' => new props('start', $fn['monitor']),
336 1
			'adbeat.com' => new props('start', fn (string $value) : array => [
337 2
				'type' => 'robot',
338 2
				'category' => 'ads',
339 2
				'app' => 'Adbeat',
340 2
				'appname' => 'Adbeat',
341 2
				'url' => 'https://'.$value
342 2
			]),
343 1
			'MicrosoftPreview/' => new props('start', $fn['feed']),
344 1
			'YahooMailProxy' => new props('exact', $fn['feed']),
345 1
			'PhxBot/' => new props('start', $fn['feed']), // proton mail
346 1
			'Embedly/' => new props('start', $fn['feed']),
347 1
			'PayPal IPN' => new props('exact', $fn['feed']),
348 1
			'DropboxPreviewBot/' => new props('start', $fn['feed']),
349 1
			'Pleroma' => new props('start', fn (string $value) : array => [ // mastodon
350 1
				'type' => 'robot',
351 1
				'category' => 'feed',
352 1
				'app' => 'Mastodon',
353 1
				'appname' => 'Pleroma',
354 1
				'appversion' => \mb_substr($value, 8)
355 1
			]),
356 1
			'Outlook-Android/' => new props('start', fn (string $value) : array => [ // mastodon
357 2
				'type' => 'robot',
358 2
				'category' => 'feed',
359 2
				'app' => 'Outlook',
360 2
				'appname' => 'Outlook-Android',
361 2
				'platform' => 'Android',
362 2
				'appversion' => \mb_substr($value, 16)
363 2
			]),
364 1
			'Outlook-iOS/' => new props('start', fn (string $value, int $i, array $tokens) : array => [ // mastodon
365 2
				'type' => 'robot',
366 2
				'category' => 'feed',
367 2
				'app' => 'Outlook',
368 2
				'appname' => 'Outlook-iOS',
369 2
				'platform' => 'iOS',
370 2
				'appversion' => $tokens[$i+1] ?? \mb_substr($value, 12)
371 2
			]),
372 1
			'OutlookMobileCloudService-Autodetect/' => new props('start', fn (string $value) : array => [
373 2
				'type' => 'robot',
374 2
				'category' => 'feed',
375 2
				'app' => 'Outlook',
376 2
				'appname' => 'OutlookMobileCloudService-Autodetect',
377 2
				'appversion' => \mb_substr($value, 37)
378 2
			]),
379 1
			'HubSpot Connect ' => new props('start', function (string $value, int $i, array $tokens) : array {
380 1
				$app = 'HubSpot Connect';
381 1
				for ($n = $i; $n < \count($tokens); $n++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
382 1
					if (\str_starts_with($tokens[$n], 'namespace: ')) {
383 1
						$app = \mb_substr($tokens[$n], 11).' - '.$tokens[$n+1];
384 1
						break;
385
					}
386
				}
387 1
				return [
388 1
					'type' => 'robot',
389 1
					'category' => 'feed',
390 1
					'app' => 'HubSpot Connect',
391 1
					'appname' => $app,
392 1
					'appversion' => \mb_substr($value, 16)
393 1
				];
394 1
			}),
395 1
			'Pro-Sitemaps/' => new props('start', $fn['crawler']),
396 1
			'Pandalytics/' => new props('start', $fn['crawler']),
397 1
			'omgili/' => new props('start', $fn['crawler']),
398
			// 'CCBot/' => new props('start', $fn['crawler']),
399 1
			'The National Archives UK Government Web Archive' => new props('start', $fn['crawler']),
400 1
			'Citoid' => new props('exact', $fn['crawler']),
401 1
			'trendictionbot' => new props('start', fn (string $value) : array => [
402 1
				'type' => 'robot',
403 1
				'category' => 'crawler',
404 1
				'app' => 'Trendicion Bot',
405 1
				'appname' => 'trendictionbot',
406 1
				'appversion' => \mb_substr($value, 14)
407 1
			]),
408 1
			'Chrome Privacy Preserving Prefetch Proxy' => new props('exact', $fn['feed']),
409 1
			'ViberUrlDownloader' => new props('exact', $fn['feed']),
410 1
			'GoogleDocs' => new props('exact', fn (string $value, int $i, array $tokens) : array => [
411 2
				'type' => 'robot',
412 2
				'category' => 'feed',
413 2
				'app' => 'Google Docs',
414 2
				'appname' => $value.'; '.$tokens[$i+1]
415 2
			]),
416 1
			'Google-Lens' => new props('exact', $fn['feed']),
417 1
			'ManicTime/' => new props('start', $fn['feed']),
418 1
			'Yik Yak/' => new props('start', $fn['feed']),
419 1
			'HubSpot-Link-Resolver' => new props('exact', $fn['feed']),
420 1
			'AppleExchangeWebServices/' => new props('start', $fn['feed']),
421 1
			'The Lounge IRC Client' => new props('exact', $fn['feed']),
422 1
			'W3C-checklink/' => new props('start', $fn['validator']),
423 1
			'CSSCheck/' => new props('start', $fn['validator']),
424 1
			'Let\'s Encrypt validation server' => new props('exact', $fn['validator']),
425 1
			'SEO-Macroscope/' => new props('start', $fn['validator']),
426 1
			'Electronic Frontier Foundation\'s Do Not Track Verifier' => new props('exact', $fn['validator']),
427 1
			'Expanse' => new props('start', $fn['crawler']),
428 1
			'eCairn-Grabber/' => new props('start', $fn['scraper']),
429 1
			'SEOkicks' => new props('exact', $fn['crawler']),
430 1
			'PostmanRuntime/' => new props('start', $fn['scraper']),
431 1
			'axios/' => new props('start', $fn['scraper']),
432 1
			'Rogerbot/' => new props('start', $fn['crawler']),
433 1
			'DashLinkPreviews/' => new props('start', $fn['feed']),
434 1
			'Snapchat/' => new props('start', $fn['feed']),
435 1
			'HTTPClient/' => new props('start', $fn['scraper']),
436 1
			'WhatsApp/' => new props('any', $fn['feed']),
437 1
			'Hootsuite-Authoring/' => new props('start', $fn['feed']),
438 1
			'URL Preview' => new props('any', $fn['feed']),
439 1
			'Link Preview' => new props('any', $fn['feed']),
440 1
			'ApacheBench/' => new props('start', $fn['validator']),
441 1
			'Asana/' => new props('start', $fn['feed']),
442 1
			'Java/' => new props('start', $fn['scraper']),
443 1
			'curl/' => new props('any', $fn['scraper']),
444 1
			'Wget/' => new props('start', $fn['scraper']),
445 1
			'rest-client/' => new props('start', $fn['scraper']),
446 1
			'ruby/' => new props('start', $fn['scraper']),
447 1
			'Bun/' => new props('start', $fn['scraper']),
448 1
			'CakePHP' => new props('start', $fn['scraper']),
449 1
			'cpp-httplib/' => new props('start', $fn['scraper']),
450 1
			'Dart/' => new props('start', $fn['scraper']),
451 1
			'Deno/' => new props('start', $fn['scraper']),
452 1
			'libwww-perl/' => new props('start', $fn['scraper']),
453 1
			'http/' => new props('start', $fn['scraper']),
454 1
			'Cpanel-HTTP-Client/' => new props('start', $fn['scraper']),
455 1
			'http-client/' => new props('any', $fn['scraper']),
456 1
			'HttpClient/' => new props('any', $fn['scraper']),
457 1
			'PowerShell/' => new props('start', $fn['scraper']),
458 1
			'OAI-SearchBot/' => new props('start', $fn['search']),
459 1
			'Google-Extended' => new props('start', $fn['ai']),
460 1
			'ChatGPT-User/' => new props('start', $fn['ai']),
461 1
			'facebookexternalhit/' => new props('start', $fn['feed']),
462 1
			'facebookcatalog/' => new props('start', $fn['feed']),
463 1
			'Validator' => new props('any', $fn['validator']),
464 1
			'feed' => new props('any', $fn['feed']),
465 1
			'bot/' => new props('any', $fn['map']),
466 1
			'bot-' => new props('any', $fn['map']),
467 1
			' bot ' => new props('any', $fn['map']),
468 1
			'bot' => new props('end', $fn['map']),
469 1
			'spider' => new props('any', $fn['crawler']),
470 1
			'crawler' => new props('any', $fn['map']),
471 1
		];
472
	}
473
}