|
1
|
|
|
<?php |
|
2
|
|
|
declare(strict_types = 1); |
|
3
|
|
|
namespace hexydec\agentzero; |
|
4
|
|
|
|
|
5
|
|
|
class crawlers { |
|
6
|
|
|
|
|
7
|
|
|
/** |
|
8
|
|
|
* Extracts application and version information from a token |
|
9
|
|
|
* |
|
10
|
|
|
* @param string $value The token to be processed |
|
11
|
|
|
* @param array<string|null> $data An array containing existing data to merge |
|
12
|
|
|
* @return array<string|int|float|null> The $data array with the processed application and version added |
|
13
|
|
|
*/ |
|
14
|
12 |
|
public static function getApp(string $value, array $data = []) : array { |
|
15
|
12 |
|
if (!\str_contains($value, '://') && !\str_starts_with($value, 'Chrome/') && \strcasecmp('Cubot', $value) !== 0 && \strcasecmp('Power bot', $value) !== 0) { // bot will be in the URL |
|
16
|
12 |
|
$parts = \explode('/', $value, 2); |
|
17
|
|
|
|
|
18
|
|
|
// process version |
|
19
|
12 |
|
if (!empty($parts[1])) { |
|
20
|
10 |
|
$parts[1] = \ltrim($parts[1], 'v'); |
|
21
|
10 |
|
$parts[1] = \substr($parts[1], 0, \strspn($parts[1], '0123456789.')); |
|
22
|
|
|
} |
|
23
|
12 |
|
$category = [ |
|
24
|
12 |
|
'yacybot' => [ |
|
25
|
12 |
|
'category' => 'search', |
|
26
|
12 |
|
'app' => 'YacyBot' |
|
27
|
12 |
|
], |
|
28
|
12 |
|
'googlebot' => [ |
|
29
|
|
|
'category' => 'search', |
|
30
|
2 |
|
'app' => 'GoogleBot' |
|
31
|
|
|
], |
|
32
|
|
|
'googlebot-mobile' => [ |
|
33
|
|
|
'category' => 'search', |
|
34
|
|
|
'app' => 'GoogleBot' |
|
35
|
|
|
], |
|
36
|
|
|
'googlebot-image' => [ |
|
37
|
|
|
'category' => 'search', |
|
38
|
12 |
|
'app' => 'GoogleBot' |
|
39
|
8 |
|
], |
|
40
|
8 |
|
'googlebot-video' => [ |
|
41
|
8 |
|
'category' => 'search', |
|
42
|
8 |
|
'app' => 'GoogleBot' |
|
43
|
8 |
|
], |
|
44
|
2 |
|
'googlebot-news' => [ |
|
45
|
2 |
|
'category' => 'search', |
|
46
|
2 |
|
'app' => 'GoogleBot' |
|
47
|
8 |
|
], |
|
48
|
8 |
|
'storebot-google' => [ |
|
49
|
8 |
|
'category' => 'search', |
|
50
|
8 |
|
'app' => 'GoogleBot' |
|
51
|
8 |
|
], |
|
52
|
2 |
|
'adsbot-google' => [ |
|
53
|
2 |
|
'category' => 'ads', |
|
54
|
2 |
|
'app' => 'GoogleBot' |
|
55
|
2 |
|
], |
|
56
|
2 |
|
'adsbot-google-mobile' => [ |
|
57
|
2 |
|
'category' => 'ads', |
|
58
|
2 |
|
'app' => 'GoogleBot' |
|
59
|
2 |
|
], |
|
60
|
2 |
|
'mediapartners-google' => [ |
|
61
|
2 |
|
'category' => 'ads', |
|
62
|
2 |
|
'app' => 'GoogleBot' |
|
63
|
2 |
|
], |
|
64
|
8 |
|
'bingbot' => [ |
|
65
|
8 |
|
'category' => 'search', |
|
66
|
8 |
|
'app' => 'BingBot' |
|
67
|
8 |
|
], |
|
68
|
12 |
|
'adidxbot' => [ |
|
69
|
10 |
|
'category' => 'ads', |
|
70
|
10 |
|
'app' => 'AdidxBot' |
|
71
|
10 |
|
], |
|
72
|
10 |
|
'duckduckbot' => [ |
|
73
|
10 |
|
'category' => 'search', |
|
74
|
10 |
|
'app' => 'DuckDuckBot' |
|
75
|
10 |
|
], |
|
76
|
10 |
|
'duckduckgo-favicons-bot' => [ |
|
77
|
10 |
|
'category' => 'search', |
|
78
|
10 |
|
'app' => 'DuckDuckBot' |
|
79
|
10 |
|
], |
|
80
|
10 |
|
'coccocbot-image' => [ |
|
81
|
10 |
|
'category' => 'search', |
|
82
|
10 |
|
'app' => 'CoccocBot' |
|
83
|
10 |
|
], |
|
84
|
10 |
|
'coccocbot-web' => [ |
|
85
|
10 |
|
'category' => 'search', |
|
86
|
10 |
|
'app' => 'CoccocBot' |
|
87
|
10 |
|
], |
|
88
|
10 |
|
'applebot' => [ |
|
89
|
10 |
|
'category' => 'search', |
|
90
|
10 |
|
'app' => 'AppleBot' |
|
91
|
10 |
|
], |
|
92
|
10 |
|
'yandexbot' => [ |
|
93
|
10 |
|
'category' => 'search' |
|
94
|
10 |
|
], |
|
95
|
10 |
|
'mj12bot' => [ |
|
96
|
10 |
|
'category' => 'search', |
|
97
|
10 |
|
'app' => 'Majestic 12 Bot' |
|
98
|
10 |
|
], |
|
99
|
10 |
|
'mail.ru_bot' => [ |
|
100
|
10 |
|
'category' => 'search', |
|
101
|
10 |
|
'app' => 'Mail.ru Bot' |
|
102
|
10 |
|
], |
|
103
|
10 |
|
'exabot' => [ |
|
104
|
10 |
|
'category' => 'search', |
|
105
|
10 |
|
'app' => 'ExaBot' |
|
106
|
10 |
|
], |
|
107
|
10 |
|
'uptimerobot' => [ |
|
108
|
10 |
|
'category' => 'monitor' |
|
109
|
10 |
|
], |
|
110
|
10 |
|
'petalbot' => [ |
|
111
|
10 |
|
'category' => 'search' |
|
112
|
10 |
|
], |
|
113
|
10 |
|
'twitterbot' => [ |
|
114
|
10 |
|
'category' => 'feed', |
|
115
|
10 |
|
'app' => 'TwitterBot' |
|
116
|
10 |
|
], |
|
117
|
10 |
|
'xbot' => [ |
|
118
|
10 |
|
'category' => 'feed' |
|
119
|
10 |
|
], |
|
120
|
10 |
|
'discordbot' => [ |
|
121
|
10 |
|
'category' => 'feed', |
|
122
|
10 |
|
'app' => 'DiscordBot' |
|
123
|
10 |
|
], |
|
124
|
10 |
|
'sematextsyntheticsrobot' => [ |
|
125
|
10 |
|
'category' => 'monitor', |
|
126
|
10 |
|
'app' => 'Sematext Synthetics Robot' |
|
127
|
10 |
|
], |
|
128
|
10 |
|
'linkedinbot' => [ |
|
129
|
10 |
|
'category' => 'feed' |
|
130
|
10 |
|
], |
|
131
|
10 |
|
'paperlibot' => [ |
|
132
|
10 |
|
'category' => 'feed' |
|
133
|
10 |
|
], |
|
134
|
10 |
|
'bitlybot' => [ |
|
135
|
10 |
|
'category' => 'feed', |
|
136
|
10 |
|
'app' => 'Bit.ly Bot' |
|
137
|
10 |
|
], |
|
138
|
10 |
|
'tineye-bot' => [ |
|
139
|
10 |
|
'category' => 'search', |
|
140
|
10 |
|
'app' => 'TinEye Bot' |
|
141
|
10 |
|
], |
|
142
|
10 |
|
'pinterestbot' => [ |
|
143
|
10 |
|
'category' => 'feed', |
|
144
|
10 |
|
'app' => 'PinterestBot' |
|
145
|
10 |
|
], |
|
146
|
10 |
|
'webcrawler' => [ |
|
147
|
10 |
|
'category' => 'crawler' |
|
148
|
10 |
|
], |
|
149
|
10 |
|
'webprosbot' => [ |
|
150
|
10 |
|
'category' => 'crawler', |
|
151
|
10 |
|
'app' => 'WebprosBot' |
|
152
|
10 |
|
], |
|
153
|
10 |
|
'guzzlehttp' => [ |
|
154
|
10 |
|
'category' => 'scraper' |
|
155
|
10 |
|
], |
|
156
|
10 |
|
'telegrambot' => [ |
|
157
|
10 |
|
'category' => 'feed' |
|
158
|
10 |
|
], |
|
159
|
10 |
|
'semrushbot' => [ |
|
160
|
10 |
|
'category' => 'crawler' |
|
161
|
10 |
|
], |
|
162
|
10 |
|
'mediatoolkitbot' => [ |
|
163
|
10 |
|
'category' => 'crawler', |
|
164
|
10 |
|
'app' => 'MediaToolkitBot' |
|
165
|
10 |
|
], |
|
166
|
10 |
|
'iploggerbot' => [ |
|
167
|
10 |
|
'category' => 'monitor' |
|
168
|
10 |
|
], |
|
169
|
10 |
|
'cfnetwork' => [ |
|
170
|
10 |
|
'category' => 'feed', |
|
171
|
10 |
|
'app' => 'Apple Core Foundation Network' |
|
172
|
10 |
|
], |
|
173
|
10 |
|
'ncsc web check [email protected]' => [ |
|
174
|
10 |
|
'category' => 'monitor', |
|
175
|
10 |
|
'app' => 'NCSC Web Check' |
|
176
|
10 |
|
], |
|
177
|
10 |
|
'google-site-verification' => [ |
|
178
|
10 |
|
'category' => 'validator', |
|
179
|
10 |
|
'app' => 'Google Site Verification' |
|
180
|
10 |
|
], |
|
181
|
10 |
|
'google-inspectiontool' => [ |
|
182
|
10 |
|
'category' => 'validator', |
|
183
|
10 |
|
'app' => 'Google Inspection Tool' |
|
184
|
10 |
|
], |
|
185
|
10 |
|
'pingdomtms' => [ |
|
186
|
10 |
|
'category' => 'monitor', |
|
187
|
10 |
|
'app' => 'Pingdom.com' |
|
188
|
10 |
|
], |
|
189
|
10 |
|
'facebookexternalhit' => [ |
|
190
|
10 |
|
'category' => 'feed', |
|
191
|
10 |
|
'app' => 'Facebook URL Preview' |
|
192
|
10 |
|
], |
|
193
|
10 |
|
'phxbot' => [ |
|
194
|
10 |
|
'app' => 'ProtonMail Bot' |
|
195
|
10 |
|
] |
|
196
|
10 |
|
]; |
|
197
|
10 |
|
return \array_merge([ |
|
198
|
10 |
|
'type' => 'robot', |
|
199
|
10 |
|
'category' => \mb_stripos($value, 'crawler') !== false || \mb_stripos($value, 'bot') !== false ? 'crawler' : 'scraper', |
|
200
|
10 |
|
'app' => $parts[0], |
|
201
|
10 |
|
'appname' => $parts[0], |
|
202
|
10 |
|
'appversion' => empty($parts[1]) ? null : $parts[1] |
|
203
|
10 |
|
], $data, $category[\mb_strtolower($parts[0])] ?? []); |
|
204
|
10 |
|
} |
|
205
|
10 |
|
return []; |
|
206
|
10 |
|
} |
|
207
|
10 |
|
|
|
208
|
10 |
|
/** |
|
209
|
10 |
|
* Generates a configuration array for matching crawlers |
|
210
|
10 |
|
* |
|
211
|
10 |
|
* @return array<string,props> An array with keys representing the string to match, and values a props object defining how to generate the match and which properties to set |
|
212
|
10 |
|
*/ |
|
213
|
10 |
|
public static function get() : array { |
|
214
|
10 |
|
$fn = [ |
|
215
|
10 |
|
'search' => fn (string $value) : array => self::getApp($value, ['category' => 'search']), |
|
216
|
10 |
|
'ads' => fn (string $value) : array => self::getApp($value, ['category' => 'ads']), |
|
217
|
10 |
|
'validator' => fn (string $value) : array => self::getApp($value, ['category' => 'validator']), |
|
218
|
10 |
|
'feed' => fn (string $value) : array => self::getApp($value, \array_merge( |
|
219
|
10 |
|
\str_contains($value, 'WhatsApp/') ? [ |
|
220
|
10 |
|
'app' => 'WhatsApp' |
|
221
|
10 |
|
] : [], |
|
222
|
10 |
|
[ |
|
223
|
10 |
|
'category' => 'feed' |
|
224
|
10 |
|
] |
|
225
|
10 |
|
)), |
|
226
|
10 |
|
'crawler' => function (string $value) : array { |
|
227
|
10 |
|
$parts = \explode('/', $value, 2); |
|
228
|
10 |
|
$map = [ |
|
229
|
10 |
|
'baiduspider' => 'search', |
|
230
|
10 |
|
'haosouspider' => 'search', |
|
231
|
10 |
|
'yisouspider' => 'search', |
|
232
|
10 |
|
'360spider' => 'search', |
|
233
|
10 |
|
'sogou web spider' => 'search', |
|
234
|
10 |
|
'bytespider' => 'search', |
|
235
|
10 |
|
]; |
|
236
|
10 |
|
return self::getApp($value, [ |
|
237
|
10 |
|
'category' => $map[\mb_strtolower($parts[0])] ?? 'crawler' |
|
238
|
10 |
|
]); |
|
239
|
10 |
|
}, |
|
240
|
10 |
|
'monitor' => fn (string $value) : array => self::getApp($value, ['category' => 'monitor']), |
|
241
|
10 |
|
'scraper' => fn (string $value) : array => self::getApp($value, ['category' => 'scraper']), |
|
242
|
10 |
|
'map' => fn (string $value) : ?array => self::getApp($value) |
|
243
|
10 |
|
]; |
|
244
|
10 |
|
return [ |
|
245
|
10 |
|
'Yahoo! Slurp' => new props('exact', $fn['search']), |
|
246
|
10 |
|
'facebookexternalhit/' => new props('start', $fn['map']), |
|
247
|
10 |
|
'Google-Site-Verification/' => new props('start', $fn['map']), |
|
248
|
10 |
|
'Google-InspectionTool/' => new props('start', $fn['map']), |
|
249
|
|
|
'Mediapartners-Google' => new props('start', $fn['search']), |
|
250
|
8 |
|
'FeedFetcher-Google' => new props('exact', $fn['feed']), |
|
251
|
8 |
|
'GoogleProducer' => new props('exact', $fn['feed']), |
|
252
|
8 |
|
'Google-adstxt' => new props('exact', $fn['ads']), |
|
253
|
1 |
|
'CFNetwork/' => new props('start', $fn['map']), |
|
254
|
1 |
|
'Siteimprove.com' => new props('any', $fn['crawler']), |
|
255
|
1 |
|
'CyotekWebCopy' => new props('start', $fn['scraper']), |
|
256
|
1 |
|
'Google Page Speed Insights' => new props('exact', $fn['validator']), |
|
257
|
1 |
|
'Qwantify' => new props('start', function (string $value) : array { |
|
258
|
1 |
|
$parts = \explode('/', $value, 3); |
|
259
|
1 |
|
return [ |
|
260
|
1 |
|
'type' => 'robot', |
|
261
|
1 |
|
'category' => 'search', |
|
262
|
1 |
|
'app' => 'Qwant Web Crawler', |
|
263
|
1 |
|
'appname' => $parts[0], |
|
264
|
1 |
|
'appversion' => $parts[1] ?? null |
|
265
|
1 |
|
]; |
|
266
|
1 |
|
}), |
|
267
|
1 |
|
'okhttp' => new props('start', $fn['scraper']), |
|
268
|
1 |
|
'python' => new props('start', $fn['scraper']), |
|
269
|
1 |
|
'jsdom/' => new props('start', $fn['scraper']), |
|
270
|
1 |
|
'Nessus' => new props('start', $fn['monitor']), |
|
271
|
1 |
|
'Chrome-Lighthouse' => new props('start', $fn['validator']), |
|
272
|
1 |
|
'Siege/' => new props('start', $fn['validator']), |
|
273
|
1 |
|
'Microsoft Profiling/' => new props('any', $fn['validator']), |
|
274
|
1 |
|
'Bidtellect' => new props('start', $fn['crawler']), |
|
275
|
1 |
|
'magpie-crawler/' => new props('start', $fn['crawler']), |
|
276
|
1 |
|
'PingdomTMS/' => new props('start', $fn['map']), |
|
277
|
1 |
|
'DynGate' => new props('exact', $fn['monitor']), |
|
278
|
1 |
|
'Datadog/Synthetics' => new props('exact', [ |
|
279
|
1 |
|
'type' => 'robot', |
|
280
|
1 |
|
'category' => 'monitor', |
|
281
|
1 |
|
'app' => 'Datadog/Synthetics' |
|
282
|
1 |
|
]), |
|
283
|
1 |
|
'RuxitSynthetic/' => new props('start', $fn['monitor']), |
|
284
|
1 |
|
'Checkly/' => new props('start', $fn['monitor']), |
|
285
|
1 |
|
'Uptime/' => new props('start', $fn['monitor']), |
|
286
|
1 |
|
'HostTracker/' => new props('start', $fn['monitor']), |
|
287
|
1 |
|
'NCSC Web Check [email protected]' => new props('exact', $fn['map']), |
|
288
|
1 |
|
'Pingdom.com' => new props('start', function (string $value) : array { |
|
289
|
1 |
|
$version = \explode('_', \trim($value, '_')); |
|
290
|
1 |
|
return [ |
|
291
|
1 |
|
'type' => 'robot', |
|
292
|
1 |
|
'category' => 'monitor', |
|
293
|
1 |
|
'app' => 'Pingdom.com', |
|
294
|
1 |
|
'appname' => \trim($value, '_'), |
|
295
|
1 |
|
'appversion' => \end($version) |
|
296
|
1 |
|
]; |
|
297
|
1 |
|
}), |
|
298
|
1 |
|
'proximic' => new props('exact', $fn['ads']), |
|
299
|
1 |
|
'WordPress' => new props('start', $fn['monitor']), |
|
300
|
1 |
|
'PRTG Network Monitor' => new props('exact', $fn['monitor']), |
|
301
|
1 |
|
'PRTGCloudBot/' => new props('start', $fn['monitor']), |
|
302
|
1 |
|
'Site24x7' => new props('exact', $fn['monitor']), |
|
303
|
1 |
|
'StatusCake' => new props('exact', $fn['monitor']), |
|
304
|
1 |
|
'adbeat.com' => new props('start', fn (string $value) : array => [ |
|
305
|
1 |
|
'type' => 'robot', |
|
306
|
1 |
|
'category' => 'ads', |
|
307
|
1 |
|
'app' => 'Adbeat', |
|
308
|
1 |
|
'appname' => 'Adbeat', |
|
309
|
1 |
|
'url' => 'https://'.$value |
|
310
|
1 |
|
]), |
|
311
|
1 |
|
'MicrosoftPreview/' => new props('start', $fn['feed']), |
|
312
|
1 |
|
'YahooMailProxy' => new props('exact', $fn['feed']), |
|
313
|
1 |
|
'PhxBot/' => new props('start', $fn['feed']), // proton mail |
|
314
|
1 |
|
'Pleroma' => new props('start', fn (string $value) : array => [ // mastodon |
|
315
|
1 |
|
'type' => 'robot', |
|
316
|
2 |
|
'category' => 'feed', |
|
317
|
2 |
|
'app' => 'Mastodon', |
|
318
|
2 |
|
'appname' => 'Pleroma', |
|
319
|
2 |
|
'appversion' => \mb_substr($value, 8) |
|
320
|
2 |
|
]), |
|
321
|
2 |
|
'Outlook-Android/' => new props('start', fn (string $value) : array => [ // mastodon |
|
322
|
1 |
|
'type' => 'robot', |
|
323
|
1 |
|
'category' => 'feed', |
|
324
|
1 |
|
'app' => 'Outlook', |
|
325
|
1 |
|
'appname' => 'Outlook-Android', |
|
326
|
1 |
|
'platform' => 'Android', |
|
327
|
1 |
|
'appversion' => \mb_substr($value, 16) |
|
328
|
1 |
|
]), |
|
329
|
1 |
|
'Outlook-iOS/' => new props('start', fn (string $value, int $i, array $tokens) : array => [ // mastodon |
|
330
|
1 |
|
'type' => 'robot', |
|
331
|
1 |
|
'category' => 'feed', |
|
332
|
1 |
|
'app' => 'Outlook', |
|
333
|
2 |
|
'appname' => 'Outlook-iOS', |
|
334
|
2 |
|
'platform' => 'iOS', |
|
335
|
2 |
|
'appversion' => $tokens[$i+1] ?? \mb_substr($value, 12) |
|
336
|
2 |
|
]), |
|
337
|
2 |
|
'OutlookMobileCloudService-Autodetect/' => new props('start', fn (string $value) : array => [ // mastodon |
|
338
|
2 |
|
'type' => 'robot', |
|
339
|
2 |
|
'category' => 'feed', |
|
340
|
1 |
|
'app' => 'Outlook', |
|
341
|
2 |
|
'appname' => 'OutlookMobileCloudService-Autodetect', |
|
342
|
2 |
|
'appversion' => \mb_substr($value, 37) |
|
343
|
2 |
|
]), |
|
344
|
2 |
|
'Chrome Privacy Preserving Prefetch Proxy' => new props('exact', $fn['feed']), |
|
345
|
2 |
|
'ViberUrlDownloader' => new props('exact', $fn['feed']), |
|
346
|
2 |
|
'Google-Lens' => new props('exact', $fn['feed']), |
|
347
|
2 |
|
'ManicTime/' => new props('start', $fn['feed']), |
|
348
|
1 |
|
'Yik Yak/' => new props('start', $fn['feed']), |
|
349
|
2 |
|
'HubSpot-Link-Resolver' => new props('exact', $fn['feed']), |
|
350
|
2 |
|
'W3C-checklink/' => new props('start', $fn['validator']), |
|
351
|
2 |
|
'CSSCheck/' => new props('start', $fn['validator']), |
|
352
|
2 |
|
'Let\'s Encrypt validation server' => new props('exact', $fn['validator']), |
|
353
|
2 |
|
'SEO-Macroscope/' => new props('exact', $fn['validator']), |
|
354
|
2 |
|
'Electronic Frontier Foundation\'s Do Not Track Verifier' => new props('exact', $fn['validator']), |
|
355
|
1 |
|
'Expanse' => new props('start', $fn['crawler']), |
|
356
|
1 |
|
'eCairn-Grabber/' => new props('start', $fn['scraper']), |
|
357
|
1 |
|
'SEOkicks' => new props('exact', $fn['crawler']), |
|
358
|
1 |
|
'PostmanRuntime/' => new props('start', $fn['scraper']), |
|
359
|
1 |
|
'axios/' => new props('start', $fn['scraper']), |
|
360
|
1 |
|
'Rogerbot/' => new props('start', $fn['crawler']), |
|
361
|
1 |
|
'DashLinkPreviews/' => new props('start', $fn['feed']), |
|
362
|
1 |
|
'Snapchat/' => new props('start', $fn['feed']), |
|
363
|
1 |
|
'HTTPClient/' => new props('start', $fn['scraper']), |
|
364
|
1 |
|
'WhatsApp/' => new props('any', $fn['feed']), |
|
365
|
1 |
|
'Hootsuite-Authoring/' => new props('start', $fn['feed']), |
|
366
|
1 |
|
'Snap URL Preview Service' => new props('exact', $fn['feed']), |
|
367
|
1 |
|
'ApacheBench/' => new props('start', $fn['validator']), |
|
368
|
1 |
|
'Asana/' => new props('start', $fn['feed']), |
|
369
|
1 |
|
'Java/' => new props('start', $fn['scraper']), |
|
370
|
1 |
|
'curl/' => new props('any', $fn['scraper']), |
|
371
|
1 |
|
'Wget/' => new props('start', $fn['scraper']), |
|
372
|
1 |
|
'rest-client/' => new props('start', $fn['scraper']), |
|
373
|
1 |
|
'ruby/' => new props('start', $fn['scraper']), |
|
374
|
1 |
|
'Bun/' => new props('start', $fn['scraper']), |
|
375
|
1 |
|
'CakePHP' => new props('start', $fn['scraper']), |
|
376
|
1 |
|
'cpp-httplib/' => new props('start', $fn['scraper']), |
|
377
|
1 |
|
'Dart/' => new props('start', $fn['scraper']), |
|
378
|
1 |
|
'Deno/' => new props('start', $fn['scraper']), |
|
379
|
1 |
|
'libwww-perl/' => new props('start', $fn['scraper']), |
|
380
|
1 |
|
'http/' => new props('start', $fn['scraper']), |
|
381
|
1 |
|
'Cpanel-HTTP-Client/' => new props('start', $fn['scraper']), |
|
382
|
1 |
|
'http-client/' => new props('any', $fn['scraper']), |
|
383
|
1 |
|
'HttpClient/' => new props('any', $fn['scraper']), |
|
384
|
1 |
|
'Validator' => new props('any', $fn['validator']), |
|
385
|
1 |
|
'feed' => new props('any', $fn['feed']), |
|
386
|
1 |
|
'spider' => new props('any', $fn['crawler']), |
|
387
|
1 |
|
'crawler' => new props('any', $fn['map']), |
|
388
|
1 |
|
'bot/' => new props('any', $fn['map']), |
|
389
|
1 |
|
'bot-' => new props('any', $fn['map']), |
|
390
|
1 |
|
' bot ' => new props('any', $fn['map']), |
|
391
|
1 |
|
'bot' => new props('end', $fn['map']) |
|
392
|
1 |
|
]; |
|
393
|
|
|
} |
|
394
|
|
|
} |