1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Sinergi\BrowserDetector; |
4
|
|
|
|
5
|
|
|
class ScriptedAgentDetector implements DetectorInterface |
6
|
|
|
{ |
7
|
|
|
const FUNC_PREFIX = 'checkRobot'; |
8
|
|
|
|
9
|
|
|
protected static $userAgentString; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* @var ScriptedAgent |
13
|
|
|
*/ |
14
|
|
|
protected static $scriptedAgent; |
15
|
|
|
|
16
|
|
|
protected static $robotsList = array( |
17
|
|
|
'Google', |
18
|
|
|
'Baidu', |
19
|
|
|
'Facebook', |
20
|
|
|
'Bing', |
21
|
|
|
'Slurp', |
22
|
|
|
'Twitter', |
23
|
|
|
'Skype', |
24
|
|
|
'W3CValidator', |
25
|
|
|
'wkHTMLtoPDF', |
26
|
|
|
'Yandex', |
27
|
|
|
'Apple', |
28
|
|
|
'Paperli', |
29
|
|
|
'Ahrefs', |
30
|
|
|
'MJ12', |
31
|
|
|
'LiveLap', |
32
|
|
|
'Webdav', |
33
|
|
|
'MetaURI', |
34
|
|
|
'TLSProbe', |
35
|
|
|
'ScoopIt', |
36
|
|
|
'Netcraft', |
37
|
|
|
'Curl', |
38
|
|
|
'Python', |
39
|
|
|
'GoLang', |
40
|
|
|
'Perl', |
41
|
|
|
'Wget', |
42
|
|
|
'ZGrab', |
43
|
|
|
'Java', |
44
|
|
|
'Shellshock', |
45
|
|
|
'Browershots', |
46
|
|
|
'Whois', |
47
|
|
|
'MageReport', |
48
|
|
|
'Adbeat', |
49
|
|
|
'Ubermetrics', |
50
|
|
|
'Socialrank', |
51
|
|
|
'GlutenFree', |
52
|
|
|
'ICQ', |
53
|
|
|
'Proximic', |
54
|
|
|
'Verisign' |
55
|
|
|
); |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Routine to determine the scripted agent type. |
59
|
|
|
* |
60
|
|
|
* @param ScriptedAgent $scriptedAgent |
61
|
|
|
* @param UserAgent $userAgent |
62
|
|
|
* |
63
|
|
|
* @return bool |
64
|
|
|
*/ |
65
|
|
|
public static function detect(ScriptedAgent $scriptedAgent, UserAgent $userAgent = null) |
66
|
|
|
{ |
67
|
|
|
self::$scriptedAgent = $scriptedAgent; |
68
|
|
|
if (is_null($userAgent)) { |
69
|
|
|
$userAgent = self::$scriptedAgent->getUserAgent(); |
70
|
|
|
} |
71
|
|
|
self::$userAgentString = $userAgent->getUserAgentString(); |
72
|
|
|
|
73
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::UNKNOWN); |
74
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::UNKNOWN); |
75
|
|
|
self::$scriptedAgent->setInfoURL(ScriptedAgent::UNKNOWN); |
76
|
|
|
|
77
|
|
|
foreach (self::$robotsList as $robotName) { |
78
|
|
|
$funcName = self::FUNC_PREFIX . $robotName; |
79
|
|
|
|
80
|
|
|
if (self::$funcName()) { |
81
|
|
|
return true; |
82
|
|
|
} |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
return false; |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* Determine if the browser is wkHTMLtoPDF |
90
|
|
|
* |
91
|
|
|
* @return bool |
92
|
|
|
*/ |
93
|
|
View Code Duplication |
public static function checkRobotwkHTMLtoPDF() |
|
|
|
|
94
|
|
|
{ |
95
|
|
|
if (stripos(self::$userAgentString, 'wkhtmltopdf') !== false) { |
96
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::WKHTMLTOPDF); |
97
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::TOOL); |
98
|
|
|
self::$scriptedAgent->setInfoURL("https://wkhtmltopdf.org/"); |
99
|
|
|
return true; |
100
|
|
|
} |
101
|
|
|
return false; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* Determine if the browser is the ICQ preview. |
106
|
|
|
* |
107
|
|
|
* @return bool |
108
|
|
|
*/ |
109
|
|
|
public static function checkRobotICQ() |
|
|
|
|
110
|
|
|
{ |
111
|
|
|
//Chrome 51 always provides the Upgrade-Insecure-Requests header. ICQ does not. |
112
|
|
|
//But to be extra safe, also check for the russian language which the ICQ bot sets. |
113
|
|
|
if (stripos(self::$userAgentString, 'Chrome/51.0.2704.103') !== false && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== false) { |
114
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::ICQ); |
115
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); |
116
|
|
|
self::$scriptedAgent->setInfoURL("https://icq.com"); |
117
|
|
|
return true; |
118
|
|
|
} |
119
|
|
|
return false; |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
/** |
123
|
|
|
* Determine if the agent is GoogleBot, or a google ads bot. |
124
|
|
|
* |
125
|
|
|
* @return bool |
126
|
|
|
*/ |
127
|
|
|
public static function checkRobotGoogle() |
128
|
|
|
{ |
129
|
|
|
if (stripos(self::$userAgentString, "Googlebot") !== false) { |
130
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::GOOGLEBOT); |
131
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
132
|
|
|
self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); |
133
|
|
|
return true; |
134
|
|
|
} |
135
|
|
|
if (stripos(self::$userAgentString, "AdsBot-Google") !== false |
136
|
|
|
|| stripos(self::$userAgentString, "Mediapartners-Google") !== false |
137
|
|
|
|| stripos(self::$userAgentString, "Google-Adwords") !== false |
138
|
|
|
|| stripos(self::$userAgentString, "AdXVastFetcher-Google") !== false |
139
|
|
|
) { |
140
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::GOOGLEADS); |
141
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); |
142
|
|
|
self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); |
143
|
|
|
return true; |
144
|
|
|
} |
145
|
|
|
if (stripos(self::$userAgentString, "Google Favicon") !== false) { |
146
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::GOOGLEFAVICON); |
147
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::GENERIC); |
148
|
|
|
self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4626518.htm"); |
149
|
|
|
return true; |
150
|
|
|
} |
151
|
|
|
if (stripos(self::$userAgentString, "Google Web Preview") !== false) { |
152
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::GOOGLEPREVIEW); |
153
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); |
154
|
|
|
self::$scriptedAgent->setInfoURL("https://www.distilnetworks.com/bot-directory/bot/google-web-preview/"); |
155
|
|
|
return true; |
156
|
|
|
} |
157
|
|
|
return false; |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* Determine if the agent is the Baidu spider. |
162
|
|
|
* |
163
|
|
|
* @return bool |
164
|
|
|
*/ |
165
|
|
View Code Duplication |
public static function checkRobotBaidu() |
|
|
|
|
166
|
|
|
{ |
167
|
|
|
if (stripos(self::$userAgentString, "Baiduspider") !== false) { |
168
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::BAIDU); |
169
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
170
|
|
|
self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); |
171
|
|
|
return true; |
172
|
|
|
} |
173
|
|
|
return false; |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
/** |
177
|
|
|
* Determine if the agent is the Facebook preview bot. |
178
|
|
|
* |
179
|
|
|
* @return bool |
180
|
|
|
*/ |
181
|
|
View Code Duplication |
public static function checkRobotFacebook() |
|
|
|
|
182
|
|
|
{ |
183
|
|
|
if (stripos(self::$userAgentString, "facebookexternalhit") !== false) { |
184
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::FACEBOOK); |
185
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); |
186
|
|
|
self::$scriptedAgent->setInfoURL("https://www.facebook.com/externalhit_uatext.php"); |
187
|
|
|
return true; |
188
|
|
|
} |
189
|
|
|
return false; |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* Determine if the agent is the bing spider, bing preview bot, or MSN bot |
194
|
|
|
* |
195
|
|
|
* @return bool |
196
|
|
|
*/ |
197
|
|
|
public static function checkRobotBing() |
198
|
|
|
{ |
199
|
|
|
if (stripos(self::$userAgentString, "adidxbot/") !== false) { |
200
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::BING); |
201
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); |
202
|
|
|
self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); |
203
|
|
|
return true; |
204
|
|
|
} |
205
|
|
|
if (stripos(self::$userAgentString, "/bingbot.htm") !== false) { |
206
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::BING); |
207
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
208
|
|
|
self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); |
209
|
|
|
return true; |
210
|
|
|
} |
211
|
|
|
if (stripos(self::$userAgentString, "/msnbot.htm") !== false) { |
212
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::MSNBOT); |
213
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
214
|
|
|
self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); |
215
|
|
|
return true; |
216
|
|
|
} |
217
|
|
|
if (stripos(self::$userAgentString, "BingPreview/") !== false) { |
218
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::BING_PREVIEW); |
219
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); |
220
|
|
|
self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); |
221
|
|
|
return true; |
222
|
|
|
} |
223
|
|
|
return false; |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
/** |
227
|
|
|
* Determine if the agent is the Yahoo Slurp! Spider. |
228
|
|
|
* |
229
|
|
|
* @return bool |
230
|
|
|
* |
231
|
|
|
*/ |
232
|
|
View Code Duplication |
public static function checkRobotSlurp() |
|
|
|
|
233
|
|
|
{ |
234
|
|
|
if (stripos(self::$userAgentString, "Yahoo! Slurp") !== false) { |
235
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::SLURP); |
236
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
237
|
|
|
self::$scriptedAgent->setInfoURL("https://help.yahoo.com/kb/SLN22600.html"); |
238
|
|
|
return true; |
239
|
|
|
} |
240
|
|
|
return false; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
/** |
244
|
|
|
* Determine if the agent is the twitter preview bot. |
245
|
|
|
* |
246
|
|
|
* @return bool |
247
|
|
|
*/ |
248
|
|
View Code Duplication |
public static function checkRobotTwitter() |
|
|
|
|
249
|
|
|
{ |
250
|
|
|
if (stripos(self::$userAgentString, "Twitterbot/") !== false) { |
251
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::TWITTER); |
252
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); |
253
|
|
|
self::$scriptedAgent->setInfoURL("http://stackoverflow.com/questions/22362215/twitter-user-agent-on-sharing"); |
254
|
|
|
return true; |
255
|
|
|
} |
256
|
|
|
return false; |
257
|
|
|
} |
258
|
|
|
|
259
|
|
|
/** |
260
|
|
|
* Determine if the agent is the skype preview bot. |
261
|
|
|
* |
262
|
|
|
* @return bool |
263
|
|
|
*/ |
264
|
|
View Code Duplication |
public static function checkRobotSkype() |
|
|
|
|
265
|
|
|
{ |
266
|
|
|
if (stripos(self::$userAgentString, "SkypeUriPreview") !== false) { |
267
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::SKYPE); |
268
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); |
269
|
|
|
self::$scriptedAgent->setInfoURL("http://www.skype.com"); |
270
|
|
|
return true; |
271
|
|
|
} |
272
|
|
|
return false; |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
/** |
276
|
|
|
* Determine if the agent is the W3C Validator tool. |
277
|
|
|
* |
278
|
|
|
* @return bool |
279
|
|
|
*/ |
280
|
|
|
public static function checkRobotW3CValidator() |
281
|
|
|
{ |
282
|
|
|
if (stripos(self::$userAgentString, "W3C_Validator/") !== false || |
283
|
|
|
stripos(self::$userAgentString, "Validator.nu/") !== false || |
284
|
|
|
stripos(self::$userAgentString, "W3C-mobileOK/DDC-") !== false || |
285
|
|
|
stripos(self::$userAgentString, "W3C_I18n-Checker/") !== false || |
286
|
|
|
stripos(self::$userAgentString, "FeedValidator/") !== false || |
287
|
|
|
stripos(self::$userAgentString, "Jigsaw/") !== false || |
288
|
|
|
stripos(self::$userAgentString, "JW3C_Unicorn/") !== false |
289
|
|
|
) { |
290
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); |
291
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::TOOL); |
292
|
|
|
self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); |
293
|
|
|
return true; |
294
|
|
|
} |
295
|
|
|
if (stripos(self::$userAgentString, "NING/") !== false || |
296
|
|
|
stripos(self::$userAgentString, "W3C-checklink") !== false) { |
297
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); |
298
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
299
|
|
|
self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); |
300
|
|
|
return true; |
301
|
|
|
} |
302
|
|
|
return false; |
303
|
|
|
} |
304
|
|
|
|
305
|
|
|
/** |
306
|
|
|
* Determine if the agent is the Yandex spider. |
307
|
|
|
* |
308
|
|
|
* @return bool |
309
|
|
|
*/ |
310
|
|
View Code Duplication |
public static function checkRobotYandex() |
|
|
|
|
311
|
|
|
{ |
312
|
|
|
if (stripos(self::$userAgentString, "YandexBot/") !== false) { |
313
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::YANDEX); |
314
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
315
|
|
|
self::$scriptedAgent->setInfoURL("http://yandex.com/bots"); |
316
|
|
|
return true; |
317
|
|
|
} |
318
|
|
|
return false; |
319
|
|
|
} |
320
|
|
|
|
321
|
|
|
/** |
322
|
|
|
* Determine if the agent is the AppleBot |
323
|
|
|
* |
324
|
|
|
* @return bool |
325
|
|
|
*/ |
326
|
|
View Code Duplication |
public static function checkRobotApple() |
|
|
|
|
327
|
|
|
{ |
328
|
|
|
if (stripos(self::$userAgentString, "AppleBot/") !== false) { |
329
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::APPLEBOT); |
330
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
331
|
|
|
self::$scriptedAgent->setInfoURL("https://support.apple.com/en-gb/HT204683"); |
332
|
|
|
return true; |
333
|
|
|
} |
334
|
|
|
return false; |
335
|
|
|
} |
336
|
|
|
|
337
|
|
|
/** |
338
|
|
|
* Determine if the agent is the Paper.li bot. |
339
|
|
|
* |
340
|
|
|
* @return bool |
341
|
|
|
*/ |
342
|
|
View Code Duplication |
public static function checkRobotPaperli() |
|
|
|
|
343
|
|
|
{ |
344
|
|
|
if (stripos(self::$userAgentString, "PaperLiBot/") !== false) { |
345
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::PAPERLI); |
346
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
347
|
|
|
self::$scriptedAgent->setInfoURL("https://support.paper.li/hc/en-us/articles/204105253-What-is-Paper-li-"); |
348
|
|
|
return true; |
349
|
|
|
} |
350
|
|
|
return false; |
351
|
|
|
} |
352
|
|
|
|
353
|
|
|
/** |
354
|
|
|
* Determine if the agent is the Ahrefs survey. |
355
|
|
|
* |
356
|
|
|
* @return bool |
357
|
|
|
*/ |
358
|
|
View Code Duplication |
public static function checkRobotAhrefs() |
|
|
|
|
359
|
|
|
{ |
360
|
|
|
if (stripos(self::$userAgentString, "AhrefsBot/") !== false) { |
361
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::AHREFS); |
362
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
363
|
|
|
self::$scriptedAgent->setInfoURL("https://ahrefs.com/robot"); |
364
|
|
|
return true; |
365
|
|
|
} |
366
|
|
|
return false; |
367
|
|
|
} |
368
|
|
|
|
369
|
|
|
/** |
370
|
|
|
* Determine if the agent is the Majestic 12 spider. |
371
|
|
|
* |
372
|
|
|
* @return bool |
373
|
|
|
*/ |
374
|
|
View Code Duplication |
public static function checkRobotMJ12() |
|
|
|
|
375
|
|
|
{ |
376
|
|
|
if (stripos(self::$userAgentString, "MJ12Bot/") !== false) { |
377
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::MJ12); |
378
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
379
|
|
|
self::$scriptedAgent->setInfoURL("http://www.majestic12.co.uk/projects/dsearch/mj12bot.php"); |
380
|
|
|
return true; |
381
|
|
|
} |
382
|
|
|
return false; |
383
|
|
|
} |
384
|
|
|
|
385
|
|
|
/** |
386
|
|
|
* Determine if the agent is the LiveLap spider. |
387
|
|
|
* |
388
|
|
|
* @return bool |
389
|
|
|
*/ |
390
|
|
View Code Duplication |
public static function checkRobotLiveLap() |
|
|
|
|
391
|
|
|
{ |
392
|
|
|
if (stripos(self::$userAgentString, "LivelapBot/") !== false) { |
393
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::LIVELAP); |
394
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
395
|
|
|
self::$scriptedAgent->setInfoURL("http://site.livelap.com/crawler.html"); |
396
|
|
|
return true; |
397
|
|
|
} |
398
|
|
|
return false; |
399
|
|
|
} |
400
|
|
|
|
401
|
|
|
/** |
402
|
|
|
* Determine if the agent is a Web Distributed Authoring and Versioning client. Usually unexpected WebDAV requests are hack attempts. |
403
|
|
|
* |
404
|
|
|
* @return bool |
405
|
|
|
*/ |
406
|
|
View Code Duplication |
public static function checkRobotWebdav() |
|
|
|
|
407
|
|
|
{ |
408
|
|
|
if (stripos(self::$userAgentString, "WEBDAV Client") !== false || |
409
|
|
|
stripos(self::$userAgentString, "Microsoft Office Existence Discovery") !== false) { //Office Webdav probe |
410
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::WEBDAV); |
411
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::TOOL); |
412
|
|
|
self::$scriptedAgent->setInfoURL("https://en.wikipedia.org/wiki/WebDAV"); |
413
|
|
|
return true; |
414
|
|
|
} |
415
|
|
|
return false; |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
/** |
419
|
|
|
* Determine if the agent is the MetaURI scraper. |
420
|
|
|
* |
421
|
|
|
* @return bool |
422
|
|
|
*/ |
423
|
|
View Code Duplication |
public static function checkRobotMetaURI() |
|
|
|
|
424
|
|
|
{ |
425
|
|
|
if (stripos(self::$userAgentString, "MetaURI API/") !== false) { |
426
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::METAURI); |
427
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
428
|
|
|
self::$scriptedAgent->setInfoURL("https://github.com/stateless-systems/uri-meta"); |
429
|
|
|
return true; |
430
|
|
|
} |
431
|
|
|
return false; |
432
|
|
|
} |
433
|
|
|
|
434
|
|
|
/** |
435
|
|
|
* Determine if the agent is the TLSProbe tool. |
436
|
|
|
* |
437
|
|
|
* @return bool |
438
|
|
|
*/ |
439
|
|
View Code Duplication |
public static function checkRobotTLSProbe() |
|
|
|
|
440
|
|
|
{ |
441
|
|
|
if (stripos(self::$userAgentString, "TLSProbe/") !== false) { |
442
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::TLSPROBE); |
443
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::TOOL); |
444
|
|
|
self::$scriptedAgent->setInfoURL("https://bitbucket.org/marco-bellaccini/tlsprobe"); |
445
|
|
|
return true; |
446
|
|
|
} |
447
|
|
|
return false; |
448
|
|
|
} |
449
|
|
|
|
450
|
|
|
/** |
451
|
|
|
* Determine if the agent is the scoop.it bots. |
452
|
|
|
* |
453
|
|
|
* @return bool |
454
|
|
|
*/ |
455
|
|
View Code Duplication |
public static function checkRobotScoopIt() |
|
|
|
|
456
|
|
|
{ |
457
|
|
|
if (stripos(self::$userAgentString, "wpif Safari") !== false |
458
|
|
|
|| stripos(self::$userAgentString, "imgsizer Safari") !== false) { |
459
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::SCOOPIT); |
460
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
461
|
|
|
self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4785385.htm"); |
462
|
|
|
return true; |
463
|
|
|
} |
464
|
|
|
return false; |
465
|
|
|
} |
466
|
|
|
|
467
|
|
|
/** |
468
|
|
|
* Determine if the agent is the Netcraft SSL Survey. |
469
|
|
|
* |
470
|
|
|
* @return bool |
471
|
|
|
*/ |
472
|
|
View Code Duplication |
public static function checkRobotNetcraft() |
|
|
|
|
473
|
|
|
{ |
474
|
|
|
if (stripos(self::$userAgentString, "Netcraft SSL Server Survey") !== false) { |
475
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::NETCRAFT); |
476
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
477
|
|
|
self::$scriptedAgent->setInfoURL("https://www.netcraft.com/internet-data-mining/ssl-survey/"); |
478
|
|
|
return true; |
479
|
|
|
} |
480
|
|
|
return false; |
481
|
|
|
} |
482
|
|
|
|
483
|
|
|
/** |
484
|
|
|
* Determine if the agent is the curl library/cli tool. |
485
|
|
|
* |
486
|
|
|
* @return bool |
487
|
|
|
*/ |
488
|
|
View Code Duplication |
public static function checkRobotCurl() |
|
|
|
|
489
|
|
|
{ |
490
|
|
|
if (stripos(self::$userAgentString, "curl/") !== false) { |
491
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::CURL); |
492
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::GENERIC); |
493
|
|
|
self::$scriptedAgent->setInfoURL("https://curl.haxx.se/"); |
494
|
|
|
return true; |
495
|
|
|
} |
496
|
|
|
return false; |
497
|
|
|
} |
498
|
|
|
|
499
|
|
|
/** |
500
|
|
|
* Determine if the agent is the python programming language. |
501
|
|
|
* |
502
|
|
|
* @return bool |
503
|
|
|
*/ |
504
|
|
View Code Duplication |
public static function checkRobotPython() |
|
|
|
|
505
|
|
|
{ |
506
|
|
|
if (stripos(self::$userAgentString, "python-requests/") !== false || |
507
|
|
|
stripos(self::$userAgentString, "python-urllib/") !== false) { |
508
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::PYTHON); |
509
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::GENERIC); |
510
|
|
|
self::$scriptedAgent->setInfoURL("https://www.python.org/"); |
511
|
|
|
return true; |
512
|
|
|
} |
513
|
|
|
return false; |
514
|
|
|
} |
515
|
|
|
|
516
|
|
|
/** |
517
|
|
|
* Determine if the agent is the GoLang programming language. |
518
|
|
|
* |
519
|
|
|
* @return bool |
520
|
|
|
*/ |
521
|
|
View Code Duplication |
public static function checkRobotGoLang() |
|
|
|
|
522
|
|
|
{ |
523
|
|
|
if (stripos(self::$userAgentString, "Go-http-client") !== false) { |
524
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::GOLANG); |
525
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::GENERIC); |
526
|
|
|
self::$scriptedAgent->setInfoURL("https://golang.org/"); |
527
|
|
|
return true; |
528
|
|
|
} |
529
|
|
|
return false; |
530
|
|
|
} |
531
|
|
|
|
532
|
|
|
/** |
533
|
|
|
* Determine if the agent is the perl programming language. |
534
|
|
|
* |
535
|
|
|
* @return bool |
536
|
|
|
*/ |
537
|
|
View Code Duplication |
public static function checkRobotPerl() |
|
|
|
|
538
|
|
|
{ |
539
|
|
|
if (stripos(self::$userAgentString, "libwww-perl/") !== false) { |
540
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::PERL); |
541
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::GENERIC); |
542
|
|
|
self::$scriptedAgent->setInfoURL("https://www.perl.org/"); |
543
|
|
|
return true; |
544
|
|
|
} |
545
|
|
|
return false; |
546
|
|
|
} |
547
|
|
|
|
548
|
|
|
/** |
549
|
|
|
* Determine if the agent is the wget tool. |
550
|
|
|
* |
551
|
|
|
* @return bool |
552
|
|
|
*/ |
553
|
|
View Code Duplication |
public static function checkRobotWget() |
|
|
|
|
554
|
|
|
{ |
555
|
|
|
if (stripos(self::$userAgentString, "Wget/") !== false) { |
556
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::WGET); |
557
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::TOOL); |
558
|
|
|
self::$scriptedAgent->setInfoURL("https://www.gnu.org/software/wget/"); |
559
|
|
|
return true; |
560
|
|
|
} |
561
|
|
|
return false; |
562
|
|
|
} |
563
|
|
|
|
564
|
|
|
/** |
565
|
|
|
* Determine if the agent is the zgrab TLS banner tool. |
566
|
|
|
* |
567
|
|
|
* @return bool |
568
|
|
|
*/ |
569
|
|
View Code Duplication |
public static function checkRobotZGrab() |
|
|
|
|
570
|
|
|
{ |
571
|
|
|
if (stripos(self::$userAgentString, "zgrab/") !== false) { |
572
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::ZGRAB); |
573
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::TOOL); |
574
|
|
|
self::$scriptedAgent->setInfoURL("https://github.com/zmap/zgrab"); |
575
|
|
|
return true; |
576
|
|
|
} |
577
|
|
|
return false; |
578
|
|
|
} |
579
|
|
|
|
580
|
|
|
/** |
581
|
|
|
* Determine if the agent is the Java programming language. |
582
|
|
|
* |
583
|
|
|
* @return bool |
584
|
|
|
*/ |
585
|
|
View Code Duplication |
public static function checkRobotJava() |
|
|
|
|
586
|
|
|
{ |
587
|
|
|
if (stripos(self::$userAgentString, "Java/") !== false) { |
588
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::JAVA); |
589
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::GENERIC); |
590
|
|
|
self::$scriptedAgent->setInfoURL("https://www.java.com/en/"); |
591
|
|
|
return true; |
592
|
|
|
} |
593
|
|
|
return false; |
594
|
|
|
} |
595
|
|
|
|
596
|
|
|
/** |
597
|
|
|
* Determine if the agent is the ShellShock exploit. |
598
|
|
|
* |
599
|
|
|
* @return bool |
600
|
|
|
*/ |
601
|
|
View Code Duplication |
public static function checkRobotShellshock() |
|
|
|
|
602
|
|
|
{ |
603
|
|
|
if (stripos(self::$userAgentString, "() { :;}; /bin/bash -c") !== false) { |
604
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::SHELLSHOCK); |
605
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::EXPLOIT); |
606
|
|
|
self::$scriptedAgent->setInfoURL("https://blog.cloudflare.com/inside-shellshock/"); |
607
|
|
|
return true; |
608
|
|
|
} |
609
|
|
|
return false; |
610
|
|
|
} |
611
|
|
|
|
612
|
|
|
/** |
613
|
|
|
* Determine if the agent is the browsershots testing tool. |
614
|
|
|
* |
615
|
|
|
* @return bool |
616
|
|
|
*/ |
617
|
|
View Code Duplication |
public static function checkRobotBrowershots() |
|
|
|
|
618
|
|
|
{ |
619
|
|
|
if (stripos(self::$userAgentString, "Browsershots") !== false) { |
620
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::BROWSERSHOTS); |
621
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
622
|
|
|
self::$scriptedAgent->setInfoURL("http://browsershots.org/"); |
623
|
|
|
return true; |
624
|
|
|
} |
625
|
|
|
return false; |
626
|
|
|
} |
627
|
|
|
|
628
|
|
|
/** |
629
|
|
|
* Determine if the agent is the who.is spider. |
630
|
|
|
* |
631
|
|
|
* @return bool |
632
|
|
|
*/ |
633
|
|
View Code Duplication |
public static function checkRobotWhois() |
|
|
|
|
634
|
|
|
{ |
635
|
|
|
if (stripos(self::$userAgentString, "who.is bot") !== false) { |
636
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::WHOIS); |
637
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
638
|
|
|
self::$scriptedAgent->setInfoURL("http://www.who.is/"); |
639
|
|
|
return true; |
640
|
|
|
} |
641
|
|
|
return false; |
642
|
|
|
} |
643
|
|
|
|
644
|
|
|
/** |
645
|
|
|
* Determine if the agent is the MageReport exploit survey. |
646
|
|
|
* |
647
|
|
|
* @return bool |
648
|
|
|
*/ |
649
|
|
View Code Duplication |
public static function checkRobotMageReport() |
|
|
|
|
650
|
|
|
{ |
651
|
|
|
if (stripos(self::$userAgentString, "MageReport") !== false) { |
652
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::MAGEREPORT); |
653
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
654
|
|
|
self::$scriptedAgent->setInfoURL("https://www.magereport.com/"); |
655
|
|
|
return true; |
656
|
|
|
} |
657
|
|
|
return false; |
658
|
|
|
} |
659
|
|
|
|
660
|
|
|
/** |
661
|
|
|
* Determine if the agent is the AdBeat advertising survey. |
662
|
|
|
* |
663
|
|
|
* @return bool |
664
|
|
|
*/ |
665
|
|
View Code Duplication |
public static function checkRobotAdbeat() |
|
|
|
|
666
|
|
|
{ |
667
|
|
|
if (stripos(self::$userAgentString, "adbeat.com") !== false) { |
668
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::ADBEAT); |
669
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); |
670
|
|
|
self::$scriptedAgent->setInfoURL("https://www.adbeat.com/operation_policy"); |
671
|
|
|
return true; |
672
|
|
|
} |
673
|
|
|
return false; |
674
|
|
|
} |
675
|
|
|
|
676
|
|
|
/** |
677
|
|
|
* Determine if the agent is the SocialRankIO crawler. |
678
|
|
|
* |
679
|
|
|
* @return bool |
680
|
|
|
*/ |
681
|
|
View Code Duplication |
public static function checkRobotSocialrank() |
|
|
|
|
682
|
|
|
{ |
683
|
|
|
if (stripos(self::$userAgentString, "SocialRankIOBot") !== false) { |
684
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::SOCIALRANK); |
685
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
686
|
|
|
self::$scriptedAgent->setInfoURL("http://socialrank.io/about"); |
687
|
|
|
return true; |
688
|
|
|
} |
689
|
|
|
return false; |
690
|
|
|
} |
691
|
|
|
|
692
|
|
|
/** |
693
|
|
|
* Determine if the agent is the Gluten Free crawler. |
694
|
|
|
* |
695
|
|
|
* @return bool |
696
|
|
|
*/ |
697
|
|
View Code Duplication |
public static function checkRobotGlutenFree() |
|
|
|
|
698
|
|
|
{ |
699
|
|
|
if (stripos(self::$userAgentString, "Gluten Free Crawler/") !== false) { |
700
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::GLUTENFREE); |
701
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
702
|
|
|
self::$scriptedAgent->setInfoURL("http://glutenfreepleasure.com/"); |
703
|
|
|
return true; |
704
|
|
|
} |
705
|
|
|
return false; |
706
|
|
|
} |
707
|
|
|
|
708
|
|
|
/** |
709
|
|
|
* Determine if the agent is the Proximic spider. |
710
|
|
|
* |
711
|
|
|
* @return bool |
712
|
|
|
*/ |
713
|
|
View Code Duplication |
public static function checkRobotProximic() |
|
|
|
|
714
|
|
|
{ |
715
|
|
|
if (stripos(self::$userAgentString, "proximic;") !== false) { |
716
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::PROXIMIC); |
717
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SPIDER); |
718
|
|
|
self::$scriptedAgent->setInfoURL("http://www.proximic.com/info/spider.php"); |
719
|
|
|
return true; |
720
|
|
|
} |
721
|
|
|
return false; |
722
|
|
|
} |
723
|
|
|
|
724
|
|
|
/** |
725
|
|
|
* Determine if the agent is the Ubermetrics survey. |
726
|
|
|
* |
727
|
|
|
* @return bool |
728
|
|
|
*/ |
729
|
|
View Code Duplication |
public static function checkRobotUbermetrics() |
|
|
|
|
730
|
|
|
{ |
731
|
|
|
if (stripos(self::$userAgentString, "@ubermetrics-technologies.com") !== false) { |
732
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::UBERMETRICS); |
733
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
734
|
|
|
self::$scriptedAgent->setInfoURL("https://www.ubermetrics-technologies.com/"); |
735
|
|
|
return true; |
736
|
|
|
} |
737
|
|
|
return false; |
738
|
|
|
} |
739
|
|
|
|
740
|
|
|
/** |
741
|
|
|
* Determine if the agent is the Verisign ips-agent. |
742
|
|
|
* |
743
|
|
|
* @return bool |
744
|
|
|
*/ |
745
|
|
View Code Duplication |
public static function checkRobotVerisign() |
|
|
|
|
746
|
|
|
{ |
747
|
|
|
if (stripos(self::$userAgentString, "ips-agent") !== false) { |
748
|
|
|
self::$scriptedAgent->setName(ScriptedAgent::VERISIGN); |
749
|
|
|
self::$scriptedAgent->setType(ScriptedAgent::SURVEY); |
750
|
|
|
self::$scriptedAgent->setInfoURL("http://www.spambotsecurity.com/forum/viewtopic.php?f=7&t=1453"); |
751
|
|
|
return true; |
752
|
|
|
} |
753
|
|
|
return false; |
754
|
|
|
} |
755
|
|
|
} |
756
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.