Completed
Pull Request — master (#37)
by Romaric
03:21
created
src/CrawlerDetect.php 2 patches
Indentation   +499 added lines, -499 removed lines patch added patch discarded remove patch
@@ -4,518 +4,518 @@
 block discarded – undo
4 4
 
5 5
 class CrawlerDetect
6 6
 {
7
-    protected $userAgent = null;
7
+	protected $userAgent = null;
8 8
 
9
-    protected $httpHeaders = array();
9
+	protected $httpHeaders = array();
10 10
 
11
-    protected $matches = array();
11
+	protected $matches = array();
12 12
 
13
-    protected static $crawlers = array(
14
-        '007ac9 Crawler',
15
-        '008\\/',
16
-        '360Spider',
17
-        'A6-Indexer',
18
-        'ABACHOBot',
19
-        'AbiLogicBot',
20
-        'Aboundex',
21
-        'Accoona-AI-Agent',
22
-        'acoon',
23
-        'AddSugarSpiderBot',
24
-        'AddThis',
25
-        'Adidxbot',
26
-        'ADmantX',
27
-        'AdvBot',
28
-        'AHC',
29
-        'ahrefsbot',
30
-        'aihitbot',
31
-        'Airmail',
32
-        'AISearchBot',
33
-        'Anemone',
34
-        'antibot',
35
-        'AnyApexBot',
36
-        'Applebot',
37
-        'arabot',
38
-        'Arachmo',
39
-        'archive-com',
40
-        'archive.org_bot',
41
-        'B-l-i-t-z-B-O-T',
42
-        'backlinkcrawler',
43
-        'baiduspider',
44
-        'BecomeBot',
45
-        'BeslistBot',
46
-        'bibnum\.bnf',
47
-        'biglotron',
48
-        'BillyBobBot',
49
-        'Bimbot',
50
-        'bingbot',
51
-        'binlar',
52
-        'blekkobot',
53
-        'blexbot',
54
-        'BlitzBOT',
55
-        'bl\.uk_lddc_bot',
56
-        'bnf\.fr_bot',
57
-        'boitho\.com-dc',
58
-        'boitho\.com-robot',
59
-        'brainobot',
60
-        'btbot',
61
-        'BUbiNG',
62
-        'Butterfly\/',
63
-        'buzzbot',
64
-        'BuzzSumo',
65
-        'careerbot',
66
-        'CatchBot',
67
-        'CC Metadata Scaper',
68
-        'ccbot',
69
-        'Cerberian Drtrs',
70
-        'changedetection',
71
-        'Charlotte',
72
-        'clips\.ua\.ac\.be',
73
-        'CloudFlare-AlwaysOnline',
74
-        'citeseerxbot',
75
-        'coccoc',
76
-        'classbot',
77
-        'Commons-HttpClient',
78
-        'content crawler spider',
79
-        'Content Crawler',
80
-        'convera',
81
-        'ConveraCrawler',
82
-        'CoPubbot',
83
-        'cosmos',
84
-        'Covario-IDS',
85
-        'CrawlBot',
86
-        'crawler4j',
87
-        'CrystalSemanticsBot',
88
-        'curl',
89
-        'cXensebot',
90
-        'CyberPatrol',
91
-        'DataparkSearch',
92
-        'dataprovider',
93
-        'DiamondBot',
94
-        'Digg',
95
-        'discobot',
96
-        'DomainAppender',
97
-        'domaincrawler',
98
-        'Domain Re-Animator Bot',
99
-        'dotbot',
100
-        'drupact',
101
-        'DuckDuckBot',
102
-        'EARTHCOM',
103
-        'EasouSpider',
104
-        'ec2linkfinder',
105
-        'edisterbot',
106
-        'ElectricMonk',
107
-        'elisabot',
108
-        'emailmarketingrobot',
109
-        'Embedly',
110
-        'EmeraldShield\.com WebBot',
111
-        'envolk\[ITS\]spider',
112
-        'EsperanzaBot',
113
-        'europarchive\.org',
114
-        'EventMachine HttpClient',
115
-        'exabot',
116
-        'ezooms',
117
-        'facebookexternalhit',
118
-        'Facebot',
119
-        'FAST Enteprise Crawler',
120
-        'FAST Enterprise Crawler',
121
-        'FAST-WebCrawler',
122
-        'FDSE robot',
123
-        'Feedfetcher-Google',
124
-        'FindLinks',
125
-        'findlink',
126
-        'findthatfile',
127
-        'findxbot',
128
-        'Flamingo_SearchEngine',
129
-        'fluffy',
130
-        'fr-crawler',
131
-        'FRCrawler',
132
-        'FurlBot',
133
-        'FyberSpider',
134
-        'g00g1e\.net',
135
-        'GigablastOpenSource',
136
-        'grub-client',
137
-        'g2crawler',
138
-        'Gaisbot',
139
-        'GalaxyBot',
140
-        'genieBot',
141
-        'Genieo',
142
-        'GermCrawler',
143
-        'getprismatic.com',
144
-        'gigabot',
145
-        'GingerCrawler',
146
-        'Girafabot',
147
-        'Gluten Free Crawler',
148
-        'gnam gnam spider',
149
-        'Go-http-client',
150
-        'Googlebot-Image',
151
-        'Googlebot-Mobile',
152
-        'Googlebot',
153
-        'Google-HTTP-Java-Client',
154
-        'Google favicon',
155
-        'GrapeshotCrawler',
156
-        'gslfbot',
157
-        'GurujiBot',
158
-        'HappyFunBot',
159
-        'Healthbot',
160
-        'heritrix',
161
-        'hl_ftien_spider',
162
-        'Holmes',
163
-        'htdig',
164
-        'httpunit',
165
-        'httrack',
166
-        'ia_archiver',
167
-        'iaskspider',
168
-        'iCCrawler',
169
-        'ichiro',
170
-        'igdeSpyder',
171
-        'iisbot',
172
-        'InAGist',
173
-        'InfoWizards Reciprocal Link System PRO',
174
-        'Insitesbot',
175
-        'integromedb',
176
-        'intelium_bot',
177
-        'InterfaxScanBot',
178
-        'IODC',
179
-        'IOI',
180
-        'ip-web-crawler\.com',
181
-        'ips-agent',
182
-        'IRLbot',
183
-        'IssueCrawler',
184
-        'IstellaBot',
185
-        'it2media-domain-crawler',
186
-        'iZSearch',
187
-        'Jaxified Bot',
188
-        'JOC Web Spider',
189
-        'jyxobot',
190
-        'KoepaBot',
191
-        'L\.webis',
192
-        'LapozzBot',
193
-        'Larbin',
194
-        'lb-spider',
195
-        'LDSpider',
196
-        'LexxeBot',
197
-        'libwww',
198
-        'Linguee Bot',
199
-        'Link Valet',
200
-        'linkdex',
201
-        'LinkExaminer',
202
-        'LinksManager\.com_bot',
203
-        'LinkpadBot',
204
-        'LinksCrawler',
205
-        'LinkWalker',
206
-        'Lipperhey Link Explorer',
207
-        'Lipperhey SEO Service',
208
-        'Livelapbot',
209
-        'LongURL API',
210
-        'lmspider',
211
-        'lssbot',
212
-        'lssrocketcrawler',
213
-        'ltx71',
214
-        'lufsbot',
215
-        'lwp-trivial',
216
-        'Mail\.RU_Bot',
217
-        'MegaIndex\.ru',
218
-        'mabontland',
219
-        'magpie-crawler',
220
-        'MagpieRSS',
221
-        'Mediapartners-Google',
222
-        'memorybot',
223
-        'MetaURI',
224
-        'MJ12bot',
225
-        'mlbot',
226
-        'Mnogosearch',
227
-        'mogimogi',
228
-        'MojeekBot',
229
-        'Moreoverbot',
230
-        'Morning Paper',
231
-        'Mrcgiguy',
232
-        'MSIECrawler',
233
-        'msnbot',
234
-        'msrbot',
235
-        'MVAClient',
236
-        'mxbot',
237
-        'NerdByNature\.Bot',
238
-        'NerdyBot',
239
-        'netEstate NE Crawler',
240
-        'netresearchserver',
241
-        'NetSeer Crawler',
242
-        'NewsGator',
243
-        'newsme',
244
-        'NextGenSearchBot',
245
-        'NG-Search',
246
-        'ngbot',
247
-        'nicebot',
248
-        'niki-bot',
249
-        'Notifixious',
250
-        'noxtrumbot',
251
-        'Nusearch Spider',
252
-        'nutch',
253
-        'NutchCVS',
254
-        'Nymesis',
255
-        'obot',
256
-        'oegp',
257
-        'ocrawler',
258
-        'omgilibot',
259
-        'OmniExplorer_Bot',
260
-        'online link validator',
261
-        'Online Website Link Checker',
262
-        'OOZBOT',
263
-        'openindexspider',
264
-        'OpenWebSpider',
265
-        'OrangeBot',
266
-        'Orbiter',
267
-        'ow\.ly',
268
-        'PaperLiBot',
269
-        'Pingdom\.com_bot',
270
-        'Ploetz \+ Zeller',
271
-        'page2rss',
272
-        'PageBitesHyperBot',
273
-        'panscient',
274
-        'Peew',
275
-        'PercolateCrawler',
276
-        'phpcrawl',
277
-        'Pizilla',
278
-        'Plukkie',
279
-        'polybot',
280
-        'Pompos',
281
-        'postano',
282
-        'PostPost',
283
-        'postrank',
284
-        'proximic',
285
-        'psbot',
286
-        'purebot',
287
-        'PycURL',
288
-        'Python-httplib2',
289
-        'python-requests',
290
-        'Python-urllib',
291
-        'Qseero',
292
-        'QuerySeekerSpider',
293
-        'Qwantify',
294
-        'Radian6',
295
-        'RAMPyBot',
296
-        'RebelMouse',
297
-        'REL Link Checker',
298
-        'RetrevoPageAnalyzer',
299
-        'Riddler',
300
-        'Robosourcer',
301
-        'rogerbot',
302
-        'Ruby',
303
-        'RufusBot',
304
-        'SandCrawler',
305
-        'SBIder',
306
-        'ScoutJet',
307
-        'ScoutURLMonitor',
308
-        'Scrapy',
309
-        'ScreenerBot',
310
-        'scribdbot',
311
-        'Scrubby',
312
-        'SearchmetricsBot',
313
-        'SearchSight',
314
-        'seekbot',
315
-        'semanticdiscovery',
316
-        'SemrushBot',
317
-        'Sensis Web Crawler',
318
-        'SEOChat::Bot',
319
-        'seokicks-robot',
320
-        'SEOstats',
321
-        'Seznam screenshot-generator',
322
-        'seznambot',
323
-        'Shim-Crawler',
324
-        'ShopWiki',
325
-        'Shoula robot',
326
-        'ShowyouBot',
327
-        'SimpleCrawler',
328
-        'sistrix crawler',
329
-        'SiteBar',
330
-        'sitebot',
331
-        'siteexplorer\.info',
332
-        'SklikBot',
333
-        'slider\.com',
334
-        'slurp',
335
-        'smtbot',
336
-        'Snappy',
337
-        'sogou spider',
338
-        'sogou',
339
-        'Sosospider',
340
-        'spbot',
341
-        'Speedy Spider',
342
-        'speedy',
343
-        'SpiderMan',
344
-        'Sqworm',
345
-        'SSL-Crawler',
346
-        'StackRambler',
347
-        'suggybot',
348
-        'summify',
349
-        'SurdotlyBot',
350
-        'SurveyBot',
351
-        'SynooBot',
352
-        'tagoobot',
353
-        'teoma',
354
-        'TerrawizBot',
355
-        'TheSuBot',
356
-        'Thumbnail\.CZ robot',
357
-        'TinEye',
358
-        'toplistbot',
359
-        'trendictionbot',
360
-        'TrueBot',
361
-        'truwoGPS',
362
-        'turnitinbot',
363
-        'TweetedTimes Bot',
364
-        'tweetedtimes.com',
365
-        'TweetmemeBot',
366
-        'twengabot',
367
-        'Twikle',
368
-        'Twitterbot',
369
-        'uMBot',
370
-        'UnisterBot',
371
-        'UnwindFetchor',
372
-        'updated',
373
-        'urlappendbot',
374
-        'Urlfilebot',
375
-        'urlresolver',
376
-        'UsineNouvelleCrawler',
377
-        'Vagabondo',
378
-        'Vivante Link Checker',
379
-        'voilabot',
380
-        'Vortex',
381
-        'voyager\\/',
382
-        'VYU2',
383
-        'web-archive-net\.com\.bot',
384
-        'Websquash\.com',
385
-        'WeSEE:Ads\/PageBot',
386
-        'wbsearchbot',
387
-        'webcollage',
388
-        'webcompanycrawler',
389
-        'webcrawler',
390
-        'webmon ',
391
-        'WeSEE:Search',
392
-        'wf84',
393
-        'wget',
394
-        'wocbot',
395
-        'WoFindeIch Robot',
396
-        'WomlpeFactory',
397
-        'woriobot',
398
-        'wotbox',
399
-        'Xaldon_WebSpider',
400
-        'Xenu Link Sleuth',
401
-        'xintellibot',
402
-        'XML Sitemaps Generator',
403
-        'XoviBot',
404
-        'Y!J-ASR',
405
-        'yacy',
406
-        'yacybot',
407
-        'Yahoo Link Preview',
408
-        'Yahoo! Slurp China',
409
-        'Yahoo! Slurp',
410
-        'YahooSeeker',
411
-        'YahooSeeker-Testing',
412
-        'YandexBot',
413
-        'YandexImages',
414
-        'YandexMetrika',
415
-        'yandex',
416
-        'yanga',
417
-        'Yasaklibot',
418
-        'yeti',
419
-        'YioopBot',
420
-        'YisouSpider',
421
-        'YodaoBot',
422
-        'yoogliFetchAgent',
423
-        'yoozBot',
424
-        'YoudaoBot',
425
-        'Zao',
426
-        'Zealbot',
427
-        'zspider',
428
-        'ZyBorg',
429
-        '[a-z0-9\\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)',
430
-    );
13
+	protected static $crawlers = array(
14
+		'007ac9 Crawler',
15
+		'008\\/',
16
+		'360Spider',
17
+		'A6-Indexer',
18
+		'ABACHOBot',
19
+		'AbiLogicBot',
20
+		'Aboundex',
21
+		'Accoona-AI-Agent',
22
+		'acoon',
23
+		'AddSugarSpiderBot',
24
+		'AddThis',
25
+		'Adidxbot',
26
+		'ADmantX',
27
+		'AdvBot',
28
+		'AHC',
29
+		'ahrefsbot',
30
+		'aihitbot',
31
+		'Airmail',
32
+		'AISearchBot',
33
+		'Anemone',
34
+		'antibot',
35
+		'AnyApexBot',
36
+		'Applebot',
37
+		'arabot',
38
+		'Arachmo',
39
+		'archive-com',
40
+		'archive.org_bot',
41
+		'B-l-i-t-z-B-O-T',
42
+		'backlinkcrawler',
43
+		'baiduspider',
44
+		'BecomeBot',
45
+		'BeslistBot',
46
+		'bibnum\.bnf',
47
+		'biglotron',
48
+		'BillyBobBot',
49
+		'Bimbot',
50
+		'bingbot',
51
+		'binlar',
52
+		'blekkobot',
53
+		'blexbot',
54
+		'BlitzBOT',
55
+		'bl\.uk_lddc_bot',
56
+		'bnf\.fr_bot',
57
+		'boitho\.com-dc',
58
+		'boitho\.com-robot',
59
+		'brainobot',
60
+		'btbot',
61
+		'BUbiNG',
62
+		'Butterfly\/',
63
+		'buzzbot',
64
+		'BuzzSumo',
65
+		'careerbot',
66
+		'CatchBot',
67
+		'CC Metadata Scaper',
68
+		'ccbot',
69
+		'Cerberian Drtrs',
70
+		'changedetection',
71
+		'Charlotte',
72
+		'clips\.ua\.ac\.be',
73
+		'CloudFlare-AlwaysOnline',
74
+		'citeseerxbot',
75
+		'coccoc',
76
+		'classbot',
77
+		'Commons-HttpClient',
78
+		'content crawler spider',
79
+		'Content Crawler',
80
+		'convera',
81
+		'ConveraCrawler',
82
+		'CoPubbot',
83
+		'cosmos',
84
+		'Covario-IDS',
85
+		'CrawlBot',
86
+		'crawler4j',
87
+		'CrystalSemanticsBot',
88
+		'curl',
89
+		'cXensebot',
90
+		'CyberPatrol',
91
+		'DataparkSearch',
92
+		'dataprovider',
93
+		'DiamondBot',
94
+		'Digg',
95
+		'discobot',
96
+		'DomainAppender',
97
+		'domaincrawler',
98
+		'Domain Re-Animator Bot',
99
+		'dotbot',
100
+		'drupact',
101
+		'DuckDuckBot',
102
+		'EARTHCOM',
103
+		'EasouSpider',
104
+		'ec2linkfinder',
105
+		'edisterbot',
106
+		'ElectricMonk',
107
+		'elisabot',
108
+		'emailmarketingrobot',
109
+		'Embedly',
110
+		'EmeraldShield\.com WebBot',
111
+		'envolk\[ITS\]spider',
112
+		'EsperanzaBot',
113
+		'europarchive\.org',
114
+		'EventMachine HttpClient',
115
+		'exabot',
116
+		'ezooms',
117
+		'facebookexternalhit',
118
+		'Facebot',
119
+		'FAST Enteprise Crawler',
120
+		'FAST Enterprise Crawler',
121
+		'FAST-WebCrawler',
122
+		'FDSE robot',
123
+		'Feedfetcher-Google',
124
+		'FindLinks',
125
+		'findlink',
126
+		'findthatfile',
127
+		'findxbot',
128
+		'Flamingo_SearchEngine',
129
+		'fluffy',
130
+		'fr-crawler',
131
+		'FRCrawler',
132
+		'FurlBot',
133
+		'FyberSpider',
134
+		'g00g1e\.net',
135
+		'GigablastOpenSource',
136
+		'grub-client',
137
+		'g2crawler',
138
+		'Gaisbot',
139
+		'GalaxyBot',
140
+		'genieBot',
141
+		'Genieo',
142
+		'GermCrawler',
143
+		'getprismatic.com',
144
+		'gigabot',
145
+		'GingerCrawler',
146
+		'Girafabot',
147
+		'Gluten Free Crawler',
148
+		'gnam gnam spider',
149
+		'Go-http-client',
150
+		'Googlebot-Image',
151
+		'Googlebot-Mobile',
152
+		'Googlebot',
153
+		'Google-HTTP-Java-Client',
154
+		'Google favicon',
155
+		'GrapeshotCrawler',
156
+		'gslfbot',
157
+		'GurujiBot',
158
+		'HappyFunBot',
159
+		'Healthbot',
160
+		'heritrix',
161
+		'hl_ftien_spider',
162
+		'Holmes',
163
+		'htdig',
164
+		'httpunit',
165
+		'httrack',
166
+		'ia_archiver',
167
+		'iaskspider',
168
+		'iCCrawler',
169
+		'ichiro',
170
+		'igdeSpyder',
171
+		'iisbot',
172
+		'InAGist',
173
+		'InfoWizards Reciprocal Link System PRO',
174
+		'Insitesbot',
175
+		'integromedb',
176
+		'intelium_bot',
177
+		'InterfaxScanBot',
178
+		'IODC',
179
+		'IOI',
180
+		'ip-web-crawler\.com',
181
+		'ips-agent',
182
+		'IRLbot',
183
+		'IssueCrawler',
184
+		'IstellaBot',
185
+		'it2media-domain-crawler',
186
+		'iZSearch',
187
+		'Jaxified Bot',
188
+		'JOC Web Spider',
189
+		'jyxobot',
190
+		'KoepaBot',
191
+		'L\.webis',
192
+		'LapozzBot',
193
+		'Larbin',
194
+		'lb-spider',
195
+		'LDSpider',
196
+		'LexxeBot',
197
+		'libwww',
198
+		'Linguee Bot',
199
+		'Link Valet',
200
+		'linkdex',
201
+		'LinkExaminer',
202
+		'LinksManager\.com_bot',
203
+		'LinkpadBot',
204
+		'LinksCrawler',
205
+		'LinkWalker',
206
+		'Lipperhey Link Explorer',
207
+		'Lipperhey SEO Service',
208
+		'Livelapbot',
209
+		'LongURL API',
210
+		'lmspider',
211
+		'lssbot',
212
+		'lssrocketcrawler',
213
+		'ltx71',
214
+		'lufsbot',
215
+		'lwp-trivial',
216
+		'Mail\.RU_Bot',
217
+		'MegaIndex\.ru',
218
+		'mabontland',
219
+		'magpie-crawler',
220
+		'MagpieRSS',
221
+		'Mediapartners-Google',
222
+		'memorybot',
223
+		'MetaURI',
224
+		'MJ12bot',
225
+		'mlbot',
226
+		'Mnogosearch',
227
+		'mogimogi',
228
+		'MojeekBot',
229
+		'Moreoverbot',
230
+		'Morning Paper',
231
+		'Mrcgiguy',
232
+		'MSIECrawler',
233
+		'msnbot',
234
+		'msrbot',
235
+		'MVAClient',
236
+		'mxbot',
237
+		'NerdByNature\.Bot',
238
+		'NerdyBot',
239
+		'netEstate NE Crawler',
240
+		'netresearchserver',
241
+		'NetSeer Crawler',
242
+		'NewsGator',
243
+		'newsme',
244
+		'NextGenSearchBot',
245
+		'NG-Search',
246
+		'ngbot',
247
+		'nicebot',
248
+		'niki-bot',
249
+		'Notifixious',
250
+		'noxtrumbot',
251
+		'Nusearch Spider',
252
+		'nutch',
253
+		'NutchCVS',
254
+		'Nymesis',
255
+		'obot',
256
+		'oegp',
257
+		'ocrawler',
258
+		'omgilibot',
259
+		'OmniExplorer_Bot',
260
+		'online link validator',
261
+		'Online Website Link Checker',
262
+		'OOZBOT',
263
+		'openindexspider',
264
+		'OpenWebSpider',
265
+		'OrangeBot',
266
+		'Orbiter',
267
+		'ow\.ly',
268
+		'PaperLiBot',
269
+		'Pingdom\.com_bot',
270
+		'Ploetz \+ Zeller',
271
+		'page2rss',
272
+		'PageBitesHyperBot',
273
+		'panscient',
274
+		'Peew',
275
+		'PercolateCrawler',
276
+		'phpcrawl',
277
+		'Pizilla',
278
+		'Plukkie',
279
+		'polybot',
280
+		'Pompos',
281
+		'postano',
282
+		'PostPost',
283
+		'postrank',
284
+		'proximic',
285
+		'psbot',
286
+		'purebot',
287
+		'PycURL',
288
+		'Python-httplib2',
289
+		'python-requests',
290
+		'Python-urllib',
291
+		'Qseero',
292
+		'QuerySeekerSpider',
293
+		'Qwantify',
294
+		'Radian6',
295
+		'RAMPyBot',
296
+		'RebelMouse',
297
+		'REL Link Checker',
298
+		'RetrevoPageAnalyzer',
299
+		'Riddler',
300
+		'Robosourcer',
301
+		'rogerbot',
302
+		'Ruby',
303
+		'RufusBot',
304
+		'SandCrawler',
305
+		'SBIder',
306
+		'ScoutJet',
307
+		'ScoutURLMonitor',
308
+		'Scrapy',
309
+		'ScreenerBot',
310
+		'scribdbot',
311
+		'Scrubby',
312
+		'SearchmetricsBot',
313
+		'SearchSight',
314
+		'seekbot',
315
+		'semanticdiscovery',
316
+		'SemrushBot',
317
+		'Sensis Web Crawler',
318
+		'SEOChat::Bot',
319
+		'seokicks-robot',
320
+		'SEOstats',
321
+		'Seznam screenshot-generator',
322
+		'seznambot',
323
+		'Shim-Crawler',
324
+		'ShopWiki',
325
+		'Shoula robot',
326
+		'ShowyouBot',
327
+		'SimpleCrawler',
328
+		'sistrix crawler',
329
+		'SiteBar',
330
+		'sitebot',
331
+		'siteexplorer\.info',
332
+		'SklikBot',
333
+		'slider\.com',
334
+		'slurp',
335
+		'smtbot',
336
+		'Snappy',
337
+		'sogou spider',
338
+		'sogou',
339
+		'Sosospider',
340
+		'spbot',
341
+		'Speedy Spider',
342
+		'speedy',
343
+		'SpiderMan',
344
+		'Sqworm',
345
+		'SSL-Crawler',
346
+		'StackRambler',
347
+		'suggybot',
348
+		'summify',
349
+		'SurdotlyBot',
350
+		'SurveyBot',
351
+		'SynooBot',
352
+		'tagoobot',
353
+		'teoma',
354
+		'TerrawizBot',
355
+		'TheSuBot',
356
+		'Thumbnail\.CZ robot',
357
+		'TinEye',
358
+		'toplistbot',
359
+		'trendictionbot',
360
+		'TrueBot',
361
+		'truwoGPS',
362
+		'turnitinbot',
363
+		'TweetedTimes Bot',
364
+		'tweetedtimes.com',
365
+		'TweetmemeBot',
366
+		'twengabot',
367
+		'Twikle',
368
+		'Twitterbot',
369
+		'uMBot',
370
+		'UnisterBot',
371
+		'UnwindFetchor',
372
+		'updated',
373
+		'urlappendbot',
374
+		'Urlfilebot',
375
+		'urlresolver',
376
+		'UsineNouvelleCrawler',
377
+		'Vagabondo',
378
+		'Vivante Link Checker',
379
+		'voilabot',
380
+		'Vortex',
381
+		'voyager\\/',
382
+		'VYU2',
383
+		'web-archive-net\.com\.bot',
384
+		'Websquash\.com',
385
+		'WeSEE:Ads\/PageBot',
386
+		'wbsearchbot',
387
+		'webcollage',
388
+		'webcompanycrawler',
389
+		'webcrawler',
390
+		'webmon ',
391
+		'WeSEE:Search',
392
+		'wf84',
393
+		'wget',
394
+		'wocbot',
395
+		'WoFindeIch Robot',
396
+		'WomlpeFactory',
397
+		'woriobot',
398
+		'wotbox',
399
+		'Xaldon_WebSpider',
400
+		'Xenu Link Sleuth',
401
+		'xintellibot',
402
+		'XML Sitemaps Generator',
403
+		'XoviBot',
404
+		'Y!J-ASR',
405
+		'yacy',
406
+		'yacybot',
407
+		'Yahoo Link Preview',
408
+		'Yahoo! Slurp China',
409
+		'Yahoo! Slurp',
410
+		'YahooSeeker',
411
+		'YahooSeeker-Testing',
412
+		'YandexBot',
413
+		'YandexImages',
414
+		'YandexMetrika',
415
+		'yandex',
416
+		'yanga',
417
+		'Yasaklibot',
418
+		'yeti',
419
+		'YioopBot',
420
+		'YisouSpider',
421
+		'YodaoBot',
422
+		'yoogliFetchAgent',
423
+		'yoozBot',
424
+		'YoudaoBot',
425
+		'Zao',
426
+		'Zealbot',
427
+		'zspider',
428
+		'ZyBorg',
429
+		'[a-z0-9\\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)',
430
+	);
431 431
 
432
-    /**
433
-     * All possible HTTP headers that represent the
434
-     * User-Agent string.
435
-     *
436
-     * @var array
437
-     */
438
-    protected static $uaHttpHeaders = array(
439
-        // The default User-Agent string.
440
-        'HTTP_USER_AGENT',
441
-        // Header can occur on devices using Opera Mini.
442
-        'HTTP_X_OPERAMINI_PHONE_UA',
443
-        // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
444
-        'HTTP_X_DEVICE_USER_AGENT',
445
-        'HTTP_X_ORIGINAL_USER_AGENT',
446
-        'HTTP_X_SKYFIRE_PHONE',
447
-        'HTTP_X_BOLT_PHONE_UA',
448
-        'HTTP_DEVICE_STOCK_UA',
449
-        'HTTP_X_UCBROWSER_DEVICE_UA',
450
-    );
432
+	/**
433
+	 * All possible HTTP headers that represent the
434
+	 * User-Agent string.
435
+	 *
436
+	 * @var array
437
+	 */
438
+	protected static $uaHttpHeaders = array(
439
+		// The default User-Agent string.
440
+		'HTTP_USER_AGENT',
441
+		// Header can occur on devices using Opera Mini.
442
+		'HTTP_X_OPERAMINI_PHONE_UA',
443
+		// Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
444
+		'HTTP_X_DEVICE_USER_AGENT',
445
+		'HTTP_X_ORIGINAL_USER_AGENT',
446
+		'HTTP_X_SKYFIRE_PHONE',
447
+		'HTTP_X_BOLT_PHONE_UA',
448
+		'HTTP_DEVICE_STOCK_UA',
449
+		'HTTP_X_UCBROWSER_DEVICE_UA',
450
+	);
451 451
 
452
-    /**
453
-     * Class constructor.
454
-     */
455
-    public function __construct(array $headers = null, $userAgent = null)
456
-    {
457
-        $this->setHttpHeaders($headers);
458
-        $this->setUserAgent($userAgent);
459
-    }
452
+	/**
453
+	 * Class constructor.
454
+	 */
455
+	public function __construct(array $headers = null, $userAgent = null)
456
+	{
457
+		$this->setHttpHeaders($headers);
458
+		$this->setUserAgent($userAgent);
459
+	}
460 460
 
461
-    public function setHttpHeaders($httpHeaders = null)
462
-    {
463
-        // use global _SERVER if $httpHeaders aren't defined
464
-        if (!is_array($httpHeaders) || !count($httpHeaders)) {
465
-            $httpHeaders = $_SERVER;
466
-        }
467
-        // clear existing headers
468
-        $this->httpHeaders = array();
469
-        // Only save HTTP headers. In PHP land, that means only _SERVER vars that
470
-        // start with HTTP_.
471
-        foreach ($httpHeaders as $key => $value) {
472
-            if (substr($key, 0, 5) === 'HTTP_') {
473
-                $this->httpHeaders[$key] = $value;
474
-            }
475
-        }
476
-    }
461
+	public function setHttpHeaders($httpHeaders = null)
462
+	{
463
+		// use global _SERVER if $httpHeaders aren't defined
464
+		if (!is_array($httpHeaders) || !count($httpHeaders)) {
465
+			$httpHeaders = $_SERVER;
466
+		}
467
+		// clear existing headers
468
+		$this->httpHeaders = array();
469
+		// Only save HTTP headers. In PHP land, that means only _SERVER vars that
470
+		// start with HTTP_.
471
+		foreach ($httpHeaders as $key => $value) {
472
+			if (substr($key, 0, 5) === 'HTTP_') {
473
+				$this->httpHeaders[$key] = $value;
474
+			}
475
+		}
476
+	}
477 477
 
478
-    public function getUaHttpHeaders()
479
-    {
480
-        return self::$uaHttpHeaders;
481
-    }
478
+	public function getUaHttpHeaders()
479
+	{
480
+		return self::$uaHttpHeaders;
481
+	}
482 482
 
483
-    public function setUserAgent($userAgent = null)
484
-    {
485
-        if (false === empty($userAgent)) {
486
-            return $this->userAgent = $userAgent;
487
-        } else {
488
-            $this->userAgent = null;
489
-            foreach ($this->getUaHttpHeaders() as $altHeader) {
490
-                if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow. (Serban)
491
-                    $this->userAgent .= $this->httpHeaders[$altHeader].' ';
492
-                }
493
-            }
483
+	public function setUserAgent($userAgent = null)
484
+	{
485
+		if (false === empty($userAgent)) {
486
+			return $this->userAgent = $userAgent;
487
+		} else {
488
+			$this->userAgent = null;
489
+			foreach ($this->getUaHttpHeaders() as $altHeader) {
490
+				if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow. (Serban)
491
+					$this->userAgent .= $this->httpHeaders[$altHeader].' ';
492
+				}
493
+			}
494 494
 
495
-            return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
496
-        }
497
-    }
495
+			return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
496
+		}
497
+	}
498 498
 
499
-    public function getRegex()
500
-    {
501
-        return '('.implode('|', self::$crawlers).')';
502
-    }
499
+	public function getRegex()
500
+	{
501
+		return '('.implode('|', self::$crawlers).')';
502
+	}
503 503
 
504
-    public function isCrawler($userAgent = null)
505
-    {
506
-        $agent = is_null($userAgent) ? $this->userAgent : $userAgent;
504
+	public function isCrawler($userAgent = null)
505
+	{
506
+		$agent = is_null($userAgent) ? $this->userAgent : $userAgent;
507 507
 
508
-        $result = preg_match('/'.$this->getRegex().'/i', $agent, $matches);
508
+		$result = preg_match('/'.$this->getRegex().'/i', $agent, $matches);
509 509
 
510
-        if ($matches) {
511
-            $this->matches = $matches;
512
-        }
510
+		if ($matches) {
511
+			$this->matches = $matches;
512
+		}
513 513
 
514
-        return (bool) $result;
515
-    }
514
+		return (bool) $result;
515
+	}
516 516
 
517
-    public function getMatches()
518
-    {
519
-        return $this->matches[0];
520
-    }
517
+	public function getMatches()
518
+	{
519
+		return $this->matches[0];
520
+	}
521 521
 }
Please login to merge, or discard this patch.
Spacing   +2 added lines, -2 removed lines patch added patch discarded remove patch
@@ -461,7 +461,7 @@  discard block
 block discarded – undo
461 461
     public function setHttpHeaders($httpHeaders = null)
462 462
     {
463 463
         // use global _SERVER if $httpHeaders aren't defined
464
-        if (!is_array($httpHeaders) || !count($httpHeaders)) {
464
+        if ( ! is_array($httpHeaders) || ! count($httpHeaders)) {
465 465
             $httpHeaders = $_SERVER;
466 466
         }
467 467
         // clear existing headers
@@ -492,7 +492,7 @@  discard block
 block discarded – undo
492 492
                 }
493 493
             }
494 494
 
495
-            return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
495
+            return $this->userAgent = ( ! empty($this->userAgent) ? trim($this->userAgent) : null);
496 496
         }
497 497
     }
498 498
 
Please login to merge, or discard this patch.
tests/UATests.php 1 patch
Indentation   +21 added lines, -21 removed lines patch added patch discarded remove patch
@@ -2,30 +2,30 @@
 block discarded – undo
2 2
 
3 3
 class UserAgentTest extends PHPUnit_Framework_TestCase
4 4
 {
5
-    protected $CrawlerDetect;
5
+	protected $CrawlerDetect;
6 6
 
7
-    public function setUp()
8
-    {
9
-        $this->CrawlerDetect = new Jaybizzle\CrawlerDetect\CrawlerDetect();
10
-    }
7
+	public function setUp()
8
+	{
9
+		$this->CrawlerDetect = new Jaybizzle\CrawlerDetect\CrawlerDetect();
10
+	}
11 11
 
12
-    public function testBots()
13
-    {
14
-        $lines = file(__DIR__.'/crawlers.txt');
12
+	public function testBots()
13
+	{
14
+		$lines = file(__DIR__.'/crawlers.txt');
15 15
 
16
-        foreach ($lines as $line) {
17
-            $test = $this->CrawlerDetect->isCrawler($line);
18
-            $this->assertEquals($test, true, $line);
19
-        }
20
-    }
16
+		foreach ($lines as $line) {
17
+			$test = $this->CrawlerDetect->isCrawler($line);
18
+			$this->assertEquals($test, true, $line);
19
+		}
20
+	}
21 21
 
22
-    public function testDevices()
23
-    {
24
-        $lines = file(__DIR__.'/devices.txt');
22
+	public function testDevices()
23
+	{
24
+		$lines = file(__DIR__.'/devices.txt');
25 25
 
26
-        foreach ($lines as $line) {
27
-            $test = $this->CrawlerDetect->isCrawler($line);
28
-            $this->assertEquals($test, false, $line);
29
-        }
30
-    }
26
+		foreach ($lines as $line) {
27
+			$test = $this->CrawlerDetect->isCrawler($line);
28
+			$this->assertEquals($test, false, $line);
29
+		}
30
+	}
31 31
 }
Please login to merge, or discard this patch.