Completed
Pull Request — master (#89)
by Mark
02:08
created
tests/bootstrap.php 2 patches
Indentation   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -11,7 +11,7 @@
 block discarded – undo
11 11
 $dot = dirname(__FILE__);
12 12
 
13 13
 if (!file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
14
-    throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
14
+	throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
15 15
 }
16 16
 /** @var \Composer\Autoload\ClassLoader $autoloader */
17 17
 $autoloader = include $composer;
Please login to merge, or discard this patch.
Spacing   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -10,7 +10,7 @@
 block discarded – undo
10 10
  */
11 11
 $dot = dirname(__FILE__);
12 12
 
13
-if (!file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
13
+if ( ! file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
14 14
     throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
15 15
 }
16 16
 /** @var \Composer\Autoload\ClassLoader $autoloader */
Please login to merge, or discard this patch.
src/Fixtures/Exclusions.php 1 patch
Indentation   +49 added lines, -49 removed lines patch added patch discarded remove patch
@@ -13,53 +13,53 @@
 block discarded – undo
13 13
 
14 14
 class Exclusions extends AbstractProvider
15 15
 {
16
-    /**
17
-     * List of strings to remove from the user agent before running the crawler regex
18
-     * Over a large list of user agents, this gives us about a 55% speed increase!
19
-     *
20
-     * @var array
21
-     */
22
-    protected $data = array(
23
-        'Safari.[\d\.]*',
24
-        'Firefox.[\d\.]*',
25
-        'Chrome.[\d\.]*',
26
-        'Chromium.[\d\.]*',
27
-        'MSIE.[\d\.]',
28
-        'Opera\/[\d\.]*',
29
-        'Mozilla.[\d\.]*',
30
-        'AppleWebKit.[\d\.]*',
31
-        'Trident.[\d\.]*',
32
-        'Windows NT.[\d\.]*',
33
-        'Android.[\d\.]*',
34
-        'Macintosh.',
35
-        'Ubuntu',
36
-        'Linux',
37
-        '[ ]Intel',
38
-        'Mac OS X [\d_]*',
39
-        '(like )?Gecko(.[\d\.]*)?',
40
-        'KHTML',
41
-        'CriOS.[\d\.]*',
42
-        'CPU iPhone OS ([0-9_])* like Mac OS X',
43
-        'CPU OS ([0-9_])* like Mac OS X',
44
-        'iPod',
45
-        'compatible',
46
-        'x86_..',
47
-        'i686',
48
-        'x64',
49
-        'X11',
50
-        'rv:[\d\.]*',
51
-        'Version.[\d\.]*',
52
-        'WOW64',
53
-        'Win64',
54
-        'Dalvik.[\d\.]*',
55
-        ' \.NET CLR [\d\.]*',
56
-        'Presto.[\d\.]*',
57
-        'Media Center PC',
58
-        'BlackBerry',
59
-        'Build',
60
-        'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.',
61
-        'Opera',
62
-        ' \.NET[\d\.]*',
63
-        '\(|\)|;|,', // Remove the following characters ( ) : ,
64
-    );
16
+	/**
17
+	 * List of strings to remove from the user agent before running the crawler regex
18
+	 * Over a large list of user agents, this gives us about a 55% speed increase!
19
+	 *
20
+	 * @var array
21
+	 */
22
+	protected $data = array(
23
+		'Safari.[\d\.]*',
24
+		'Firefox.[\d\.]*',
25
+		'Chrome.[\d\.]*',
26
+		'Chromium.[\d\.]*',
27
+		'MSIE.[\d\.]',
28
+		'Opera\/[\d\.]*',
29
+		'Mozilla.[\d\.]*',
30
+		'AppleWebKit.[\d\.]*',
31
+		'Trident.[\d\.]*',
32
+		'Windows NT.[\d\.]*',
33
+		'Android.[\d\.]*',
34
+		'Macintosh.',
35
+		'Ubuntu',
36
+		'Linux',
37
+		'[ ]Intel',
38
+		'Mac OS X [\d_]*',
39
+		'(like )?Gecko(.[\d\.]*)?',
40
+		'KHTML',
41
+		'CriOS.[\d\.]*',
42
+		'CPU iPhone OS ([0-9_])* like Mac OS X',
43
+		'CPU OS ([0-9_])* like Mac OS X',
44
+		'iPod',
45
+		'compatible',
46
+		'x86_..',
47
+		'i686',
48
+		'x64',
49
+		'X11',
50
+		'rv:[\d\.]*',
51
+		'Version.[\d\.]*',
52
+		'WOW64',
53
+		'Win64',
54
+		'Dalvik.[\d\.]*',
55
+		' \.NET CLR [\d\.]*',
56
+		'Presto.[\d\.]*',
57
+		'Media Center PC',
58
+		'BlackBerry',
59
+		'Build',
60
+		'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.',
61
+		'Opera',
62
+		' \.NET[\d\.]*',
63
+		'\(|\)|;|,', // Remove the following characters ( ) : ,
64
+	);
65 65
 }
Please login to merge, or discard this patch.
src/Fixtures/AbstractProvider.php 1 patch
Indentation   +4 added lines, -4 removed lines patch added patch discarded remove patch
@@ -13,8 +13,8 @@
 block discarded – undo
13 13
 
14 14
 abstract class AbstractProvider
15 15
 {
16
-    public function getAll()
17
-    {
18
-        return $this->data;
19
-    }
16
+	public function getAll()
17
+	{
18
+		return $this->data;
19
+	}
20 20
 }
Please login to merge, or discard this patch.
src/Fixtures/Crawlers.php 1 patch
Indentation   +320 added lines, -320 removed lines patch added patch discarded remove patch
@@ -13,324 +13,324 @@
 block discarded – undo
13 13
 
14 14
 class Crawlers extends AbstractProvider
15 15
 {
16
-    /**
17
-     * Array of regular expressions to match against the user agent.
18
-     *
19
-     * @var array
20
-     */
21
-    protected $data = array(
22
-        '.*Java.*outbrain',
23
-        '008\/',
24
-        '^NING\/',
25
-        'A6-Indexer',
26
-        'Aboundex',
27
-        'Accoona-AI-Agent',
28
-        'acoon',
29
-        'AddThis',
30
-        'ADmantX',
31
-        'AHC',
32
-        'Airmail',
33
-        'alexa site audit',
34
-        'Anemone',
35
-        'Apache-HttpClient\/',
36
-        'Arachmo',
37
-        'archive-com',
38
-        'B-l-i-t-z-B-O-T',
39
-        'Backlink-Ceck\.de',
40
-        'baidu\.com',
41
-        'BazQux',
42
-        'bibnum\.bnf',
43
-        'biglotron',
44
-        'BingLocalSearch',
45
-        'BingPreview',
46
-        'binlar',
47
-        'Bloglovin',
48
-        'Blogtrottr',
49
-        'boitho\.com-dc',
50
-        'Browsershots',
51
-        'BUbiNG',
52
-        'Butterfly\/',
53
-        'BuzzSumo',
54
-        'CapsuleChecker',
55
-        'CC Metadata Scaper',
56
-        'Cerberian Drtrs',
57
-        'changedetection',
58
-        'Charlotte',
59
-        'clips\.ua\.ac\.be',
60
-        'CloudFlare-AlwaysOnline',
61
-        'coccoc',
62
-        'CommaFeed',
63
-        'Commons-HttpClient',
64
-        'convera',
65
-        'cosmos',
66
-        'corporatetwitnews',
67
-        'Covario-IDS',
68
-        'cron-job\.org',
69
-        'Curious George',
70
-        'curl',
71
-        'CyberPatrol',
72
-        'DataparkSearch',
73
-        'dataprovider',
74
-        'Daum(oa)?[ \/][0-9]',
75
-        'developers\.google\.com\/\+\/web\/snippet\/',
76
-        'Digg',
77
-        'DomainAppender',
78
-        'Dragonfly File Reader',
79
-        'drupact',
80
-        'EARTHCOM',
81
-        'ec2linkfinder',
82
-        'ECCP',
83
-        'ElectricMonk',
84
-        'EMail Exractor',
85
-        'EmailWolf',
86
-        'Embed PHP Library',
87
-        'Embedly',
88
-        'europarchive\.org',
89
-        'EventMachine HttpClient',
90
-        'ExactSearch',
91
-        'ExaleadCloudview',
92
-        'ezooms',
93
-        'facebookexternalhit',
94
-        'facebookplatform',
95
-        'Feed Wrangler',
96
-        'Feedbin',
97
-        'FeedBurner',
98
-        'Feedfetcher-Google',
99
-        'Feedly',
100
-        'Feedspot',
101
-        'FeedValidator',
102
-        'Fever',
103
-        'findlink',
104
-        'findthatfile',
105
-        'Flamingo_SearchEngine',
106
-        'FlipboardProxy',
107
-        'fluffy',
108
-        'Funnelback',
109
-        'g00g1e\.net',
110
-        'Genieo',
111
-        'getprismatic\.com',
112
-        'GigablastOpenSource',
113
-        'Go-http-client',
114
-        'Google favicon',
115
-        'Google Keyword Suggestion',
116
-        'Google Page Speed Insights',
117
-        'Google Web Preview',
118
-        'Google-HTTP-Java-Client',
119
-        'Google-Site-Verification',
120
-        'google_partner_monitoring',
121
-        'GoogleProducer',
122
-        'Grammarly',
123
-        'grub-client',
124
-        'heritrix',
125
-        'Holmes',
126
-        'htdig',
127
-        'HTTPMon',
128
-        'http-kit',
129
-        'http_requester',
130
-        'httpunit',
131
-        'http_request2',
132
-        'httrack',
133
-        'HubPages.*crawlingpolicy',
134
-        'HubSpot Marketing Grader',
135
-        'ichiro',
136
-        'IDG Twitter Links Resolver',
137
-        'igdeSpyder',
138
-        'InAGist',
139
-        'infegy',
140
-        'InfoWizards Reciprocal Link System PRO',
141
-        'inpwrd\.com',
142
-        'integromedb',
143
-        'IODC',
144
-        'IOI',
145
-        'ips-agent',
146
-        'iZSearch',
147
-        '^Java\/',
148
-        'Jigsaw',
149
-        'Jobrapido',
150
-        'kouio',
151
-        'L\.webis',
152
-        'Larbin',
153
-        'libwww',
154
-        'Link Valet',
155
-        'linkCheck',
156
-        'linkdex',
157
-        'LinkExaminer',
158
-        'LinkWalker',
159
-        'Lipperhey',
160
-        'link checker',
161
-        'link validator',
162
-        'LongURL API',
163
-        'ltx71',
164
-        'lwp-trivial',
165
-        'lycos',
166
-        'mabontland',
167
-        'MagpieRSS',
168
-        'Mediapartners-Google',
169
-        'MegaIndex\.ru',
170
-        'MetaURI',
171
-        'MergeFlow-PageReader',
172
-        'Mnogosearch',
173
-        'mogimogi',
174
-        'Mojolicious (Perl)',
175
-        'Morning Paper',
176
-        'Mrcgiguy',
177
-        'MVAClient',
178
-        'Netcraft Web Server Survey',
179
-        'NetcraftSurveyAgent',
180
-        'NetLyzer FastProbe',
181
-        'netresearch',
182
-        'Netvibes',
183
-        'NewsBlur .*(Fetcher|Finder)',
184
-        'NewsGator',
185
-        'newsme',
186
-        'newspaper\/',
187
-        'NG-Search',
188
-        'nineconnections\.com',
189
-        'nominet\.org\.uk',
190
-        'Notifixious',
191
-        'nuhk',
192
-        'nutch',
193
-        'Nuzzel',
194
-        'Nymesis',
195
-        'oegp',
196
-        'Omea Reader',
197
-        'omgili',
198
-        'Orbiter',
199
-        'ow\.ly',
200
-        'Go [\d\.]* package http',
201
-        'page2rss',
202
-        'PagePeeker',
203
-        'panscient',
204
-        'Peew',
205
-        'PhantomJS\/',
206
-        'phpcrawl',
207
-        'phpservermon',
208
-        'Pingdom\.com',
209
-        'Pinterest',
210
-        'Pizilla',
211
-        'Ploetz \+ Zeller',
212
-        'Plukkie',
213
-        'PocketParser',
214
-        'Pompos',
215
-        'postano',
216
-        'PostPost',
217
-        'postrank',
218
-        'proximic',
219
-        'Pulsepoint XT3 web scraper',
220
-        'Python-httplib2',
221
-        'python-requests',
222
-        'Python-urllib',
223
-        'Qseero',
224
-        'Qwantify',
225
-        'Radian6',
226
-        'Readability',
227
-        'RebelMouse',
228
-        'RetrevoPageAnalyzer',
229
-        'Riddler',
230
-        'Robosourcer',
231
-        'ROI Hunter',
232
-        'Ruby',
233
-        'SalesIntelligent',
234
-        'SBIder',
235
-        'scooter',
236
-        'ScoutJet',
237
-        'ScoutURLMonitor',
238
-        'Scrapy',
239
-        'Scrubby',
240
-        'SearchSight',
241
-        'semanticdiscovery',
242
-        'SEOstats',
243
-        'Server Density Service Monitoring',
244
-        'servernfo\.com',
245
-        'Seznam screenshot-generator',
246
-        'ShopWiki',
247
-        'SilverReader',
248
-        'SimplePie',
249
-        'Site24x7',
250
-        'SiteBar',
251
-        'siteexplorer\.info',
252
-        'Siteimprove\.com',
253
-        'SkypeUriPreview',
254
-        'slider\.com',
255
-        'slurp',
256
-        'SMRF URL Expander',
257
-        'snapchat-proxy',
258
-        'Snappy',
259
-        'SNK Siteshooter B0t',
260
-        'sogou',
261
-        'SortSite',
262
-        'speedy',
263
-        'Spinn3r',
264
-        'Sqworm',
265
-        'StackRambler',
266
-        'Stratagems Kumo',
267
-        'summify',
268
-        'teoma',
269
-        'theoldreader\.com',
270
-        'TinEye',
271
-        'Tiny Tiny RSS',
272
-        'Traackr.com',
273
-        'truwoGPS',
274
-        'tweetedtimes\.com',
275
-        'Twikle',
276
-        'Typhoeus',
277
-        'ubermetrics-technologies',
278
-        'UdmSearch',
279
-        'UnwindFetchor',
280
-        'updated',
281
-        'URLChecker',
282
-        'urlresolver',
283
-        'Vagabondo',
284
-        'Validator\.nu\/LV',
285
-        'via ggpht\.com GoogleImageProxy',
286
-        'vkShare',
287
-        'Vortex',
288
-        'voyager\/',
289
-        'VYU2',
290
-        'W3C-checklink',
291
-        'W3C-mobileOK',
292
-        'W3C_CSS_Validator_JFouffa',
293
-        'W3C_I18n-Checker',
294
-        'W3C_Unicorn',
295
-        'W3C_Validator',
296
-        'Wappalyzer',
297
-        'WinHttpRequest',
298
-        'web-capture\.net',
299
-        'WebCapture',
300
-        'WebCorp',
301
-        'webcollage',
302
-        'WebIndex',
303
-        'WebFetch',
304
-        'webmon ',
305
-        'websitepulse[+ ]checker',
306
-        'Websquash\.com',
307
-        'WebThumbnail',
308
-        'WeSEE:Search',
309
-        'wf84',
310
-        'wget',
311
-        'WhatsApp',
312
-        'WomlpeFactory',
313
-        'WordPress\/',
314
-        'wotbox',
315
-        'wscheck',
316
-        'WWW-Mechanize',
317
-        'www\.monitor\.us',
318
-        'XaxisSemanticsClassifier',
319
-        'Xenu Link Sleuth',
320
-        'XML Sitemaps Generator',
321
-        'Y!J-ASR',
322
-        'yacy',
323
-        'Yahoo Ad monitoring',
324
-        'Yahoo Link Preview',
325
-        'YahooSeeker',
326
-        'yandex',
327
-        'yanga',
328
-        'yeti',
329
-        'yoogliFetchAgent',
330
-        'YottaaMonitor',
331
-        'Zao',
332
-        'zgrab',
333
-        'ZyBorg',
334
-        '[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)',
335
-    );
16
+	/**
17
+	 * Array of regular expressions to match against the user agent.
18
+	 *
19
+	 * @var array
20
+	 */
21
+	protected $data = array(
22
+		'.*Java.*outbrain',
23
+		'008\/',
24
+		'^NING\/',
25
+		'A6-Indexer',
26
+		'Aboundex',
27
+		'Accoona-AI-Agent',
28
+		'acoon',
29
+		'AddThis',
30
+		'ADmantX',
31
+		'AHC',
32
+		'Airmail',
33
+		'alexa site audit',
34
+		'Anemone',
35
+		'Apache-HttpClient\/',
36
+		'Arachmo',
37
+		'archive-com',
38
+		'B-l-i-t-z-B-O-T',
39
+		'Backlink-Ceck\.de',
40
+		'baidu\.com',
41
+		'BazQux',
42
+		'bibnum\.bnf',
43
+		'biglotron',
44
+		'BingLocalSearch',
45
+		'BingPreview',
46
+		'binlar',
47
+		'Bloglovin',
48
+		'Blogtrottr',
49
+		'boitho\.com-dc',
50
+		'Browsershots',
51
+		'BUbiNG',
52
+		'Butterfly\/',
53
+		'BuzzSumo',
54
+		'CapsuleChecker',
55
+		'CC Metadata Scaper',
56
+		'Cerberian Drtrs',
57
+		'changedetection',
58
+		'Charlotte',
59
+		'clips\.ua\.ac\.be',
60
+		'CloudFlare-AlwaysOnline',
61
+		'coccoc',
62
+		'CommaFeed',
63
+		'Commons-HttpClient',
64
+		'convera',
65
+		'cosmos',
66
+		'corporatetwitnews',
67
+		'Covario-IDS',
68
+		'cron-job\.org',
69
+		'Curious George',
70
+		'curl',
71
+		'CyberPatrol',
72
+		'DataparkSearch',
73
+		'dataprovider',
74
+		'Daum(oa)?[ \/][0-9]',
75
+		'developers\.google\.com\/\+\/web\/snippet\/',
76
+		'Digg',
77
+		'DomainAppender',
78
+		'Dragonfly File Reader',
79
+		'drupact',
80
+		'EARTHCOM',
81
+		'ec2linkfinder',
82
+		'ECCP',
83
+		'ElectricMonk',
84
+		'EMail Exractor',
85
+		'EmailWolf',
86
+		'Embed PHP Library',
87
+		'Embedly',
88
+		'europarchive\.org',
89
+		'EventMachine HttpClient',
90
+		'ExactSearch',
91
+		'ExaleadCloudview',
92
+		'ezooms',
93
+		'facebookexternalhit',
94
+		'facebookplatform',
95
+		'Feed Wrangler',
96
+		'Feedbin',
97
+		'FeedBurner',
98
+		'Feedfetcher-Google',
99
+		'Feedly',
100
+		'Feedspot',
101
+		'FeedValidator',
102
+		'Fever',
103
+		'findlink',
104
+		'findthatfile',
105
+		'Flamingo_SearchEngine',
106
+		'FlipboardProxy',
107
+		'fluffy',
108
+		'Funnelback',
109
+		'g00g1e\.net',
110
+		'Genieo',
111
+		'getprismatic\.com',
112
+		'GigablastOpenSource',
113
+		'Go-http-client',
114
+		'Google favicon',
115
+		'Google Keyword Suggestion',
116
+		'Google Page Speed Insights',
117
+		'Google Web Preview',
118
+		'Google-HTTP-Java-Client',
119
+		'Google-Site-Verification',
120
+		'google_partner_monitoring',
121
+		'GoogleProducer',
122
+		'Grammarly',
123
+		'grub-client',
124
+		'heritrix',
125
+		'Holmes',
126
+		'htdig',
127
+		'HTTPMon',
128
+		'http-kit',
129
+		'http_requester',
130
+		'httpunit',
131
+		'http_request2',
132
+		'httrack',
133
+		'HubPages.*crawlingpolicy',
134
+		'HubSpot Marketing Grader',
135
+		'ichiro',
136
+		'IDG Twitter Links Resolver',
137
+		'igdeSpyder',
138
+		'InAGist',
139
+		'infegy',
140
+		'InfoWizards Reciprocal Link System PRO',
141
+		'inpwrd\.com',
142
+		'integromedb',
143
+		'IODC',
144
+		'IOI',
145
+		'ips-agent',
146
+		'iZSearch',
147
+		'^Java\/',
148
+		'Jigsaw',
149
+		'Jobrapido',
150
+		'kouio',
151
+		'L\.webis',
152
+		'Larbin',
153
+		'libwww',
154
+		'Link Valet',
155
+		'linkCheck',
156
+		'linkdex',
157
+		'LinkExaminer',
158
+		'LinkWalker',
159
+		'Lipperhey',
160
+		'link checker',
161
+		'link validator',
162
+		'LongURL API',
163
+		'ltx71',
164
+		'lwp-trivial',
165
+		'lycos',
166
+		'mabontland',
167
+		'MagpieRSS',
168
+		'Mediapartners-Google',
169
+		'MegaIndex\.ru',
170
+		'MetaURI',
171
+		'MergeFlow-PageReader',
172
+		'Mnogosearch',
173
+		'mogimogi',
174
+		'Mojolicious (Perl)',
175
+		'Morning Paper',
176
+		'Mrcgiguy',
177
+		'MVAClient',
178
+		'Netcraft Web Server Survey',
179
+		'NetcraftSurveyAgent',
180
+		'NetLyzer FastProbe',
181
+		'netresearch',
182
+		'Netvibes',
183
+		'NewsBlur .*(Fetcher|Finder)',
184
+		'NewsGator',
185
+		'newsme',
186
+		'newspaper\/',
187
+		'NG-Search',
188
+		'nineconnections\.com',
189
+		'nominet\.org\.uk',
190
+		'Notifixious',
191
+		'nuhk',
192
+		'nutch',
193
+		'Nuzzel',
194
+		'Nymesis',
195
+		'oegp',
196
+		'Omea Reader',
197
+		'omgili',
198
+		'Orbiter',
199
+		'ow\.ly',
200
+		'Go [\d\.]* package http',
201
+		'page2rss',
202
+		'PagePeeker',
203
+		'panscient',
204
+		'Peew',
205
+		'PhantomJS\/',
206
+		'phpcrawl',
207
+		'phpservermon',
208
+		'Pingdom\.com',
209
+		'Pinterest',
210
+		'Pizilla',
211
+		'Ploetz \+ Zeller',
212
+		'Plukkie',
213
+		'PocketParser',
214
+		'Pompos',
215
+		'postano',
216
+		'PostPost',
217
+		'postrank',
218
+		'proximic',
219
+		'Pulsepoint XT3 web scraper',
220
+		'Python-httplib2',
221
+		'python-requests',
222
+		'Python-urllib',
223
+		'Qseero',
224
+		'Qwantify',
225
+		'Radian6',
226
+		'Readability',
227
+		'RebelMouse',
228
+		'RetrevoPageAnalyzer',
229
+		'Riddler',
230
+		'Robosourcer',
231
+		'ROI Hunter',
232
+		'Ruby',
233
+		'SalesIntelligent',
234
+		'SBIder',
235
+		'scooter',
236
+		'ScoutJet',
237
+		'ScoutURLMonitor',
238
+		'Scrapy',
239
+		'Scrubby',
240
+		'SearchSight',
241
+		'semanticdiscovery',
242
+		'SEOstats',
243
+		'Server Density Service Monitoring',
244
+		'servernfo\.com',
245
+		'Seznam screenshot-generator',
246
+		'ShopWiki',
247
+		'SilverReader',
248
+		'SimplePie',
249
+		'Site24x7',
250
+		'SiteBar',
251
+		'siteexplorer\.info',
252
+		'Siteimprove\.com',
253
+		'SkypeUriPreview',
254
+		'slider\.com',
255
+		'slurp',
256
+		'SMRF URL Expander',
257
+		'snapchat-proxy',
258
+		'Snappy',
259
+		'SNK Siteshooter B0t',
260
+		'sogou',
261
+		'SortSite',
262
+		'speedy',
263
+		'Spinn3r',
264
+		'Sqworm',
265
+		'StackRambler',
266
+		'Stratagems Kumo',
267
+		'summify',
268
+		'teoma',
269
+		'theoldreader\.com',
270
+		'TinEye',
271
+		'Tiny Tiny RSS',
272
+		'Traackr.com',
273
+		'truwoGPS',
274
+		'tweetedtimes\.com',
275
+		'Twikle',
276
+		'Typhoeus',
277
+		'ubermetrics-technologies',
278
+		'UdmSearch',
279
+		'UnwindFetchor',
280
+		'updated',
281
+		'URLChecker',
282
+		'urlresolver',
283
+		'Vagabondo',
284
+		'Validator\.nu\/LV',
285
+		'via ggpht\.com GoogleImageProxy',
286
+		'vkShare',
287
+		'Vortex',
288
+		'voyager\/',
289
+		'VYU2',
290
+		'W3C-checklink',
291
+		'W3C-mobileOK',
292
+		'W3C_CSS_Validator_JFouffa',
293
+		'W3C_I18n-Checker',
294
+		'W3C_Unicorn',
295
+		'W3C_Validator',
296
+		'Wappalyzer',
297
+		'WinHttpRequest',
298
+		'web-capture\.net',
299
+		'WebCapture',
300
+		'WebCorp',
301
+		'webcollage',
302
+		'WebIndex',
303
+		'WebFetch',
304
+		'webmon ',
305
+		'websitepulse[+ ]checker',
306
+		'Websquash\.com',
307
+		'WebThumbnail',
308
+		'WeSEE:Search',
309
+		'wf84',
310
+		'wget',
311
+		'WhatsApp',
312
+		'WomlpeFactory',
313
+		'WordPress\/',
314
+		'wotbox',
315
+		'wscheck',
316
+		'WWW-Mechanize',
317
+		'www\.monitor\.us',
318
+		'XaxisSemanticsClassifier',
319
+		'Xenu Link Sleuth',
320
+		'XML Sitemaps Generator',
321
+		'Y!J-ASR',
322
+		'yacy',
323
+		'Yahoo Ad monitoring',
324
+		'Yahoo Link Preview',
325
+		'YahooSeeker',
326
+		'yandex',
327
+		'yanga',
328
+		'yeti',
329
+		'yoogliFetchAgent',
330
+		'YottaaMonitor',
331
+		'Zao',
332
+		'zgrab',
333
+		'ZyBorg',
334
+		'[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)',
335
+	);
336 336
 }
Please login to merge, or discard this patch.
tests/UATests.php 1 patch
Indentation   +40 added lines, -40 removed lines patch added patch discarded remove patch
@@ -14,55 +14,55 @@
 block discarded – undo
14 14
 
15 15
 class UserAgentTest extends PHPUnit_Framework_TestCase
16 16
 {
17
-    protected $CrawlerDetect;
17
+	protected $CrawlerDetect;
18 18
 
19
-    public function setUp()
20
-    {
21
-        $this->CrawlerDetect = new CrawlerDetect();
22
-    }
19
+	public function setUp()
20
+	{
21
+		$this->CrawlerDetect = new CrawlerDetect();
22
+	}
23 23
 
24
-    public function testBots()
25
-    {
26
-        $lines = file(__DIR__.'/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
24
+	public function testBots()
25
+	{
26
+		$lines = file(__DIR__.'/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
27 27
 
28
-        foreach ($lines as $line) {
29
-            $test = $this->CrawlerDetect->isCrawler($line);
30
-            $this->assertEquals($test, true, $line);
31
-        }
32
-    }
28
+		foreach ($lines as $line) {
29
+			$test = $this->CrawlerDetect->isCrawler($line);
30
+			$this->assertEquals($test, true, $line);
31
+		}
32
+	}
33 33
 
34
-    public function testDevices()
35
-    {
36
-        $lines = file(__DIR__.'/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
34
+	public function testDevices()
35
+	{
36
+		$lines = file(__DIR__.'/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
37 37
 
38
-        foreach ($lines as $line) {
39
-            $test = $this->CrawlerDetect->isCrawler($line);
40
-            $this->assertEquals($test, false, $line);
41
-        }
42
-    }
38
+		foreach ($lines as $line) {
39
+			$test = $this->CrawlerDetect->isCrawler($line);
40
+			$this->assertEquals($test, false, $line);
41
+		}
42
+	}
43 43
 
44
-    public function testReturnsCorrectMatchedBotName()
45
-    {
46
-        $test = $this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');
44
+	public function testReturnsCorrectMatchedBotName()
45
+	{
46
+		$test = $this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');
47 47
 
48
-        $matches = $this->CrawlerDetect->getMatches();
48
+		$matches = $this->CrawlerDetect->getMatches();
49 49
 
50
-        $this->assertEquals($this->CrawlerDetect->getMatches(), 'Yahoo Ad monitoring', $matches);
51
-    }
50
+		$this->assertEquals($this->CrawlerDetect->getMatches(), 'Yahoo Ad monitoring', $matches);
51
+	}
52 52
 
53
-    public function testForRegexCollision()
54
-    {
55
-        $crawlers = new Crawlers();
53
+	public function testForRegexCollision()
54
+	{
55
+		$crawlers = new Crawlers();
56 56
 
57
-        foreach ($crawlers->getAll() as $key1 => $regex) {
58
-            foreach ($crawlers->getAll() as $key2 => $compare) {
59
-                // Dont check this regex against itself
60
-                if ($key1 != $key2) {
61
-                    preg_match('/'.$regex.'/i', stripslashes($compare), $matches);
57
+		foreach ($crawlers->getAll() as $key1 => $regex) {
58
+			foreach ($crawlers->getAll() as $key2 => $compare) {
59
+				// Dont check this regex against itself
60
+				if ($key1 != $key2) {
61
+					preg_match('/'.$regex.'/i', stripslashes($compare), $matches);
62 62
 
63
-                    $this->assertEmpty($matches, $regex.' collided with '.$compare);
64
-                }
65
-            }
66
-        }
67
-    }
63
+					$this->assertEmpty($matches, $regex.' collided with '.$compare);
64
+				}
65
+			}
66
+		}
67
+	}
68 68
 }
Please login to merge, or discard this patch.
src/CrawlerDetect.php 1 patch
Indentation   +174 added lines, -174 removed lines patch added patch discarded remove patch
@@ -16,178 +16,178 @@
 block discarded – undo
16 16
 
17 17
 class CrawlerDetect
18 18
 {
19
-    /**
20
-     * The user agent.
21
-     *
22
-     * @var null
23
-     */
24
-    protected $userAgent = null;
25
-
26
-    /**
27
-     * Headers that contain a user agent.
28
-     *
29
-     * @var array
30
-     */
31
-    protected $httpHeaders = array();
32
-
33
-    /**
34
-     * Store regex matches.
35
-     *
36
-     * @var array
37
-     */
38
-    protected $matches = array();
39
-
40
-    /**
41
-     * Crawlers object
42
-     * 
43
-     * @var Jaybizzle\CrawlerDetect\Fixtures\Crawlers
44
-     */
45
-    protected $crawlers;
46
-
47
-    /**
48
-     * Exclusions object
49
-     * 
50
-     * @var Jaybizzle\CrawlerDetect\Fixtures\Exclusions
51
-     */
52
-    protected $exclusions;
53
-
54
-    /**
55
-     * All possible HTTP headers that represent the
56
-     * User-Agent string.
57
-     *
58
-     * @var array
59
-     */
60
-    protected static $uaHttpHeaders = array(
61
-        // The default User-Agent string.
62
-        'HTTP_USER_AGENT',
63
-        // Header can occur on devices using Opera Mini.
64
-        'HTTP_X_OPERAMINI_PHONE_UA',
65
-        // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
66
-        'HTTP_X_DEVICE_USER_AGENT',
67
-        'HTTP_X_ORIGINAL_USER_AGENT',
68
-        'HTTP_X_SKYFIRE_PHONE',
69
-        'HTTP_X_BOLT_PHONE_UA',
70
-        'HTTP_DEVICE_STOCK_UA',
71
-        'HTTP_X_UCBROWSER_DEVICE_UA',
72
-    );
73
-
74
-    /**
75
-     * Class constructor.
76
-     */
77
-    public function __construct(array $headers = null, $userAgent = null)
78
-    {
79
-        $this->setHttpHeaders($headers);
80
-        $this->setUserAgent($userAgent);
81
-        $this->crawlers = new Crawlers();
82
-        $this->exclusions = new Exclusions();
83
-    }
84
-
85
-    /**
86
-     * Set HTTP headers.
87
-     *
88
-     * @param array $httpHeaders
89
-     */
90
-    public function setHttpHeaders($httpHeaders = null)
91
-    {
92
-        // use global _SERVER if $httpHeaders aren't defined
93
-        if (!is_array($httpHeaders) || !count($httpHeaders)) {
94
-            $httpHeaders = $_SERVER;
95
-        }
96
-        // clear existing headers
97
-        $this->httpHeaders = array();
98
-        // Only save HTTP headers. In PHP land, that means only _SERVER vars that
99
-        // start with HTTP_.
100
-        foreach ($httpHeaders as $key => $value) {
101
-            if (substr($key, 0, 5) === 'HTTP_') {
102
-                $this->httpHeaders[$key] = $value;
103
-            }
104
-        }
105
-    }
106
-
107
-    /**
108
-     * Return user agent headers.
109
-     *
110
-     * @return array
111
-     */
112
-    public function getUaHttpHeaders()
113
-    {
114
-        return self::$uaHttpHeaders;
115
-    }
116
-
117
-    /**
118
-     * Set the user agent.
119
-     *
120
-     * @param string $userAgent
121
-     */
122
-    public function setUserAgent($userAgent = null)
123
-    {
124
-        if (false === empty($userAgent)) {
125
-            return $this->userAgent = $userAgent;
126
-        } else {
127
-            $this->userAgent = null;
128
-            foreach ($this->getUaHttpHeaders() as $altHeader) {
129
-                if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow.
130
-                    $this->userAgent .= $this->httpHeaders[$altHeader].' ';
131
-                }
132
-            }
133
-
134
-            return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
135
-        }
136
-    }
137
-
138
-    /**
139
-     * Build the user agent regex.
140
-     *
141
-     * @return string
142
-     */
143
-    public function getRegex()
144
-    {
145
-        return '('.implode('|', $this->crawlers->getAll()).')';
146
-    }
147
-
148
-    /**
149
-     * Build the replacement regex.
150
-     *
151
-     * @return string
152
-     */
153
-    public function getExclusions()
154
-    {
155
-        return '('.implode('|', $this->exclusions->getAll()).')';
156
-    }
157
-
158
-    /**
159
-     * Check user agent string against the regex.
160
-     *
161
-     * @param string $userAgent
162
-     *
163
-     * @return bool
164
-     */
165
-    public function isCrawler($userAgent = null)
166
-    {
167
-        $agent = is_null($userAgent) ? $this->userAgent : $userAgent;
168
-
169
-        $agent = preg_replace('/'.$this->getExclusions().'/i', '', $agent);
170
-
171
-        if (trim($agent) === false) {
172
-            return false;
173
-        } else {
174
-            $result = preg_match('/'.$this->getRegex().'/i', trim($agent), $matches);
175
-        }
176
-
177
-        if ($matches) {
178
-            $this->matches = $matches;
179
-        }
180
-
181
-        return (bool) $result;
182
-    }
183
-
184
-    /**
185
-     * Return the matches.
186
-     *
187
-     * @return string
188
-     */
189
-    public function getMatches()
190
-    {
191
-        return $this->matches[0];
192
-    }
19
+	/**
20
+	 * The user agent.
21
+	 *
22
+	 * @var null
23
+	 */
24
+	protected $userAgent = null;
25
+
26
+	/**
27
+	 * Headers that contain a user agent.
28
+	 *
29
+	 * @var array
30
+	 */
31
+	protected $httpHeaders = array();
32
+
33
+	/**
34
+	 * Store regex matches.
35
+	 *
36
+	 * @var array
37
+	 */
38
+	protected $matches = array();
39
+
40
+	/**
41
+	 * Crawlers object
42
+	 * 
43
+	 * @var Jaybizzle\CrawlerDetect\Fixtures\Crawlers
44
+	 */
45
+	protected $crawlers;
46
+
47
+	/**
48
+	 * Exclusions object
49
+	 * 
50
+	 * @var Jaybizzle\CrawlerDetect\Fixtures\Exclusions
51
+	 */
52
+	protected $exclusions;
53
+
54
+	/**
55
+	 * All possible HTTP headers that represent the
56
+	 * User-Agent string.
57
+	 *
58
+	 * @var array
59
+	 */
60
+	protected static $uaHttpHeaders = array(
61
+		// The default User-Agent string.
62
+		'HTTP_USER_AGENT',
63
+		// Header can occur on devices using Opera Mini.
64
+		'HTTP_X_OPERAMINI_PHONE_UA',
65
+		// Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
66
+		'HTTP_X_DEVICE_USER_AGENT',
67
+		'HTTP_X_ORIGINAL_USER_AGENT',
68
+		'HTTP_X_SKYFIRE_PHONE',
69
+		'HTTP_X_BOLT_PHONE_UA',
70
+		'HTTP_DEVICE_STOCK_UA',
71
+		'HTTP_X_UCBROWSER_DEVICE_UA',
72
+	);
73
+
74
+	/**
75
+	 * Class constructor.
76
+	 */
77
+	public function __construct(array $headers = null, $userAgent = null)
78
+	{
79
+		$this->setHttpHeaders($headers);
80
+		$this->setUserAgent($userAgent);
81
+		$this->crawlers = new Crawlers();
82
+		$this->exclusions = new Exclusions();
83
+	}
84
+
85
+	/**
86
+	 * Set HTTP headers.
87
+	 *
88
+	 * @param array $httpHeaders
89
+	 */
90
+	public function setHttpHeaders($httpHeaders = null)
91
+	{
92
+		// use global _SERVER if $httpHeaders aren't defined
93
+		if (!is_array($httpHeaders) || !count($httpHeaders)) {
94
+			$httpHeaders = $_SERVER;
95
+		}
96
+		// clear existing headers
97
+		$this->httpHeaders = array();
98
+		// Only save HTTP headers. In PHP land, that means only _SERVER vars that
99
+		// start with HTTP_.
100
+		foreach ($httpHeaders as $key => $value) {
101
+			if (substr($key, 0, 5) === 'HTTP_') {
102
+				$this->httpHeaders[$key] = $value;
103
+			}
104
+		}
105
+	}
106
+
107
+	/**
108
+	 * Return user agent headers.
109
+	 *
110
+	 * @return array
111
+	 */
112
+	public function getUaHttpHeaders()
113
+	{
114
+		return self::$uaHttpHeaders;
115
+	}
116
+
117
+	/**
118
+	 * Set the user agent.
119
+	 *
120
+	 * @param string $userAgent
121
+	 */
122
+	public function setUserAgent($userAgent = null)
123
+	{
124
+		if (false === empty($userAgent)) {
125
+			return $this->userAgent = $userAgent;
126
+		} else {
127
+			$this->userAgent = null;
128
+			foreach ($this->getUaHttpHeaders() as $altHeader) {
129
+				if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow.
130
+					$this->userAgent .= $this->httpHeaders[$altHeader].' ';
131
+				}
132
+			}
133
+
134
+			return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
135
+		}
136
+	}
137
+
138
+	/**
139
+	 * Build the user agent regex.
140
+	 *
141
+	 * @return string
142
+	 */
143
+	public function getRegex()
144
+	{
145
+		return '('.implode('|', $this->crawlers->getAll()).')';
146
+	}
147
+
148
+	/**
149
+	 * Build the replacement regex.
150
+	 *
151
+	 * @return string
152
+	 */
153
+	public function getExclusions()
154
+	{
155
+		return '('.implode('|', $this->exclusions->getAll()).')';
156
+	}
157
+
158
+	/**
159
+	 * Check user agent string against the regex.
160
+	 *
161
+	 * @param string $userAgent
162
+	 *
163
+	 * @return bool
164
+	 */
165
+	public function isCrawler($userAgent = null)
166
+	{
167
+		$agent = is_null($userAgent) ? $this->userAgent : $userAgent;
168
+
169
+		$agent = preg_replace('/'.$this->getExclusions().'/i', '', $agent);
170
+
171
+		if (trim($agent) === false) {
172
+			return false;
173
+		} else {
174
+			$result = preg_match('/'.$this->getRegex().'/i', trim($agent), $matches);
175
+		}
176
+
177
+		if ($matches) {
178
+			$this->matches = $matches;
179
+		}
180
+
181
+		return (bool) $result;
182
+	}
183
+
184
+	/**
185
+	 * Return the matches.
186
+	 *
187
+	 * @return string
188
+	 */
189
+	public function getMatches()
190
+	{
191
+		return $this->matches[0];
192
+	}
193 193
 }
Please login to merge, or discard this patch.