@@ -461,7 +461,7 @@ discard block |
||
461 | 461 | public function setHttpHeaders($httpHeaders = null) |
462 | 462 | { |
463 | 463 | // use global _SERVER if $httpHeaders aren't defined |
464 | - if (!is_array($httpHeaders) || !count($httpHeaders)) { |
|
464 | + if ( ! is_array($httpHeaders) || ! count($httpHeaders)) { |
|
465 | 465 | $httpHeaders = $_SERVER; |
466 | 466 | } |
467 | 467 | // clear existing headers |
@@ -492,7 +492,7 @@ discard block |
||
492 | 492 | } |
493 | 493 | } |
494 | 494 | |
495 | - return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null); |
|
495 | + return $this->userAgent = ( ! empty($this->userAgent) ? trim($this->userAgent) : null); |
|
496 | 496 | } |
497 | 497 | } |
498 | 498 |
@@ -4,419 +4,419 @@ |
||
4 | 4 | |
5 | 5 | class CrawlerDetect |
6 | 6 | { |
7 | - /** |
|
8 | - * The user agent. |
|
9 | - * |
|
10 | - * @var null |
|
11 | - */ |
|
12 | - protected $userAgent = null; |
|
7 | + /** |
|
8 | + * The user agent. |
|
9 | + * |
|
10 | + * @var null |
|
11 | + */ |
|
12 | + protected $userAgent = null; |
|
13 | 13 | |
14 | - /** |
|
15 | - * Headers that container user agent. |
|
16 | - * |
|
17 | - * @var array |
|
18 | - */ |
|
19 | - protected $httpHeaders = array(); |
|
14 | + /** |
|
15 | + * Headers that container user agent. |
|
16 | + * |
|
17 | + * @var array |
|
18 | + */ |
|
19 | + protected $httpHeaders = array(); |
|
20 | 20 | |
21 | - /** |
|
22 | - * Store regex matches. |
|
23 | - * |
|
24 | - * @var array |
|
25 | - */ |
|
26 | - protected $matches = array(); |
|
21 | + /** |
|
22 | + * Store regex matches. |
|
23 | + * |
|
24 | + * @var array |
|
25 | + */ |
|
26 | + protected $matches = array(); |
|
27 | 27 | |
28 | - /** |
|
29 | - * List of strings to remove from the user agent before running the crawler regex |
|
30 | - * Over a large list of user agents, this gives us about a 55% speed increase! |
|
31 | - * |
|
32 | - * @var array |
|
33 | - */ |
|
34 | - protected static $ignore = array( |
|
35 | - 'Safari.[\d\.]*', |
|
36 | - 'Firefox.[\d\.]*', |
|
37 | - 'Chrome.[\d\.]*', |
|
38 | - 'Chromium.[\d\.]*', |
|
39 | - 'MSIE.[\d\.]', |
|
40 | - 'Opera\/[\d\.]*', |
|
41 | - 'Mozilla.[\d\.]*', |
|
42 | - 'AppleWebKit.[\d\.]*', |
|
43 | - 'Trident.[\d\.]*', |
|
44 | - 'Windows NT.[\d\.]*', |
|
45 | - 'Android.[\d\.]*', |
|
46 | - 'Macintosh.', |
|
47 | - 'Ubuntu', |
|
48 | - 'Linux', |
|
49 | - 'Intel', |
|
50 | - 'Mac OS X', |
|
51 | - 'Gecko.[\d\.]*', |
|
52 | - 'KHTML', |
|
53 | - 'CriOS.[\d\.]*', |
|
54 | - 'CPU iPhone OS ([0-9_])* like Mac OS X', |
|
55 | - 'CPU OS ([0-9_])* like Mac OS X', |
|
56 | - 'iPod', |
|
57 | - 'like Gecko', |
|
58 | - 'compatible', |
|
59 | - 'x86_..', |
|
60 | - 'i686', |
|
61 | - 'x64', |
|
62 | - 'X11', |
|
63 | - 'rv:[\d\.]*', |
|
64 | - 'Version.[\d\.]*', |
|
65 | - 'WOW64', |
|
66 | - 'Win64', |
|
67 | - 'Dalvik.[\d\.]*', |
|
68 | - '\.NET CLR [\d\.]*', |
|
69 | - 'Presto.[\d\.]*', |
|
70 | - 'Media Center PC', |
|
71 | - ); |
|
28 | + /** |
|
29 | + * List of strings to remove from the user agent before running the crawler regex |
|
30 | + * Over a large list of user agents, this gives us about a 55% speed increase! |
|
31 | + * |
|
32 | + * @var array |
|
33 | + */ |
|
34 | + protected static $ignore = array( |
|
35 | + 'Safari.[\d\.]*', |
|
36 | + 'Firefox.[\d\.]*', |
|
37 | + 'Chrome.[\d\.]*', |
|
38 | + 'Chromium.[\d\.]*', |
|
39 | + 'MSIE.[\d\.]', |
|
40 | + 'Opera\/[\d\.]*', |
|
41 | + 'Mozilla.[\d\.]*', |
|
42 | + 'AppleWebKit.[\d\.]*', |
|
43 | + 'Trident.[\d\.]*', |
|
44 | + 'Windows NT.[\d\.]*', |
|
45 | + 'Android.[\d\.]*', |
|
46 | + 'Macintosh.', |
|
47 | + 'Ubuntu', |
|
48 | + 'Linux', |
|
49 | + 'Intel', |
|
50 | + 'Mac OS X', |
|
51 | + 'Gecko.[\d\.]*', |
|
52 | + 'KHTML', |
|
53 | + 'CriOS.[\d\.]*', |
|
54 | + 'CPU iPhone OS ([0-9_])* like Mac OS X', |
|
55 | + 'CPU OS ([0-9_])* like Mac OS X', |
|
56 | + 'iPod', |
|
57 | + 'like Gecko', |
|
58 | + 'compatible', |
|
59 | + 'x86_..', |
|
60 | + 'i686', |
|
61 | + 'x64', |
|
62 | + 'X11', |
|
63 | + 'rv:[\d\.]*', |
|
64 | + 'Version.[\d\.]*', |
|
65 | + 'WOW64', |
|
66 | + 'Win64', |
|
67 | + 'Dalvik.[\d\.]*', |
|
68 | + '\.NET CLR [\d\.]*', |
|
69 | + 'Presto.[\d\.]*', |
|
70 | + 'Media Center PC', |
|
71 | + ); |
|
72 | 72 | |
73 | - /** |
|
74 | - * Array of regular expressions to match against the user agent. |
|
75 | - * |
|
76 | - * @var array |
|
77 | - */ |
|
78 | - protected static $crawlers = array( |
|
79 | - '008\\/', |
|
80 | - 'A6-Indexer', |
|
81 | - 'Aboundex', |
|
82 | - 'Accoona-AI-Agent', |
|
83 | - 'acoon', |
|
84 | - 'AddThis', |
|
85 | - 'ADmantX', |
|
86 | - 'AHC', |
|
87 | - 'Airmail', |
|
88 | - 'Anemone', |
|
89 | - 'Arachmo', |
|
90 | - 'archive-com', |
|
91 | - 'B-l-i-t-z-B-O-T', |
|
92 | - 'bibnum\.bnf', |
|
93 | - 'biglotron', |
|
94 | - 'binlar', |
|
95 | - 'boitho\.com-dc', |
|
96 | - 'BUbiNG', |
|
97 | - 'Butterfly\\/', |
|
98 | - 'BuzzSumo', |
|
99 | - 'CC Metadata Scaper', |
|
100 | - 'Cerberian Drtrs', |
|
101 | - 'changedetection', |
|
102 | - 'Charlotte', |
|
103 | - 'clips\.ua\.ac\.be', |
|
104 | - 'CloudFlare-AlwaysOnline', |
|
105 | - 'coccoc', |
|
106 | - 'Commons-HttpClient', |
|
107 | - 'convera', |
|
108 | - 'cosmos', |
|
109 | - 'Covario-IDS', |
|
110 | - 'curl', |
|
111 | - 'CyberPatrol', |
|
112 | - 'DataparkSearch', |
|
113 | - 'dataprovider', |
|
114 | - 'Digg', |
|
115 | - 'DomainAppender', |
|
116 | - 'drupact', |
|
117 | - 'EARTHCOM', |
|
118 | - 'ec2linkfinder', |
|
119 | - 'ElectricMonk', |
|
120 | - 'Embedly', |
|
121 | - 'europarchive\.org', |
|
122 | - 'EventMachine HttpClient', |
|
123 | - 'ezooms', |
|
124 | - 'eZ Publish Link Validator', |
|
125 | - 'facebookexternalhit', |
|
126 | - 'Feedfetcher-Google', |
|
127 | - 'FeedValidator', |
|
128 | - 'FindLinks', |
|
129 | - 'findlink', |
|
130 | - 'findthatfile', |
|
131 | - 'Flamingo_SearchEngine', |
|
132 | - 'fluffy', |
|
133 | - 'getprismatic\.com', |
|
134 | - 'g00g1e\.net', |
|
135 | - 'GigablastOpenSource', |
|
136 | - 'grub-client', |
|
137 | - 'Genieo', |
|
138 | - 'Go-http-client', |
|
139 | - 'Google-HTTP-Java-Client', |
|
140 | - 'Google favicon', |
|
141 | - 'Google Keyword Suggestion', |
|
142 | - 'heritrix', |
|
143 | - 'Holmes', |
|
144 | - 'htdig', |
|
145 | - 'httpunit', |
|
146 | - 'httrack', |
|
147 | - 'ichiro', |
|
148 | - 'igdeSpyder', |
|
149 | - 'InAGist', |
|
150 | - 'InfoWizards Reciprocal Link System PRO', |
|
151 | - 'integromedb', |
|
152 | - 'IODC', |
|
153 | - 'IOI', |
|
154 | - 'ips-agent', |
|
155 | - 'iZSearch', |
|
156 | - 'L\.webis', |
|
157 | - 'Larbin', |
|
158 | - 'libwww', |
|
159 | - 'Link Valet', |
|
160 | - 'linkdex', |
|
161 | - 'LinkExaminer', |
|
162 | - 'LinkWalker', |
|
163 | - 'Lipperhey Link Explorer', |
|
164 | - 'Lipperhey SEO Service', |
|
165 | - 'LongURL API', |
|
166 | - 'ltx71', |
|
167 | - 'lwp-trivial', |
|
168 | - 'MegaIndex\.ru', |
|
169 | - 'mabontland', |
|
170 | - 'MagpieRSS', |
|
171 | - 'Mediapartners-Google', |
|
172 | - 'MetaURI', |
|
173 | - 'Mnogosearch', |
|
174 | - 'mogimogi', |
|
175 | - 'Morning Paper', |
|
176 | - 'Mrcgiguy', |
|
177 | - 'MVAClient', |
|
178 | - 'netresearchserver', |
|
179 | - 'NewsGator', |
|
180 | - 'newsme', |
|
181 | - 'NG-Search', |
|
182 | - '^NING\\/', |
|
183 | - 'Notifixious', |
|
184 | - 'nutch', |
|
185 | - 'NutchCVS', |
|
186 | - 'Nymesis', |
|
187 | - 'oegp', |
|
188 | - 'online link validator', |
|
189 | - 'Online Website Link Checker', |
|
190 | - 'Orbiter', |
|
191 | - 'ow\.ly', |
|
192 | - 'Ploetz \+ Zeller', |
|
193 | - 'page2rss', |
|
194 | - 'panscient', |
|
195 | - 'Peew', |
|
196 | - 'phpcrawl', |
|
197 | - 'Pizilla', |
|
198 | - 'Plukkie', |
|
199 | - 'Pompos', |
|
200 | - 'postano', |
|
201 | - 'PostPost', |
|
202 | - 'postrank', |
|
203 | - 'proximic', |
|
204 | - 'PycURL', |
|
205 | - 'Python-httplib2', |
|
206 | - 'python-requests', |
|
207 | - 'Python-urllib', |
|
208 | - 'Qseero', |
|
209 | - 'Qwantify', |
|
210 | - 'Radian6', |
|
211 | - 'RebelMouse', |
|
212 | - 'REL Link Checker', |
|
213 | - 'RetrevoPageAnalyzer', |
|
214 | - 'Riddler', |
|
215 | - 'Robosourcer', |
|
216 | - 'Ruby', |
|
217 | - 'SBIder', |
|
218 | - 'ScoutJet', |
|
219 | - 'ScoutURLMonitor', |
|
220 | - 'Scrapy', |
|
221 | - 'Scrubby', |
|
222 | - 'SearchSight', |
|
223 | - 'semanticdiscovery', |
|
224 | - 'SEOstats', |
|
225 | - 'Seznam screenshot-generator', |
|
226 | - 'ShopWiki', |
|
227 | - 'SiteBar', |
|
228 | - 'siteexplorer\.info', |
|
229 | - 'slider\.com', |
|
230 | - 'slurp', |
|
231 | - 'Snappy', |
|
232 | - 'sogou', |
|
233 | - 'speedy', |
|
234 | - 'Sqworm', |
|
235 | - 'StackRambler', |
|
236 | - 'Stratagems Kumo', |
|
237 | - 'summify', |
|
238 | - 'teoma', |
|
239 | - 'theoldreader\.com', |
|
240 | - 'TinEye', |
|
241 | - 'Traackr.com', |
|
242 | - 'truwoGPS', |
|
243 | - 'tweetedtimes\.com', |
|
244 | - 'Twikle', |
|
245 | - 'UnwindFetchor', |
|
246 | - 'updated', |
|
247 | - 'urlresolver', |
|
248 | - 'Validator\.nu\\/LV', |
|
249 | - 'Vagabondo', |
|
250 | - 'Vivante Link Checker', |
|
251 | - 'Vortex', |
|
252 | - 'voyager\\/', |
|
253 | - 'VYU2', |
|
254 | - 'W3C-checklink', |
|
255 | - 'W3C_CSS_Validator_JFouffa', |
|
256 | - 'W3C_I18n-Checker', |
|
257 | - 'W3C-mobileOK', |
|
258 | - 'W3C_Unicorn', |
|
259 | - 'W3C_Validator', |
|
260 | - 'WebIndex', |
|
261 | - 'Websquash\.com', |
|
262 | - 'webcollage', |
|
263 | - 'webmon ', |
|
264 | - 'WeSEE:Search', |
|
265 | - 'wf84', |
|
266 | - 'wget', |
|
267 | - 'WomlpeFactory', |
|
268 | - 'wotbox', |
|
269 | - 'Xenu Link Sleuth', |
|
270 | - 'XML Sitemaps Generator', |
|
271 | - 'Y!J-ASR', |
|
272 | - 'yacy', |
|
273 | - 'Yahoo Link Preview', |
|
274 | - 'Yahoo! Slurp China', |
|
275 | - 'Yahoo! Slurp', |
|
276 | - 'YahooSeeker', |
|
277 | - 'YahooSeeker-Testing', |
|
278 | - 'YandexImages', |
|
279 | - 'YandexMetrika', |
|
280 | - 'yandex', |
|
281 | - 'yanga', |
|
282 | - 'yeti', |
|
283 | - 'yoogliFetchAgent', |
|
284 | - 'Zao', |
|
285 | - 'ZyBorg', |
|
286 | - '[a-z0-9\\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)', |
|
287 | - ); |
|
73 | + /** |
|
74 | + * Array of regular expressions to match against the user agent. |
|
75 | + * |
|
76 | + * @var array |
|
77 | + */ |
|
78 | + protected static $crawlers = array( |
|
79 | + '008\\/', |
|
80 | + 'A6-Indexer', |
|
81 | + 'Aboundex', |
|
82 | + 'Accoona-AI-Agent', |
|
83 | + 'acoon', |
|
84 | + 'AddThis', |
|
85 | + 'ADmantX', |
|
86 | + 'AHC', |
|
87 | + 'Airmail', |
|
88 | + 'Anemone', |
|
89 | + 'Arachmo', |
|
90 | + 'archive-com', |
|
91 | + 'B-l-i-t-z-B-O-T', |
|
92 | + 'bibnum\.bnf', |
|
93 | + 'biglotron', |
|
94 | + 'binlar', |
|
95 | + 'boitho\.com-dc', |
|
96 | + 'BUbiNG', |
|
97 | + 'Butterfly\\/', |
|
98 | + 'BuzzSumo', |
|
99 | + 'CC Metadata Scaper', |
|
100 | + 'Cerberian Drtrs', |
|
101 | + 'changedetection', |
|
102 | + 'Charlotte', |
|
103 | + 'clips\.ua\.ac\.be', |
|
104 | + 'CloudFlare-AlwaysOnline', |
|
105 | + 'coccoc', |
|
106 | + 'Commons-HttpClient', |
|
107 | + 'convera', |
|
108 | + 'cosmos', |
|
109 | + 'Covario-IDS', |
|
110 | + 'curl', |
|
111 | + 'CyberPatrol', |
|
112 | + 'DataparkSearch', |
|
113 | + 'dataprovider', |
|
114 | + 'Digg', |
|
115 | + 'DomainAppender', |
|
116 | + 'drupact', |
|
117 | + 'EARTHCOM', |
|
118 | + 'ec2linkfinder', |
|
119 | + 'ElectricMonk', |
|
120 | + 'Embedly', |
|
121 | + 'europarchive\.org', |
|
122 | + 'EventMachine HttpClient', |
|
123 | + 'ezooms', |
|
124 | + 'eZ Publish Link Validator', |
|
125 | + 'facebookexternalhit', |
|
126 | + 'Feedfetcher-Google', |
|
127 | + 'FeedValidator', |
|
128 | + 'FindLinks', |
|
129 | + 'findlink', |
|
130 | + 'findthatfile', |
|
131 | + 'Flamingo_SearchEngine', |
|
132 | + 'fluffy', |
|
133 | + 'getprismatic\.com', |
|
134 | + 'g00g1e\.net', |
|
135 | + 'GigablastOpenSource', |
|
136 | + 'grub-client', |
|
137 | + 'Genieo', |
|
138 | + 'Go-http-client', |
|
139 | + 'Google-HTTP-Java-Client', |
|
140 | + 'Google favicon', |
|
141 | + 'Google Keyword Suggestion', |
|
142 | + 'heritrix', |
|
143 | + 'Holmes', |
|
144 | + 'htdig', |
|
145 | + 'httpunit', |
|
146 | + 'httrack', |
|
147 | + 'ichiro', |
|
148 | + 'igdeSpyder', |
|
149 | + 'InAGist', |
|
150 | + 'InfoWizards Reciprocal Link System PRO', |
|
151 | + 'integromedb', |
|
152 | + 'IODC', |
|
153 | + 'IOI', |
|
154 | + 'ips-agent', |
|
155 | + 'iZSearch', |
|
156 | + 'L\.webis', |
|
157 | + 'Larbin', |
|
158 | + 'libwww', |
|
159 | + 'Link Valet', |
|
160 | + 'linkdex', |
|
161 | + 'LinkExaminer', |
|
162 | + 'LinkWalker', |
|
163 | + 'Lipperhey Link Explorer', |
|
164 | + 'Lipperhey SEO Service', |
|
165 | + 'LongURL API', |
|
166 | + 'ltx71', |
|
167 | + 'lwp-trivial', |
|
168 | + 'MegaIndex\.ru', |
|
169 | + 'mabontland', |
|
170 | + 'MagpieRSS', |
|
171 | + 'Mediapartners-Google', |
|
172 | + 'MetaURI', |
|
173 | + 'Mnogosearch', |
|
174 | + 'mogimogi', |
|
175 | + 'Morning Paper', |
|
176 | + 'Mrcgiguy', |
|
177 | + 'MVAClient', |
|
178 | + 'netresearchserver', |
|
179 | + 'NewsGator', |
|
180 | + 'newsme', |
|
181 | + 'NG-Search', |
|
182 | + '^NING\\/', |
|
183 | + 'Notifixious', |
|
184 | + 'nutch', |
|
185 | + 'NutchCVS', |
|
186 | + 'Nymesis', |
|
187 | + 'oegp', |
|
188 | + 'online link validator', |
|
189 | + 'Online Website Link Checker', |
|
190 | + 'Orbiter', |
|
191 | + 'ow\.ly', |
|
192 | + 'Ploetz \+ Zeller', |
|
193 | + 'page2rss', |
|
194 | + 'panscient', |
|
195 | + 'Peew', |
|
196 | + 'phpcrawl', |
|
197 | + 'Pizilla', |
|
198 | + 'Plukkie', |
|
199 | + 'Pompos', |
|
200 | + 'postano', |
|
201 | + 'PostPost', |
|
202 | + 'postrank', |
|
203 | + 'proximic', |
|
204 | + 'PycURL', |
|
205 | + 'Python-httplib2', |
|
206 | + 'python-requests', |
|
207 | + 'Python-urllib', |
|
208 | + 'Qseero', |
|
209 | + 'Qwantify', |
|
210 | + 'Radian6', |
|
211 | + 'RebelMouse', |
|
212 | + 'REL Link Checker', |
|
213 | + 'RetrevoPageAnalyzer', |
|
214 | + 'Riddler', |
|
215 | + 'Robosourcer', |
|
216 | + 'Ruby', |
|
217 | + 'SBIder', |
|
218 | + 'ScoutJet', |
|
219 | + 'ScoutURLMonitor', |
|
220 | + 'Scrapy', |
|
221 | + 'Scrubby', |
|
222 | + 'SearchSight', |
|
223 | + 'semanticdiscovery', |
|
224 | + 'SEOstats', |
|
225 | + 'Seznam screenshot-generator', |
|
226 | + 'ShopWiki', |
|
227 | + 'SiteBar', |
|
228 | + 'siteexplorer\.info', |
|
229 | + 'slider\.com', |
|
230 | + 'slurp', |
|
231 | + 'Snappy', |
|
232 | + 'sogou', |
|
233 | + 'speedy', |
|
234 | + 'Sqworm', |
|
235 | + 'StackRambler', |
|
236 | + 'Stratagems Kumo', |
|
237 | + 'summify', |
|
238 | + 'teoma', |
|
239 | + 'theoldreader\.com', |
|
240 | + 'TinEye', |
|
241 | + 'Traackr.com', |
|
242 | + 'truwoGPS', |
|
243 | + 'tweetedtimes\.com', |
|
244 | + 'Twikle', |
|
245 | + 'UnwindFetchor', |
|
246 | + 'updated', |
|
247 | + 'urlresolver', |
|
248 | + 'Validator\.nu\\/LV', |
|
249 | + 'Vagabondo', |
|
250 | + 'Vivante Link Checker', |
|
251 | + 'Vortex', |
|
252 | + 'voyager\\/', |
|
253 | + 'VYU2', |
|
254 | + 'W3C-checklink', |
|
255 | + 'W3C_CSS_Validator_JFouffa', |
|
256 | + 'W3C_I18n-Checker', |
|
257 | + 'W3C-mobileOK', |
|
258 | + 'W3C_Unicorn', |
|
259 | + 'W3C_Validator', |
|
260 | + 'WebIndex', |
|
261 | + 'Websquash\.com', |
|
262 | + 'webcollage', |
|
263 | + 'webmon ', |
|
264 | + 'WeSEE:Search', |
|
265 | + 'wf84', |
|
266 | + 'wget', |
|
267 | + 'WomlpeFactory', |
|
268 | + 'wotbox', |
|
269 | + 'Xenu Link Sleuth', |
|
270 | + 'XML Sitemaps Generator', |
|
271 | + 'Y!J-ASR', |
|
272 | + 'yacy', |
|
273 | + 'Yahoo Link Preview', |
|
274 | + 'Yahoo! Slurp China', |
|
275 | + 'Yahoo! Slurp', |
|
276 | + 'YahooSeeker', |
|
277 | + 'YahooSeeker-Testing', |
|
278 | + 'YandexImages', |
|
279 | + 'YandexMetrika', |
|
280 | + 'yandex', |
|
281 | + 'yanga', |
|
282 | + 'yeti', |
|
283 | + 'yoogliFetchAgent', |
|
284 | + 'Zao', |
|
285 | + 'ZyBorg', |
|
286 | + '[a-z0-9\\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)', |
|
287 | + ); |
|
288 | 288 | |
289 | - /** |
|
290 | - * All possible HTTP headers that represent the |
|
291 | - * User-Agent string. |
|
292 | - * |
|
293 | - * @var array |
|
294 | - */ |
|
295 | - protected static $uaHttpHeaders = array( |
|
296 | - // The default User-Agent string. |
|
297 | - 'HTTP_USER_AGENT', |
|
298 | - // Header can occur on devices using Opera Mini. |
|
299 | - 'HTTP_X_OPERAMINI_PHONE_UA', |
|
300 | - // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/ |
|
301 | - 'HTTP_X_DEVICE_USER_AGENT', |
|
302 | - 'HTTP_X_ORIGINAL_USER_AGENT', |
|
303 | - 'HTTP_X_SKYFIRE_PHONE', |
|
304 | - 'HTTP_X_BOLT_PHONE_UA', |
|
305 | - 'HTTP_DEVICE_STOCK_UA', |
|
306 | - 'HTTP_X_UCBROWSER_DEVICE_UA', |
|
307 | - ); |
|
289 | + /** |
|
290 | + * All possible HTTP headers that represent the |
|
291 | + * User-Agent string. |
|
292 | + * |
|
293 | + * @var array |
|
294 | + */ |
|
295 | + protected static $uaHttpHeaders = array( |
|
296 | + // The default User-Agent string. |
|
297 | + 'HTTP_USER_AGENT', |
|
298 | + // Header can occur on devices using Opera Mini. |
|
299 | + 'HTTP_X_OPERAMINI_PHONE_UA', |
|
300 | + // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/ |
|
301 | + 'HTTP_X_DEVICE_USER_AGENT', |
|
302 | + 'HTTP_X_ORIGINAL_USER_AGENT', |
|
303 | + 'HTTP_X_SKYFIRE_PHONE', |
|
304 | + 'HTTP_X_BOLT_PHONE_UA', |
|
305 | + 'HTTP_DEVICE_STOCK_UA', |
|
306 | + 'HTTP_X_UCBROWSER_DEVICE_UA', |
|
307 | + ); |
|
308 | 308 | |
309 | - /** |
|
310 | - * Class constructor. |
|
311 | - */ |
|
312 | - public function __construct(array $headers = null, $userAgent = null) |
|
313 | - { |
|
314 | - $this->setHttpHeaders($headers); |
|
315 | - $this->setUserAgent($userAgent); |
|
316 | - } |
|
309 | + /** |
|
310 | + * Class constructor. |
|
311 | + */ |
|
312 | + public function __construct(array $headers = null, $userAgent = null) |
|
313 | + { |
|
314 | + $this->setHttpHeaders($headers); |
|
315 | + $this->setUserAgent($userAgent); |
|
316 | + } |
|
317 | 317 | |
318 | - /** |
|
319 | - * Set HTTP headers. |
|
320 | - * |
|
321 | - * @param array $httpHeaders |
|
322 | - */ |
|
323 | - public function setHttpHeaders($httpHeaders = null) |
|
324 | - { |
|
325 | - // use global _SERVER if $httpHeaders aren't defined |
|
326 | - if (!is_array($httpHeaders) || !count($httpHeaders)) { |
|
327 | - $httpHeaders = $_SERVER; |
|
328 | - } |
|
329 | - // clear existing headers |
|
330 | - $this->httpHeaders = array(); |
|
331 | - // Only save HTTP headers. In PHP land, that means only _SERVER vars that |
|
332 | - // start with HTTP_. |
|
333 | - foreach ($httpHeaders as $key => $value) { |
|
334 | - if (substr($key, 0, 5) === 'HTTP_') { |
|
335 | - $this->httpHeaders[$key] = $value; |
|
336 | - } |
|
337 | - } |
|
338 | - } |
|
318 | + /** |
|
319 | + * Set HTTP headers. |
|
320 | + * |
|
321 | + * @param array $httpHeaders |
|
322 | + */ |
|
323 | + public function setHttpHeaders($httpHeaders = null) |
|
324 | + { |
|
325 | + // use global _SERVER if $httpHeaders aren't defined |
|
326 | + if (!is_array($httpHeaders) || !count($httpHeaders)) { |
|
327 | + $httpHeaders = $_SERVER; |
|
328 | + } |
|
329 | + // clear existing headers |
|
330 | + $this->httpHeaders = array(); |
|
331 | + // Only save HTTP headers. In PHP land, that means only _SERVER vars that |
|
332 | + // start with HTTP_. |
|
333 | + foreach ($httpHeaders as $key => $value) { |
|
334 | + if (substr($key, 0, 5) === 'HTTP_') { |
|
335 | + $this->httpHeaders[$key] = $value; |
|
336 | + } |
|
337 | + } |
|
338 | + } |
|
339 | 339 | |
340 | - /** |
|
341 | - * Return user agent headers. |
|
342 | - * |
|
343 | - * @return array |
|
344 | - */ |
|
345 | - public function getUaHttpHeaders() |
|
346 | - { |
|
347 | - return self::$uaHttpHeaders; |
|
348 | - } |
|
340 | + /** |
|
341 | + * Return user agent headers. |
|
342 | + * |
|
343 | + * @return array |
|
344 | + */ |
|
345 | + public function getUaHttpHeaders() |
|
346 | + { |
|
347 | + return self::$uaHttpHeaders; |
|
348 | + } |
|
349 | 349 | |
350 | - /** |
|
351 | - * Set the user agent. |
|
352 | - * |
|
353 | - * @param string $userAgent |
|
354 | - */ |
|
355 | - public function setUserAgent($userAgent = null) |
|
356 | - { |
|
357 | - if (false === empty($userAgent)) { |
|
358 | - return $this->userAgent = $userAgent; |
|
359 | - } else { |
|
360 | - $this->userAgent = null; |
|
361 | - foreach ($this->getUaHttpHeaders() as $altHeader) { |
|
362 | - if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow. |
|
363 | - $this->userAgent .= $this->httpHeaders[$altHeader].' '; |
|
364 | - } |
|
365 | - } |
|
350 | + /** |
|
351 | + * Set the user agent. |
|
352 | + * |
|
353 | + * @param string $userAgent |
|
354 | + */ |
|
355 | + public function setUserAgent($userAgent = null) |
|
356 | + { |
|
357 | + if (false === empty($userAgent)) { |
|
358 | + return $this->userAgent = $userAgent; |
|
359 | + } else { |
|
360 | + $this->userAgent = null; |
|
361 | + foreach ($this->getUaHttpHeaders() as $altHeader) { |
|
362 | + if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow. |
|
363 | + $this->userAgent .= $this->httpHeaders[$altHeader].' '; |
|
364 | + } |
|
365 | + } |
|
366 | 366 | |
367 | - return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null); |
|
368 | - } |
|
369 | - } |
|
367 | + return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null); |
|
368 | + } |
|
369 | + } |
|
370 | 370 | |
371 | - /** |
|
372 | - * Build the user agent regex. |
|
373 | - * |
|
374 | - * @return string |
|
375 | - */ |
|
376 | - public function getRegex() |
|
377 | - { |
|
378 | - return '('.implode('|', self::$crawlers).')'; |
|
379 | - } |
|
371 | + /** |
|
372 | + * Build the user agent regex. |
|
373 | + * |
|
374 | + * @return string |
|
375 | + */ |
|
376 | + public function getRegex() |
|
377 | + { |
|
378 | + return '('.implode('|', self::$crawlers).')'; |
|
379 | + } |
|
380 | 380 | |
381 | - /** |
|
382 | - * Build the replacement regex. |
|
383 | - * |
|
384 | - * @return string |
|
385 | - */ |
|
386 | - public function getIgnored() |
|
387 | - { |
|
388 | - return '('.implode('|', self::$ignore).')'; |
|
389 | - } |
|
381 | + /** |
|
382 | + * Build the replacement regex. |
|
383 | + * |
|
384 | + * @return string |
|
385 | + */ |
|
386 | + public function getIgnored() |
|
387 | + { |
|
388 | + return '('.implode('|', self::$ignore).')'; |
|
389 | + } |
|
390 | 390 | |
391 | - /** |
|
392 | - * Check user agent string against the regex. |
|
393 | - * |
|
394 | - * @param string $userAgent |
|
395 | - * |
|
396 | - * @return bool |
|
397 | - */ |
|
398 | - public function isCrawler($userAgent = null) |
|
399 | - { |
|
400 | - $agent = is_null($userAgent) ? $this->userAgent : $userAgent; |
|
391 | + /** |
|
392 | + * Check user agent string against the regex. |
|
393 | + * |
|
394 | + * @param string $userAgent |
|
395 | + * |
|
396 | + * @return bool |
|
397 | + */ |
|
398 | + public function isCrawler($userAgent = null) |
|
399 | + { |
|
400 | + $agent = is_null($userAgent) ? $this->userAgent : $userAgent; |
|
401 | 401 | |
402 | - $agent = preg_replace('/'.$this->getIgnored().'/i', '', $agent); |
|
402 | + $agent = preg_replace('/'.$this->getIgnored().'/i', '', $agent); |
|
403 | 403 | |
404 | - $result = preg_match('/'.$this->getRegex().'/i', $agent, $matches); |
|
404 | + $result = preg_match('/'.$this->getRegex().'/i', $agent, $matches); |
|
405 | 405 | |
406 | - if ($matches) { |
|
407 | - $this->matches = $matches; |
|
408 | - } |
|
406 | + if ($matches) { |
|
407 | + $this->matches = $matches; |
|
408 | + } |
|
409 | 409 | |
410 | - return (bool) $result; |
|
411 | - } |
|
410 | + return (bool) $result; |
|
411 | + } |
|
412 | 412 | |
413 | - /** |
|
414 | - * Return the matches. |
|
415 | - * |
|
416 | - * @return array |
|
417 | - */ |
|
418 | - public function getMatches() |
|
419 | - { |
|
420 | - return $this->matches[0]; |
|
421 | - } |
|
413 | + /** |
|
414 | + * Return the matches. |
|
415 | + * |
|
416 | + * @return array |
|
417 | + */ |
|
418 | + public function getMatches() |
|
419 | + { |
|
420 | + return $this->matches[0]; |
|
421 | + } |
|
422 | 422 | } |
@@ -2,30 +2,30 @@ |
||
2 | 2 | |
3 | 3 | class UserAgentTest extends PHPUnit_Framework_TestCase |
4 | 4 | { |
5 | - protected $CrawlerDetect; |
|
5 | + protected $CrawlerDetect; |
|
6 | 6 | |
7 | - public function setUp() |
|
8 | - { |
|
9 | - $this->CrawlerDetect = new Jaybizzle\CrawlerDetect\CrawlerDetect(); |
|
10 | - } |
|
7 | + public function setUp() |
|
8 | + { |
|
9 | + $this->CrawlerDetect = new Jaybizzle\CrawlerDetect\CrawlerDetect(); |
|
10 | + } |
|
11 | 11 | |
12 | - public function testBots() |
|
13 | - { |
|
14 | - $lines = file(__DIR__.'/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
|
12 | + public function testBots() |
|
13 | + { |
|
14 | + $lines = file(__DIR__.'/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
|
15 | 15 | |
16 | - foreach ($lines as $line) { |
|
17 | - $test = $this->CrawlerDetect->isCrawler($line); |
|
18 | - $this->assertEquals($test, true, $line); |
|
19 | - } |
|
20 | - } |
|
16 | + foreach ($lines as $line) { |
|
17 | + $test = $this->CrawlerDetect->isCrawler($line); |
|
18 | + $this->assertEquals($test, true, $line); |
|
19 | + } |
|
20 | + } |
|
21 | 21 | |
22 | - public function testDevices() |
|
23 | - { |
|
24 | - $lines = file(__DIR__.'/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
|
22 | + public function testDevices() |
|
23 | + { |
|
24 | + $lines = file(__DIR__.'/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
|
25 | 25 | |
26 | - foreach ($lines as $line) { |
|
27 | - $test = $this->CrawlerDetect->isCrawler($line); |
|
28 | - $this->assertEquals($test, false, $line); |
|
29 | - } |
|
30 | - } |
|
26 | + foreach ($lines as $line) { |
|
27 | + $test = $this->CrawlerDetect->isCrawler($line); |
|
28 | + $this->assertEquals($test, false, $line); |
|
29 | + } |
|
30 | + } |
|
31 | 31 | } |