Inspection of "Separate fixtures into their own classes" - JayBizzle/Crawler-Detect - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#89)

by Mark

created 2016-04-23 19:30 UTC

Status

Indentation +1 added lines, -1 removed lines patch added patch discarded remove patch

@@ -11,7 +11,7 @@
 block discarded – undo
 $dot = dirname(__FILE__);
 
 if (!file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
-    throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
+	throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
 }
 /** @var \Composer\Autoload\ClassLoader $autoloader */
 $autoloader = include $composer;

Please login to merge, or discard this patch.

Spacing +1 added lines, -1 removed lines patch added patch discarded remove patch

@@ -10,7 +10,7 @@
 block discarded – undo
  */
 $dot = dirname(__FILE__);
 
-if (!file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
+if ( ! file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
     throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
 }
 /** @var \Composer\Autoload\ClassLoader $autoloader */

Please login to merge, or discard this patch.

src/Fixtures/Exclusions.php 1 patch

Indentation +49 added lines, -49 removed lines patch added patch discarded remove patch

		@@ -13,53 +13,53 @@
		block discarded – undo
13	13
14	14	class Exclusions extends AbstractProvider
15	15	{
16		- /**
17		- * List of strings to remove from the user agent before running the crawler regex
18		- * Over a large list of user agents, this gives us about a 55% speed increase!
19		- *
20		- * @var array
21		- */
22		- protected $data = array(
23		- 'Safari.[\d\.]*',
24		- 'Firefox.[\d\.]*',
25		- 'Chrome.[\d\.]*',
26		- 'Chromium.[\d\.]*',
27		- 'MSIE.[\d\.]',
28		- 'Opera\/[\d\.]*',
29		- 'Mozilla.[\d\.]*',
30		- 'AppleWebKit.[\d\.]*',
31		- 'Trident.[\d\.]*',
32		- 'Windows NT.[\d\.]*',
33		- 'Android.[\d\.]*',
34		- 'Macintosh.',
35		- 'Ubuntu',
36		- 'Linux',
37		- '[ ]Intel',
38		- 'Mac OS X [\d_]*',
39		- '(like )?Gecko(.[\d\.]*)?',
40		- 'KHTML',
41		- 'CriOS.[\d\.]*',
42		- 'CPU iPhone OS ([0-9_])* like Mac OS X',
43		- 'CPU OS ([0-9_])* like Mac OS X',
44		- 'iPod',
45		- 'compatible',
46		- 'x86_..',
47		- 'i686',
48		- 'x64',
49		- 'X11',
50		- 'rv:[\d\.]*',
51		- 'Version.[\d\.]*',
52		- 'WOW64',
53		- 'Win64',
54		- 'Dalvik.[\d\.]*',
55		- ' \.NET CLR [\d\.]*',
56		- 'Presto.[\d\.]*',
57		- 'Media Center PC',
58		- 'BlackBerry',
59		- 'Build',
60		- 'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.',
61		- 'Opera',
62		- ' \.NET[\d\.]*',
63		- '$\|$\|;\|,', // Remove the following characters ( ) : ,
64		- );
	16	+ /**
	17	+ * List of strings to remove from the user agent before running the crawler regex
	18	+ * Over a large list of user agents, this gives us about a 55% speed increase!
	19	+ *
	20	+ * @var array
	21	+ */
	22	+ protected $data = array(
	23	+ 'Safari.[\d\.]*',
	24	+ 'Firefox.[\d\.]*',
	25	+ 'Chrome.[\d\.]*',
	26	+ 'Chromium.[\d\.]*',
	27	+ 'MSIE.[\d\.]',
	28	+ 'Opera\/[\d\.]*',
	29	+ 'Mozilla.[\d\.]*',
	30	+ 'AppleWebKit.[\d\.]*',
	31	+ 'Trident.[\d\.]*',
	32	+ 'Windows NT.[\d\.]*',
	33	+ 'Android.[\d\.]*',
	34	+ 'Macintosh.',
	35	+ 'Ubuntu',
	36	+ 'Linux',
	37	+ '[ ]Intel',
	38	+ 'Mac OS X [\d_]*',
	39	+ '(like )?Gecko(.[\d\.]*)?',
	40	+ 'KHTML',
	41	+ 'CriOS.[\d\.]*',
	42	+ 'CPU iPhone OS ([0-9_])* like Mac OS X',
	43	+ 'CPU OS ([0-9_])* like Mac OS X',
	44	+ 'iPod',
	45	+ 'compatible',
	46	+ 'x86_..',
	47	+ 'i686',
	48	+ 'x64',
	49	+ 'X11',
	50	+ 'rv:[\d\.]*',
	51	+ 'Version.[\d\.]*',
	52	+ 'WOW64',
	53	+ 'Win64',
	54	+ 'Dalvik.[\d\.]*',
	55	+ ' \.NET CLR [\d\.]*',
	56	+ 'Presto.[\d\.]*',
	57	+ 'Media Center PC',
	58	+ 'BlackBerry',
	59	+ 'Build',
	60	+ 'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.',
	61	+ 'Opera',
	62	+ ' \.NET[\d\.]*',
	63	+ '$\|$\|;\|,', // Remove the following characters ( ) : ,
	64	+ );
65	65	}

Please login to merge, or discard this patch.

src/Fixtures/AbstractProvider.php 1 patch

Indentation +4 added lines, -4 removed lines patch added patch discarded remove patch

@@ -13,8 +13,8 @@
 block discarded – undo
 
 abstract class AbstractProvider
 {
-    public function getAll()
-    {
-        return $this->data;
-    }
+	public function getAll()
+	{
+		return $this->data;
+	}
 }

Please login to merge, or discard this patch.

src/Fixtures/Crawlers.php 1 patch

Indentation +320 added lines, -320 removed lines patch added patch discarded remove patch

@@ -13,324 +13,324 @@
 block discarded – undo
 
 class Crawlers extends AbstractProvider
 {
-    /**
-     * Array of regular expressions to match against the user agent.
-     *
-     * @var array
-     */
-    protected $data = array(
-        '.*Java.*outbrain',
-        '008\/',
-        '^NING\/',
-        'A6-Indexer',
-        'Aboundex',
-        'Accoona-AI-Agent',
-        'acoon',
-        'AddThis',
-        'ADmantX',
-        'AHC',
-        'Airmail',
-        'alexa site audit',
-        'Anemone',
-        'Apache-HttpClient\/',
-        'Arachmo',
-        'archive-com',
-        'B-l-i-t-z-B-O-T',
-        'Backlink-Ceck\.de',
-        'baidu\.com',
-        'BazQux',
-        'bibnum\.bnf',
-        'biglotron',
-        'BingLocalSearch',
-        'BingPreview',
-        'binlar',
-        'Bloglovin',
-        'Blogtrottr',
-        'boitho\.com-dc',
-        'Browsershots',
-        'BUbiNG',
-        'Butterfly\/',
-        'BuzzSumo',
-        'CapsuleChecker',
-        'CC Metadata Scaper',
-        'Cerberian Drtrs',
-        'changedetection',
-        'Charlotte',
-        'clips\.ua\.ac\.be',
-        'CloudFlare-AlwaysOnline',
-        'coccoc',
-        'CommaFeed',
-        'Commons-HttpClient',
-        'convera',
-        'cosmos',
-        'corporatetwitnews',
-        'Covario-IDS',
-        'cron-job\.org',
-        'Curious George',
-        'curl',
-        'CyberPatrol',
-        'DataparkSearch',
-        'dataprovider',
-        'Daum(oa)?[ \/][0-9]',
-        'developers\.google\.com\/\+\/web\/snippet\/',
-        'Digg',
-        'DomainAppender',
-        'Dragonfly File Reader',
-        'drupact',
-        'EARTHCOM',
-        'ec2linkfinder',
-        'ECCP',
-        'ElectricMonk',
-        'EMail Exractor',
-        'EmailWolf',
-        'Embed PHP Library',
-        'Embedly',
-        'europarchive\.org',
-        'EventMachine HttpClient',
-        'ExactSearch',
-        'ExaleadCloudview',
-        'ezooms',
-        'facebookexternalhit',
-        'facebookplatform',
-        'Feed Wrangler',
-        'Feedbin',
-        'FeedBurner',
-        'Feedfetcher-Google',
-        'Feedly',
-        'Feedspot',
-        'FeedValidator',
-        'Fever',
-        'findlink',
-        'findthatfile',
-        'Flamingo_SearchEngine',
-        'FlipboardProxy',
-        'fluffy',
-        'Funnelback',
-        'g00g1e\.net',
-        'Genieo',
-        'getprismatic\.com',
-        'GigablastOpenSource',
-        'Go-http-client',
-        'Google favicon',
-        'Google Keyword Suggestion',
-        'Google Page Speed Insights',
-        'Google Web Preview',
-        'Google-HTTP-Java-Client',
-        'Google-Site-Verification',
-        'google_partner_monitoring',
-        'GoogleProducer',
-        'Grammarly',
-        'grub-client',
-        'heritrix',
-        'Holmes',
-        'htdig',
-        'HTTPMon',
-        'http-kit',
-        'http_requester',
-        'httpunit',
-        'http_request2',
-        'httrack',
-        'HubPages.*crawlingpolicy',
-        'HubSpot Marketing Grader',
-        'ichiro',
-        'IDG Twitter Links Resolver',
-        'igdeSpyder',
-        'InAGist',
-        'infegy',
-        'InfoWizards Reciprocal Link System PRO',
-        'inpwrd\.com',
-        'integromedb',
-        'IODC',
-        'IOI',
-        'ips-agent',
-        'iZSearch',
-        '^Java\/',
-        'Jigsaw',
-        'Jobrapido',
-        'kouio',
-        'L\.webis',
-        'Larbin',
-        'libwww',
-        'Link Valet',
-        'linkCheck',
-        'linkdex',
-        'LinkExaminer',
-        'LinkWalker',
-        'Lipperhey',
-        'link checker',
-        'link validator',
-        'LongURL API',
-        'ltx71',
-        'lwp-trivial',
-        'lycos',
-        'mabontland',
-        'MagpieRSS',
-        'Mediapartners-Google',
-        'MegaIndex\.ru',
-        'MetaURI',
-        'MergeFlow-PageReader',
-        'Mnogosearch',
-        'mogimogi',
-        'Mojolicious (Perl)',
-        'Morning Paper',
-        'Mrcgiguy',
-        'MVAClient',
-        'Netcraft Web Server Survey',
-        'NetcraftSurveyAgent',
-        'NetLyzer FastProbe',
-        'netresearch',
-        'Netvibes',
-        'NewsBlur .*(Fetcher|Finder)',
-        'NewsGator',
-        'newsme',
-        'newspaper\/',
-        'NG-Search',
-        'nineconnections\.com',
-        'nominet\.org\.uk',
-        'Notifixious',
-        'nuhk',
-        'nutch',
-        'Nuzzel',
-        'Nymesis',
-        'oegp',
-        'Omea Reader',
-        'omgili',
-        'Orbiter',
-        'ow\.ly',
-        'Go [\d\.]* package http',
-        'page2rss',
-        'PagePeeker',
-        'panscient',
-        'Peew',
-        'PhantomJS\/',
-        'phpcrawl',
-        'phpservermon',
-        'Pingdom\.com',
-        'Pinterest',
-        'Pizilla',
-        'Ploetz \+ Zeller',
-        'Plukkie',
-        'PocketParser',
-        'Pompos',
-        'postano',
-        'PostPost',
-        'postrank',
-        'proximic',
-        'Pulsepoint XT3 web scraper',
-        'Python-httplib2',
-        'python-requests',
-        'Python-urllib',
-        'Qseero',
-        'Qwantify',
-        'Radian6',
-        'Readability',
-        'RebelMouse',
-        'RetrevoPageAnalyzer',
-        'Riddler',
-        'Robosourcer',
-        'ROI Hunter',
-        'Ruby',
-        'SalesIntelligent',
-        'SBIder',
-        'scooter',
-        'ScoutJet',
-        'ScoutURLMonitor',
-        'Scrapy',
-        'Scrubby',
-        'SearchSight',
-        'semanticdiscovery',
-        'SEOstats',
-        'Server Density Service Monitoring',
-        'servernfo\.com',
-        'Seznam screenshot-generator',
-        'ShopWiki',
-        'SilverReader',
-        'SimplePie',
-        'Site24x7',
-        'SiteBar',
-        'siteexplorer\.info',
-        'Siteimprove\.com',
-        'SkypeUriPreview',
-        'slider\.com',
-        'slurp',
-        'SMRF URL Expander',
-        'snapchat-proxy',
-        'Snappy',
-        'SNK Siteshooter B0t',
-        'sogou',
-        'SortSite',
-        'speedy',
-        'Spinn3r',
-        'Sqworm',
-        'StackRambler',
-        'Stratagems Kumo',
-        'summify',
-        'teoma',
-        'theoldreader\.com',
-        'TinEye',
-        'Tiny Tiny RSS',
-        'Traackr.com',
-        'truwoGPS',
-        'tweetedtimes\.com',
-        'Twikle',
-        'Typhoeus',
-        'ubermetrics-technologies',
-        'UdmSearch',
-        'UnwindFetchor',
-        'updated',
-        'URLChecker',
-        'urlresolver',
-        'Vagabondo',
-        'Validator\.nu\/LV',
-        'via ggpht\.com GoogleImageProxy',
-        'vkShare',
-        'Vortex',
-        'voyager\/',
-        'VYU2',
-        'W3C-checklink',
-        'W3C-mobileOK',
-        'W3C_CSS_Validator_JFouffa',
-        'W3C_I18n-Checker',
-        'W3C_Unicorn',
-        'W3C_Validator',
-        'Wappalyzer',
-        'WinHttpRequest',
-        'web-capture\.net',
-        'WebCapture',
-        'WebCorp',
-        'webcollage',
-        'WebIndex',
-        'WebFetch',
-        'webmon ',
-        'websitepulse[+ ]checker',
-        'Websquash\.com',
-        'WebThumbnail',
-        'WeSEE:Search',
-        'wf84',
-        'wget',
-        'WhatsApp',
-        'WomlpeFactory',
-        'WordPress\/',
-        'wotbox',
-        'wscheck',
-        'WWW-Mechanize',
-        'www\.monitor\.us',
-        'XaxisSemanticsClassifier',
-        'Xenu Link Sleuth',
-        'XML Sitemaps Generator',
-        'Y!J-ASR',
-        'yacy',
-        'Yahoo Ad monitoring',
-        'Yahoo Link Preview',
-        'YahooSeeker',
-        'yandex',
-        'yanga',
-        'yeti',
-        'yoogliFetchAgent',
-        'YottaaMonitor',
-        'Zao',
-        'zgrab',
-        'ZyBorg',
-        '[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)',
-    );
+	/**
+	 * Array of regular expressions to match against the user agent.
+	 *
+	 * @var array
+	 */
+	protected $data = array(
+		'.*Java.*outbrain',
+		'008\/',
+		'^NING\/',
+		'A6-Indexer',
+		'Aboundex',
+		'Accoona-AI-Agent',
+		'acoon',
+		'AddThis',
+		'ADmantX',
+		'AHC',
+		'Airmail',
+		'alexa site audit',
+		'Anemone',
+		'Apache-HttpClient\/',
+		'Arachmo',
+		'archive-com',
+		'B-l-i-t-z-B-O-T',
+		'Backlink-Ceck\.de',
+		'baidu\.com',
+		'BazQux',
+		'bibnum\.bnf',
+		'biglotron',
+		'BingLocalSearch',
+		'BingPreview',
+		'binlar',
+		'Bloglovin',
+		'Blogtrottr',
+		'boitho\.com-dc',
+		'Browsershots',
+		'BUbiNG',
+		'Butterfly\/',
+		'BuzzSumo',
+		'CapsuleChecker',
+		'CC Metadata Scaper',
+		'Cerberian Drtrs',
+		'changedetection',
+		'Charlotte',
+		'clips\.ua\.ac\.be',
+		'CloudFlare-AlwaysOnline',
+		'coccoc',
+		'CommaFeed',
+		'Commons-HttpClient',
+		'convera',
+		'cosmos',
+		'corporatetwitnews',
+		'Covario-IDS',
+		'cron-job\.org',
+		'Curious George',
+		'curl',
+		'CyberPatrol',
+		'DataparkSearch',
+		'dataprovider',
+		'Daum(oa)?[ \/][0-9]',
+		'developers\.google\.com\/\+\/web\/snippet\/',
+		'Digg',
+		'DomainAppender',
+		'Dragonfly File Reader',
+		'drupact',
+		'EARTHCOM',
+		'ec2linkfinder',
+		'ECCP',
+		'ElectricMonk',
+		'EMail Exractor',
+		'EmailWolf',
+		'Embed PHP Library',
+		'Embedly',
+		'europarchive\.org',
+		'EventMachine HttpClient',
+		'ExactSearch',
+		'ExaleadCloudview',
+		'ezooms',
+		'facebookexternalhit',
+		'facebookplatform',
+		'Feed Wrangler',
+		'Feedbin',
+		'FeedBurner',
+		'Feedfetcher-Google',
+		'Feedly',
+		'Feedspot',
+		'FeedValidator',
+		'Fever',
+		'findlink',
+		'findthatfile',
+		'Flamingo_SearchEngine',
+		'FlipboardProxy',
+		'fluffy',
+		'Funnelback',
+		'g00g1e\.net',
+		'Genieo',
+		'getprismatic\.com',
+		'GigablastOpenSource',
+		'Go-http-client',
+		'Google favicon',
+		'Google Keyword Suggestion',
+		'Google Page Speed Insights',
+		'Google Web Preview',
+		'Google-HTTP-Java-Client',
+		'Google-Site-Verification',
+		'google_partner_monitoring',
+		'GoogleProducer',
+		'Grammarly',
+		'grub-client',
+		'heritrix',
+		'Holmes',
+		'htdig',
+		'HTTPMon',
+		'http-kit',
+		'http_requester',
+		'httpunit',
+		'http_request2',
+		'httrack',
+		'HubPages.*crawlingpolicy',
+		'HubSpot Marketing Grader',
+		'ichiro',
+		'IDG Twitter Links Resolver',
+		'igdeSpyder',
+		'InAGist',
+		'infegy',
+		'InfoWizards Reciprocal Link System PRO',
+		'inpwrd\.com',
+		'integromedb',
+		'IODC',
+		'IOI',
+		'ips-agent',
+		'iZSearch',
+		'^Java\/',
+		'Jigsaw',
+		'Jobrapido',
+		'kouio',
+		'L\.webis',
+		'Larbin',
+		'libwww',
+		'Link Valet',
+		'linkCheck',
+		'linkdex',
+		'LinkExaminer',
+		'LinkWalker',
+		'Lipperhey',
+		'link checker',
+		'link validator',
+		'LongURL API',
+		'ltx71',
+		'lwp-trivial',
+		'lycos',
+		'mabontland',
+		'MagpieRSS',
+		'Mediapartners-Google',
+		'MegaIndex\.ru',
+		'MetaURI',
+		'MergeFlow-PageReader',
+		'Mnogosearch',
+		'mogimogi',
+		'Mojolicious (Perl)',
+		'Morning Paper',
+		'Mrcgiguy',
+		'MVAClient',
+		'Netcraft Web Server Survey',
+		'NetcraftSurveyAgent',
+		'NetLyzer FastProbe',
+		'netresearch',
+		'Netvibes',
+		'NewsBlur .*(Fetcher|Finder)',
+		'NewsGator',
+		'newsme',
+		'newspaper\/',
+		'NG-Search',
+		'nineconnections\.com',
+		'nominet\.org\.uk',
+		'Notifixious',
+		'nuhk',
+		'nutch',
+		'Nuzzel',
+		'Nymesis',
+		'oegp',
+		'Omea Reader',
+		'omgili',
+		'Orbiter',
+		'ow\.ly',
+		'Go [\d\.]* package http',
+		'page2rss',
+		'PagePeeker',
+		'panscient',
+		'Peew',
+		'PhantomJS\/',
+		'phpcrawl',
+		'phpservermon',
+		'Pingdom\.com',
+		'Pinterest',
+		'Pizilla',
+		'Ploetz \+ Zeller',
+		'Plukkie',
+		'PocketParser',
+		'Pompos',
+		'postano',
+		'PostPost',
+		'postrank',
+		'proximic',
+		'Pulsepoint XT3 web scraper',
+		'Python-httplib2',
+		'python-requests',
+		'Python-urllib',
+		'Qseero',
+		'Qwantify',
+		'Radian6',
+		'Readability',
+		'RebelMouse',
+		'RetrevoPageAnalyzer',
+		'Riddler',
+		'Robosourcer',
+		'ROI Hunter',
+		'Ruby',
+		'SalesIntelligent',
+		'SBIder',
+		'scooter',
+		'ScoutJet',
+		'ScoutURLMonitor',
+		'Scrapy',
+		'Scrubby',
+		'SearchSight',
+		'semanticdiscovery',
+		'SEOstats',
+		'Server Density Service Monitoring',
+		'servernfo\.com',
+		'Seznam screenshot-generator',
+		'ShopWiki',
+		'SilverReader',
+		'SimplePie',
+		'Site24x7',
+		'SiteBar',
+		'siteexplorer\.info',
+		'Siteimprove\.com',
+		'SkypeUriPreview',
+		'slider\.com',
+		'slurp',
+		'SMRF URL Expander',
+		'snapchat-proxy',
+		'Snappy',
+		'SNK Siteshooter B0t',
+		'sogou',
+		'SortSite',
+		'speedy',
+		'Spinn3r',
+		'Sqworm',
+		'StackRambler',
+		'Stratagems Kumo',
+		'summify',
+		'teoma',
+		'theoldreader\.com',
+		'TinEye',
+		'Tiny Tiny RSS',
+		'Traackr.com',
+		'truwoGPS',
+		'tweetedtimes\.com',
+		'Twikle',
+		'Typhoeus',
+		'ubermetrics-technologies',
+		'UdmSearch',
+		'UnwindFetchor',
+		'updated',
+		'URLChecker',
+		'urlresolver',
+		'Vagabondo',
+		'Validator\.nu\/LV',
+		'via ggpht\.com GoogleImageProxy',
+		'vkShare',
+		'Vortex',
+		'voyager\/',
+		'VYU2',
+		'W3C-checklink',
+		'W3C-mobileOK',
+		'W3C_CSS_Validator_JFouffa',
+		'W3C_I18n-Checker',
+		'W3C_Unicorn',
+		'W3C_Validator',
+		'Wappalyzer',
+		'WinHttpRequest',
+		'web-capture\.net',
+		'WebCapture',
+		'WebCorp',
+		'webcollage',
+		'WebIndex',
+		'WebFetch',
+		'webmon ',
+		'websitepulse[+ ]checker',
+		'Websquash\.com',
+		'WebThumbnail',
+		'WeSEE:Search',
+		'wf84',
+		'wget',
+		'WhatsApp',
+		'WomlpeFactory',
+		'WordPress\/',
+		'wotbox',
+		'wscheck',
+		'WWW-Mechanize',
+		'www\.monitor\.us',
+		'XaxisSemanticsClassifier',
+		'Xenu Link Sleuth',
+		'XML Sitemaps Generator',
+		'Y!J-ASR',
+		'yacy',
+		'Yahoo Ad monitoring',
+		'Yahoo Link Preview',
+		'YahooSeeker',
+		'yandex',
+		'yanga',
+		'yeti',
+		'yoogliFetchAgent',
+		'YottaaMonitor',
+		'Zao',
+		'zgrab',
+		'ZyBorg',
+		'[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)',
+	);
 }

Please login to merge, or discard this patch.

tests/UATests.php 1 patch

Indentation +40 added lines, -40 removed lines patch added patch discarded remove patch

@@ -14,55 +14,55 @@
 block discarded – undo
 
 class UserAgentTest extends PHPUnit_Framework_TestCase
 {
-    protected $CrawlerDetect;
+	protected $CrawlerDetect;
 
-    public function setUp()
-    {
-        $this->CrawlerDetect = new CrawlerDetect();
-    }
+	public function setUp()
+	{
+		$this->CrawlerDetect = new CrawlerDetect();
+	}
 
-    public function testBots()
-    {
-        $lines = file(__DIR__.'/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+	public function testBots()
+	{
+		$lines = file(__DIR__.'/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
 
-        foreach ($lines as $line) {
-            $test = $this->CrawlerDetect->isCrawler($line);
-            $this->assertEquals($test, true, $line);
-        }
-    }
+		foreach ($lines as $line) {
+			$test = $this->CrawlerDetect->isCrawler($line);
+			$this->assertEquals($test, true, $line);
+		}
+	}
 
-    public function testDevices()
-    {
-        $lines = file(__DIR__.'/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+	public function testDevices()
+	{
+		$lines = file(__DIR__.'/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
 
-        foreach ($lines as $line) {
-            $test = $this->CrawlerDetect->isCrawler($line);
-            $this->assertEquals($test, false, $line);
-        }
-    }
+		foreach ($lines as $line) {
+			$test = $this->CrawlerDetect->isCrawler($line);
+			$this->assertEquals($test, false, $line);
+		}
+	}
 
-    public function testReturnsCorrectMatchedBotName()
-    {
-        $test = $this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');
+	public function testReturnsCorrectMatchedBotName()
+	{
+		$test = $this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');
 
-        $matches = $this->CrawlerDetect->getMatches();
+		$matches = $this->CrawlerDetect->getMatches();
 
-        $this->assertEquals($this->CrawlerDetect->getMatches(), 'Yahoo Ad monitoring', $matches);
-    }
+		$this->assertEquals($this->CrawlerDetect->getMatches(), 'Yahoo Ad monitoring', $matches);
+	}
 
-    public function testForRegexCollision()
-    {
-        $crawlers = new Crawlers();
+	public function testForRegexCollision()
+	{
+		$crawlers = new Crawlers();
 
-        foreach ($crawlers->getAll() as $key1 => $regex) {
-            foreach ($crawlers->getAll() as $key2 => $compare) {
-                // Dont check this regex against itself
-                if ($key1 != $key2) {
-                    preg_match('/'.$regex.'/i', stripslashes($compare), $matches);
+		foreach ($crawlers->getAll() as $key1 => $regex) {
+			foreach ($crawlers->getAll() as $key2 => $compare) {
+				// Dont check this regex against itself
+				if ($key1 != $key2) {
+					preg_match('/'.$regex.'/i', stripslashes($compare), $matches);
 
-                    $this->assertEmpty($matches, $regex.' collided with '.$compare);
-                }
-            }
-        }
-    }
+					$this->assertEmpty($matches, $regex.' collided with '.$compare);
+				}
+			}
+		}
+	}
 }

Please login to merge, or discard this patch.

src/CrawlerDetect.php 1 patch

Indentation +174 added lines, -174 removed lines patch added patch discarded remove patch

@@ -16,178 +16,178 @@
 block discarded – undo
 
 class CrawlerDetect
 {
-    /**
-     * The user agent.
-     *
-     * @var null
-     */
-    protected $userAgent = null;
-
-    /**
-     * Headers that contain a user agent.
-     *
-     * @var array
-     */
-    protected $httpHeaders = array();
-
-    /**
-     * Store regex matches.
-     *
-     * @var array
-     */
-    protected $matches = array();
-
-    /**
-     * Crawlers object
-     * 
-     * @var Jaybizzle\CrawlerDetect\Fixtures\Crawlers
-     */
-    protected $crawlers;
-
-    /**
-     * Exclusions object
-     * 
-     * @var Jaybizzle\CrawlerDetect\Fixtures\Exclusions
-     */
-    protected $exclusions;
-
-    /**
-     * All possible HTTP headers that represent the
-     * User-Agent string.
-     *
-     * @var array
-     */
-    protected static $uaHttpHeaders = array(
-        // The default User-Agent string.
-        'HTTP_USER_AGENT',
-        // Header can occur on devices using Opera Mini.
-        'HTTP_X_OPERAMINI_PHONE_UA',
-        // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
-        'HTTP_X_DEVICE_USER_AGENT',
-        'HTTP_X_ORIGINAL_USER_AGENT',
-        'HTTP_X_SKYFIRE_PHONE',
-        'HTTP_X_BOLT_PHONE_UA',
-        'HTTP_DEVICE_STOCK_UA',
-        'HTTP_X_UCBROWSER_DEVICE_UA',
-    );
-
-    /**
-     * Class constructor.
-     */
-    public function __construct(array $headers = null, $userAgent = null)
-    {
-        $this->setHttpHeaders($headers);
-        $this->setUserAgent($userAgent);
-        $this->crawlers = new Crawlers();
-        $this->exclusions = new Exclusions();
-    }
-
-    /**
-     * Set HTTP headers.
-     *
-     * @param array $httpHeaders
-     */
-    public function setHttpHeaders($httpHeaders = null)
-    {
-        // use global _SERVER if $httpHeaders aren't defined
-        if (!is_array($httpHeaders) || !count($httpHeaders)) {
-            $httpHeaders = $_SERVER;
-        }
-        // clear existing headers
-        $this->httpHeaders = array();
-        // Only save HTTP headers. In PHP land, that means only _SERVER vars that
-        // start with HTTP_.
-        foreach ($httpHeaders as $key => $value) {
-            if (substr($key, 0, 5) === 'HTTP_') {
-                $this->httpHeaders[$key] = $value;
-            }
-        }
-    }
-
-    /**
-     * Return user agent headers.
-     *
-     * @return array
-     */
-    public function getUaHttpHeaders()
-    {
-        return self::$uaHttpHeaders;
-    }
-
-    /**
-     * Set the user agent.
-     *
-     * @param string $userAgent
-     */
-    public function setUserAgent($userAgent = null)
-    {
-        if (false === empty($userAgent)) {
-            return $this->userAgent = $userAgent;
-        } else {
-            $this->userAgent = null;
-            foreach ($this->getUaHttpHeaders() as $altHeader) {
-                if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow.
-                    $this->userAgent .= $this->httpHeaders[$altHeader].' ';
-                }
-            }
-
-            return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
-        }
-    }
-
-    /**
-     * Build the user agent regex.
-     *
-     * @return string
-     */
-    public function getRegex()
-    {
-        return '('.implode('|', $this->crawlers->getAll()).')';
-    }
-
-    /**
-     * Build the replacement regex.
-     *
-     * @return string
-     */
-    public function getExclusions()
-    {
-        return '('.implode('|', $this->exclusions->getAll()).')';
-    }
-
-    /**
-     * Check user agent string against the regex.
-     *
-     * @param string $userAgent
-     *
-     * @return bool
-     */
-    public function isCrawler($userAgent = null)
-    {
-        $agent = is_null($userAgent) ? $this->userAgent : $userAgent;
-
-        $agent = preg_replace('/'.$this->getExclusions().'/i', '', $agent);
-
-        if (trim($agent) === false) {
-            return false;
-        } else {
-            $result = preg_match('/'.$this->getRegex().'/i', trim($agent), $matches);
-        }
-
-        if ($matches) {
-            $this->matches = $matches;
-        }
-
-        return (bool) $result;
-    }
-
-    /**
-     * Return the matches.
-     *
-     * @return string
-     */
-    public function getMatches()
-    {
-        return $this->matches[0];
-    }
+	/**
+	 * The user agent.
+	 *
+	 * @var null
+	 */
+	protected $userAgent = null;
+
+	/**
+	 * Headers that contain a user agent.
+	 *
+	 * @var array
+	 */
+	protected $httpHeaders = array();
+
+	/**
+	 * Store regex matches.
+	 *
+	 * @var array
+	 */
+	protected $matches = array();
+
+	/**
+	 * Crawlers object
+	 * 
+	 * @var Jaybizzle\CrawlerDetect\Fixtures\Crawlers
+	 */
+	protected $crawlers;
+
+	/**
+	 * Exclusions object
+	 * 
+	 * @var Jaybizzle\CrawlerDetect\Fixtures\Exclusions
+	 */
+	protected $exclusions;
+
+	/**
+	 * All possible HTTP headers that represent the
+	 * User-Agent string.
+	 *
+	 * @var array
+	 */
+	protected static $uaHttpHeaders = array(
+		// The default User-Agent string.
+		'HTTP_USER_AGENT',
+		// Header can occur on devices using Opera Mini.
+		'HTTP_X_OPERAMINI_PHONE_UA',
+		// Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
+		'HTTP_X_DEVICE_USER_AGENT',
+		'HTTP_X_ORIGINAL_USER_AGENT',
+		'HTTP_X_SKYFIRE_PHONE',
+		'HTTP_X_BOLT_PHONE_UA',
+		'HTTP_DEVICE_STOCK_UA',
+		'HTTP_X_UCBROWSER_DEVICE_UA',
+	);
+
+	/**
+	 * Class constructor.
+	 */
+	public function __construct(array $headers = null, $userAgent = null)
+	{
+		$this->setHttpHeaders($headers);
+		$this->setUserAgent($userAgent);
+		$this->crawlers = new Crawlers();
+		$this->exclusions = new Exclusions();
+	}
+
+	/**
+	 * Set HTTP headers.
+	 *
+	 * @param array $httpHeaders
+	 */
+	public function setHttpHeaders($httpHeaders = null)
+	{
+		// use global _SERVER if $httpHeaders aren't defined
+		if (!is_array($httpHeaders) || !count($httpHeaders)) {
+			$httpHeaders = $_SERVER;
+		}
+		// clear existing headers
+		$this->httpHeaders = array();
+		// Only save HTTP headers. In PHP land, that means only _SERVER vars that
+		// start with HTTP_.
+		foreach ($httpHeaders as $key => $value) {
+			if (substr($key, 0, 5) === 'HTTP_') {
+				$this->httpHeaders[$key] = $value;
+			}
+		}
+	}
+
+	/**
+	 * Return user agent headers.
+	 *
+	 * @return array
+	 */
+	public function getUaHttpHeaders()
+	{
+		return self::$uaHttpHeaders;
+	}
+
+	/**
+	 * Set the user agent.
+	 *
+	 * @param string $userAgent
+	 */
+	public function setUserAgent($userAgent = null)
+	{
+		if (false === empty($userAgent)) {
+			return $this->userAgent = $userAgent;
+		} else {
+			$this->userAgent = null;
+			foreach ($this->getUaHttpHeaders() as $altHeader) {
+				if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow.
+					$this->userAgent .= $this->httpHeaders[$altHeader].' ';
+				}
+			}
+
+			return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
+		}
+	}
+
+	/**
+	 * Build the user agent regex.
+	 *
+	 * @return string
+	 */
+	public function getRegex()
+	{
+		return '('.implode('|', $this->crawlers->getAll()).')';
+	}
+
+	/**
+	 * Build the replacement regex.
+	 *
+	 * @return string
+	 */
+	public function getExclusions()
+	{
+		return '('.implode('|', $this->exclusions->getAll()).')';
+	}
+
+	/**
+	 * Check user agent string against the regex.
+	 *
+	 * @param string $userAgent
+	 *
+	 * @return bool
+	 */
+	public function isCrawler($userAgent = null)
+	{
+		$agent = is_null($userAgent) ? $this->userAgent : $userAgent;
+
+		$agent = preg_replace('/'.$this->getExclusions().'/i', '', $agent);
+
+		if (trim($agent) === false) {
+			return false;
+		} else {
+			$result = preg_match('/'.$this->getRegex().'/i', trim($agent), $matches);
+		}
+
+		if ($matches) {
+			$this->matches = $matches;
+		}
+
+		return (bool) $result;
+	}
+
+	/**
+	 * Return the matches.
+	 *
+	 * @return string
+	 */
+	public function getMatches()
+	{
+		return $this->matches[0];
+	}
 }

Please login to merge, or discard this patch.

		@@ -11,7 +11,7 @@
		block discarded – undo
11	11	$dot = dirname(__FILE__);
12	12
13	13	if (!file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
14		- throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
	14	+ throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
15	15	}
16	16	/** @var \Composer\Autoload\ClassLoader $autoloader */
17	17	$autoloader = include $composer;

		@@ -10,7 +10,7 @@
		block discarded – undo
10	10	*/
11	11	$dot = dirname(__FILE__);
12	12
13		-if (!file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
	13	+if ( ! file_exists($composer = dirname($dot).'/vendor/autoload.php')) {
14	14	throw new RuntimeException("Please run 'composer install' first to set up autoloading. $composer");
15	15	}
16	16	/** @var \Composer\Autoload\ClassLoader $autoloader */

		@@ -13,8 +13,8 @@
		block discarded – undo
13	13
14	14	abstract class AbstractProvider
15	15	{
16		- public function getAll()
17		- {
18		- return $this->data;
19		- }
	16	+ public function getAll()
	17	+ {
	18	+ return $this->data;
	19	+ }
20	20	}

		@@ -13,324 +13,324 @@
		block discarded – undo
13	13
14	14	class Crawlers extends AbstractProvider
15	15	{
16		- /**
17		- * Array of regular expressions to match against the user agent.
18		- *
19		- * @var array
20		- */
21		- protected $data = array(
22		- '.Java.outbrain',
23		- '008\/',
24		- '^NING\/',
25		- 'A6-Indexer',
26		- 'Aboundex',
27		- 'Accoona-AI-Agent',
28		- 'acoon',
29		- 'AddThis',
30		- 'ADmantX',
31		- 'AHC',
32		- 'Airmail',
33		- 'alexa site audit',
34		- 'Anemone',
35		- 'Apache-HttpClient\/',
36		- 'Arachmo',
37		- 'archive-com',
38		- 'B-l-i-t-z-B-O-T',
39		- 'Backlink-Ceck\.de',
40		- 'baidu\.com',
41		- 'BazQux',
42		- 'bibnum\.bnf',
43		- 'biglotron',
44		- 'BingLocalSearch',
45		- 'BingPreview',
46		- 'binlar',
47		- 'Bloglovin',
48		- 'Blogtrottr',
49		- 'boitho\.com-dc',
50		- 'Browsershots',
51		- 'BUbiNG',
52		- 'Butterfly\/',
53		- 'BuzzSumo',
54		- 'CapsuleChecker',
55		- 'CC Metadata Scaper',
56		- 'Cerberian Drtrs',
57		- 'changedetection',
58		- 'Charlotte',
59		- 'clips\.ua\.ac\.be',
60		- 'CloudFlare-AlwaysOnline',
61		- 'coccoc',
62		- 'CommaFeed',
63		- 'Commons-HttpClient',
64		- 'convera',
65		- 'cosmos',
66		- 'corporatetwitnews',
67		- 'Covario-IDS',
68		- 'cron-job\.org',
69		- 'Curious George',
70		- 'curl',
71		- 'CyberPatrol',
72		- 'DataparkSearch',
73		- 'dataprovider',
74		- 'Daum(oa)?[ \/][0-9]',
75		- 'developers\.google\.com\/\+\/web\/snippet\/',
76		- 'Digg',
77		- 'DomainAppender',
78		- 'Dragonfly File Reader',
79		- 'drupact',
80		- 'EARTHCOM',
81		- 'ec2linkfinder',
82		- 'ECCP',
83		- 'ElectricMonk',
84		- 'EMail Exractor',
85		- 'EmailWolf',
86		- 'Embed PHP Library',
87		- 'Embedly',
88		- 'europarchive\.org',
89		- 'EventMachine HttpClient',
90		- 'ExactSearch',
91		- 'ExaleadCloudview',
92		- 'ezooms',
93		- 'facebookexternalhit',
94		- 'facebookplatform',
95		- 'Feed Wrangler',
96		- 'Feedbin',
97		- 'FeedBurner',
98		- 'Feedfetcher-Google',
99		- 'Feedly',
100		- 'Feedspot',
101		- 'FeedValidator',
102		- 'Fever',
103		- 'findlink',
104		- 'findthatfile',
105		- 'Flamingo_SearchEngine',
106		- 'FlipboardProxy',
107		- 'fluffy',
108		- 'Funnelback',
109		- 'g00g1e\.net',
110		- 'Genieo',
111		- 'getprismatic\.com',
112		- 'GigablastOpenSource',
113		- 'Go-http-client',
114		- 'Google favicon',
115		- 'Google Keyword Suggestion',
116		- 'Google Page Speed Insights',
117		- 'Google Web Preview',
118		- 'Google-HTTP-Java-Client',
119		- 'Google-Site-Verification',
120		- 'google_partner_monitoring',
121		- 'GoogleProducer',
122		- 'Grammarly',
123		- 'grub-client',
124		- 'heritrix',
125		- 'Holmes',
126		- 'htdig',
127		- 'HTTPMon',
128		- 'http-kit',
129		- 'http_requester',
130		- 'httpunit',
131		- 'http_request2',
132		- 'httrack',
133		- 'HubPages.*crawlingpolicy',
134		- 'HubSpot Marketing Grader',
135		- 'ichiro',
136		- 'IDG Twitter Links Resolver',
137		- 'igdeSpyder',
138		- 'InAGist',
139		- 'infegy',
140		- 'InfoWizards Reciprocal Link System PRO',
141		- 'inpwrd\.com',
142		- 'integromedb',
143		- 'IODC',
144		- 'IOI',
145		- 'ips-agent',
146		- 'iZSearch',
147		- '^Java\/',
148		- 'Jigsaw',
149		- 'Jobrapido',
150		- 'kouio',
151		- 'L\.webis',
152		- 'Larbin',
153		- 'libwww',
154		- 'Link Valet',
155		- 'linkCheck',
156		- 'linkdex',
157		- 'LinkExaminer',
158		- 'LinkWalker',
159		- 'Lipperhey',
160		- 'link checker',
161		- 'link validator',
162		- 'LongURL API',
163		- 'ltx71',
164		- 'lwp-trivial',
165		- 'lycos',
166		- 'mabontland',
167		- 'MagpieRSS',
168		- 'Mediapartners-Google',
169		- 'MegaIndex\.ru',
170		- 'MetaURI',
171		- 'MergeFlow-PageReader',
172		- 'Mnogosearch',
173		- 'mogimogi',
174		- 'Mojolicious (Perl)',
175		- 'Morning Paper',
176		- 'Mrcgiguy',
177		- 'MVAClient',
178		- 'Netcraft Web Server Survey',
179		- 'NetcraftSurveyAgent',
180		- 'NetLyzer FastProbe',
181		- 'netresearch',
182		- 'Netvibes',
183		- 'NewsBlur .*(Fetcher\|Finder)',
184		- 'NewsGator',
185		- 'newsme',
186		- 'newspaper\/',
187		- 'NG-Search',
188		- 'nineconnections\.com',
189		- 'nominet\.org\.uk',
190		- 'Notifixious',
191		- 'nuhk',
192		- 'nutch',
193		- 'Nuzzel',
194		- 'Nymesis',
195		- 'oegp',
196		- 'Omea Reader',
197		- 'omgili',
198		- 'Orbiter',
199		- 'ow\.ly',
200		- 'Go [\d\.]* package http',
201		- 'page2rss',
202		- 'PagePeeker',
203		- 'panscient',
204		- 'Peew',
205		- 'PhantomJS\/',
206		- 'phpcrawl',
207		- 'phpservermon',
208		- 'Pingdom\.com',
209		- 'Pinterest',
210		- 'Pizilla',
211		- 'Ploetz \+ Zeller',
212		- 'Plukkie',
213		- 'PocketParser',
214		- 'Pompos',
215		- 'postano',
216		- 'PostPost',
217		- 'postrank',
218		- 'proximic',
219		- 'Pulsepoint XT3 web scraper',
220		- 'Python-httplib2',
221		- 'python-requests',
222		- 'Python-urllib',
223		- 'Qseero',
224		- 'Qwantify',
225		- 'Radian6',
226		- 'Readability',
227		- 'RebelMouse',
228		- 'RetrevoPageAnalyzer',
229		- 'Riddler',
230		- 'Robosourcer',
231		- 'ROI Hunter',
232		- 'Ruby',
233		- 'SalesIntelligent',
234		- 'SBIder',
235		- 'scooter',
236		- 'ScoutJet',
237		- 'ScoutURLMonitor',
238		- 'Scrapy',
239		- 'Scrubby',
240		- 'SearchSight',
241		- 'semanticdiscovery',
242		- 'SEOstats',
243		- 'Server Density Service Monitoring',
244		- 'servernfo\.com',
245		- 'Seznam screenshot-generator',
246		- 'ShopWiki',
247		- 'SilverReader',
248		- 'SimplePie',
249		- 'Site24x7',
250		- 'SiteBar',
251		- 'siteexplorer\.info',
252		- 'Siteimprove\.com',
253		- 'SkypeUriPreview',
254		- 'slider\.com',
255		- 'slurp',
256		- 'SMRF URL Expander',
257		- 'snapchat-proxy',
258		- 'Snappy',
259		- 'SNK Siteshooter B0t',
260		- 'sogou',
261		- 'SortSite',
262		- 'speedy',
263		- 'Spinn3r',
264		- 'Sqworm',
265		- 'StackRambler',
266		- 'Stratagems Kumo',
267		- 'summify',
268		- 'teoma',
269		- 'theoldreader\.com',
270		- 'TinEye',
271		- 'Tiny Tiny RSS',
272		- 'Traackr.com',
273		- 'truwoGPS',
274		- 'tweetedtimes\.com',
275		- 'Twikle',
276		- 'Typhoeus',
277		- 'ubermetrics-technologies',
278		- 'UdmSearch',
279		- 'UnwindFetchor',
280		- 'updated',
281		- 'URLChecker',
282		- 'urlresolver',
283		- 'Vagabondo',
284		- 'Validator\.nu\/LV',
285		- 'via ggpht\.com GoogleImageProxy',
286		- 'vkShare',
287		- 'Vortex',
288		- 'voyager\/',
289		- 'VYU2',
290		- 'W3C-checklink',
291		- 'W3C-mobileOK',
292		- 'W3C_CSS_Validator_JFouffa',
293		- 'W3C_I18n-Checker',
294		- 'W3C_Unicorn',
295		- 'W3C_Validator',
296		- 'Wappalyzer',
297		- 'WinHttpRequest',
298		- 'web-capture\.net',
299		- 'WebCapture',
300		- 'WebCorp',
301		- 'webcollage',
302		- 'WebIndex',
303		- 'WebFetch',
304		- 'webmon ',
305		- 'websitepulse[+ ]checker',
306		- 'Websquash\.com',
307		- 'WebThumbnail',
308		- 'WeSEE:Search',
309		- 'wf84',
310		- 'wget',
311		- 'WhatsApp',
312		- 'WomlpeFactory',
313		- 'WordPress\/',
314		- 'wotbox',
315		- 'wscheck',
316		- 'WWW-Mechanize',
317		- 'www\.monitor\.us',
318		- 'XaxisSemanticsClassifier',
319		- 'Xenu Link Sleuth',
320		- 'XML Sitemaps Generator',
321		- 'Y!J-ASR',
322		- 'yacy',
323		- 'Yahoo Ad monitoring',
324		- 'Yahoo Link Preview',
325		- 'YahooSeeker',
326		- 'yandex',
327		- 'yanga',
328		- 'yeti',
329		- 'yoogliFetchAgent',
330		- 'YottaaMonitor',
331		- 'Zao',
332		- 'zgrab',
333		- 'ZyBorg',
334		- '[a-z0-9\-_]*((?<!cu)bot\|crawler\|archiver\|transcoder\|spider)',
335		- );
	16	+ /**
	17	+ * Array of regular expressions to match against the user agent.
	18	+ *
	19	+ * @var array
	20	+ */
	21	+ protected $data = array(
	22	+ '.Java.outbrain',
	23	+ '008\/',
	24	+ '^NING\/',
	25	+ 'A6-Indexer',
	26	+ 'Aboundex',
	27	+ 'Accoona-AI-Agent',
	28	+ 'acoon',
	29	+ 'AddThis',
	30	+ 'ADmantX',
	31	+ 'AHC',
	32	+ 'Airmail',
	33	+ 'alexa site audit',
	34	+ 'Anemone',
	35	+ 'Apache-HttpClient\/',
	36	+ 'Arachmo',
	37	+ 'archive-com',
	38	+ 'B-l-i-t-z-B-O-T',
	39	+ 'Backlink-Ceck\.de',
	40	+ 'baidu\.com',
	41	+ 'BazQux',
	42	+ 'bibnum\.bnf',
	43	+ 'biglotron',
	44	+ 'BingLocalSearch',
	45	+ 'BingPreview',
	46	+ 'binlar',
	47	+ 'Bloglovin',
	48	+ 'Blogtrottr',
	49	+ 'boitho\.com-dc',
	50	+ 'Browsershots',
	51	+ 'BUbiNG',
	52	+ 'Butterfly\/',
	53	+ 'BuzzSumo',
	54	+ 'CapsuleChecker',
	55	+ 'CC Metadata Scaper',
	56	+ 'Cerberian Drtrs',
	57	+ 'changedetection',
	58	+ 'Charlotte',
	59	+ 'clips\.ua\.ac\.be',
	60	+ 'CloudFlare-AlwaysOnline',
	61	+ 'coccoc',
	62	+ 'CommaFeed',
	63	+ 'Commons-HttpClient',
	64	+ 'convera',
	65	+ 'cosmos',
	66	+ 'corporatetwitnews',
	67	+ 'Covario-IDS',
	68	+ 'cron-job\.org',
	69	+ 'Curious George',
	70	+ 'curl',
	71	+ 'CyberPatrol',
	72	+ 'DataparkSearch',
	73	+ 'dataprovider',
	74	+ 'Daum(oa)?[ \/][0-9]',
	75	+ 'developers\.google\.com\/\+\/web\/snippet\/',
	76	+ 'Digg',
	77	+ 'DomainAppender',
	78	+ 'Dragonfly File Reader',
	79	+ 'drupact',
	80	+ 'EARTHCOM',
	81	+ 'ec2linkfinder',
	82	+ 'ECCP',
	83	+ 'ElectricMonk',
	84	+ 'EMail Exractor',
	85	+ 'EmailWolf',
	86	+ 'Embed PHP Library',
	87	+ 'Embedly',
	88	+ 'europarchive\.org',
	89	+ 'EventMachine HttpClient',
	90	+ 'ExactSearch',
	91	+ 'ExaleadCloudview',
	92	+ 'ezooms',
	93	+ 'facebookexternalhit',
	94	+ 'facebookplatform',
	95	+ 'Feed Wrangler',
	96	+ 'Feedbin',
	97	+ 'FeedBurner',
	98	+ 'Feedfetcher-Google',
	99	+ 'Feedly',
	100	+ 'Feedspot',
	101	+ 'FeedValidator',
	102	+ 'Fever',
	103	+ 'findlink',
	104	+ 'findthatfile',
	105	+ 'Flamingo_SearchEngine',
	106	+ 'FlipboardProxy',
	107	+ 'fluffy',
	108	+ 'Funnelback',
	109	+ 'g00g1e\.net',
	110	+ 'Genieo',
	111	+ 'getprismatic\.com',
	112	+ 'GigablastOpenSource',
	113	+ 'Go-http-client',
	114	+ 'Google favicon',
	115	+ 'Google Keyword Suggestion',
	116	+ 'Google Page Speed Insights',
	117	+ 'Google Web Preview',
	118	+ 'Google-HTTP-Java-Client',
	119	+ 'Google-Site-Verification',
	120	+ 'google_partner_monitoring',
	121	+ 'GoogleProducer',
	122	+ 'Grammarly',
	123	+ 'grub-client',
	124	+ 'heritrix',
	125	+ 'Holmes',
	126	+ 'htdig',
	127	+ 'HTTPMon',
	128	+ 'http-kit',
	129	+ 'http_requester',
	130	+ 'httpunit',
	131	+ 'http_request2',
	132	+ 'httrack',
	133	+ 'HubPages.*crawlingpolicy',
	134	+ 'HubSpot Marketing Grader',
	135	+ 'ichiro',
	136	+ 'IDG Twitter Links Resolver',
	137	+ 'igdeSpyder',
	138	+ 'InAGist',
	139	+ 'infegy',
	140	+ 'InfoWizards Reciprocal Link System PRO',
	141	+ 'inpwrd\.com',
	142	+ 'integromedb',
	143	+ 'IODC',
	144	+ 'IOI',
	145	+ 'ips-agent',
	146	+ 'iZSearch',
	147	+ '^Java\/',
	148	+ 'Jigsaw',
	149	+ 'Jobrapido',
	150	+ 'kouio',
	151	+ 'L\.webis',
	152	+ 'Larbin',
	153	+ 'libwww',
	154	+ 'Link Valet',
	155	+ 'linkCheck',
	156	+ 'linkdex',
	157	+ 'LinkExaminer',
	158	+ 'LinkWalker',
	159	+ 'Lipperhey',
	160	+ 'link checker',
	161	+ 'link validator',
	162	+ 'LongURL API',
	163	+ 'ltx71',
	164	+ 'lwp-trivial',
	165	+ 'lycos',
	166	+ 'mabontland',
	167	+ 'MagpieRSS',
	168	+ 'Mediapartners-Google',
	169	+ 'MegaIndex\.ru',
	170	+ 'MetaURI',
	171	+ 'MergeFlow-PageReader',
	172	+ 'Mnogosearch',
	173	+ 'mogimogi',
	174	+ 'Mojolicious (Perl)',
	175	+ 'Morning Paper',
	176	+ 'Mrcgiguy',
	177	+ 'MVAClient',
	178	+ 'Netcraft Web Server Survey',
	179	+ 'NetcraftSurveyAgent',
	180	+ 'NetLyzer FastProbe',
	181	+ 'netresearch',
	182	+ 'Netvibes',
	183	+ 'NewsBlur .*(Fetcher\|Finder)',
	184	+ 'NewsGator',
	185	+ 'newsme',
	186	+ 'newspaper\/',
	187	+ 'NG-Search',
	188	+ 'nineconnections\.com',
	189	+ 'nominet\.org\.uk',
	190	+ 'Notifixious',
	191	+ 'nuhk',
	192	+ 'nutch',
	193	+ 'Nuzzel',
	194	+ 'Nymesis',
	195	+ 'oegp',
	196	+ 'Omea Reader',
	197	+ 'omgili',
	198	+ 'Orbiter',
	199	+ 'ow\.ly',
	200	+ 'Go [\d\.]* package http',
	201	+ 'page2rss',
	202	+ 'PagePeeker',
	203	+ 'panscient',
	204	+ 'Peew',
	205	+ 'PhantomJS\/',
	206	+ 'phpcrawl',
	207	+ 'phpservermon',
	208	+ 'Pingdom\.com',
	209	+ 'Pinterest',
	210	+ 'Pizilla',
	211	+ 'Ploetz \+ Zeller',
	212	+ 'Plukkie',
	213	+ 'PocketParser',
	214	+ 'Pompos',
	215	+ 'postano',
	216	+ 'PostPost',
	217	+ 'postrank',
	218	+ 'proximic',
	219	+ 'Pulsepoint XT3 web scraper',
	220	+ 'Python-httplib2',
	221	+ 'python-requests',
	222	+ 'Python-urllib',
	223	+ 'Qseero',
	224	+ 'Qwantify',
	225	+ 'Radian6',
	226	+ 'Readability',
	227	+ 'RebelMouse',
	228	+ 'RetrevoPageAnalyzer',
	229	+ 'Riddler',
	230	+ 'Robosourcer',
	231	+ 'ROI Hunter',
	232	+ 'Ruby',
	233	+ 'SalesIntelligent',
	234	+ 'SBIder',
	235	+ 'scooter',
	236	+ 'ScoutJet',
	237	+ 'ScoutURLMonitor',
	238	+ 'Scrapy',
	239	+ 'Scrubby',
	240	+ 'SearchSight',
	241	+ 'semanticdiscovery',
	242	+ 'SEOstats',
	243	+ 'Server Density Service Monitoring',
	244	+ 'servernfo\.com',
	245	+ 'Seznam screenshot-generator',
	246	+ 'ShopWiki',
	247	+ 'SilverReader',
	248	+ 'SimplePie',
	249	+ 'Site24x7',
	250	+ 'SiteBar',
	251	+ 'siteexplorer\.info',
	252	+ 'Siteimprove\.com',
	253	+ 'SkypeUriPreview',
	254	+ 'slider\.com',
	255	+ 'slurp',
	256	+ 'SMRF URL Expander',
	257	+ 'snapchat-proxy',
	258	+ 'Snappy',
	259	+ 'SNK Siteshooter B0t',
	260	+ 'sogou',
	261	+ 'SortSite',
	262	+ 'speedy',
	263	+ 'Spinn3r',
	264	+ 'Sqworm',
	265	+ 'StackRambler',
	266	+ 'Stratagems Kumo',
	267	+ 'summify',
	268	+ 'teoma',
	269	+ 'theoldreader\.com',
	270	+ 'TinEye',
	271	+ 'Tiny Tiny RSS',
	272	+ 'Traackr.com',
	273	+ 'truwoGPS',
	274	+ 'tweetedtimes\.com',
	275	+ 'Twikle',
	276	+ 'Typhoeus',
	277	+ 'ubermetrics-technologies',
	278	+ 'UdmSearch',
	279	+ 'UnwindFetchor',
	280	+ 'updated',
	281	+ 'URLChecker',
	282	+ 'urlresolver',
	283	+ 'Vagabondo',
	284	+ 'Validator\.nu\/LV',
	285	+ 'via ggpht\.com GoogleImageProxy',
	286	+ 'vkShare',
	287	+ 'Vortex',
	288	+ 'voyager\/',
	289	+ 'VYU2',
	290	+ 'W3C-checklink',
	291	+ 'W3C-mobileOK',
	292	+ 'W3C_CSS_Validator_JFouffa',
	293	+ 'W3C_I18n-Checker',
	294	+ 'W3C_Unicorn',
	295	+ 'W3C_Validator',
	296	+ 'Wappalyzer',
	297	+ 'WinHttpRequest',
	298	+ 'web-capture\.net',
	299	+ 'WebCapture',
	300	+ 'WebCorp',
	301	+ 'webcollage',
	302	+ 'WebIndex',
	303	+ 'WebFetch',
	304	+ 'webmon ',
	305	+ 'websitepulse[+ ]checker',
	306	+ 'Websquash\.com',
	307	+ 'WebThumbnail',
	308	+ 'WeSEE:Search',
	309	+ 'wf84',
	310	+ 'wget',
	311	+ 'WhatsApp',
	312	+ 'WomlpeFactory',
	313	+ 'WordPress\/',
	314	+ 'wotbox',
	315	+ 'wscheck',
	316	+ 'WWW-Mechanize',
	317	+ 'www\.monitor\.us',
	318	+ 'XaxisSemanticsClassifier',
	319	+ 'Xenu Link Sleuth',
	320	+ 'XML Sitemaps Generator',
	321	+ 'Y!J-ASR',
	322	+ 'yacy',
	323	+ 'Yahoo Ad monitoring',
	324	+ 'Yahoo Link Preview',
	325	+ 'YahooSeeker',
	326	+ 'yandex',
	327	+ 'yanga',
	328	+ 'yeti',
	329	+ 'yoogliFetchAgent',
	330	+ 'YottaaMonitor',
	331	+ 'Zao',
	332	+ 'zgrab',
	333	+ 'ZyBorg',
	334	+ '[a-z0-9\-_]*((?<!cu)bot\|crawler\|archiver\|transcoder\|spider)',
	335	+ );
336	336	}

		@@ -14,55 +14,55 @@
		block discarded – undo
14	14
15	15	class UserAgentTest extends PHPUnit_Framework_TestCase
16	16	{
17		- protected $CrawlerDetect;
	17	+ protected $CrawlerDetect;
18	18
19		- public function setUp()
20		- {
21		- $this->CrawlerDetect = new CrawlerDetect();
22		- }
	19	+ public function setUp()
	20	+ {
	21	+ $this->CrawlerDetect = new CrawlerDetect();
	22	+ }
23	23
24		- public function testBots()
25		- {
26		- $lines = file(__DIR__.'/crawlers.txt', FILE_IGNORE_NEW_LINES \| FILE_SKIP_EMPTY_LINES);
	24	+ public function testBots()
	25	+ {
	26	+ $lines = file(__DIR__.'/crawlers.txt', FILE_IGNORE_NEW_LINES \| FILE_SKIP_EMPTY_LINES);
27	27
28		- foreach ($lines as $line) {
29		- $test = $this->CrawlerDetect->isCrawler($line);
30		- $this->assertEquals($test, true, $line);
31		- }
32		- }
	28	+ foreach ($lines as $line) {
	29	+ $test = $this->CrawlerDetect->isCrawler($line);
	30	+ $this->assertEquals($test, true, $line);
	31	+ }
	32	+ }
33	33
34		- public function testDevices()
35		- {
36		- $lines = file(__DIR__.'/devices.txt', FILE_IGNORE_NEW_LINES \| FILE_SKIP_EMPTY_LINES);
	34	+ public function testDevices()
	35	+ {
	36	+ $lines = file(__DIR__.'/devices.txt', FILE_IGNORE_NEW_LINES \| FILE_SKIP_EMPTY_LINES);
37	37
38		- foreach ($lines as $line) {
39		- $test = $this->CrawlerDetect->isCrawler($line);
40		- $this->assertEquals($test, false, $line);
41		- }
42		- }
	38	+ foreach ($lines as $line) {
	39	+ $test = $this->CrawlerDetect->isCrawler($line);
	40	+ $this->assertEquals($test, false, $line);
	41	+ }
	42	+ }
43	43
44		- public function testReturnsCorrectMatchedBotName()
45		- {
46		- $test = $this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');
	44	+ public function testReturnsCorrectMatchedBotName()
	45	+ {
	46	+ $test = $this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');
47	47
48		- $matches = $this->CrawlerDetect->getMatches();
	48	+ $matches = $this->CrawlerDetect->getMatches();
49	49
50		- $this->assertEquals($this->CrawlerDetect->getMatches(), 'Yahoo Ad monitoring', $matches);
51		- }
	50	+ $this->assertEquals($this->CrawlerDetect->getMatches(), 'Yahoo Ad monitoring', $matches);
	51	+ }
52	52
53		- public function testForRegexCollision()
54		- {
55		- $crawlers = new Crawlers();
	53	+ public function testForRegexCollision()
	54	+ {
	55	+ $crawlers = new Crawlers();
56	56
57		- foreach ($crawlers->getAll() as $key1 => $regex) {
58		- foreach ($crawlers->getAll() as $key2 => $compare) {
59		- // Dont check this regex against itself
60		- if ($key1 != $key2) {
61		- preg_match('/'.$regex.'/i', stripslashes($compare), $matches);
	57	+ foreach ($crawlers->getAll() as $key1 => $regex) {
	58	+ foreach ($crawlers->getAll() as $key2 => $compare) {
	59	+ // Dont check this regex against itself
	60	+ if ($key1 != $key2) {
	61	+ preg_match('/'.$regex.'/i', stripslashes($compare), $matches);
62	62
63		- $this->assertEmpty($matches, $regex.' collided with '.$compare);
64		- }
65		- }
66		- }
67		- }
	63	+ $this->assertEmpty($matches, $regex.' collided with '.$compare);
	64	+ }
	65	+ }
	66	+ }
	67	+ }
68	68	}

		@@ -16,178 +16,178 @@
		block discarded – undo
16	16
17	17	class CrawlerDetect
18	18	{
19		- /**
20		- * The user agent.
21		- *
22		- * @var null
23		- */
24		- protected $userAgent = null;
25		-
26		- /**
27		- * Headers that contain a user agent.
28		- *
29		- * @var array
30		- */
31		- protected $httpHeaders = array();
32		-
33		- /**
34		- * Store regex matches.
35		- *
36		- * @var array
37		- */
38		- protected $matches = array();
39		-
40		- /**
41		- * Crawlers object
42		- *
43		- * @var Jaybizzle\CrawlerDetect\Fixtures\Crawlers
44		- */
45		- protected $crawlers;
46		-
47		- /**
48		- * Exclusions object
49		- *
50		- * @var Jaybizzle\CrawlerDetect\Fixtures\Exclusions
51		- */
52		- protected $exclusions;
53		-
54		- /**
55		- * All possible HTTP headers that represent the
56		- * User-Agent string.
57		- *
58		- * @var array
59		- */
60		- protected static $uaHttpHeaders = array(
61		- // The default User-Agent string.
62		- 'HTTP_USER_AGENT',
63		- // Header can occur on devices using Opera Mini.
64		- 'HTTP_X_OPERAMINI_PHONE_UA',
65		- // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
66		- 'HTTP_X_DEVICE_USER_AGENT',
67		- 'HTTP_X_ORIGINAL_USER_AGENT',
68		- 'HTTP_X_SKYFIRE_PHONE',
69		- 'HTTP_X_BOLT_PHONE_UA',
70		- 'HTTP_DEVICE_STOCK_UA',
71		- 'HTTP_X_UCBROWSER_DEVICE_UA',
72		- );
73		-
74		- /**
75		- * Class constructor.
76		- */
77		- public function __construct(array $headers = null, $userAgent = null)
78		- {
79		- $this->setHttpHeaders($headers);
80		- $this->setUserAgent($userAgent);
81		- $this->crawlers = new Crawlers();
82		- $this->exclusions = new Exclusions();
83		- }
84		-
85		- /**
86		- * Set HTTP headers.
87		- *
88		- * @param array $httpHeaders
89		- */
90		- public function setHttpHeaders($httpHeaders = null)
91		- {
92		- // use global _SERVER if $httpHeaders aren't defined
93		- if (!is_array($httpHeaders) \|\| !count($httpHeaders)) {
94		- $httpHeaders = $_SERVER;
95		- }
96		- // clear existing headers
97		- $this->httpHeaders = array();
98		- // Only save HTTP headers. In PHP land, that means only _SERVER vars that
99		- // start with HTTP_.
100		- foreach ($httpHeaders as $key => $value) {
101		- if (substr($key, 0, 5) === 'HTTP_') {
102		- $this->httpHeaders[$key] = $value;
103		- }
104		- }
105		- }
106		-
107		- /**
108		- * Return user agent headers.
109		- *
110		- * @return array
111		- */
112		- public function getUaHttpHeaders()
113		- {
114		- return self::$uaHttpHeaders;
115		- }
116		-
117		- /**
118		- * Set the user agent.
119		- *
120		- * @param string $userAgent
121		- */
122		- public function setUserAgent($userAgent = null)
123		- {
124		- if (false === empty($userAgent)) {
125		- return $this->userAgent = $userAgent;
126		- } else {
127		- $this->userAgent = null;
128		- foreach ($this->getUaHttpHeaders() as $altHeader) {
129		- if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow.
130		- $this->userAgent .= $this->httpHeaders[$altHeader].' ';
131		- }
132		- }
133		-
134		- return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
135		- }
136		- }
137		-
138		- /**
139		- * Build the user agent regex.
140		- *
141		- * @return string
142		- */
143		- public function getRegex()
144		- {
145		- return '('.implode('\|', $this->crawlers->getAll()).')';
146		- }
147		-
148		- /**
149		- * Build the replacement regex.
150		- *
151		- * @return string
152		- */
153		- public function getExclusions()
154		- {
155		- return '('.implode('\|', $this->exclusions->getAll()).')';
156		- }
157		-
158		- /**
159		- * Check user agent string against the regex.
160		- *
161		- * @param string $userAgent
162		- *
163		- * @return bool
164		- */
165		- public function isCrawler($userAgent = null)
166		- {
167		- $agent = is_null($userAgent) ? $this->userAgent : $userAgent;
168		-
169		- $agent = preg_replace('/'.$this->getExclusions().'/i', '', $agent);
170		-
171		- if (trim($agent) === false) {
172		- return false;
173		- } else {
174		- $result = preg_match('/'.$this->getRegex().'/i', trim($agent), $matches);
175		- }
176		-
177		- if ($matches) {
178		- $this->matches = $matches;
179		- }
180		-
181		- return (bool) $result;
182		- }
183		-
184		- /**
185		- * Return the matches.
186		- *
187		- * @return string
188		- */
189		- public function getMatches()
190		- {
191		- return $this->matches[0];
192		- }
	19	+ /**
	20	+ * The user agent.
	21	+ *
	22	+ * @var null
	23	+ */
	24	+ protected $userAgent = null;
	25	+
	26	+ /**
	27	+ * Headers that contain a user agent.
	28	+ *
	29	+ * @var array
	30	+ */
	31	+ protected $httpHeaders = array();
	32	+
	33	+ /**
	34	+ * Store regex matches.
	35	+ *
	36	+ * @var array
	37	+ */
	38	+ protected $matches = array();
	39	+
	40	+ /**
	41	+ * Crawlers object
	42	+ *
	43	+ * @var Jaybizzle\CrawlerDetect\Fixtures\Crawlers
	44	+ */
	45	+ protected $crawlers;
	46	+
	47	+ /**
	48	+ * Exclusions object
	49	+ *
	50	+ * @var Jaybizzle\CrawlerDetect\Fixtures\Exclusions
	51	+ */
	52	+ protected $exclusions;
	53	+
	54	+ /**
	55	+ * All possible HTTP headers that represent the
	56	+ * User-Agent string.
	57	+ *
	58	+ * @var array
	59	+ */
	60	+ protected static $uaHttpHeaders = array(
	61	+ // The default User-Agent string.
	62	+ 'HTTP_USER_AGENT',
	63	+ // Header can occur on devices using Opera Mini.
	64	+ 'HTTP_X_OPERAMINI_PHONE_UA',
	65	+ // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
	66	+ 'HTTP_X_DEVICE_USER_AGENT',
	67	+ 'HTTP_X_ORIGINAL_USER_AGENT',
	68	+ 'HTTP_X_SKYFIRE_PHONE',
	69	+ 'HTTP_X_BOLT_PHONE_UA',
	70	+ 'HTTP_DEVICE_STOCK_UA',
	71	+ 'HTTP_X_UCBROWSER_DEVICE_UA',
	72	+ );
	73	+
	74	+ /**
	75	+ * Class constructor.
	76	+ */
	77	+ public function __construct(array $headers = null, $userAgent = null)
	78	+ {
	79	+ $this->setHttpHeaders($headers);
	80	+ $this->setUserAgent($userAgent);
	81	+ $this->crawlers = new Crawlers();
	82	+ $this->exclusions = new Exclusions();
	83	+ }
	84	+
	85	+ /**
	86	+ * Set HTTP headers.
	87	+ *
	88	+ * @param array $httpHeaders
	89	+ */
	90	+ public function setHttpHeaders($httpHeaders = null)
	91	+ {
	92	+ // use global _SERVER if $httpHeaders aren't defined
	93	+ if (!is_array($httpHeaders) \|\| !count($httpHeaders)) {
	94	+ $httpHeaders = $_SERVER;
	95	+ }
	96	+ // clear existing headers
	97	+ $this->httpHeaders = array();
	98	+ // Only save HTTP headers. In PHP land, that means only _SERVER vars that
	99	+ // start with HTTP_.
	100	+ foreach ($httpHeaders as $key => $value) {
	101	+ if (substr($key, 0, 5) === 'HTTP_') {
	102	+ $this->httpHeaders[$key] = $value;
	103	+ }
	104	+ }
	105	+ }
	106	+
	107	+ /**
	108	+ * Return user agent headers.
	109	+ *
	110	+ * @return array
	111	+ */
	112	+ public function getUaHttpHeaders()
	113	+ {
	114	+ return self::$uaHttpHeaders;
	115	+ }
	116	+
	117	+ /**
	118	+ * Set the user agent.
	119	+ *
	120	+ * @param string $userAgent
	121	+ */
	122	+ public function setUserAgent($userAgent = null)
	123	+ {
	124	+ if (false === empty($userAgent)) {
	125	+ return $this->userAgent = $userAgent;
	126	+ } else {
	127	+ $this->userAgent = null;
	128	+ foreach ($this->getUaHttpHeaders() as $altHeader) {
	129	+ if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow.
	130	+ $this->userAgent .= $this->httpHeaders[$altHeader].' ';
	131	+ }
	132	+ }
	133	+
	134	+ return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null);
	135	+ }
	136	+ }
	137	+
	138	+ /**
	139	+ * Build the user agent regex.
	140	+ *
	141	+ * @return string
	142	+ */
	143	+ public function getRegex()
	144	+ {
	145	+ return '('.implode('\|', $this->crawlers->getAll()).')';
	146	+ }
	147	+
	148	+ /**
	149	+ * Build the replacement regex.
	150	+ *
	151	+ * @return string
	152	+ */
	153	+ public function getExclusions()
	154	+ {
	155	+ return '('.implode('\|', $this->exclusions->getAll()).')';
	156	+ }
	157	+
	158	+ /**
	159	+ * Check user agent string against the regex.
	160	+ *
	161	+ * @param string $userAgent
	162	+ *
	163	+ * @return bool
	164	+ */
	165	+ public function isCrawler($userAgent = null)
	166	+ {
	167	+ $agent = is_null($userAgent) ? $this->userAgent : $userAgent;
	168	+
	169	+ $agent = preg_replace('/'.$this->getExclusions().'/i', '', $agent);
	170	+
	171	+ if (trim($agent) === false) {
	172	+ return false;
	173	+ } else {
	174	+ $result = preg_match('/'.$this->getRegex().'/i', trim($agent), $matches);
	175	+ }
	176	+
	177	+ if ($matches) {
	178	+ $this->matches = $matches;
	179	+ }
	180	+
	181	+ return (bool) $result;
	182	+ }
	183	+
	184	+ /**
	185	+ * Return the matches.
	186	+ *
	187	+ * @return string
	188	+ */
	189	+ public function getMatches()
	190	+ {
	191	+ return $this->matches[0];
	192	+ }
193	193	}

JayBizzle / Crawler-Detect

Pull Request — master (#89)

Status

Category

Indentation +1 added lines, -1 removed lines patch added patch discarded remove patch

Spacing +1 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +49 added lines, -49 removed lines patch added patch discarded remove patch

Indentation +4 added lines, -4 removed lines patch added patch discarded remove patch

Indentation +320 added lines, -320 removed lines patch added patch discarded remove patch

Indentation +40 added lines, -40 removed lines patch added patch discarded remove patch

Indentation +174 added lines, -174 removed lines patch added patch discarded remove patch