whallysson /
crawlerdetect
| 1 | <?php |
||
| 2 | |||
| 3 | namespace CodeBlog\CrawlerDetect; |
||
| 4 | |||
| 5 | /** |
||
| 6 | * Class CodeBlog CrawlerDetect |
||
| 7 | * |
||
| 8 | * @author Whallysson Avelino <https://github.com/whallysson> |
||
| 9 | * @package CodeBlog\CrawlerDetect |
||
| 10 | */ |
||
| 11 | |||
| 12 | use CodeBlog\CrawlerDetect\Fixtures\Headers; |
||
| 13 | use CodeBlog\CrawlerDetect\Fixtures\Crawlers; |
||
| 14 | use CodeBlog\CrawlerDetect\Fixtures\Exclusions; |
||
| 15 | |||
| 16 | class CrawlerDetect { |
||
| 17 | |||
| 18 | /** @var null */ |
||
| 19 | protected $userAgent = null; |
||
| 20 | |||
| 21 | /** @var array */ |
||
| 22 | protected $httpHeaders = array(); |
||
| 23 | |||
| 24 | /** @var array */ |
||
| 25 | protected $matches = array(); |
||
| 26 | |||
| 27 | /** @var \CodeBlog\CrawlerDetect\Fixtures\Crawlers */ |
||
| 28 | protected $crawlers; |
||
| 29 | |||
| 30 | /** @var \CodeBlog\CrawlerDetect\Fixtures\Exclusions */ |
||
| 31 | protected $exclusions; |
||
| 32 | |||
| 33 | /** @var \CodeBlog\CrawlerDetect\Fixtures\Headers */ |
||
| 34 | protected $uaHttpHeaders; |
||
| 35 | |||
| 36 | /** @var string */ |
||
| 37 | protected $compiledRegex; |
||
| 38 | |||
| 39 | /** @var string */ |
||
| 40 | protected $compiledExclusions; |
||
| 41 | |||
| 42 | /** |
||
| 43 | * Class constructor |
||
| 44 | */ |
||
| 45 | public function __construct(array $headers = null, $userAgent = null) { |
||
| 46 | $this->crawlers = new Crawlers(); |
||
| 47 | $this->exclusions = new Exclusions(); |
||
| 48 | $this->uaHttpHeaders = new Headers(); |
||
| 49 | |||
| 50 | $this->compiledRegex = $this->compileRegex($this->crawlers->getAll()); |
||
| 51 | $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll()); |
||
| 52 | |||
| 53 | $this->setHttpHeaders($headers); |
||
| 54 | $this->setUserAgent($userAgent); |
||
| 55 | } |
||
| 56 | |||
| 57 | /** |
||
| 58 | * Compile the regex patterns into one regex string. |
||
| 59 | * |
||
| 60 | * @param array |
||
| 61 | * |
||
| 62 | * @return string |
||
| 63 | */ |
||
| 64 | public function compileRegex(array $patterns) { |
||
| 65 | return '(' . implode('|', $patterns) . ')'; |
||
| 66 | } |
||
| 67 | |||
| 68 | /** |
||
| 69 | * Set HTTP headers. |
||
| 70 | * |
||
| 71 | * @param array|null $httpHeaders |
||
| 72 | */ |
||
| 73 | public function setHttpHeaders($httpHeaders = null) { |
||
| 74 | // Use global _SERVER if $httpHeaders aren't defined. |
||
| 75 | if (!is_array($httpHeaders) || !count($httpHeaders)) { |
||
| 76 | $httpHeaders = $_SERVER; |
||
| 77 | } |
||
| 78 | |||
| 79 | // Clear existing headers. |
||
| 80 | $this->httpHeaders = array(); |
||
| 81 | |||
| 82 | // Only save HTTP headers. In PHP land, that means |
||
| 83 | // only _SERVER vars that start with HTTP_. |
||
| 84 | foreach ($httpHeaders as $key => $value) { |
||
| 85 | if (strpos($key, 'HTTP_') === 0) { |
||
| 86 | $this->httpHeaders[$key] = $value; |
||
| 87 | } |
||
| 88 | } |
||
| 89 | } |
||
| 90 | |||
| 91 | /** |
||
| 92 | * Return user agent headers. |
||
| 93 | * |
||
| 94 | * @return array |
||
| 95 | */ |
||
| 96 | public function getUaHttpHeaders() { |
||
| 97 | return $this->uaHttpHeaders->getAll(); |
||
| 98 | } |
||
| 99 | |||
| 100 | /** |
||
| 101 | * Set the user agent. |
||
| 102 | * |
||
| 103 | * @param string $userAgent |
||
| 104 | */ |
||
| 105 | public function setUserAgent($userAgent) { |
||
| 106 | if (is_null($userAgent)) { |
||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 107 | foreach ($this->getUaHttpHeaders() as $altHeader) { |
||
| 108 | if (isset($this->httpHeaders[$altHeader])) { |
||
| 109 | $userAgent .= $this->httpHeaders[$altHeader] . ' '; |
||
| 110 | } |
||
| 111 | } |
||
| 112 | } |
||
| 113 | |||
| 114 | return $this->userAgent = $userAgent; |
||
|
0 ignored issues
–
show
It seems like
$userAgent of type string is incompatible with the declared type null of property $userAgent.
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property. Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property.. Loading history...
|
|||
| 115 | } |
||
| 116 | |||
| 117 | /** |
||
| 118 | * Check user agent string against the regex. |
||
| 119 | * |
||
| 120 | * @param string|null $userAgent |
||
| 121 | * |
||
| 122 | * @return bool |
||
| 123 | */ |
||
| 124 | public function isCrawler(string $userAgent = null) { |
||
| 125 | $agent = trim(preg_replace( |
||
| 126 | "/{$this->compiledExclusions}/i", |
||
| 127 | '', |
||
| 128 | $userAgent ?: $this->userAgent |
||
| 129 | )); |
||
| 130 | |||
| 131 | if ($agent == '') { |
||
| 132 | return false; |
||
| 133 | } |
||
| 134 | |||
| 135 | $result = preg_match("/{$this->compiledRegex}/i", $agent, $matches); |
||
| 136 | |||
| 137 | if ($matches) { |
||
| 138 | $this->matches = $matches; |
||
| 139 | } |
||
| 140 | |||
| 141 | return (bool) $result; |
||
| 142 | } |
||
| 143 | |||
| 144 | /** |
||
| 145 | * Return the matches. |
||
| 146 | * |
||
| 147 | * @return string|null |
||
| 148 | */ |
||
| 149 | public function getMatches() { |
||
| 150 | return isset($this->matches[0]) ? $this->matches[0] : null; |
||
| 151 | } |
||
| 152 | |||
| 153 | } |
||
| 154 |