Completed
Push — master ( 0b30d3...a2728a )
by Mark
12s
created

src/CrawlerDetect.php (1 issue)

Severity

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
/*
4
 * This file is part of Crawler Detect - the web crawler detection library.
5
 *
6
 * (c) Mark Beech <[email protected]>
7
 *
8
 * This source file is subject to the MIT license that is bundled
9
 * with this source code in the file LICENSE.
10
 */
11
12
namespace Jaybizzle\CrawlerDetect;
13
14
use Jaybizzle\CrawlerDetect\Fixtures\Headers;
15
use Jaybizzle\CrawlerDetect\Fixtures\Crawlers;
16
use Jaybizzle\CrawlerDetect\Fixtures\Exclusions;
17
18
class CrawlerDetect
19
{
20
    /**
21
     * The user agent.
22
     *
23
     * @var null
24
     */
25
    protected $userAgent = null;
26
27
    /**
28
     * Headers that contain a user agent.
29
     *
30
     * @var array
31
     */
32
    protected $httpHeaders = array();
33
34
    /**
35
     * Store regex matches.
36
     *
37
     * @var array
38
     */
39
    protected $matches = array();
40
41
    /**
42
     * Crawlers object.
43
     *
44
     * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
45
     */
46
    protected $crawlers;
47
48
    /**
49
     * Exclusions object.
50
     *
51
     * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
52
     */
53
    protected $exclusions;
54
55
    /**
56
     * Headers object.
57
     *
58
     * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
59
     */
60
    protected $uaHttpHeaders;
61
62
    /**
63
     * The compiled regex string.
64
     *
65
     * @var string
66
     */
67
    protected $compiledRegex;
68
69
    /**
70
     * The compiled exclusions regex string.
71
     *
72
     * @var string
73
     */
74
    protected $compiledExclusions;
75
76
    /**
77
     * Class constructor.
78
     */
79
    public function __construct(array $headers = null, $userAgent = null)
80
    {
81
        $this->crawlers = new Crawlers();
82
        $this->exclusions = new Exclusions();
83
        $this->uaHttpHeaders = new Headers();
84
85
        $this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
86
        $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
87
88
        $this->setHttpHeaders($headers);
89
        $this->userAgent = $this->setUserAgent($userAgent);
90
    }
91
92
    /**
93
     * Compile the regex patterns into one regex string.
94
     *
95
     * @param array
96
     * 
97
     * @return string
98
     */
99
    public function compileRegex($patterns)
100
    {
101
        return '('.implode('|', $patterns).')';
102
    }
103
104
    /**
105
     * Set HTTP headers.
106
     *
107
     * @param array|null $httpHeaders
108
     */
109
    public function setHttpHeaders($httpHeaders)
0 ignored issues
show
setHttpHeaders uses the super-global variable $_SERVER which is generally not recommended.

Instead of super-globals, we recommend to explicitly inject the dependencies of your class. This makes your code less dependent on global state and it becomes generally more testable:

// Bad
class Router
{
    public function generate($path)
    {
        return $_SERVER['HOST'].$path;
    }
}

// Better
class Router
{
    private $host;

    public function __construct($host)
    {
        $this->host = $host;
    }

    public function generate($path)
    {
        return $this->host.$path;
    }
}

class Controller
{
    public function myAction(Request $request)
    {
        // Instead of
        $page = isset($_GET['page']) ? intval($_GET['page']) : 1;

        // Better (assuming you use the Symfony2 request)
        $page = $request->query->get('page', 1);
    }
}
Loading history...
110
    {
111
        // Use global _SERVER if $httpHeaders aren't defined.
112
        if (! is_array($httpHeaders) || ! count($httpHeaders)) {
113
            $httpHeaders = $_SERVER;
114
        }
115
116
        // Clear existing headers.
117
        $this->httpHeaders = array();
118
119
        // Only save HTTP headers. In PHP land, that means
120
        // only _SERVER vars that start with HTTP_.
121
        foreach ($httpHeaders as $key => $value) {
122
            if (strpos($key, 'HTTP_') === 0) {
123
                $this->httpHeaders[$key] = $value;
124
            }
125
        }
126
    }
127
128
    /**
129
     * Return user agent headers.
130
     *
131
     * @return array
132
     */
133
    public function getUaHttpHeaders()
134
    {
135
        return $this->uaHttpHeaders->getAll();
136
    }
137
138
    /**
139
     * Set the user agent.
140
     *
141
     * @param string $userAgent
142
     */
143
    public function setUserAgent($userAgent)
144
    {
145
        if (is_null($userAgent)) {
146
            foreach ($this->getUaHttpHeaders() as $altHeader) {
147
                if (isset($this->httpHeaders[$altHeader])) {
148
                    $userAgent .= $this->httpHeaders[$altHeader].' ';
149
                }
150
            }
151
        }
152
153
        return $userAgent;
154
    }
155
156
    /**
157
     * Check user agent string against the regex.
158
     *
159
     * @param string|null $userAgent
160
     *
161
     * @return bool
162
     */
163
    public function isCrawler($userAgent = null)
164
    {
165
        $agent = $userAgent ?: $this->userAgent;
166
167
        $agent = preg_replace('/'.$this->compiledExclusions.'/i', '', $agent);
168
169
        if (strlen(trim($agent)) == 0) {
170
            return false;
171
        }
172
173
        $result = preg_match('/'.$this->compiledRegex.'/i', trim($agent), $matches);
174
175
        if ($matches) {
176
            $this->matches = $matches;
177
        }
178
179
        return (bool) $result;
180
    }
181
182
    /**
183
     * Return the matches.
184
     *
185
     * @return string|null
186
     */
187
    public function getMatches()
188
    {
189
        return isset($this->matches[0]) ? $this->matches[0] : null;
190
    }
191
}
192