AccessLogParser   A
last analyzed

Complexity

Total Complexity 18

Size/Duplication

Total Lines 252
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 3

Test Coverage

Coverage 100%

Importance

Changes 48
Bugs 7 Features 22
Metric Value
wmc 18
c 48
b 7
f 22
lcom 1
cbo 3
dl 0
loc 252
rs 10
ccs 106
cts 106
cp 1

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
C prepareParsedData() 0 26 8
B getPattern() 0 25 3
A getQuotedFormatString() 0 19 3
A getSimplePatterns() 0 71 1
A getCallbackPatterns() 0 47 2
1
<?php
2
3
/*
4
 * (c) Mantas Varatiejus <[email protected]>
5
 *
6
 * For the full copyright and license information, please view the LICENSE
7
 * file that was distributed with this source code.
8
 */
9
10
namespace MVar\Apache2LogParser;
11
12
/**
13
 * Apache2 access log parser.
14
 */
15
class AccessLogParser extends AbstractLineParser
0 ignored issues
show
Deprecated Code introduced by
The class MVar\Apache2LogParser\AbstractLineParser has been deprecated with message: Will be removed in 3.0. Use \MVar\LogParser\AbstractLineParser instead.

This class, trait or interface has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the type will be removed from the class and what other constant to use instead.

Loading history...
16
{
17
    use TimeFormatTrait;
18
19
    // Copied from Apache 2.2.22 config
20
    const FORMAT_COMMON = '%h %l %u %t "%r" %>s %O';
21
    const FORMAT_COMBINED = '%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i"';
22
    const FORMAT_VHOST_COMBINED = '%v:%p %h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i"';
23
    const FORMAT_REFERER = '%{Referer}i -> %U';
24
    const FORMAT_AGENT = '%{User-Agent}i';
25
26
    /**
27
     * @var string
28
     */
29
    private $format;
30
31
    /**
32
     * @var string
33
     */
34
    private $pattern;
35
36
    /**
37
     * @var KeyBag
38
     */
39
    private $keyBag;
40
41
    /**
42
     * Constructor.
43
     *
44
     * @param string $format One of defined constants or custom log line format
45
     */
46 53
    public function __construct($format)
47
    {
48 53
        $this->format = $format;
49 53
    }
50
51
    /**
52
     * {@inheritdoc}
53
     */
54 51
    protected function prepareParsedData(array $matches)
55
    {
56 51
        $result = parent::prepareParsedData($matches);
57
58 51
        if (isset($result['time'])) {
59 11
            $result['time'] = $this->formatTime($result['time']);
60 11
        }
61
62 51
        if (isset($result['response_body_size']) && $result['response_body_size'] == '-') {
63 1
            $result['response_body_size'] = 0;
64 1
        }
65
66 51
        foreach ($this->keyBag->getNamespaces() as $search) {
67
            // Put all variables to single array
68 51
            foreach ($result as $key => $data) {
69 37
                if (strpos($key, "{$search}__") === 0) {
70 18
                    $realKey = substr($key, strlen($search) + 2);
71 18
                    $realKey = $this->keyBag->get($search, $realKey) ?: $realKey;
72 18
                    $result[$search][$realKey] = $data;
73 18
                    unset($result[$key]);
74 18
                }
75 51
            }
76 51
        }
77
78 51
        return $result;
79
    }
80
81
    /**
82
     * {@inheritdoc}
83
     */
84 53
    protected function getPattern()
85
    {
86 53
        if ($this->pattern !== null) {
87 1
            return $this->pattern;
88
        }
89
90 53
        $this->keyBag = new KeyBag();
91 53
        $pattern = $this->getQuotedFormatString();
92
93
        // Put simple patterns
94 53
        $pattern = str_replace(
95 53
            array_keys($this->getSimplePatterns()),
96 53
            array_values($this->getSimplePatterns()),
97
            $pattern
98 53
        );
99
100
        // Put regexp patterns
101 53
        foreach ($this->getCallbackPatterns() as $callbackPattern => $callback) {
102 52
            $pattern = preg_replace_callback($callbackPattern, $callback, $pattern);
103 53
        }
104
105 53
        $this->pattern = "/^{$pattern}$/";
106
107 53
        return $this->pattern;
108
    }
109
110
    /**
111
     * Quotes characters which are not included in log format directives
112
     * and returns quoted format string.
113
     *
114
     * @return string
115
     */
116 53
    protected function getQuotedFormatString()
117
    {
118
        // Valid pattern of log format directives
119 53
        $validPattern = '%(\!?[2-5]\d\d(\,[2-5]\d\d)*)?(\<|\>)?(\{[^\}]*\})?[a-z]';
120
121 53
        $pattern = preg_replace_callback(
122 53
            '/(?<before>' . $validPattern . '?)?(?<match>.+?)(?<after>' . $validPattern . ')?/i',
123
            function (array $matches) {
124 53
                $before = isset($matches['before']) ? $matches['before'] : '';
125 53
                $after = isset($matches['after']) ? $matches['after'] : '';
126 53
                $match = preg_quote($matches['match'], '/');
127
128 53
                return "{$before}{$match}{$after}";
129 53
            },
130 53
            $this->format
131 53
        );
132
133 53
        return $pattern;
134
    }
135
136
    /**
137
     * Returns patters that can be replaced with as strings.
138
     * Note: This parser is not a validator, so in most cases patterns must not be exact
139
     *
140
     * @return array
141
     */
142 53
    protected function getSimplePatterns()
143
    {
144
        // Register "request" namespace in KeyBag
145
        // This allows to convert parsed variables to array
146 53
        $this->keyBag->registerNamespace('request');
147
148
        return [
149
            // The percent sign
150 53
            '%%' => '%',
151
            // Local IP address
152 53
            '%A' => '(?<local_ip>[\dA-Za-z\:\.]{3,39})',
153
            // Client IP address of the request
154 53
            '%a' => '(?<client_ip>[\dA-Za-z\:\.]{3,39})',
155
            // Underlying peer IP address of the connection
156 53
            '%{c}a' => '(?<peer_ip>[\dA-Za-z\:\.]{3,39})',
157
            // Size of response in bytes, excluding HTTP headers
158 53
            '%B' => '(?<response_body_size>\d+)',
159
            // Size of response in bytes, excluding HTTP headers. In CLF format
160 53
            '%b' => '(?<response_body_size>\d+|-)',
161
            // The time taken to serve the request, in microseconds
162 53
            '%D' => '(?<request_time_us>\d+)',
163
            // Filename
164 53
            '%f' => '(?<filename>.+)',
165
            // The request protocol
166 53
            '%H' => '(?<request_protocol>\S+)',
167
            // Remote hostname
168 53
            '%h' => '(?<remote_host>\S+)',
169
            // Bytes received, including request and headers
170 53
            '%I' => '(?<bytes_received>\d+)',
171
            // Number of keep-alive requests handled on this connection
172 53
            '%k' => '(?<keepalive_requests>\d+)',
173
            // The request log ID from the error log
174 53
            '%L' => '(?<log_id>\S+)',
175
            // Remote logname
176 53
            '%l' => '(?<identity>\S+)',
177
            // The request method
178 53
            '%m' => '(?<request_method>[A-Za-z]+)',
179
            // Bytes sent, including headers
180 53
            '%O' => '(?<bytes_sent>\d+|\-)',
181
            // The process ID of the child that serviced the request
182 53
            '%P' => '(?<process_id>\S+)',
183
            // The canonical port of the server serving the request
184 53
            '%p' => '(?<server_port>\d+)',
185
            // The query string
186 53
            '%q' => '(?<query_string>\?\S+|)',
187
            // The handler generating the response
188 53
            '%R' => '(?<response_handler>\S+)',
189
            // First line of request
190 53
            '%r' => '(?<request_line>(?<request__method>\w+) (?<request__path>\S+)( (?<request__protocol>\S+))?|-)',
191
            // Bytes transferred (received and sent), including request and headers
192 53
            '%S' => '(?<bytes_transferred>\d+)',
193
            // The status of the original request
194 53
            '%s' => '(?<original_status_code>[2-5]\d\d)',
195
            // Status of the final request
196 53
            '%>s' => '(?<response_code>[2-5]\d\d)', // TODO: check after modifiers support implementation
197
            // The time taken to serve the request, in seconds
198 53
            '%T' => '(?<request_time_s>\d+)',
199
            // Time the request was received
200 53
            '%t' => '\[(?<time>\d\d\/\w{3}\/\d{4}\:\d\d\:\d\d\:\d\d [+-]\d{4})\]',
201
            // The URL path requested, not including any query string
202 53
            '%U' => '(?<request_path>\S+?)',
203
            // Remote user
204 53
            '%u' => '(?<remote_user>\S+)',
205
            // The server name according to the UseCanonicalName setting
206 53
            '%V' => '(?<server_name>\S+)',
207
            // The canonical ServerName of the server serving the request
208 53
            '%v' => '(?<canonical_server_name>\S+)',
209
            // Connection status when response is completed
210 53
            '%X' => '(?<connection_status>[Xx]|\+|\-)',
211 53
        ];
212
    }
213
214
    /**
215
     * Patterns that requires preg_replace_callback() to be set in place.
216
     *
217
     * @return array
218
     */
219 52
    protected function getCallbackPatterns()
220
    {
221 52
        $holder = $this->keyBag;
222
223
        return [
224
            // Header lines in the request sent to the server (e.g., User-Agent, Referer)
225
            '/%\{([^\}]+)\}i/' => function (array $matches) use ($holder) {
226 12
                $index = $holder->add('request_headers', $matches[1]);
227 12
                $pattern = strcasecmp($matches[1], 'referer') == 0 ? '[^\"]*' : '.+';
228
229 12
                return "(?<request_headers__{$index}>{$pattern})";
230 52
            },
231
            // The contents of cookies in the request sent to the server
232
            '/%\{([^\}]+)\}C/' => function (array $matches) use ($holder) {
233 1
                $index = $holder->add('cookies', $matches[1]);
234
235 1
                return "(?<cookies__{$index}>.+)";
236 52
            },
237
            // The contents of the environment variable
238
            '/%\{([^\}]+)\}e/' => function (array $matches) use ($holder) {
239 1
                $index = $holder->add('env_vars', $matches[1]);
240
241 1
                return "(?<env_vars__{$index}>.+)";
242 52
            },
243
            // The contents of notes from another modules
244
            '/%\{([^\}]+)\}n/' => function (array $matches) use ($holder) {
245 1
                $index = $holder->add('mod_vars', $matches[1]);
246
247 1
                return "(?<mod_vars__{$index}>.+)";
248 52
            },
249
            // Header lines in the response sent from the server
250
            '/%\{([^\}]+)\}o/' => function (array $matches) use ($holder) {
251 2
                $index = $holder->add('response_headers', $matches[1]);
252
253 2
                return "(?<response_headers__{$index}>.+)";
254 52
            },
255
            // The process ID or thread ID of the child that serviced the request
256
            '/%\{(pid|tid|hextid)\}P/' => function (array $matches) {
257 1
                return '(?<' . $matches[1] . '>\S+)';
258 52
            },
259
            // The canonical port of the server serving the request, or the server's actual port,
260
            // or the client's actual port
261 52
            '/%\{(canonical|local|remote)\}p/' => function (array $matches) {
262 1
                return '(?<' . $matches[1] . '_port>\d+)';
263 52
            },
264 52
        ];
265
    }
266
}
267