1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
* (c) Mantas Varatiejus <[email protected]> |
5
|
|
|
* |
6
|
|
|
* For the full copyright and license information, please view the LICENSE |
7
|
|
|
* file that was distributed with this source code. |
8
|
|
|
*/ |
9
|
|
|
|
10
|
|
|
namespace MVar\Apache2LogParser; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* Apache2 access log parser. |
14
|
|
|
*/ |
15
|
|
|
class AccessLogParser extends AbstractLineParser |
|
|
|
|
16
|
|
|
{ |
17
|
|
|
use TimeFormatTrait; |
18
|
|
|
|
19
|
|
|
// Copied from Apache 2.2.22 config |
20
|
|
|
const FORMAT_COMMON = '%h %l %u %t "%r" %>s %O'; |
21
|
|
|
const FORMAT_COMBINED = '%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i"'; |
22
|
|
|
const FORMAT_VHOST_COMBINED = '%v:%p %h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i"'; |
23
|
|
|
const FORMAT_REFERER = '%{Referer}i -> %U'; |
24
|
|
|
const FORMAT_AGENT = '%{User-Agent}i'; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @var string |
28
|
|
|
*/ |
29
|
|
|
private $format; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @var string |
33
|
|
|
*/ |
34
|
|
|
private $pattern; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @var KeyBag |
38
|
|
|
*/ |
39
|
|
|
private $keyBag; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Constructor. |
43
|
|
|
* |
44
|
|
|
* @param string $format One of defined constants or custom log line format |
45
|
|
|
*/ |
46
|
53 |
|
public function __construct($format) |
47
|
|
|
{ |
48
|
53 |
|
$this->format = $format; |
49
|
53 |
|
} |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* {@inheritdoc} |
53
|
|
|
*/ |
54
|
51 |
|
protected function prepareParsedData(array $matches) |
55
|
|
|
{ |
56
|
51 |
|
$result = parent::prepareParsedData($matches); |
57
|
|
|
|
58
|
51 |
|
if (isset($result['time'])) { |
59
|
11 |
|
$result['time'] = $this->formatTime($result['time']); |
60
|
11 |
|
} |
61
|
|
|
|
62
|
51 |
|
if (isset($result['response_body_size']) && $result['response_body_size'] == '-') { |
63
|
1 |
|
$result['response_body_size'] = 0; |
64
|
1 |
|
} |
65
|
|
|
|
66
|
51 |
|
foreach ($this->keyBag->getNamespaces() as $search) { |
67
|
|
|
// Put all variables to single array |
68
|
51 |
|
foreach ($result as $key => $data) { |
69
|
37 |
|
if (strpos($key, "{$search}__") === 0) { |
70
|
18 |
|
$realKey = substr($key, strlen($search) + 2); |
71
|
18 |
|
$realKey = $this->keyBag->get($search, $realKey) ?: $realKey; |
72
|
18 |
|
$result[$search][$realKey] = $data; |
73
|
18 |
|
unset($result[$key]); |
74
|
18 |
|
} |
75
|
51 |
|
} |
76
|
51 |
|
} |
77
|
|
|
|
78
|
51 |
|
return $result; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
/** |
82
|
|
|
* {@inheritdoc} |
83
|
|
|
*/ |
84
|
53 |
|
protected function getPattern() |
85
|
|
|
{ |
86
|
53 |
|
if ($this->pattern !== null) { |
87
|
1 |
|
return $this->pattern; |
88
|
|
|
} |
89
|
|
|
|
90
|
53 |
|
$this->keyBag = new KeyBag(); |
91
|
53 |
|
$pattern = $this->getQuotedFormatString(); |
92
|
|
|
|
93
|
|
|
// Put simple patterns |
94
|
53 |
|
$pattern = str_replace( |
95
|
53 |
|
array_keys($this->getSimplePatterns()), |
96
|
53 |
|
array_values($this->getSimplePatterns()), |
97
|
|
|
$pattern |
98
|
53 |
|
); |
99
|
|
|
|
100
|
|
|
// Put regexp patterns |
101
|
53 |
|
foreach ($this->getCallbackPatterns() as $callbackPattern => $callback) { |
102
|
52 |
|
$pattern = preg_replace_callback($callbackPattern, $callback, $pattern); |
103
|
53 |
|
} |
104
|
|
|
|
105
|
53 |
|
$this->pattern = "/^{$pattern}$/"; |
106
|
|
|
|
107
|
53 |
|
return $this->pattern; |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
/** |
111
|
|
|
* Quotes characters which are not included in log format directives |
112
|
|
|
* and returns quoted format string. |
113
|
|
|
* |
114
|
|
|
* @return string |
115
|
|
|
*/ |
116
|
53 |
|
protected function getQuotedFormatString() |
117
|
|
|
{ |
118
|
|
|
// Valid pattern of log format directives |
119
|
53 |
|
$validPattern = '%(\!?[2-5]\d\d(\,[2-5]\d\d)*)?(\<|\>)?(\{[^\}]*\})?[a-z]'; |
120
|
|
|
|
121
|
53 |
|
$pattern = preg_replace_callback( |
122
|
53 |
|
'/(?<before>' . $validPattern . '?)?(?<match>.+?)(?<after>' . $validPattern . ')?/i', |
123
|
|
|
function (array $matches) { |
124
|
53 |
|
$before = isset($matches['before']) ? $matches['before'] : ''; |
125
|
53 |
|
$after = isset($matches['after']) ? $matches['after'] : ''; |
126
|
53 |
|
$match = preg_quote($matches['match'], '/'); |
127
|
|
|
|
128
|
53 |
|
return "{$before}{$match}{$after}"; |
129
|
53 |
|
}, |
130
|
53 |
|
$this->format |
131
|
53 |
|
); |
132
|
|
|
|
133
|
53 |
|
return $pattern; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Returns patters that can be replaced with as strings. |
138
|
|
|
* Note: This parser is not a validator, so in most cases patterns must not be exact |
139
|
|
|
* |
140
|
|
|
* @return array |
141
|
|
|
*/ |
142
|
53 |
|
protected function getSimplePatterns() |
143
|
|
|
{ |
144
|
|
|
// Register "request" namespace in KeyBag |
145
|
|
|
// This allows to convert parsed variables to array |
146
|
53 |
|
$this->keyBag->registerNamespace('request'); |
147
|
|
|
|
148
|
|
|
return [ |
149
|
|
|
// The percent sign |
150
|
53 |
|
'%%' => '%', |
151
|
|
|
// Local IP address |
152
|
53 |
|
'%A' => '(?<local_ip>[\dA-Za-z\:\.]{3,39})', |
153
|
|
|
// Client IP address of the request |
154
|
53 |
|
'%a' => '(?<client_ip>[\dA-Za-z\:\.]{3,39})', |
155
|
|
|
// Underlying peer IP address of the connection |
156
|
53 |
|
'%{c}a' => '(?<peer_ip>[\dA-Za-z\:\.]{3,39})', |
157
|
|
|
// Size of response in bytes, excluding HTTP headers |
158
|
53 |
|
'%B' => '(?<response_body_size>\d+)', |
159
|
|
|
// Size of response in bytes, excluding HTTP headers. In CLF format |
160
|
53 |
|
'%b' => '(?<response_body_size>\d+|-)', |
161
|
|
|
// The time taken to serve the request, in microseconds |
162
|
53 |
|
'%D' => '(?<request_time_us>\d+)', |
163
|
|
|
// Filename |
164
|
53 |
|
'%f' => '(?<filename>.+)', |
165
|
|
|
// The request protocol |
166
|
53 |
|
'%H' => '(?<request_protocol>\S+)', |
167
|
|
|
// Remote hostname |
168
|
53 |
|
'%h' => '(?<remote_host>\S+)', |
169
|
|
|
// Bytes received, including request and headers |
170
|
53 |
|
'%I' => '(?<bytes_received>\d+)', |
171
|
|
|
// Number of keep-alive requests handled on this connection |
172
|
53 |
|
'%k' => '(?<keepalive_requests>\d+)', |
173
|
|
|
// The request log ID from the error log |
174
|
53 |
|
'%L' => '(?<log_id>\S+)', |
175
|
|
|
// Remote logname |
176
|
53 |
|
'%l' => '(?<identity>\S+)', |
177
|
|
|
// The request method |
178
|
53 |
|
'%m' => '(?<request_method>[A-Za-z]+)', |
179
|
|
|
// Bytes sent, including headers |
180
|
53 |
|
'%O' => '(?<bytes_sent>\d+|\-)', |
181
|
|
|
// The process ID of the child that serviced the request |
182
|
53 |
|
'%P' => '(?<process_id>\S+)', |
183
|
|
|
// The canonical port of the server serving the request |
184
|
53 |
|
'%p' => '(?<server_port>\d+)', |
185
|
|
|
// The query string |
186
|
53 |
|
'%q' => '(?<query_string>\?\S+|)', |
187
|
|
|
// The handler generating the response |
188
|
53 |
|
'%R' => '(?<response_handler>\S+)', |
189
|
|
|
// First line of request |
190
|
53 |
|
'%r' => '(?<request_line>(?<request__method>\w+) (?<request__path>\S+)( (?<request__protocol>\S+))?|-)', |
191
|
|
|
// Bytes transferred (received and sent), including request and headers |
192
|
53 |
|
'%S' => '(?<bytes_transferred>\d+)', |
193
|
|
|
// The status of the original request |
194
|
53 |
|
'%s' => '(?<original_status_code>[2-5]\d\d)', |
195
|
|
|
// Status of the final request |
196
|
53 |
|
'%>s' => '(?<response_code>[2-5]\d\d)', // TODO: check after modifiers support implementation |
197
|
|
|
// The time taken to serve the request, in seconds |
198
|
53 |
|
'%T' => '(?<request_time_s>\d+)', |
199
|
|
|
// Time the request was received |
200
|
53 |
|
'%t' => '\[(?<time>\d\d\/\w{3}\/\d{4}\:\d\d\:\d\d\:\d\d [+-]\d{4})\]', |
201
|
|
|
// The URL path requested, not including any query string |
202
|
53 |
|
'%U' => '(?<request_path>\S+?)', |
203
|
|
|
// Remote user |
204
|
53 |
|
'%u' => '(?<remote_user>\S+)', |
205
|
|
|
// The server name according to the UseCanonicalName setting |
206
|
53 |
|
'%V' => '(?<server_name>\S+)', |
207
|
|
|
// The canonical ServerName of the server serving the request |
208
|
53 |
|
'%v' => '(?<canonical_server_name>\S+)', |
209
|
|
|
// Connection status when response is completed |
210
|
53 |
|
'%X' => '(?<connection_status>[Xx]|\+|\-)', |
211
|
53 |
|
]; |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
/** |
215
|
|
|
* Patterns that requires preg_replace_callback() to be set in place. |
216
|
|
|
* |
217
|
|
|
* @return array |
218
|
|
|
*/ |
219
|
52 |
|
protected function getCallbackPatterns() |
220
|
|
|
{ |
221
|
52 |
|
$holder = $this->keyBag; |
222
|
|
|
|
223
|
|
|
return [ |
224
|
|
|
// Header lines in the request sent to the server (e.g., User-Agent, Referer) |
225
|
|
|
'/%\{([^\}]+)\}i/' => function (array $matches) use ($holder) { |
226
|
12 |
|
$index = $holder->add('request_headers', $matches[1]); |
227
|
12 |
|
$pattern = strcasecmp($matches[1], 'referer') == 0 ? '[^\"]*' : '.+'; |
228
|
|
|
|
229
|
12 |
|
return "(?<request_headers__{$index}>{$pattern})"; |
230
|
52 |
|
}, |
231
|
|
|
// The contents of cookies in the request sent to the server |
232
|
|
|
'/%\{([^\}]+)\}C/' => function (array $matches) use ($holder) { |
233
|
1 |
|
$index = $holder->add('cookies', $matches[1]); |
234
|
|
|
|
235
|
1 |
|
return "(?<cookies__{$index}>.+)"; |
236
|
52 |
|
}, |
237
|
|
|
// The contents of the environment variable |
238
|
|
|
'/%\{([^\}]+)\}e/' => function (array $matches) use ($holder) { |
239
|
1 |
|
$index = $holder->add('env_vars', $matches[1]); |
240
|
|
|
|
241
|
1 |
|
return "(?<env_vars__{$index}>.+)"; |
242
|
52 |
|
}, |
243
|
|
|
// The contents of notes from another modules |
244
|
|
|
'/%\{([^\}]+)\}n/' => function (array $matches) use ($holder) { |
245
|
1 |
|
$index = $holder->add('mod_vars', $matches[1]); |
246
|
|
|
|
247
|
1 |
|
return "(?<mod_vars__{$index}>.+)"; |
248
|
52 |
|
}, |
249
|
|
|
// Header lines in the response sent from the server |
250
|
|
|
'/%\{([^\}]+)\}o/' => function (array $matches) use ($holder) { |
251
|
2 |
|
$index = $holder->add('response_headers', $matches[1]); |
252
|
|
|
|
253
|
2 |
|
return "(?<response_headers__{$index}>.+)"; |
254
|
52 |
|
}, |
255
|
|
|
// The process ID or thread ID of the child that serviced the request |
256
|
|
|
'/%\{(pid|tid|hextid)\}P/' => function (array $matches) { |
257
|
1 |
|
return '(?<' . $matches[1] . '>\S+)'; |
258
|
52 |
|
}, |
259
|
|
|
// The canonical port of the server serving the request, or the server's actual port, |
260
|
|
|
// or the client's actual port |
261
|
52 |
|
'/%\{(canonical|local|remote)\}p/' => function (array $matches) { |
262
|
1 |
|
return '(?<' . $matches[1] . '_port>\d+)'; |
263
|
52 |
|
}, |
264
|
52 |
|
]; |
265
|
|
|
} |
266
|
|
|
} |
267
|
|
|
|
This class, trait or interface has been deprecated. The supplier of the file has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the type will be removed from the class and what other constant to use instead.