1
|
|
|
<?php |
2
|
|
|
declare(strict_types = 1); |
3
|
|
|
namespace hexydec\agentzero; |
4
|
|
|
|
5
|
|
|
class agentzero { |
6
|
|
|
|
7
|
|
|
// ua string |
8
|
|
|
public readonly string $string; |
9
|
|
|
|
10
|
|
|
// categories |
11
|
|
|
public readonly ?string $type; |
12
|
|
|
public readonly ?string $category; |
13
|
|
|
|
14
|
|
|
// device |
15
|
|
|
public readonly ?string $vendor; |
16
|
|
|
public readonly ?string $device; |
17
|
|
|
public readonly ?string $model; |
18
|
|
|
public readonly ?string $build; |
19
|
|
|
public readonly ?int $ram; |
20
|
|
|
|
21
|
|
|
// architecture |
22
|
|
|
public readonly ?string $processor; |
23
|
|
|
public readonly ?string $architecture; |
24
|
|
|
public readonly ?int $bits; |
25
|
|
|
public readonly ?string $cpu; |
26
|
|
|
public readonly ?int $cpuclock; |
27
|
|
|
|
28
|
|
|
// platform |
29
|
|
|
public readonly ?string $kernel; |
30
|
|
|
public readonly ?string $platform; |
31
|
|
|
public readonly ?string $platformversion; |
32
|
|
|
|
33
|
|
|
// browser |
34
|
|
|
public readonly ?string $engine; |
35
|
|
|
public readonly ?string $engineversion; |
36
|
|
|
public readonly ?string $browser; |
37
|
|
|
public readonly ?string $browserversion; |
38
|
|
|
public readonly ?string $browserstatus; |
39
|
|
|
public readonly ?string $browserreleased; |
40
|
|
|
public readonly ?string $browserlatest; |
41
|
|
|
public readonly ?string $language; |
42
|
|
|
|
43
|
|
|
// app |
44
|
|
|
public readonly ?string $app; |
45
|
|
|
public readonly ?string $appname; |
46
|
|
|
public readonly ?string $appversion; |
47
|
|
|
public readonly ?string $framework; |
48
|
|
|
public readonly ?string $frameworkversion; |
49
|
|
|
public readonly ?string $url; |
50
|
|
|
|
51
|
|
|
// network |
52
|
|
|
public readonly ?string $nettype; |
53
|
|
|
public readonly ?string $proxy; |
54
|
|
|
|
55
|
|
|
// screen |
56
|
|
|
public readonly ?int $width; |
57
|
|
|
public readonly ?int $height; |
58
|
|
|
public readonly ?int $dpi; |
59
|
|
|
public readonly ?float $density; |
60
|
|
|
public readonly ?bool $darkmode; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* Constructs a new AgentZero object, private because it can only be created internally |
64
|
|
|
* |
65
|
|
|
* @param string $ua The user-agent string |
66
|
|
|
* @param \stdClass $data A stdClass object containing the UA details |
67
|
|
|
*/ |
68
|
100 |
|
private function __construct(string $ua, \stdClass $data) { |
69
|
100 |
|
$this->string = $ua; |
|
|
|
|
70
|
|
|
|
71
|
|
|
// categories |
72
|
100 |
|
$this->type = $data->type ?? null; |
|
|
|
|
73
|
100 |
|
$this->category = $data->category ?? null; |
|
|
|
|
74
|
|
|
|
75
|
|
|
// device |
76
|
100 |
|
$this->vendor = $data->vendor ?? null; |
|
|
|
|
77
|
100 |
|
$this->device = $data->device ?? null; |
|
|
|
|
78
|
100 |
|
$this->model = $data->model ?? null; |
|
|
|
|
79
|
100 |
|
$this->build = $data->build ?? null; |
|
|
|
|
80
|
100 |
|
$this->ram = $data->ram ?? null; |
|
|
|
|
81
|
|
|
|
82
|
|
|
// architecture |
83
|
100 |
|
$this->processor = $data->processor ?? null; |
|
|
|
|
84
|
100 |
|
$this->architecture = $data->architecture ?? null; |
|
|
|
|
85
|
100 |
|
$this->bits = $data->bits ?? null; |
|
|
|
|
86
|
100 |
|
$this->cpu = $data->cpu ?? null; |
|
|
|
|
87
|
100 |
|
$this->cpuclock = $data->cpuclock ?? null; |
|
|
|
|
88
|
|
|
|
89
|
|
|
// platform |
90
|
100 |
|
$this->kernel = $data->kernel ?? null; |
|
|
|
|
91
|
100 |
|
$this->platform = $data->platform ?? null; |
|
|
|
|
92
|
100 |
|
$this->platformversion = $data->platformversion ?? null; |
|
|
|
|
93
|
|
|
|
94
|
|
|
// browser |
95
|
100 |
|
$this->engine = $data->engine ?? null; |
|
|
|
|
96
|
100 |
|
$this->engineversion = $data->engineversion ?? null; |
|
|
|
|
97
|
100 |
|
$this->browser = $data->browser ?? null; |
|
|
|
|
98
|
100 |
|
$this->browserversion = $data->browserversion ?? null; |
|
|
|
|
99
|
100 |
|
$this->browserstatus = $data->browserstatus ?? null; |
|
|
|
|
100
|
100 |
|
$this->browserreleased = !empty($data->browserreleased) ? $data->browserreleased : null; |
|
|
|
|
101
|
100 |
|
$this->browserlatest = $data->browserlatest ?? null; |
|
|
|
|
102
|
100 |
|
$this->language = $data->language ?? null; |
|
|
|
|
103
|
|
|
|
104
|
|
|
// app |
105
|
100 |
|
$this->app = $data->app ?? null; |
|
|
|
|
106
|
100 |
|
$this->appname = $data->appname ?? null; |
|
|
|
|
107
|
100 |
|
$this->appversion = $data->appversion ?? null; |
|
|
|
|
108
|
100 |
|
$this->framework = $data->framework ?? null; |
|
|
|
|
109
|
100 |
|
$this->frameworkversion = $data->frameworkversion ?? null; |
|
|
|
|
110
|
100 |
|
$this->url = $data->url ?? null; |
|
|
|
|
111
|
|
|
|
112
|
|
|
// network |
113
|
100 |
|
$this->nettype = $data->nettype ?? null; |
|
|
|
|
114
|
100 |
|
$this->proxy = $data->proxy ?? null; |
|
|
|
|
115
|
|
|
|
116
|
|
|
// screen |
117
|
100 |
|
$this->width = $data->width ?? null; |
|
|
|
|
118
|
100 |
|
$this->height = $data->height ?? null; |
|
|
|
|
119
|
100 |
|
$this->dpi = $data->dpi ?? null; |
|
|
|
|
120
|
100 |
|
$this->density = $data->density ?? null; |
|
|
|
|
121
|
100 |
|
$this->darkmode = $data->darkmode ?? null; |
|
|
|
|
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
/** |
125
|
|
|
* Retrieves calculated properties |
126
|
|
|
* |
127
|
|
|
* @param string $key The name of the property to retrieve |
128
|
|
|
* @return string|int|null The requested property or null if it doesn't exist |
129
|
|
|
*/ |
130
|
|
|
public function __get(string $key) : string|int|null { |
131
|
|
|
switch ($key) { |
132
|
|
|
case 'host': |
133
|
|
|
if ($this->url !== null && ($host = \parse_url($this->url, PHP_URL_HOST)) !== false && $host !== null) { |
134
|
|
|
return \str_starts_with($host, 'www.') ? \substr($host, 4) : $host; |
135
|
|
|
} |
136
|
|
|
return null; |
137
|
|
|
case 'browsermajorversion': |
138
|
|
|
case 'enginemajorversion': |
139
|
|
|
case 'platformmajorversion': |
140
|
|
|
case 'appmajorversion': |
141
|
|
|
$item = \str_replace('major', '', $key); |
142
|
|
|
$value = $this->{$item} ?? null; |
143
|
|
|
return $value === null ? null : \intval(\substr($value, 0, \strspn($value, '0123456789'))); |
144
|
|
|
} |
145
|
|
|
return $this->{$key} ?? null; |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
/** |
149
|
|
|
* Fetch the client hints sent by the browser |
150
|
|
|
* |
151
|
|
|
* @return array<string,string> An array containing relevant client hints sent by the client |
152
|
|
|
*/ |
153
|
|
|
public static function getHints() : array { |
154
|
|
|
$hints = [ |
155
|
|
|
'sec-ch-ua-mobile', |
156
|
|
|
'sec-ch-ua-full-version-list', |
157
|
|
|
'sec-ch-ua-platform', |
158
|
|
|
'sec-ch-ua-platform-version', |
159
|
|
|
'sec-ch-ua-model', |
160
|
|
|
'device-memory', |
161
|
|
|
'width', |
162
|
|
|
'ect' |
163
|
|
|
]; |
164
|
|
|
$data = []; |
165
|
|
|
foreach ($hints AS $item) { |
166
|
|
|
$upper = \strtoupper(\str_replace('-', '_', $item)); |
167
|
|
|
if (!empty($_SERVER['HTTP_'.$upper])) { |
168
|
|
|
$data[$item] = $_SERVER['HTTP_'.$upper]; |
169
|
|
|
} |
170
|
|
|
} |
171
|
|
|
return $data; |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
/** |
175
|
|
|
* Extracts tokens from a UA string |
176
|
|
|
* |
177
|
|
|
* @param string $ua The User Agent string to be tokenised |
178
|
|
|
* @param array<string> $single An array of strings that can appear on their own, enables the tokens to be split correctly |
179
|
|
|
* @param array<string> $ignore An array of tokens that can be ignored in the UA string |
180
|
|
|
* @return false|array<int,string> An array of tokens, or false if no tokens could be extracted |
181
|
|
|
*/ |
182
|
100 |
|
protected static function getTokens(string $ua, array $single, array $ignore) : array|false { |
183
|
|
|
|
184
|
|
|
// prepare regexp |
185
|
100 |
|
$single = \implode('|', \array_map('\\preg_quote', $single, \array_fill(0, \count($single), '/'))); |
186
|
100 |
|
$pattern = '/\{[^}]++\}|[^()\[\];,\/ _-](?:(?<!'.$single.') (?!https?:\/\/)|(?<=[a-z])\([^)]+\)|[^()\[\];,\/ ]*)*[^()\[\];,\/ _-](?:\/[^;,()\[\] ]++)?|[0-9]/i'; |
187
|
|
|
|
188
|
|
|
// split up ua string |
189
|
100 |
|
if (\preg_match_all($pattern, $ua, $match)) { |
190
|
|
|
|
191
|
|
|
// userland token processing |
192
|
100 |
|
$tokens = []; |
193
|
100 |
|
foreach ($match[0] AS $key => $item) { |
194
|
100 |
|
$lower = \mb_strtolower($item); |
195
|
|
|
|
196
|
|
|
// special case for handling like |
197
|
100 |
|
if (\str_starts_with($lower, 'like ')) { |
198
|
|
|
|
199
|
|
|
// chop off words up to a useful token e.g. Platform/Version |
200
|
77 |
|
if (\str_contains($item, '/') && ($pos = \mb_strrpos($item, ' ')) !== false) { |
201
|
77 |
|
$tokens[$key] = \mb_substr($item, $pos + 1); |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
// check token is not ignored |
205
|
100 |
|
} elseif (!\in_array($lower, $ignore, true)) { |
206
|
100 |
|
$tokens[$key] = $item; |
207
|
|
|
} |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
// rekey and return |
211
|
100 |
|
return \array_values($tokens); |
212
|
|
|
} |
213
|
|
|
return false; |
214
|
|
|
} |
215
|
|
|
|
216
|
|
|
/** |
217
|
|
|
* Parses a User Agent string |
218
|
|
|
* |
219
|
|
|
* @param string $ua The User Agent string to be parsed |
220
|
|
|
* @param array $config An array of configuration keys |
221
|
|
|
* @return agentzero|false An agentzero object containing the parsed values of the input UA, or false if it could not be parsed |
222
|
|
|
*/ |
223
|
100 |
|
public static function parse(string $ua, array $hints = [], array $config = []) : agentzero|false { |
224
|
100 |
|
$ua = \str_replace(' ', ' ', \trim(\preg_replace( '/[^[:print:]]/', '', $ua))); |
225
|
|
|
|
226
|
|
|
// parse client hints |
227
|
100 |
|
$hinted = $ua; |
228
|
100 |
|
$browser = hints::parse($hinted, $hints); |
229
|
|
|
|
230
|
|
|
// get config |
231
|
100 |
|
if (($config = config::get($config)) === null) { |
232
|
|
|
|
233
|
|
|
// get tokens |
234
|
100 |
|
} elseif (($tokens = self::getTokens(\trim($hinted, ' "\''), $config['single'], $config['ignore'])) !== false) { |
235
|
|
|
|
236
|
|
|
// extract UA info |
237
|
100 |
|
$tokenslower = \array_map('\\mb_strtolower', $tokens); |
238
|
100 |
|
foreach ($config['match'] AS $key => $item) { |
239
|
100 |
|
$item->match($browser, $key, $tokens, $tokenslower, $config); |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
// default information |
243
|
100 |
|
$arr = (array) $browser; |
244
|
100 |
|
if (empty($arr) && !empty($tokens)) { |
245
|
1 |
|
self::parseDefault($browser, $tokens); |
246
|
|
|
} |
247
|
|
|
|
248
|
|
|
// create agentzero object and return |
249
|
100 |
|
$arr = (array) $browser; |
250
|
100 |
|
if (!empty($arr)) { |
251
|
100 |
|
return new agentzero($ua, $browser); |
252
|
|
|
} |
253
|
|
|
} |
254
|
|
|
return false; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
/** |
258
|
|
|
* Parse the UA string when no other extractions were able to be made |
259
|
|
|
* |
260
|
|
|
* @param \stdClass $obj A standard class object to populate |
261
|
|
|
* @param array<string> $tokens An array of tokens |
262
|
|
|
* @return void |
263
|
|
|
*/ |
264
|
1 |
|
protected static function parseDefault(\stdClass $obj, array $tokens) : void { |
265
|
1 |
|
$obj->type = 'robot'; |
266
|
1 |
|
$obj->category = 'scraper'; |
267
|
|
|
|
268
|
|
|
// find app names |
269
|
1 |
|
foreach ($tokens AS $item) { |
270
|
1 |
|
if (\str_contains($item, '/')) { |
271
|
1 |
|
$parts = \explode('/', $item); |
272
|
1 |
|
$obj->app = crawlers::normaliseAppname($parts[0]); |
273
|
1 |
|
$obj->appname = $parts[0]; |
274
|
1 |
|
if (!empty($parts[1])) { |
275
|
1 |
|
$obj->appversion = \ltrim($parts[1], 'v'); |
276
|
|
|
} |
277
|
1 |
|
return; |
278
|
|
|
} |
279
|
|
|
} |
280
|
|
|
|
281
|
|
|
// parse the string |
282
|
1 |
|
foreach ($tokens AS $token) { |
283
|
1 |
|
$name = []; |
284
|
1 |
|
foreach (\explode(' ', $token) AS $item) { |
285
|
1 |
|
$ver = \ltrim($item, 'v'); // strip 'v' off the front of version number |
286
|
1 |
|
if (\strspn($ver, '0123456789.') === \strlen($ver)) { |
287
|
1 |
|
$app = \implode(' ', $name); |
288
|
1 |
|
$obj->app = crawlers::normaliseAppname($app); |
289
|
1 |
|
$obj->appname = $app; |
290
|
1 |
|
$obj->appversion = $ver; |
291
|
1 |
|
return; |
292
|
|
|
} else { |
293
|
1 |
|
$name[] = $item; |
294
|
|
|
} |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
// just use the string |
299
|
1 |
|
$obj->app = crawlers::normaliseAppname($tokens[0]); |
300
|
1 |
|
$obj->appname = $tokens[0]; |
301
|
|
|
} |
302
|
|
|
} |