1 | <?php |
||
2 | declare(strict_types = 1); |
||
3 | namespace hexydec\agentzero; |
||
4 | |||
5 | class agentzero { |
||
6 | |||
7 | // ua string |
||
8 | public readonly string $string; |
||
9 | |||
10 | // categories |
||
11 | public readonly ?string $type; |
||
12 | public readonly ?string $category; |
||
13 | |||
14 | // device |
||
15 | public readonly ?string $vendor; |
||
16 | public readonly ?string $device; |
||
17 | public readonly ?string $model; |
||
18 | public readonly ?string $build; |
||
19 | public readonly ?int $ram; |
||
20 | |||
21 | // architecture |
||
22 | public readonly ?string $processor; |
||
23 | public readonly ?string $architecture; |
||
24 | public readonly ?int $bits; |
||
25 | public readonly ?string $cpu; |
||
26 | public readonly ?int $cpuclock; |
||
27 | |||
28 | // platform |
||
29 | public readonly ?string $kernel; |
||
30 | public readonly ?string $platform; |
||
31 | public readonly ?string $platformversion; |
||
32 | |||
33 | // browser |
||
34 | public readonly ?string $engine; |
||
35 | public readonly ?string $engineversion; |
||
36 | public readonly ?string $browser; |
||
37 | public readonly ?string $browserversion; |
||
38 | public readonly ?string $browserstatus; |
||
39 | public readonly ?string $browserreleased; |
||
40 | public readonly ?string $browserlatest; |
||
41 | public readonly ?string $language; |
||
42 | |||
43 | // app |
||
44 | public readonly ?string $app; |
||
45 | public readonly ?string $appname; |
||
46 | public readonly ?string $appversion; |
||
47 | public readonly ?string $framework; |
||
48 | public readonly ?string $frameworkversion; |
||
49 | public readonly ?string $url; |
||
50 | |||
51 | // network |
||
52 | public readonly ?string $nettype; |
||
53 | public readonly ?string $proxy; |
||
54 | |||
55 | // screen |
||
56 | public readonly ?int $width; |
||
57 | public readonly ?int $height; |
||
58 | public readonly ?int $dpi; |
||
59 | public readonly ?float $density; |
||
60 | public readonly ?bool $darkmode; |
||
61 | |||
62 | /** |
||
63 | * Constructs a new AgentZero object, private because it can only be created internally |
||
64 | * |
||
65 | * @param string $ua The user-agent string |
||
66 | * @param \stdClass $data A stdClass object containing the UA details |
||
67 | */ |
||
68 | 100 | private function __construct(string $ua, \stdClass $data) { |
|
69 | 100 | $this->string = $ua; |
|
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
70 | |||
71 | // categories |
||
72 | 100 | $this->type = $data->type ?? null; |
|
0 ignored issues
–
show
|
|||
73 | 100 | $this->category = $data->category ?? null; |
|
0 ignored issues
–
show
|
|||
74 | |||
75 | // device |
||
76 | 100 | $this->vendor = $data->vendor ?? null; |
|
0 ignored issues
–
show
|
|||
77 | 100 | $this->device = $data->device ?? null; |
|
0 ignored issues
–
show
|
|||
78 | 100 | $this->model = $data->model ?? null; |
|
0 ignored issues
–
show
|
|||
79 | 100 | $this->build = $data->build ?? null; |
|
0 ignored issues
–
show
|
|||
80 | 100 | $this->ram = $data->ram ?? null; |
|
0 ignored issues
–
show
|
|||
81 | |||
82 | // architecture |
||
83 | 100 | $this->processor = $data->processor ?? null; |
|
0 ignored issues
–
show
|
|||
84 | 100 | $this->architecture = $data->architecture ?? null; |
|
0 ignored issues
–
show
|
|||
85 | 100 | $this->bits = $data->bits ?? null; |
|
0 ignored issues
–
show
|
|||
86 | 100 | $this->cpu = $data->cpu ?? null; |
|
0 ignored issues
–
show
|
|||
87 | 100 | $this->cpuclock = $data->cpuclock ?? null; |
|
0 ignored issues
–
show
|
|||
88 | |||
89 | // platform |
||
90 | 100 | $this->kernel = $data->kernel ?? null; |
|
0 ignored issues
–
show
|
|||
91 | 100 | $this->platform = $data->platform ?? null; |
|
0 ignored issues
–
show
|
|||
92 | 100 | $this->platformversion = $data->platformversion ?? null; |
|
0 ignored issues
–
show
|
|||
93 | |||
94 | // browser |
||
95 | 100 | $this->engine = $data->engine ?? null; |
|
0 ignored issues
–
show
|
|||
96 | 100 | $this->engineversion = $data->engineversion ?? null; |
|
0 ignored issues
–
show
|
|||
97 | 100 | $this->browser = $data->browser ?? null; |
|
0 ignored issues
–
show
|
|||
98 | 100 | $this->browserversion = $data->browserversion ?? null; |
|
0 ignored issues
–
show
|
|||
99 | 100 | $this->browserstatus = $data->browserstatus ?? null; |
|
0 ignored issues
–
show
|
|||
100 | 100 | $this->browserreleased = !empty($data->browserreleased) ? $data->browserreleased : null; |
|
0 ignored issues
–
show
|
|||
101 | 100 | $this->browserlatest = $data->browserlatest ?? null; |
|
0 ignored issues
–
show
|
|||
102 | 100 | $this->language = $data->language ?? null; |
|
0 ignored issues
–
show
|
|||
103 | |||
104 | // app |
||
105 | 100 | $this->app = $data->app ?? null; |
|
0 ignored issues
–
show
|
|||
106 | 100 | $this->appname = $data->appname ?? null; |
|
0 ignored issues
–
show
|
|||
107 | 100 | $this->appversion = $data->appversion ?? null; |
|
0 ignored issues
–
show
|
|||
108 | 100 | $this->framework = $data->framework ?? null; |
|
0 ignored issues
–
show
|
|||
109 | 100 | $this->frameworkversion = $data->frameworkversion ?? null; |
|
0 ignored issues
–
show
|
|||
110 | 100 | $this->url = $data->url ?? null; |
|
0 ignored issues
–
show
|
|||
111 | |||
112 | // network |
||
113 | 100 | $this->nettype = $data->nettype ?? null; |
|
0 ignored issues
–
show
|
|||
114 | 100 | $this->proxy = $data->proxy ?? null; |
|
0 ignored issues
–
show
|
|||
115 | |||
116 | // screen |
||
117 | 100 | $this->width = $data->width ?? null; |
|
0 ignored issues
–
show
|
|||
118 | 100 | $this->height = $data->height ?? null; |
|
0 ignored issues
–
show
|
|||
119 | 100 | $this->dpi = $data->dpi ?? null; |
|
0 ignored issues
–
show
|
|||
120 | 100 | $this->density = $data->density ?? null; |
|
0 ignored issues
–
show
|
|||
121 | 100 | $this->darkmode = $data->darkmode ?? null; |
|
0 ignored issues
–
show
|
|||
122 | } |
||
123 | |||
124 | /** |
||
125 | * Retrieves calculated properties |
||
126 | * |
||
127 | * @param string $key The name of the property to retrieve |
||
128 | * @return string|int|null The requested property or null if it doesn't exist |
||
129 | */ |
||
130 | public function __get(string $key) : string|int|null { |
||
131 | switch ($key) { |
||
132 | case 'host': |
||
133 | if ($this->url !== null && ($host = \parse_url($this->url, PHP_URL_HOST)) !== false && $host !== null) { |
||
134 | return \str_starts_with($host, 'www.') ? \substr($host, 4) : $host; |
||
135 | } |
||
136 | return null; |
||
137 | case 'browsermajorversion': |
||
138 | case 'enginemajorversion': |
||
139 | case 'platformmajorversion': |
||
140 | case 'appmajorversion': |
||
141 | $item = \str_replace('major', '', $key); |
||
142 | $value = $this->{$item} ?? null; |
||
143 | return $value === null ? null : \intval(\substr($value, 0, \strspn($value, '0123456789'))); |
||
144 | } |
||
145 | return $this->{$key} ?? null; |
||
146 | } |
||
147 | |||
148 | /** |
||
149 | * Fetch the client hints sent by the browser |
||
150 | * |
||
151 | * @return array<string,string> An array containing relevant client hints sent by the client |
||
152 | */ |
||
153 | public static function getHints() : array { |
||
154 | $hints = [ |
||
155 | 'sec-ch-ua-mobile', |
||
156 | 'sec-ch-ua-full-version-list', |
||
157 | 'sec-ch-ua-platform', |
||
158 | 'sec-ch-ua-platform-version', |
||
159 | 'sec-ch-ua-model', |
||
160 | 'device-memory', |
||
161 | 'width', |
||
162 | 'ect' |
||
163 | ]; |
||
164 | $data = []; |
||
165 | foreach ($hints AS $item) { |
||
166 | $upper = \strtoupper(\str_replace('-', '_', $item)); |
||
167 | if (!empty($_SERVER['HTTP_'.$upper])) { |
||
168 | $data[$item] = $_SERVER['HTTP_'.$upper]; |
||
169 | } |
||
170 | } |
||
171 | return $data; |
||
172 | } |
||
173 | |||
174 | /** |
||
175 | * Extracts tokens from a UA string |
||
176 | * |
||
177 | * @param string $ua The User Agent string to be tokenised |
||
178 | * @param array<string> $single An array of strings that can appear on their own, enables the tokens to be split correctly |
||
179 | * @param array<string> $ignore An array of tokens that can be ignored in the UA string |
||
180 | * @return false|array<int,string> An array of tokens, or false if no tokens could be extracted |
||
181 | */ |
||
182 | 100 | protected static function getTokens(string $ua, array $single, array $ignore) : array|false { |
|
183 | |||
184 | // prepare regexp |
||
185 | 100 | $single = \implode('|', \array_map('\\preg_quote', $single, \array_fill(0, \count($single), '/'))); |
|
186 | 100 | $pattern = '/\{[^}]++\}|[^()\[\];,\/  _-](?:(?<!'.$single.') (?!https?:\/\/)|(?<=[a-z])\([^)]+\)|[^()\[\];,\/ ]*)*[^()\[\];,\/  _-](?:\/[^;,()\[\]  ]++)?|[0-9]/i'; |
|
187 | |||
188 | // split up ua string |
||
189 | 100 | if (\preg_match_all($pattern, $ua, $match)) { |
|
190 | |||
191 | // userland token processing |
||
192 | 100 | $tokens = []; |
|
193 | 100 | foreach ($match[0] AS $key => $item) { |
|
194 | 100 | $lower = \mb_strtolower($item); |
|
195 | |||
196 | // special case for handling like |
||
197 | 100 | if (\str_starts_with($lower, 'like ')) { |
|
198 | |||
199 | // chop off words up to a useful token e.g. Platform/Version |
||
200 | 77 | if (\str_contains($item, '/') && ($pos = \mb_strrpos($item, ' ')) !== false) { |
|
201 | 77 | $tokens[$key] = \mb_substr($item, $pos + 1); |
|
202 | } |
||
203 | |||
204 | // check token is not ignored |
||
205 | 100 | } elseif (!\in_array($lower, $ignore, true)) { |
|
206 | 100 | $tokens[$key] = $item; |
|
207 | } |
||
208 | } |
||
209 | |||
210 | // rekey and return |
||
211 | 100 | return \array_values($tokens); |
|
212 | } |
||
213 | return false; |
||
214 | } |
||
215 | |||
216 | /** |
||
217 | * Parses a User Agent string |
||
218 | * |
||
219 | * @param string $ua The User Agent string to be parsed |
||
220 | * @param array $config An array of configuration keys |
||
221 | * @return agentzero|false An agentzero object containing the parsed values of the input UA, or false if it could not be parsed |
||
222 | */ |
||
223 | 100 | public static function parse(string $ua, array $hints = [], array $config = []) : agentzero|false { |
|
224 | 100 | $ua = \str_replace(' ', ' ', \trim(\preg_replace( '/[^[:print:]]/', '', $ua))); |
|
225 | |||
226 | // parse client hints |
||
227 | 100 | $hinted = $ua; |
|
228 | 100 | $browser = hints::parse($hinted, $hints); |
|
229 | |||
230 | // get config |
||
231 | 100 | if (($config = config::get($config)) === null) { |
|
232 | |||
233 | // get tokens |
||
234 | 100 | } elseif (($tokens = self::getTokens(\trim($hinted, ' "\''), $config['single'], $config['ignore'])) !== false) { |
|
235 | |||
236 | // extract UA info |
||
237 | 100 | $tokenslower = \array_map('\\mb_strtolower', $tokens); |
|
238 | 100 | foreach ($config['match'] AS $key => $item) { |
|
239 | 100 | $item->match($browser, $key, $tokens, $tokenslower, $config); |
|
240 | } |
||
241 | |||
242 | // default information |
||
243 | 100 | $arr = (array) $browser; |
|
244 | 100 | if (empty($arr) && !empty($tokens)) { |
|
245 | 1 | self::parseDefault($browser, $tokens); |
|
246 | } |
||
247 | |||
248 | // create agentzero object and return |
||
249 | 100 | $arr = (array) $browser; |
|
250 | 100 | if (!empty($arr)) { |
|
251 | 100 | return new agentzero($ua, $browser); |
|
252 | } |
||
253 | } |
||
254 | return false; |
||
255 | } |
||
256 | |||
257 | /** |
||
258 | * Parse the UA string when no other extractions were able to be made |
||
259 | * |
||
260 | * @param \stdClass $obj A standard class object to populate |
||
261 | * @param array<string> $tokens An array of tokens |
||
262 | * @return void |
||
263 | */ |
||
264 | 1 | protected static function parseDefault(\stdClass $obj, array $tokens) : void { |
|
265 | 1 | $obj->type = 'robot'; |
|
266 | 1 | $obj->category = 'scraper'; |
|
267 | |||
268 | // find app names |
||
269 | 1 | foreach ($tokens AS $item) { |
|
270 | 1 | if (\str_contains($item, '/')) { |
|
271 | 1 | $parts = \explode('/', $item); |
|
272 | 1 | $obj->app = crawlers::normaliseAppname($parts[0]); |
|
273 | 1 | $obj->appname = $parts[0]; |
|
274 | 1 | if (!empty($parts[1])) { |
|
275 | 1 | $obj->appversion = \ltrim($parts[1], 'v'); |
|
276 | } |
||
277 | 1 | return; |
|
278 | } |
||
279 | } |
||
280 | |||
281 | // parse the string |
||
282 | 1 | foreach ($tokens AS $token) { |
|
283 | 1 | $name = []; |
|
284 | 1 | foreach (\explode(' ', $token) AS $item) { |
|
285 | 1 | $ver = \ltrim($item, 'v'); // strip 'v' off the front of version number |
|
286 | 1 | if (\strspn($ver, '0123456789.') === \strlen($ver)) { |
|
287 | 1 | $app = \implode(' ', $name); |
|
288 | 1 | $obj->app = crawlers::normaliseAppname($app); |
|
289 | 1 | $obj->appname = $app; |
|
290 | 1 | $obj->appversion = $ver; |
|
291 | 1 | return; |
|
292 | } else { |
||
293 | 1 | $name[] = $item; |
|
294 | } |
||
295 | } |
||
296 | } |
||
297 | |||
298 | // just use the string |
||
299 | 1 | $obj->app = crawlers::normaliseAppname($tokens[0]); |
|
300 | 1 | $obj->appname = $tokens[0]; |
|
301 | } |
||
302 | } |