Completed
Push — min-php71 ( f5a25a )
by James
05:53
created

IniParser::compareBcStrings()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 26
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 30

Importance

Changes 0
Metric Value
dl 0
loc 26
ccs 0
cts 14
cp 0
rs 8.439
c 0
b 0
f 0
cc 5
eloc 14
nc 5
nop 2
crap 30
1
<?php
2
declare(strict_types=1);
3
4
namespace BrowscapPHP\IniParser;
5
6
use BrowscapPHP\Data\PropertyFormatter;
7
use BrowscapPHP\Data\PropertyHolder;
8
use BrowscapPHP\Helper\Quoter;
9
use BrowscapPHP\Parser\Helper\Pattern;
10
use BrowscapPHP\Parser\Helper\SubKey;
11
12
/**
13
 * Ini parser class (compatible with PHP 5.3+)
14
 */
15
final class IniParser
16
{
17
    /**
18
     * Options for regex patterns.
19
     *
20
     * REGEX_DELIMITER: Delimiter of all the regex patterns in the whole class.
21
     * REGEX_MODIFIERS: Regex modifiers.
22
     */
23
    const REGEX_DELIMITER               = '@';
24
    const REGEX_MODIFIERS               = 'i';
25
    const COMPRESSION_PATTERN_START     = '@';
26
    const COMPRESSION_PATTERN_DELIMITER = '|';
27
28
    /**
29
     * Number of pattern to combine for a faster regular expression search.
30
     *
31
     * @important The number of patterns that can be processed in one step
32
     *            is limited by the internal regular expression limits.
33
     * @var int
34
     */
35
    const COUNT_PATTERN = 50;
36
37
    /**
38
     * Creates new ini part cache files
39
     *
40
     * @param string $content
41
     *
42
     * @return \Generator
43
     */
44
    public function createIniParts(string $content) : \Generator
45
    {
46
        // get all patterns from the ini file in the correct order,
47
        // so that we can calculate with index number of the resulting array,
48
        // which part to use when the ini file is splitted into its sections.
49
        preg_match_all('/(?<=\[)(?:[^\r\n]+)(?=\])/m', $content, $patternPositions);
50
        $patternPositions = $patternPositions[0];
51
52
        // split the ini file into sections and save the data in one line with a hash of the beloging
53
        // pattern (filtered in the previous step)
54
        $iniParts = preg_split('/\[[^\r\n]+\]/', $content);
55
        $contents = [];
56
57
        $propertyFormatter = new PropertyFormatter(new PropertyHolder());
58
59
        foreach ($patternPositions as $position => $pattern) {
60
            $pattern     = strtolower($pattern);
61
            $patternhash = Pattern::getHashForParts($pattern);
62
            $subkey      = SubKey::getIniPartCacheSubKey($patternhash);
63
64
            if (!isset($contents[$subkey])) {
65
                $contents[$subkey] = [];
66
            }
67
68
            $browserProperties = parse_ini_string($iniParts[($position + 1)], false, INI_SCANNER_RAW);
69
70
            foreach (array_keys($browserProperties) as $property) {
71
                $browserProperties[$property] = $propertyFormatter->formatPropertyValue(
72
                    $browserProperties[$property],
73
                    $property
74
                );
75
            }
76
77
            // the position has to be moved by one, because the header of the ini file
78
            // is also returned as a part
79
            $contents[$subkey][] = $patternhash . "\t" . json_encode(
80
                $browserProperties,
81
                JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_QUOT | JSON_HEX_AMP
82
            );
83
        }
84
85
        unset($patternPositions);
86
        unset($iniParts);
87
88
        $subkeys = array_flip(SubKey::getAllIniPartCacheSubKeys());
89
        foreach ($contents as $subkey => $cacheContent) {
90
            $subkey = (string) $subkey;
91
92
            yield [$subkey => $cacheContent];
93
94
            unset($subkeys[$subkey]);
95
        }
96
97
        foreach (array_keys($subkeys) as $subkey) {
98
            $subkey = (string) $subkey;
99
100
            yield [$subkey => []];
101
        }
102
    }
103
104
    /**
105
     * Creates new pattern cache files
106
     *
107
     * @param string $content
108
     *
109
     * @return \Generator
110
     */
111
    public function createPatterns($content) : \Generator
112
    {
113
        // get all relevant patterns from the INI file
114
        // - containing "*" or "?"
115
        // - not containing "*" or "?", but not having a comment
116
        preg_match_all(
117
            '/(?<=\[)(?:[^\r\n]*[?*][^\r\n]*)(?=\])|(?<=\[)(?:[^\r\n*?]+)(?=\])(?![^\[]*Comment=)/m',
118
            $content,
119
            $matches
120
        );
121
122
        if (empty($matches[0]) || !is_array($matches[0])) {
123
            yield [];
124
125
            return;
126
        }
127
128
        $quoterHelper = new Quoter();
129
        $matches      = $matches[0];
130
        usort($matches, [$this, 'compareBcStrings']);
131
132
        // build an array to structure the data. this requires some memory, but we need this step to be able to
133
        // sort the data in the way we need it (see below).
134
        $data = [];
135
136
        foreach ($matches as $pattern) {
137
            if ('GJK_Browscap_Version' === $pattern) {
138
                continue;
139
            }
140
141
            $pattern     = strtolower($pattern);
142
            $patternhash = Pattern::getHashForPattern($pattern, false);
143
            $tmpLength   = Pattern::getPatternLength($pattern);
144
145
            // special handling of default entry
146
            if ($tmpLength === 0) {
147
                $patternhash = str_repeat('z', 32);
148
            }
149
150
            if (!isset($data[$patternhash])) {
151
                $data[$patternhash] = [];
152
            }
153
154
            if (!isset($data[$patternhash][$tmpLength])) {
155
                $data[$patternhash][$tmpLength] = [];
156
            }
157
158
            $pattern = $quoterHelper->pregQuote($pattern);
159
160
            // Check if the pattern contains digits - in this case we replace them with a digit regular expression,
161
            // so that very similar patterns (e.g. only with different browser version numbers) can be compressed.
162
            // This helps to speed up the first (and most expensive) part of the pattern search a lot.
163
            if (strpbrk($pattern, '0123456789') !== false) {
164
                $compressedPattern = preg_replace('/\d/', '[\d]', $pattern);
165
166
                if (!in_array($compressedPattern, $data[$patternhash][$tmpLength])) {
167
                    $data[$patternhash][$tmpLength][] = $compressedPattern;
168
                }
169
            } else {
170
                $data[$patternhash][$tmpLength][] = $pattern;
171
            }
172
        }
173
174
        unset($matches);
175
176
        // sorting of the data is important to check the patterns later in the correct order, because
177
        // we need to check the most specific (=longest) patterns first, and the least specific
178
        // (".*" for "Default Browser")  last.
179
        //
180
        // sort by pattern start to group them
181
        ksort($data);
182
        // and then by pattern length (longest first)
183
        foreach (array_keys($data) as $key) {
184
            krsort($data[$key]);
185
        }
186
187
        // write optimized file (grouped by the first character of the has, generated from the pattern
188
        // start) with multiple patterns joined by tabs. this is to speed up loading of the data (small
189
        // array with pattern strings instead of an large array with single patterns) and also enables
190
        // us to search for multiple patterns in one preg_match call for a fast first search
191
        // (3-10 faster), followed by a detailed search for each single pattern.
192
        $contents = [];
193
        foreach ($data as $patternhash => $tmpEntries) {
194
            if (empty($tmpEntries)) {
195
                continue;
196
            }
197
198
            $subkey = SubKey::getPatternCacheSubkey($patternhash);
199
200
            if (!isset($contents[$subkey])) {
201
                $contents[$subkey] = [];
202
            }
203
204
            foreach ($tmpEntries as $tmpLength => $tmpPatterns) {
205
                if (empty($tmpPatterns)) {
206
                    continue;
207
                }
208
209
                $chunks = array_chunk($tmpPatterns, self::COUNT_PATTERN);
210
211
                foreach ($chunks as $chunk) {
212
                    $contents[$subkey][] = $patternhash . "\t" . $tmpLength . "\t" . implode("\t", $chunk);
213
                }
214
            }
215
        }
216
217
        unset($data);
218
219
        $subkeys = SubKey::getAllPatternCacheSubkeys();
220
        foreach ($contents as $subkey => $content) {
221
            $subkey = (string) $subkey;
222
223
            yield [$subkey => $content];
224
225
            unset($subkeys[$subkey]);
226
        }
227
228
        foreach (array_keys($subkeys) as $subkey) {
229
            $subkey = (string) $subkey;
230
231
            yield [$subkey => []];
232
        }
233
    }
234
235
    /**
236
     * @param string $a
237
     * @param string $b
238
     *
239
     * @return int
240
     */
241
    private function compareBcStrings(string $a, string $b) : int
242
    {
243
        $a_len = strlen($a);
244
        $b_len = strlen($b);
245
246
        if ($a_len > $b_len) {
247
            return -1;
248
        }
249
250
        if ($a_len < $b_len) {
251
            return 1;
252
        }
253
254
        $a_len = strlen(str_replace(['*', '?'], '', $a));
255
        $b_len = strlen(str_replace(['*', '?'], '', $b));
256
257
        if ($a_len > $b_len) {
258
            return -1;
259
        }
260
261
        if ($a_len < $b_len) {
262
            return 1;
263
        }
264
265
        return 0;
266
    }
267
}
268