Completed
Push — master ( d46c0c...fc3370 )
by James
05:49
created

IniParser::compareBcStrings()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 26
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 30

Importance

Changes 0
Metric Value
dl 0
loc 26
ccs 0
cts 14
cp 0
rs 8.439
c 0
b 0
f 0
cc 5
eloc 14
nc 5
nop 2
crap 30
1
<?php
2
declare(strict_types = 1);
3
4
namespace BrowscapPHP\IniParser;
5
6
use BrowscapPHP\Data\PropertyFormatter;
7
use BrowscapPHP\Data\PropertyHolder;
8
use BrowscapPHP\Helper\Quoter;
9
use BrowscapPHP\Parser\Helper\Pattern;
10
use BrowscapPHP\Parser\Helper\SubKey;
11
12
/**
13
 * Ini parser class (compatible with PHP 5.3+)
14
 */
15
final class IniParser
16
{
17
    /**
18
     * Options for regex patterns.
19
     *
20
     * REGEX_DELIMITER: Delimiter of all the regex patterns in the whole class.
21
     * REGEX_MODIFIERS: Regex modifiers.
22
     */
23
    const REGEX_DELIMITER = '@';
24
    const REGEX_MODIFIERS = 'i';
25
    const COMPRESSION_PATTERN_START = '@';
26
    const COMPRESSION_PATTERN_DELIMITER = '|';
27
28
    /**
29
     * Number of pattern to combine for a faster regular expression search.
30
     *
31
     * @important The number of patterns that can be processed in one step
32
     *            is limited by the internal regular expression limits.
33
     * @var int
34
     */
35
    const COUNT_PATTERN = 50;
36
37
    /**
38
     * Creates new ini part cache files
39
     *
40
     * @param string $content
41
     *
42
     * @return \Generator
43
     */
44
    public function createIniParts(string $content) : \Generator
45
    {
46
        // get all patterns from the ini file in the correct order,
47
        // so that we can calculate with index number of the resulting array,
48
        // which part to use when the ini file is splitted into its sections.
49
        preg_match_all('/(?<=\[)(?:[^\r\n]+)(?=\])/m', $content, $patternPositions);
50
        $patternPositions = $patternPositions[0];
51
52
        // split the ini file into sections and save the data in one line with a hash of the beloging
53
        // pattern (filtered in the previous step)
54
        $iniParts = preg_split('/\[[^\r\n]+\]/', $content);
55
        $contents = [];
56
57
        $propertyFormatter = new PropertyFormatter(new PropertyHolder());
58
59
        foreach ($patternPositions as $position => $pattern) {
60
            $pattern = strtolower($pattern);
61
            $patternhash = Pattern::getHashForParts($pattern);
62
            $subkey = SubKey::getIniPartCacheSubKey($patternhash);
63
64
            if (! isset($contents[$subkey])) {
65
                $contents[$subkey] = [];
66
            }
67
68
            $browserProperties = parse_ini_string($iniParts[($position + 1)], false, INI_SCANNER_RAW);
69
70
            foreach (array_keys($browserProperties) as $property) {
71
                $browserProperties[$property] = $propertyFormatter->formatPropertyValue(
72
                    $browserProperties[$property],
73
                    $property
74
                );
75
            }
76
77
            // the position has to be moved by one, because the header of the ini file
78
            // is also returned as a part
79
            $contents[$subkey][] = $patternhash . "\t" . json_encode(
80
                $browserProperties,
81
                JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_QUOT | JSON_HEX_AMP
82
            );
83
        }
84
85
        unset($patternPositions, $iniParts);
86
87
        $subkeys = array_flip(SubKey::getAllIniPartCacheSubKeys());
88
        foreach ($contents as $subkey => $cacheContent) {
89
            $subkey = (string) $subkey;
90
91
            yield [$subkey => $cacheContent];
92
93
            unset($subkeys[$subkey]);
94
        }
95
96
        foreach (array_keys($subkeys) as $subkey) {
97
            $subkey = (string) $subkey;
98
99
            yield [$subkey => []];
100
        }
101
    }
102
103
    /**
104
     * Creates new pattern cache files
105
     *
106
     * @param string $content
107
     *
108
     * @return \Generator
109
     */
110
    public function createPatterns($content) : \Generator
111
    {
112
        // get all relevant patterns from the INI file
113
        // - containing "*" or "?"
114
        // - not containing "*" or "?", but not having a comment
115
        preg_match_all(
116
            '/(?<=\[)(?:[^\r\n]*[?*][^\r\n]*)(?=\])|(?<=\[)(?:[^\r\n*?]+)(?=\])(?![^\[]*Comment=)/m',
117
            $content,
118
            $matches
119
        );
120
121
        if (empty($matches[0]) || ! is_array($matches[0])) {
122
            yield [];
123
124
            return;
125
        }
126
127
        $quoterHelper = new Quoter();
128
        $matches = $matches[0];
129
        usort($matches, [$this, 'compareBcStrings']);
130
131
        // build an array to structure the data. this requires some memory, but we need this step to be able to
132
        // sort the data in the way we need it (see below).
133
        $data = [];
134
135
        foreach ($matches as $pattern) {
136
            if ('GJK_Browscap_Version' === $pattern) {
137
                continue;
138
            }
139
140
            $pattern = strtolower($pattern);
141
            $patternhash = Pattern::getHashForPattern($pattern, false);
142
            $tmpLength = Pattern::getPatternLength($pattern);
143
144
            // special handling of default entry
145
            if ($tmpLength === 0) {
146
                $patternhash = str_repeat('z', 32);
147
            }
148
149
            if (! isset($data[$patternhash])) {
150
                $data[$patternhash] = [];
151
            }
152
153
            if (! isset($data[$patternhash][$tmpLength])) {
154
                $data[$patternhash][$tmpLength] = [];
155
            }
156
157
            $pattern = $quoterHelper->pregQuote($pattern);
158
159
            // Check if the pattern contains digits - in this case we replace them with a digit regular expression,
160
            // so that very similar patterns (e.g. only with different browser version numbers) can be compressed.
161
            // This helps to speed up the first (and most expensive) part of the pattern search a lot.
162
            if (strpbrk($pattern, '0123456789') !== false) {
163
                $compressedPattern = preg_replace('/\d/', '[\d]', $pattern);
164
165
                if (! in_array($compressedPattern, $data[$patternhash][$tmpLength])) {
166
                    $data[$patternhash][$tmpLength][] = $compressedPattern;
167
                }
168
            } else {
169
                $data[$patternhash][$tmpLength][] = $pattern;
170
            }
171
        }
172
173
        unset($matches);
174
175
        // sorting of the data is important to check the patterns later in the correct order, because
176
        // we need to check the most specific (=longest) patterns first, and the least specific
177
        // (".*" for "Default Browser")  last.
178
        //
179
        // sort by pattern start to group them
180
        ksort($data);
181
        // and then by pattern length (longest first)
182
        foreach (array_keys($data) as $key) {
183
            krsort($data[$key]);
184
        }
185
186
        // write optimized file (grouped by the first character of the has, generated from the pattern
187
        // start) with multiple patterns joined by tabs. this is to speed up loading of the data (small
188
        // array with pattern strings instead of an large array with single patterns) and also enables
189
        // us to search for multiple patterns in one preg_match call for a fast first search
190
        // (3-10 faster), followed by a detailed search for each single pattern.
191
        $contents = [];
192
        foreach ($data as $patternhash => $tmpEntries) {
193
            if (empty($tmpEntries)) {
194
                continue;
195
            }
196
197
            $subkey = SubKey::getPatternCacheSubkey($patternhash);
198
199
            if (! isset($contents[$subkey])) {
200
                $contents[$subkey] = [];
201
            }
202
203
            foreach ($tmpEntries as $tmpLength => $tmpPatterns) {
204
                if (empty($tmpPatterns)) {
205
                    continue;
206
                }
207
208
                $chunks = array_chunk($tmpPatterns, self::COUNT_PATTERN);
209
210
                foreach ($chunks as $chunk) {
211
                    $contents[$subkey][] = $patternhash . "\t" . $tmpLength . "\t" . implode("\t", $chunk);
212
                }
213
            }
214
        }
215
216
        unset($data);
217
218
        $subkeys = SubKey::getAllPatternCacheSubkeys();
219
        foreach ($contents as $subkey => $content) {
220
            $subkey = (string) $subkey;
221
222
            yield [$subkey => $content];
223
224
            unset($subkeys[$subkey]);
225
        }
226
227
        foreach (array_keys($subkeys) as $subkey) {
228
            $subkey = (string) $subkey;
229
230
            yield [$subkey => []];
231
        }
232
    }
233
234
    /**
235
     * @param string $a
236
     * @param string $b
237
     *
238
     * @return int
239
     */
240
    private function compareBcStrings(string $a, string $b) : int
241
    {
242
        $a_len = strlen($a);
243
        $b_len = strlen($b);
244
245
        if ($a_len > $b_len) {
246
            return -1;
247
        }
248
249
        if ($a_len < $b_len) {
250
            return 1;
251
        }
252
253
        $a_len = strlen(str_replace(['*', '?'], '', $a));
254
        $b_len = strlen(str_replace(['*', '?'], '', $b));
255
256
        if ($a_len > $b_len) {
257
            return -1;
258
        }
259
260
        if ($a_len < $b_len) {
261
            return 1;
262
        }
263
264
        return 0;
265
    }
266
}
267