Completed
Push — master ( a6622b...2986ac )
by James
23s
created

IniParser::createIniParts()   C

Complexity

Conditions 10
Paths 29

Size

Total Lines 72

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 110

Importance

Changes 0
Metric Value
dl 0
loc 72
ccs 0
cts 34
cp 0
rs 6.7442
c 0
b 0
f 0
cc 10
nc 29
nop 1
crap 110

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
declare(strict_types = 1);
3
4
namespace BrowscapPHP\IniParser;
5
6
use BrowscapPHP\Data\PropertyFormatter;
7
use BrowscapPHP\Data\PropertyHolder;
8
use BrowscapPHP\Helper\Quoter;
9
use BrowscapPHP\Parser\Helper\Pattern;
10
use BrowscapPHP\Parser\Helper\SubKey;
11
use ExceptionalJSON\EncodeErrorException;
12
13
/**
14
 * Ini parser class (compatible with PHP 5.3+)
15
 */
16
final class IniParser implements ParserInterface
17
{
18
    /**
19
     * Number of pattern to combine for a faster regular expression search.
20
     *
21
     * @important The number of patterns that can be processed in one step
22
     *            is limited by the internal regular expression limits.
23
     *
24
     * @var int
25
     */
26
    private const COUNT_PATTERN = 50;
27
28
    /**
29
     * Creates new ini part cache files
30
     *
31
     * @param string $content
32
     *
33
     * @throws \OutOfRangeException
34
     * @throws \UnexpectedValueException
35
     *
36
     * @return \Generator
37
     */
38
    public function createIniParts(string $content) : \Generator
39
    {
40
        // get all patterns from the ini file in the correct order,
41
        // so that we can calculate with index number of the resulting array,
42
        // which part to use when the ini file is splitted into its sections.
43
        preg_match_all('/(?<=\[)(?:[^\r\n]+)(?=\])/m', $content, $patternPositions);
44
        $patternPositions = $patternPositions[0];
45
46
        // split the ini file into sections and save the data in one line with a hash of the beloging
47
        // pattern (filtered in the previous step)
48
        $iniParts = preg_split('/\[[^\r\n]+\]/', $content);
49
        if (false === $iniParts) {
50
            throw new \UnexpectedValueException('an error occured while splitting content into parts');
51
        }
52
53
        $contents = [];
54
55
        $propertyFormatter = new PropertyFormatter(new PropertyHolder());
56
57
        foreach ($patternPositions as $position => $pattern) {
58
            $pattern = strtolower($pattern);
59
            $patternhash = Pattern::getHashForParts($pattern);
60
            $subkey = SubKey::getIniPartCacheSubKey($patternhash);
61
62
            if (! isset($contents[$subkey])) {
63
                $contents[$subkey] = [];
64
            }
65
66
            if (!array_key_exists($position + 1, $iniParts)) {
67
                throw new \OutOfRangeException(sprintf('could not find position %d inside iniparts', $position + 1));
68
            }
69
70
            $browserProperties = parse_ini_string($iniParts[($position + 1)], false, INI_SCANNER_RAW);
71
72
            if (false === $browserProperties) {
73
                throw new \UnexpectedValueException(sprintf('could ini parse position %d inside iniparts', $position + 1));
74
            }
75
76
            foreach (array_keys($browserProperties) as $property) {
77
                $browserProperties[$property] = $propertyFormatter->formatPropertyValue(
78
                    $browserProperties[$property],
79
                    $property
80
                );
81
            }
82
83
            try {
84
                // the position has to be moved by one, because the header of the ini file
85
                // is also returned as a part
86
                $contents[$subkey][] = $patternhash . "\t" . \ExceptionalJSON\encode(
87
                    $browserProperties,
88
                    JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_QUOT | JSON_HEX_AMP
89
                );
90
            } catch (EncodeErrorException $e) {
91
                throw new \UnexpectedValueException('json encoding content failed', 0, $e);
92
            }
93
        }
94
95
        unset($patternPositions, $iniParts);
96
97
        $subkeys = array_flip(SubKey::getAllIniPartCacheSubKeys());
98
        foreach ($contents as $subkey => $cacheContent) {
99
            yield $subkey => $cacheContent;
100
101
            unset($subkeys[$subkey]);
102
        }
103
104
        foreach (array_keys($subkeys) as $subkey) {
105
            $subkey = (string) $subkey;
106
107
            yield $subkey => '';
108
        }
109
    }
110
111
    /**
112
     * Creates new pattern cache files
113
     *
114
     * @param string $content
115
     *
116
     * @return \Generator
117
     */
118
    public function createPatterns($content) : \Generator
119
    {
120
        // get all relevant patterns from the INI file
121
        // - containing "*" or "?"
122
        // - not containing "*" or "?", but not having a comment
123
        preg_match_all(
124
            '/(?<=\[)(?:[^\r\n]*[?*][^\r\n]*)(?=\])|(?<=\[)(?:[^\r\n*?]+)(?=\])(?![^\[]*Comment=)/m',
125
            $content,
126
            $matches
127
        );
128
129
        if (empty($matches[0]) || ! is_array($matches[0])) {
130
            yield '' => '';
131
132
            return;
133
        }
134
135
        $quoterHelper = new Quoter();
136
        $matches = $matches[0];
137
        usort($matches, [$this, 'compareBcStrings']);
138
139
        // build an array to structure the data. this requires some memory, but we need this step to be able to
140
        // sort the data in the way we need it (see below).
141
        $data = [];
142
143
        foreach ($matches as $pattern) {
144
            if ('GJK_Browscap_Version' === $pattern) {
145
                continue;
146
            }
147
148
            $pattern = strtolower($pattern);
149
            $patternhash = Pattern::getHashForPattern($pattern, false)[0];
150
            $tmpLength = Pattern::getPatternLength($pattern);
151
152
            // special handling of default entry
153
            if (0 === $tmpLength) {
154
                $patternhash = str_repeat('z', 32);
155
            }
156
157
            if (! isset($data[$patternhash])) {
158
                $data[$patternhash] = [];
159
            }
160
161
            if (! isset($data[$patternhash][$tmpLength])) {
162
                $data[$patternhash][$tmpLength] = [];
163
            }
164
165
            $pattern = $quoterHelper->pregQuote($pattern);
166
167
            // Check if the pattern contains digits - in this case we replace them with a digit regular expression,
168
            // so that very similar patterns (e.g. only with different browser version numbers) can be compressed.
169
            // This helps to speed up the first (and most expensive) part of the pattern search a lot.
170
            if (false !== strpbrk($pattern, '0123456789')) {
171
                $compressedPattern = preg_replace('/\d/', '[\d]', $pattern);
172
173
                if (! in_array($compressedPattern, $data[$patternhash][$tmpLength])) {
174
                    $data[$patternhash][$tmpLength][] = $compressedPattern;
175
                }
176
            } else {
177
                $data[$patternhash][$tmpLength][] = $pattern;
178
            }
179
        }
180
181
        unset($matches);
182
183
        // sorting of the data is important to check the patterns later in the correct order, because
184
        // we need to check the most specific (=longest) patterns first, and the least specific
185
        // (".*" for "Default Browser")  last.
186
        //
187
        // sort by pattern start to group them
188
        ksort($data);
189
        // and then by pattern length (longest first)
190
        foreach (array_keys($data) as $key) {
191
            krsort($data[$key]);
192
        }
193
194
        // write optimized file (grouped by the first character of the has, generated from the pattern
195
        // start) with multiple patterns joined by tabs. this is to speed up loading of the data (small
196
        // array with pattern strings instead of an large array with single patterns) and also enables
197
        // us to search for multiple patterns in one preg_match call for a fast first search
198
        // (3-10 faster), followed by a detailed search for each single pattern.
199
        $contents = [];
200
        foreach ($data as $patternhash => $tmpEntries) {
201
            if (empty($tmpEntries)) {
202
                continue;
203
            }
204
205
            $subkey = SubKey::getPatternCacheSubkey($patternhash);
206
207
            if (! isset($contents[$subkey])) {
208
                $contents[$subkey] = [];
209
            }
210
211
            foreach ($tmpEntries as $tmpLength => $tmpPatterns) {
212
                if (empty($tmpPatterns)) {
213
                    continue;
214
                }
215
216
                $chunks = array_chunk($tmpPatterns, self::COUNT_PATTERN);
217
218
                foreach ($chunks as $chunk) {
219
                    $contents[$subkey][] = $patternhash . "\t" . $tmpLength . "\t" . implode("\t", $chunk);
220
                }
221
            }
222
        }
223
224
        unset($data);
225
226
        $subkeys = SubKey::getAllPatternCacheSubkeys();
227
        foreach ($contents as $subkey => $content) {
228
            yield $subkey => $content;
229
230
            unset($subkeys[$subkey]);
231
        }
232
233
        foreach (array_keys($subkeys) as $subkey) {
234
            $subkey = (string) $subkey;
235
236
            yield $subkey => '';
237
        }
238
    }
239
240
    /**
241
     * @param string $a
242
     * @param string $b
243
     *
244
     * @return int
245
     */
246
    private function compareBcStrings(string $a, string $b) : int
247
    {
248
        $a_len = strlen($a);
249
        $b_len = strlen($b);
250
251
        if ($a_len > $b_len) {
252
            return -1;
253
        }
254
255
        if ($a_len < $b_len) {
256
            return 1;
257
        }
258
259
        $a_len = strlen(str_replace(['*', '?'], '', $a));
260
        $b_len = strlen(str_replace(['*', '?'], '', $b));
261
262
        if ($a_len > $b_len) {
263
            return -1;
264
        }
265
266
        if ($a_len < $b_len) {
267
            return 1;
268
        }
269
270
        return 0;
271
    }
272
}
273