Passed
Push — master ( fc5965...22f863 )
by Vitaly
10:10
created

StringConditionTree::getLongestMatchingPrefix()   B

Complexity

Conditions 9
Paths 24

Size

Total Lines 54
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 54
rs 7.255
c 0
b 0
f 0
cc 9
eloc 24
nc 24
nop 2

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
use phpDocumentor\Reflection\Types\Integer;
8
9
/**
10
 * Class StringConditionTree
11
 *
12
 * @author Vitaly Egorov <[email protected]>
13
 */
14
class StringConditionTree
15
{
16
    /** Tree node root element identifier, needed for recursion */
17
    const ROOT_NAME = '';
18
19
    /** Final tree node branch identifier */
20
    const SELF_NAME = '@self';
21
22
    /** String parameter start marker */
23
    const PARAMETER_START = '{';
24
25
    /** String parameter end marker */
26
    const PARAMETER_END = '}';
27
28
    /** Parameter sorting length value for counting */
29
    const PARAMETER_COF = 2000;
30
31
    /** @var TreeNode Resulting collection for debugging */
32
    protected $debug;
33
34
    /** @var string Parametrized string start marker */
35
    protected $parameterStartMarker = self::PARAMETER_START;
36
37
    /** @var string Parametrized string end marker */
38
    protected $parameterEndMarker = self::PARAMETER_END;
39
40
    /**
41
     * StringConditionTree constructor.
42
     *
43
     * @param string $parameterStartMarker Parametrized string start marker
44
     * @param string $parameterEndMarker Parametrized string end marker
45
     */
46
    public function __construct(string $parameterStartMarker = self::PARAMETER_START, string $parameterEndMarker = self::PARAMETER_END)
47
    {
48
        $this->parameterStartMarker = $parameterStartMarker;
49
        $this->parameterEndMarker = $parameterEndMarker;
50
    }
51
52
    /**
53
     * Build similarity strings tree.
54
     *
55
     * @param array $input Collection of strings
56
     *
57
     * @return TreeNode Resulting similarity strings tree
58
     */
59
    public function process(array $input): TreeNode
60
    {
61
        /**
62
         * We need to find first matching character that present at least at one two string
63
         * to start building tree. Otherwise there is no reason to build tree.
64
         */
65
        $this->innerProcessor(self::ROOT_NAME, $input, $this->debug = new TreeNode());
66
67
        $this->debug = $this->debug->children[self::ROOT_NAME];
68
69
        return $this->debug;
70
    }
71
72
    /**
73
     * Buil string character group structure considering parametrized
74
     * and not parametrized characted groups and their length(PCG, NPCG).
75
     *
76
     * @param string $prefix Prefix string
77
     *
78
     * @return array String character groups structure
79
     */
80
    protected function getPrefixStructure(string $prefix): array
81
    {
82
        /** @var array $structureMatrix String PCG(0)/NPCG(1) structure matrix for comparison */
83
        $structureMatrix = [];
84
85
        // Flags for showing current string character group
86
        /** @var bool $isPCG Flags showing PCG started */
87
        $isPCG = false;
88
        /** @var bool $isNPCG Flags showing NPCG started */
89
        $isNPCG = true;
90
91
        // Pointer to current CG to count string NPCG length
92
        $currentCG = 0;
93
94
        /**
95
         * TODO: Try to find PCG filter :... pattern and process it also as
96
         * PCG with filters should be prioritized over PSG without filter
97
         * even if filter is .*
98
         */
99
100
        // Iterate string by characters
101
        for ($i = 0, $length = strlen($prefix); $i < $length; $i++) {
102
            if (!$isPCG && $prefix{$i} === $this->parameterStartMarker) {
103
                $isPCG = true;
104
                $isNPCG = false;
105
                $structureMatrix[] = [0,0,$prefix];
106
                $currentCG = &$structureMatrix[count($structureMatrix)-1][1];
107
            } elseif ($isPCG && $prefix{$i} === $this->parameterEndMarker) {
108
                $isPCG = false;
109
                $isNPCG = true;
110
            } elseif ($isNPCG) {
111
                $isNPCG = false;
112
                $structureMatrix[] = [1,0,$prefix];
113
                $currentCG = &$structureMatrix[count($structureMatrix)-1][1];
114
            }
115
116
            // Store current character group length
117
            $currentCG++;
118
        }
119
120
        return $structureMatrix;
121
    }
122
123
    /**
124
     * Compare string structures.
125
     *
126
     * @param array $initial Initial string structure
127
     * @param array $compared Compared string structure
128
     *
129
     * @return int Result of array elements comparison
130
     */
131
    protected function compareStringStructure(array $initial, array $compared): int
132
    {
133
        $maxStructureSize = max(count($initial), count($compared));
134
135
        // Make structures same size preserving previous existing structure value
136
        for ($i = 1; $i < $maxStructureSize; $i++) {
137
            if (!array_key_exists($i, $initial)) {
138
                $initial[$i] = $initial[$i-1];
139
            }
140
            if (!array_key_exists($i, $compared)) {
141
                $compared[$i] = $compared[$i-1];
142
            }
143
        }
144
145
        // Iterate every structure group
146
        for ($i = 0; $i < $maxStructureSize; $i++) {
147
            // If initial structure has NPCG than it has higher priority
148
            if ($initial[$i][0] > $compared[$i][0]) {
149
                return -1;
150
            }
151
152
            // If compared structure has NPCG than it has higher priority
153
            if ($initial[$i][0] < $compared[$i][0]) {
154
                return 1;
155
            }
156
157
            // Compare NOT starting NPCG length
158
            if ($i > 0 && $initial[$i][0] === 1 ) {
0 ignored issues
show
Coding Style introduced by
Expected 0 spaces before closing bracket; 1 found
Loading history...
159
                if ($initial[$i][1] > $compared[$i][1]) {
160
                    return -1;
161
                }
162
163
                if ($initial[$i][1] < $compared[$i][1]) {
164
                    return 1;
165
                }
166
            }
167
168
            // They are equal continue to next structure group comparison
169
        }
170
171
        // If both structures are equal compare lengths of NPCG
172 View Code Duplication
        for ($i = 0; $i < $maxStructureSize; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
173
            // If current CG is NPCG
174
            if ($initial[$i][0] === 1) {
175
                if ($initial[$i][1] > $compared[$i][1]) {
176
                    return 1;
177
                }
178
179
                if ($initial[$i][1] < $compared[$i][1]) {
180
                    return -1;
181
                }
182
            }
183
184
            // Current NPCG character groups have equal length - continue
185
        }
186
187
        // If both structures are equal and NPCG length are equal - compare lengths of PCG
188 View Code Duplication
        for ($i = 0; $i < $maxStructureSize; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
189
            // If current CG is PCG
190
            if ($initial[$i][0] === 0) {
191
                if ($initial[$i][1] > $compared[$i][1]) {
192
                    return -1;
193
                }
194
195
                if ($initial[$i][1] < $compared[$i][1]) {
196
                    return 1;
197
                }
198
            }
199
200
            // Current PCG character groups have equal length - continue
201
        }
202
203
        // Structures are absolutely equal
204
        return 0;
205
    }
206
207
    /**
208
     * Sort strings array considering PCG and NPCG string structure.
209
     *
210
     * @param array $input Input array for sorting
211
     * @return array Sorted array
212
     */
213
    protected function sortArrayByKeys(array &$input)
214
    {
215
        $prefixes = array_map([$this, 'getPrefixStructure'], array_keys($input));
216
217
        usort($prefixes, [$this, 'compareStringStructure']);
218
219
        $result = [];
220
        foreach ($prefixes as $sortingData) {
221
            $result[$sortingData[0][2]] = $input[$sortingData[0][2]];
222
        }
223
224
        return $result;
225
    }
226
227
    /**
228
     * Add only unique value to array.
229
     *
230
     * @param mixed $value  Unique value
231
     * @param array $array  Array for adding unique value
232
     * @param bool  $strict Strict uniqueness check
233
     *
234
     * @see in_array();
235
     *
236
     * @return bool True if unique value was added
237
     */
238
    protected function addUniqueToArray($value, &$array, bool $strict = true)
239
    {
240
        // Create array if not array is passed
241
        if (!is_array($array)) {
242
            $array = [];
243
        }
244
245
        // Add value to array if it is unique
246
        if (!in_array($value, $array, $strict)) {
247
            $array[] = $value;
248
249
            return true;
250
        } else { // Value is already in array
251
            return false;
252
        }
253
    }
254
255
    /**
256
     * Find longest matching prefix between two strings.
257
     *
258
     * @param string $initialString  Initial string
259
     * @param string $comparedString Compared string
260
     *
261
     * @return string Longest matching prefix
262
     */
263
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
264
    {
265
        // Iterate and compare how string matches
266
        $longestPrefix = '';
267
268
        // Define shortest & longest strings to avoid errors
269
        $initialLength = strlen($initialString);
270
        $comparedLength = strlen($comparedString);
271
272
        // Define shortest and longest strings to avoid character matching errors
273
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
274
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
275
276
        // Iterate initial string characters
277
        $isPattern = false;
278
        $parametrizedPrefix = '';
279
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
280
            // Pattern support
281
            // TODO: Probably can be optimized
282
            if ($isPattern || $shortestString{$z} === $this->parameterStartMarker) {
283
                $isPattern = true;
284
285
                // Concatenate longest matching prefix
286
                $parametrizedPrefix .= $shortestString{$z};
287
288
                // Compare characters with compared string
289
                if ($shortestString{$z} !== $longestString{$z}) {
290
                    // Clear pattern as pattern characters mismatch
291
                    $parametrizedPrefix = '';
0 ignored issues
show
Unused Code introduced by
$parametrizedPrefix is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
292
                    break;
293
                }
294
295
                // If pattern id closed unset flag fro special behaviour
296
                if ($shortestString{$z} === $this->parameterEndMarker) {
297
                    // If parametrized part ended append to longest matching prefix
298
                    $longestPrefix .= $parametrizedPrefix;
299
                    // Clear parametrized prefix as we can have other parametrized parts
300
                    $parametrizedPrefix = '';
301
                    // Reset parametrized flag
302
                    $isPattern = false;
303
                }
304
            } else {
305
                // Compare characters with compared string
306
                if ($shortestString{$z} !== $longestString{$z}) {
307
                    break; // Exit on first mismatching character
308
                }
309
310
                // Concatenate matching part of two strings
311
                $longestPrefix .= $initialString{$z};
312
            }
313
        }
314
315
        return $longestPrefix;
316
    }
317
318
    /**
319
     * Remove key string from the beginning of all sub-array strings.
320
     *
321
     * @param array  $array Input array of key => [keyStrings...]
322
     *
323
     * @param string $selfMarker Marker for storing self pointer
324
     *
325
     * @return array Processed array with removed keys from beginning of sub arrays
326
     */
327
    protected function removeKeyFromArrayStrings(array $array, string $selfMarker): array
328
    {
329
        $result = [];
330
        /** @var string[] $values */
331
        foreach ($array as $key => $values) {
332
            $lmpLength = strlen((string)$key);
333
            foreach ($values as $string) {
334
                $newString = substr($string, $lmpLength);
335
336
                if ($newString === false || $newString === '' || $string === $selfMarker) {
337
                    $result[$key][] = $selfMarker;
338
                } else {
339
                    $result[$key][] = $newString;
340
                }
341
            }
342
        }
343
344
        return $result;
345
    }
346
347
    /**
348
     * Find all duplication of source array values in compared array and remove them.
349
     *
350
     * @param array $source Source array
351
     * @param array $compared Compared array for filtering duplicates
352
     */
353
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
354
    {
355
        foreach ($source as $value) {
356
            foreach ($compared as $key => &$subValue) {
357
                if ($subValue === $value) {
358
                    unset($compared[$key]);
359
                }
360
            }
361
        }
362
    }
363
364
    /**
365
     * Recursive string similarity tree builder.
366
     *
367
     * @param string   $prefix
368
     * @param array    $input
369
     * @param TreeNode $result
370
     * @param string   $selfMarker
371
     */
372
    protected function innerProcessor(string $prefix, array $input, TreeNode $result, $selfMarker = self::SELF_NAME)
373
    {
374
        // Create tree node
375
        $newChild = $result->append($prefix);
376
377
        /**
378
         * Iterate all combinations of strings and group by LMP
379
         */
380
        $longestPrefixes = [];
381
        for ($i = 0, $count = count($input); $i < $count; $i++) {
382
            for ($j = $i + 1; $j < $count; $j++) {
383
                $longestMatchedPrefix = $this->getLongestMatchingPrefix($input[$i], $input[$j]);
384
385
                // We have found at least one matching character between strings
386
                if ($longestMatchedPrefix !== '') {
387
                    $this->addUniqueToArray($input[$i], $longestPrefixes[$longestMatchedPrefix]);
388
                    $this->addUniqueToArray($input[$j], $longestPrefixes[$longestMatchedPrefix]);
389
                }
390
            }
391
        }
392
393
        /**
394
         * Sort LMPs(array keys) ascending by key length
395
         */
396
        $longestPrefixes = $this->sortArrayByKeys($longestPrefixes);
397
398
        /**
399
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
400
         */
401
        $keys = array_keys($longestPrefixes);
402
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
403
            for ($j = $i + 1; $j < $length; $j++) {
404
                $this->removeDuplicatesInSubArray($longestPrefixes[$keys[$i]], $longestPrefixes[$keys[$j]]);
405
            }
406
        }
407
408
        // Remove empty LMPs as they are included in smaller LMPs
409
        $longestPrefixes = array_filter($longestPrefixes);
410
411
        /**
412
         * Search for input string that do not have LMP, and add missing as LMP
413
         */
414
        foreach ($input as $string) {
415
            $found = false;
416
417
            if ($string !== $selfMarker) {
418
                foreach ($longestPrefixes as $strings) {
419
                    if (in_array($string, $strings, true)) {
420
                        $found = true;
421
                        break;
422
                    }
423
                }
424
425
                if (!$found) {
426
                    $longestPrefixes[$string] = [$selfMarker];
427
                }
428
            }
429
        }
430
431
        /**
432
         * After filtering LMPs remove LMP from matched string arrays
433
         */
434
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes, $selfMarker);
435
436
        /**
437
         * Sort LMPs(array keys) ascending by key length
438
         */
439
        $longestPrefixes = $this->sortArrayByKeys($longestPrefixes);
440
441
        /**
442
         * If we have self marker as an input string - create LMP for it
443
         */
444
        if (in_array($selfMarker, $input, true)) {
445
            $longestPrefixes = array_merge([$selfMarker => []], $longestPrefixes);
446
        }
447
448
        /**
449
         * Recursively iterate current level LMPs
450
         */
451
        foreach ($longestPrefixes as $longestPrefix => $strings) {
452
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
453
        }
454
    }
455
}
456