Passed
Push — master ( e10342...fc5965 )
by Vitaly
08:52
created

StringConditionTree::compareStringStructure()   C

Complexity

Conditions 15
Paths 180

Size

Total Lines 64
Code Lines 25

Duplication

Lines 28
Ratio 43.75 %

Importance

Changes 0
Metric Value
dl 28
loc 64
rs 5.4882
c 0
b 0
f 0
cc 15
eloc 25
nc 180
nop 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
use phpDocumentor\Reflection\Types\Integer;
8
9
/**
10
 * Class StringConditionTree
11
 *
12
 * @author Vitaly Egorov <[email protected]>
13
 */
14
class StringConditionTree
15
{
16
    /** Tree node root element identifier, needed for recursion */
17
    const ROOT_NAME = '';
18
19
    /** Final tree node branch identifier */
20
    const SELF_NAME = '@self';
21
22
    /** String parameter start marker */
23
    const PARAMETER_START = '{';
24
25
    /** String parameter end marker */
26
    const PARAMETER_END = '}';
27
28
    /** Parameter sorting length value for counting */
29
    const PARAMETER_COF = 2000;
30
31
    /** @var TreeNode Resulting collection for debugging */
32
    protected $debug;
33
34
    /** @var string Parametrized string start marker */
35
    protected $parameterStartMarker = self::PARAMETER_START;
36
37
    /** @var string Parametrized string end marker */
38
    protected $parameterEndMarker = self::PARAMETER_END;
39
40
    /**
41
     * StringConditionTree constructor.
42
     *
43
     * @param string $parameterStartMarker Parametrized string start marker
44
     * @param string $parameterEndMarker Parametrized string end marker
45
     */
46
    public function __construct(string $parameterStartMarker = self::PARAMETER_START, string $parameterEndMarker = self::PARAMETER_END)
47
    {
48
        $this->parameterStartMarker = $parameterStartMarker;
49
        $this->parameterEndMarker = $parameterEndMarker;
50
    }
51
52
    /**
53
     * Build similarity strings tree.
54
     *
55
     * @param array $input Collection of strings
56
     *
57
     * @return TreeNode Resulting similarity strings tree
58
     */
59
    public function process(array $input): TreeNode
60
    {
61
        /**
62
         * We need to find first matching character that present at least at one two string
63
         * to start building tree. Otherwise there is no reason to build tree.
64
         */
65
        $this->innerProcessor(self::ROOT_NAME, $input, $this->debug = new TreeNode());
66
67
        $this->debug = $this->debug->children[self::ROOT_NAME];
68
69
        return $this->debug;
70
    }
71
72
    /**
73
     * Buil string character group structure considering parametrized
74
     * and not parametrized characted groups and their length(PCG, NPCG).
75
     *
76
     * @param string $prefix Prefix string
77
     *
78
     * @return array String character groups structure
79
     */
80
    protected function getPrefixStructure(string $prefix): array
81
    {
82
        /** @var array $structureMatrix String PCG(0)/NPCG(1) structure matrix for comparison */
83
        $structureMatrix = [];
84
85
        // Flags for showing current string character group
86
        /** @var bool $isPCG Flags showing PCG started */
87
        $isPCG = false;
88
        /** @var bool $isNPCG Flags showing NPCG started */
89
        $isNPCG = true;
90
91
        // Pointer to current CG to count string NPCG length
92
        $currentCG = 0;
93
94
        /**
95
         * TODO: Try to find PCG filter :... pattern and process it also as
96
         * PCG with filters should be prioritized over PSG without filter
97
         * even if filter is .*
98
         */
99
100
        // Iterate string by characters
101
        for ($i = 0, $length = strlen($prefix); $i < $length; $i++) {
102
            if (!$isPCG && $prefix{$i} === $this->parameterStartMarker) {
103
                $isPCG = true;
104
                $isNPCG = false;
105
                $structureMatrix[] = [0,0,$prefix];
106
                $currentCG = &$structureMatrix[count($structureMatrix)-1][1];
107
            } elseif ($isPCG && $prefix{$i} === $this->parameterEndMarker) {
108
                $isPCG = false;
109
                $isNPCG = true;
110
            } elseif ($isNPCG) {
111
                $isNPCG = false;
112
                $structureMatrix[] = [1,0,$prefix];
113
                $currentCG = &$structureMatrix[count($structureMatrix)-1][1];
114
            }
115
116
            // Store current character group length
117
            $currentCG++;
118
        }
119
120
        return $structureMatrix;
121
    }
122
123
    /**
124
     * Compare string structures.
125
     *
126
     * @param array $initial Initial string structure
127
     * @param array $compared Compared string structure
128
     *
129
     * @return int Result of array elements comparison
130
     */
131
    protected function compareStringStructure(array $initial, array $compared): int
132
    {
133
        $maxStructureSize = max(count($initial), count($compared));
134
135
        // Make structures same size preserving previous existing structure value
136
        for ($i = 1; $i < $maxStructureSize; $i++) {
137
            if (!array_key_exists($i, $initial)) {
138
                $initial[$i] = $initial[$i-1];
139
            }
140
            if (!array_key_exists($i, $compared)) {
141
                $compared[$i] = $compared[$i-1];
142
            }
143
        }
144
145
        // Iterate every structure group
146
        for ($i = 0; $i < $maxStructureSize; $i++) {
147
            // If initial structure has NPCG than it has higher priority
148
            if ($initial[$i][0] > $compared[$i][0]) {
149
                return -1;
150
            }
151
152
            // If compared structure has NPCG than it has higher priority
153
            if ($initial[$i][0] < $compared[$i][0]) {
154
                return 1;
155
            }
156
157
            // They are equal continue to next structure group comparison
158
        }
159
160
        // If both structures are equal compare lengths of NPCG
161 View Code Duplication
        for ($i = 0; $i < $maxStructureSize; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
162
            // If current CG is NPCG
163
            if ($initial[$i][0] === 1) {
164
                if ($initial[$i][1] > $compared[$i][1]) {
165
                    return 1;
166
                }
167
168
                if ($initial[$i][1] < $compared[$i][1]) {
169
                    return -1;
170
                }
171
            }
172
173
            // Current NPCG character groups have equal length - continue
174
        }
175
176
        // If both structures are equal and NPCG length are equal - compare lengths of PCG
177 View Code Duplication
        for ($i = 0; $i < $maxStructureSize; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
178
            // If current CG is PCG
179
            if ($initial[$i][0] === 0) {
180
                if ($initial[$i][1] > $compared[$i][1]) {
181
                    return -1;
182
                }
183
184
                if ($initial[$i][1] < $compared[$i][1]) {
185
                    return 1;
186
                }
187
            }
188
189
            // Current PCG character groups have equal length - continue
190
        }
191
192
        // Structures are absolutely equal
193
        return 0;
194
    }
195
196
    /**
197
     * Sort strings array considering PCG and NPCG string structure.
198
     *
199
     * @param array $input Input array for sorting
200
     * @return array Sorted array
201
     */
202
    protected function sortArrayByKeys(array &$input)
203
    {
204
        $prefixes = array_map([$this, 'getPrefixStructure'], array_keys($input));
205
206
        usort($prefixes, [$this, 'compareStringStructure']);
207
208
        $result = [];
209
        foreach ($prefixes as $sortingData) {
210
            $result[$sortingData[0][2]] = $input[$sortingData[0][2]];
211
        }
212
213
        return $result;
214
    }
215
216
    /**
217
     * Add only unique value to array.
218
     *
219
     * @param mixed $value  Unique value
220
     * @param array $array  Array for adding unique value
221
     * @param bool  $strict Strict uniqueness check
222
     *
223
     * @see in_array();
224
     *
225
     * @return bool True if unique value was added
226
     */
227
    protected function addUniqueToArray($value, &$array, bool $strict = true)
228
    {
229
        // Create array if not array is passed
230
        if (!is_array($array)) {
231
            $array = [];
232
        }
233
234
        // Add value to array if it is unique
235
        if (!in_array($value, $array, $strict)) {
236
            $array[] = $value;
237
238
            return true;
239
        } else { // Value is already in array
240
            return false;
241
        }
242
    }
243
244
    /**
245
     * Find longest matching prefix between two strings.
246
     *
247
     * @param string $initialString  Initial string
248
     * @param string $comparedString Compared string
249
     *
250
     * @return string Longest matching prefix
251
     */
252
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
253
    {
254
        // Iterate and compare how string matches
255
        $longestPrefix = '';
256
257
        // Define shortest & longest strings to avoid errors
258
        $initialLength = strlen($initialString);
259
        $comparedLength = strlen($comparedString);
260
261
        // Define shortest and longest strings to avoid character matching errors
262
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
263
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
264
265
        // Iterate initial string characters
266
        $isPattern = false;
267
        $parametrizedPrefix = '';
268
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
269
            // Pattern support
270
            // TODO: Probably can be optimized
271
            if ($isPattern || $shortestString{$z} === $this->parameterStartMarker) {
272
                $isPattern = true;
273
274
                // Concatenate longest matching prefix
275
                $parametrizedPrefix .= $shortestString{$z};
276
277
                // Compare characters with compared string
278
                if ($shortestString{$z} !== $longestString{$z}) {
279
                    // Clear pattern as pattern characters mismatch
280
                    $parametrizedPrefix = '';
0 ignored issues
show
Unused Code introduced by
$parametrizedPrefix is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
281
                    break;
282
                }
283
284
                // If pattern id closed unset flag fro special behaviour
285
                if ($shortestString{$z} === $this->parameterEndMarker) {
286
                    // If parametrized part ended append to longest matching prefix
287
                    $longestPrefix .= $parametrizedPrefix;
288
                    // Clear parametrized prefix as we can have other parametrized parts
289
                    $parametrizedPrefix = '';
290
                    // Reset parametrized flag
291
                    $isPattern = false;
292
                }
293
            } else {
294
                // Compare characters with compared string
295
                if ($shortestString{$z} !== $longestString{$z}) {
296
                    break; // Exit on first mismatching character
297
                }
298
299
                // Concatenate matching part of two strings
300
                $longestPrefix .= $initialString{$z};
301
            }
302
        }
303
304
        return $longestPrefix;
305
    }
306
307
    /**
308
     * Remove key string from the beginning of all sub-array strings.
309
     *
310
     * @param array  $array Input array of key => [keyStrings...]
311
     *
312
     * @param string $selfMarker Marker for storing self pointer
313
     *
314
     * @return array Processed array with removed keys from beginning of sub arrays
315
     */
316
    protected function removeKeyFromArrayStrings(array $array, string $selfMarker): array
317
    {
318
        $result = [];
319
        /** @var string[] $values */
320
        foreach ($array as $key => $values) {
321
            $lmpLength = strlen((string)$key);
322
            foreach ($values as $string) {
323
                $newString = substr($string, $lmpLength);
324
325
                if ($newString === false || $newString === '' || $string === $selfMarker) {
326
                    $result[$key][] = $selfMarker;
327
                } else {
328
                    $result[$key][] = $newString;
329
                }
330
            }
331
        }
332
333
        return $result;
334
    }
335
336
    /**
337
     * Find all duplication of source array values in compared array and remove them.
338
     *
339
     * @param array $source Source array
340
     * @param array $compared Compared array for filtering duplicates
341
     */
342
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
343
    {
344
        foreach ($source as $value) {
345
            foreach ($compared as $key => &$subValue) {
346
                if ($subValue === $value) {
347
                    unset($compared[$key]);
348
                }
349
            }
350
        }
351
    }
352
353
    /**
354
     * Recursive string similarity tree builder.
355
     *
356
     * @param string   $prefix
357
     * @param array    $input
358
     * @param TreeNode $result
359
     * @param string   $selfMarker
360
     */
361
    protected function innerProcessor(string $prefix, array $input, TreeNode $result, $selfMarker = self::SELF_NAME)
362
    {
363
        // Create tree node
364
        $newChild = $result->append($prefix);
365
366
        /**
367
         * Iterate all combinations of strings and group by LMP
368
         */
369
        $longestPrefixes = [];
370
        for ($i = 0, $count = count($input); $i < $count; $i++) {
371
            for ($j = $i + 1; $j < $count; $j++) {
372
                $longestMatchedPrefix = $this->getLongestMatchingPrefix($input[$i], $input[$j]);
373
374
                // We have found at least one matching character between strings
375
                if ($longestMatchedPrefix !== '') {
376
                    $this->addUniqueToArray($input[$i], $longestPrefixes[$longestMatchedPrefix]);
377
                    $this->addUniqueToArray($input[$j], $longestPrefixes[$longestMatchedPrefix]);
378
                }
379
            }
380
        }
381
382
        /**
383
         * Sort LMPs(array keys) ascending by key length
384
         */
385
        $longestPrefixes = $this->sortArrayByKeys($longestPrefixes);
386
387
        /**
388
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
389
         */
390
        $keys = array_keys($longestPrefixes);
391
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
392
            for ($j = $i + 1; $j < $length; $j++) {
393
                $this->removeDuplicatesInSubArray($longestPrefixes[$keys[$i]], $longestPrefixes[$keys[$j]]);
394
            }
395
        }
396
397
        // Remove empty LMPs as they are included in smaller LMPs
398
        $longestPrefixes = array_filter($longestPrefixes);
399
400
        /**
401
         * Search for input string that do not have LMP, and add missing as LMP
402
         */
403
        foreach ($input as $string) {
404
            $found = false;
405
406
            if ($string !== $selfMarker) {
407
                foreach ($longestPrefixes as $strings) {
408
                    if (in_array($string, $strings, true)) {
409
                        $found = true;
410
                        break;
411
                    }
412
                }
413
414
                if (!$found) {
415
                    $longestPrefixes[$string] = [$selfMarker];
416
                }
417
            }
418
        }
419
420
        /**
421
         * After filtering LMPs remove LMP from matched string arrays
422
         */
423
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes, $selfMarker);
424
425
        /**
426
         * Sort LMPs(array keys) ascending by key length
427
         */
428
        $longestPrefixes = $this->sortArrayByKeys($longestPrefixes);
429
430
        /**
431
         * If we have self marker as an input string - create LMP for it
432
         */
433
        if (in_array($selfMarker, $input, true)) {
434
            $longestPrefixes = array_merge([$selfMarker => []], $longestPrefixes);
435
        }
436
437
        /**
438
         * Recursively iterate current level LMPs
439
         */
440
        foreach ($longestPrefixes as $longestPrefix => $strings) {
441
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
442
        }
443
    }
444
}
445