Passed
Push — master ( 6dedb2...9b1dd9 )
by Vitaly
06:42
created

StringConditionTree::getLMPCollection()   C

Complexity

Conditions 11
Paths 63

Size

Total Lines 44
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 25
CRAP Score 11

Importance

Changes 0
Metric Value
dl 0
loc 44
ccs 25
cts 25
cp 1
rs 5.2653
c 0
b 0
f 0
nc 63
cc 11
eloc 25
nop 1
crap 11

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
8
use samsonframework\stringconditiontree\string\CommonPrefix;
9
use samsonframework\stringconditiontree\string\Structure;
10
11
/**
12
 * Class StringConditionTree
13
 *
14
 * TODO: Remove variable character group markers, move LMP search to string.
15
 *
16
 * @author Vitaly Egorov <[email protected]>
17
 */
18
class StringConditionTree
19
{
20
    /** Tree node root element identifier, needed for recursion */
21
    const ROOT_NAME = '';
22
23
    /** Final tree node branch identifier */
24
    const SELF_NAME = '@self';
25
26
    /** String parameter start marker */
27
    const PARAMETER_START = '{';
28
29
    /** String parameter end marker */
30
    const PARAMETER_END = '}';
31
32
    /** Parameter sorting length value for counting */
33
    const PARAMETER_COF = 2000;
34
35
    /** @var TreeNode Resulting collection for debugging */
36
    protected $debug;
37
38
    /** @var array Collection of input string => identifier */
39
    protected $source;
40
41
    /** @var string Parametrized string start marker */
42
    protected $parameterStartMarker = self::PARAMETER_START;
43
44
    /** @var string Parametrized string end marker */
45
    protected $parameterEndMarker = self::PARAMETER_END;
46
47
    /**
48
     * StringConditionTree constructor.
49
     *
50
     * @param string               $parameterStartMarker Parametrized string start marker
51
     * @param string               $parameterEndMarker   Parametrized string end marker
52
     */
53 1
    public function __construct(
54
        string $parameterStartMarker = self::PARAMETER_START,
55
        string $parameterEndMarker = self::PARAMETER_END
56
    ) {
57 1
        $this->parameterStartMarker = $parameterStartMarker;
58 1
        $this->parameterEndMarker = $parameterEndMarker;
59 1
    }
60
61
    /**
62
     * Build similarity strings tree.
63
     *
64
     * @param array $input Collection of strings
65
     *
66
     * @return TreeNode Resulting similarity strings tree
67
     */
68 1
    public function process(array $input): TreeNode
69
    {
70 1
        $this->source = $input;
71
72
        /**
73
         * We need to find first matching character that present at least at one two string
74
         * to start building tree. Otherwise there is no reason to build tree.
75
         */
76 1
        $this->innerProcessor(self::ROOT_NAME, array_keys($input), $this->debug = new TreeNode());
77
78 1
        return $this->debug->children[self::ROOT_NAME];
79
    }
80
81
    /**
82
     * Recursive string similarity tree builder.
83
     *
84
     * @param string   $prefix
85
     * @param array    $input
86
     * @param TreeNode $result
87
     */
88 1
    protected function innerProcessor(string $prefix, array $input, TreeNode $result)
89
    {
90
        // Create tree node. Pass string identifier if present
91 1
        $newChild = $result->append($prefix, $this->source[$result->fullValue . $prefix] ?? '');
92
93
        /**
94
         * Iterate all combinations of strings and group by LMP, also if no LMP is
95
         * found consider strings as LMP itself
96
         */
97 1
        $longestPrefixes = $this->getLMPCollection($input);
98
99
        /**
100
         * Sort LMPs(array keys) ascending by key length
101
         */
102 1
        $longestPrefixes = $this->sortArrayByKeys($longestPrefixes);
103
104
        /**
105
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
106
         */
107 1
        $this->filterLMPStrings($longestPrefixes);
108
109
        /**
110
         * After filtering LMPs remove LMP from matched string arrays
111
         */
112 1
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes);
113
114
        /**
115
         * Recursively iterate current level LMPs
116
         */
117 1
        foreach ($longestPrefixes as $longestPrefix => $strings) {
118 1
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
119
        }
120 1
    }
121
122
    /**
123
     * Get collection of grouped longest matching prefixes with strings sub-array.
124
     *
125
     * @param array $input Input strings array
126
     *
127
     * @return array Longest matching prefixes array
128
     */
129 1
    protected function getLMPCollection(array $input): array
130
    {
131 1
        $longestPrefixes = [];
132 1
        foreach ($input as $index => $string) {
133 1
            $initialStructure = new Structure($string);
134 1
            foreach ($input as $comparedString) {
135 1
                if ($string !== $comparedString) {
136 1
                    $comparedStructure = new Structure($comparedString);
137 1
                    if (($longestPrefix = $initialStructure->getCommonPrefix($comparedStructure)) !== '') {
138 1
                        $longestPrefixes[][] = $comparedString;
139
                    }
140
141 1
                    if ($longestPrefix === '//') {
142 1
                        var_dump(1);
0 ignored issues
show
Security Debugging Code introduced by
var_dump(1); looks like debug code. Are you sure you do not want to remove it? This might expose sensitive data.
Loading history...
143
                    }
144
                }
145
            }
146
        }
147
148 1
        $longestPrefixes = [];
149 1
        for ($i = 0, $length = count($input); $i < $length; $i++) {
150 1
            $initial = $input[$i];
151 1
            $foundLMP = false;
152 1
            for ($j = 0; $j < $length; $j++) {
153 1
                $compared = $input[$j];
154 1
                if ($compared !== $initial) {
155 1
                    $longestMatchedPrefix = $this->getLongestMatchingPrefix($initial, $compared);
156
157
                    // We have found at least one matching character between strings
158 1
                    if ($longestMatchedPrefix !== '') {
159 1
                        $foundLMP = true;
160 1
                        $this->addUniqueToArray($initial, $longestPrefixes[$longestMatchedPrefix]);
161
                    }
162
                }
163
            }
164
165
            // Add initial string as LMP
166 1
            if ($foundLMP === false) {
167 1
                $this->addUniqueToArray($initial, $longestPrefixes[$initial]);
168
            }
169
        }
170
171 1
        return $longestPrefixes;
172
    }
173
174
    /**
175
     * Find longest matching prefix between two strings.
176
     *
177
     * @param string $initialString  Initial string
178
     * @param string $comparedString Compared string
179
     *
180
     * TODO: Refactor this method
181
     *
182
     * @return string Longest matching prefix
183
     */
184 1
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
185
    {
186
        // Iterate and compare how string matches
187 1
        $longestPrefix = '';
188
189
        // Define shortest & longest strings to avoid errors
190 1
        $initialLength = strlen($initialString);
191 1
        $comparedLength = strlen($comparedString);
192
193
        // Define shortest and longest strings to avoid character matching errors
194 1
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
195 1
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
196
197
        // Iterate initial string characters
198 1
        $isPattern = false;
199 1
        $parametrizedPrefix = '';
200 1
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
201
            // Pattern support
202 1
            if ($isPattern || $shortestString{$z} === $this->parameterStartMarker) {
203 1
                $isPattern = true;
204
205
                // Concatenate longest matching prefix
206 1
                $parametrizedPrefix .= $shortestString{$z};
207
208
                // Compare characters with compared string
209 1
                if ($shortestString{$z} !== $longestString{$z}) {
210 1
                    break;
211
                }
212
213
                // If pattern id closed unset flag for special behaviour
214 1
                if ($shortestString{$z} === $this->parameterEndMarker) {
215
                    // If parametrized part ended append to longest matching prefix
216 1
                    $longestPrefix .= $parametrizedPrefix;
217
                    // Clear parametrized prefix as we can have other parametrized parts
218 1
                    $parametrizedPrefix = '';
219
                    // Reset parametrized flag
220 1
                    $isPattern = false;
221
                }
222
            } else {
223
                // Compare characters with compared string
224 1
                if ($shortestString{$z} !== $longestString{$z}) {
225 1
                    break; // Exit on first mismatching character
226
                }
227
228
                // Concatenate matching part of two strings
229 1
                $longestPrefix .= $initialString{$z};
230
            }
231
        }
232
233 1
        return $longestPrefix;
234
    }
235
236
    /**
237
     * Add only unique value to array.
238
     *
239
     * @param mixed $value  Unique value
240
     * @param array $array  Array for adding unique value
241
     * @param bool  $strict Strict uniqueness check
242
     *
243
     * @return bool True if unique value was added
244
     * @see in_array();
245
     *
246
     */
247 1
    protected function addUniqueToArray($value, &$array, bool $strict = true)
248
    {
249
        // Create array if not array is passed
250 1
        if (!is_array($array)) {
251 1
            $array = [];
252
        }
253
254
        // Add value to array if it is unique
255 1
        if (!in_array($value, $array, $strict)) {
256 1
            $array[] = $value;
257
258 1
            return true;
259
        }
260
261
        // Value is already in array
262 1
        return false;
263
    }
264
265
    /**
266
     * Sort strings array considering PCG and NPCG string structure.
267
     *
268
     * @param array $input Input array for sorting
269
     *
270
     * @return array Sorted keys array
271
     */
272 1
    public function sortArrayByKeys(array $input): array
273
    {
274
        /** @var Structure[] $structures */
275 1
        $structures = [];
276 1
        foreach (array_keys($input) as $string) {
277 1
            $structures[] = new Structure($string);
278
        }
279
280 1
        usort($structures, function (Structure $initial, Structure $compared) {
281 1
            return $initial->compare($compared);
282 1
        });
283
284 1
        $structures = array_reverse($structures);
285
286
        // Restore initial strings sub-arrays
287 1
        $result = [];
288 1
        foreach ($structures as $structure) {
289 1
            $result[$structure->input] = $input[$structure->input];
290
        }
291
292 1
        return $result;
293
    }
294
295
    /**
296
     * Iterate LMP and remove duplicate strings in other LMPs.
297
     *
298
     * @param array $prefixes LMP collection, returning value
299
     */
300 1
    protected function filterLMPStrings(array &$prefixes)
301
    {
302 1
        $keys = array_keys($prefixes);
303 1
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
304 1
            for ($j = $i + 1; $j < $length; $j++) {
305 1
                $this->removeDuplicatesInSubArray($prefixes[$keys[$i]], $prefixes[$keys[$j]]);
306
            }
307
        }
308 1
    }
309
310
    /**
311
     * Find all duplication of source array values in compared array and remove them.
312
     *
313
     * @param array $source   Source array
314
     * @param array $compared Compared array for filtering duplicates
315
     */
316 1
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
317
    {
318 1
        foreach ($source as $value) {
319 1
            foreach ($compared as $key => &$subValue) {
320 1
                if ($subValue === $value) {
321 1
                    unset($compared[$key]);
322
                }
323
            }
324
        }
325 1
    }
326
327
    /**
328
     * Remove key string from the beginning of all sub-array strings.
329
     *
330
     * @param array $array Input array of key => [keyStrings...]
331
     *
332
     * @return array Processed array with removed keys from beginning of sub arrays
333
     */
334 1
    protected function removeKeyFromArrayStrings(array $array): array
335
    {
336 1
        $processed = [];
337
        /** @var string[] $stringsCollection */
338 1
        foreach ($array as $keyString => $stringsCollection) {
339 1
            $lmpLength = strlen((string)$keyString);
340 1
            foreach ($stringsCollection as $stringValue) {
341
                // Remove LMP from string
342 1
                $newString = substr($stringValue, $lmpLength);
343
344
                // This string has something left besides lmp
345 1
                if ($newString !== false && $newString !== '') {
346 1
                    $processed[$keyString][] = $newString;
347 1
                } elseif (array_key_exists($keyString, $processed) === false) {
348
                    // Add empty array to LMP if missing
349 1
                    $processed[$keyString] = [];
350
                }
351
            }
352
        }
353
354 1
        return $processed;
355
    }
356
}
357