Passed
Push — master ( 62f2ce...e10342 )
by Vitaly
13:06
created

StringConditionTree::prefixLength()   B

Complexity

Conditions 6
Paths 10

Size

Total Lines 43
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 43
rs 8.439
c 0
b 0
f 0
cc 6
eloc 17
nc 10
nop 1
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
use phpDocumentor\Reflection\Types\Integer;
8
9
/**
10
 * Class StringConditionTree
11
 *
12
 * @author Vitaly Egorov <[email protected]>
13
 */
14
class StringConditionTree
15
{
16
    /** Tree node root element identifier, needed for recursion */
17
    const ROOT_NAME = '';
18
19
    /** Final tree node branch identifier */
20
    const SELF_NAME = '@self';
21
22
    /** String parameter start marker */
23
    const PARAMETER_START = '{';
24
25
    /** String parameter end marker */
26
    const PARAMETER_END = '}';
27
28
    /** Parameter sorting length value for counting */
29
    const PARAMETER_COF = 2000;
30
31
    /** @var TreeNode Resulting collection for debugging */
32
    protected $debug;
33
34
    /** @var string Parametrized string start marker */
35
    protected $parameterStartMarker = self::PARAMETER_START;
36
37
    /** @var string Parametrized string end marker */
38
    protected $parameterEndMarker = self::PARAMETER_END;
39
40
    /**
41
     * StringConditionTree constructor.
42
     *
43
     * @param string $parameterStartMarker Parametrized string start marker
44
     * @param string $parameterEndMarker Parametrized string end marker
45
     */
46
    public function __construct(string $parameterStartMarker = self::PARAMETER_START, string $parameterEndMarker = self::PARAMETER_END)
47
    {
48
        $this->parameterStartMarker = $parameterStartMarker;
49
        $this->parameterEndMarker = $parameterEndMarker;
50
    }
51
52
    /**
53
     * Build similarity strings tree.
54
     *
55
     * @param array $input Collection of strings
56
     *
57
     * @return TreeNode Resulting similarity strings tree
58
     */
59
    public function process(array $input): TreeNode
60
    {
61
        /**
62
         * We need to find first matching character that present at least at one two string
63
         * to start building tree. Otherwise there is no reason to build tree.
64
         */
65
        $this->innerProcessor(self::ROOT_NAME, $input, $this->debug = new TreeNode());
66
67
        $this->debug = $this->debug->children[self::ROOT_NAME];
68
69
        return $this->debug;
70
    }
71
72
    /**
73
     * Prefix length counter for array sorting callback to sort by prefix length and put
74
     * parametrized prefixed at the end.
75
     *
76
     * @param string $prefix Prefix string
77
     *
78
     * @return int Prefix length
79
     */
80
    protected function prefixLength(string $prefix): int
81
    {
82
        static $len = [];
83
84
        /**
85
         * Parametrized string comparison.
86
         *
87
         * We need to follow rules:
88
         *  1. Strings starting with not parametrized character(NPC)
89
         *  should have lower value than string starting with parametrized character groups(PCg).
90
         *  2. Strings having multiple PCG should be ordered according PCG count.
91
         *  3. Strings starting with NPC and having PCG should have more priority
92
         *  over only PCG strings.
93
         */
94
95
        $value = 0;
96
        $isParameter = false;
97
        $parametersCount = substr_count($prefix, $this->parameterStartMarker);
98
99
        for ($i = 0, $length = strlen($prefix); $i < $length; $i++) {
100
            if ($prefix{$i} === $this->parameterStartMarker) {
101
                $isParameter = true;
102
                $value += self::PARAMETER_COF / $parametersCount;
103
            } elseif ($prefix{$i} === $this->parameterEndMarker) {
104
                $isParameter = false;
105
            } elseif (!$isParameter) {
106
                $value += 2 * ord($prefix{$i});
107
            }
108
        }
109
110
        $len[$prefix] = (int)$value;
111
112
            /*strpos($prefix, $this->parameterStartMarker) !== false
113
            // Count length considering amount of parameters and total prefix length
114
            ? (substr_count($prefix, $this->parameterStartMarker) * self::PARAMETER_COF) - strlen($prefix)
115
            : strlen($prefix);*/
116
117
        if ($prefix === '{id}/{search}') {
118
            var_dump(1);
0 ignored issues
show
Security Debugging Code introduced by
var_dump(1); looks like debug code. Are you sure you do not want to remove it? This might expose sensitive data.
Loading history...
119
        }
120
121
        return $len[$prefix];
122
    }
123
124
    /**
125
     * Sort array by key string lengths.
126
     *
127
     * @param array $input Input array for sorting
128
     * @param int   $order Sorting order
129
     */
130
    protected function sortArrayByKeys(array &$input, int $order = SORT_ASC)
131
    {
132
        array_multisort(array_map([$this, 'prefixLength'], array_keys($input)), $order, $input);
0 ignored issues
show
Bug introduced by
array_map(array($this, '...'), array_keys($input)) cannot be passed to array_multisort() as the parameter $arr expects a reference.
Loading history...
133
    }
134
135
    /**
136
     * Add only unique value to array.
137
     *
138
     * @param mixed $value  Unique value
139
     * @param array $array  Array for adding unique value
140
     * @param bool  $strict Strict uniqueness check
141
     *
142
     * @see in_array();
143
     *
144
     * @return bool True if unique value was added
145
     */
146
    protected function addUniqueToArray($value, &$array, bool $strict = true)
147
    {
148
        // Create array if not array is passed
149
        if (!is_array($array)) {
150
            $array = [];
151
        }
152
153
        // Add value to array if it is unique
154
        if (!in_array($value, $array, $strict)) {
155
            $array[] = $value;
156
157
            return true;
158
        } else { // Value is already in array
159
            return false;
160
        }
161
    }
162
163
    /**
164
     * Find longest matching prefix between two strings.
165
     *
166
     * @param string $initialString  Initial string
167
     * @param string $comparedString Compared string
168
     *
169
     * @return string Longest matching prefix
170
     */
171
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
172
    {
173
        // Iterate and compare how string matches
174
        $longestPrefix = '';
175
176
        // Define shortest & longest strings to avoid errors
177
        $initialLength = strlen($initialString);
178
        $comparedLength = strlen($comparedString);
179
180
        // Define shortest and longest strings to avoid character matching errors
181
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
182
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
183
184
        // Iterate initial string characters
185
        $isPattern = false;
186
        $parametrizedPrefix = '';
187
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
188
            // Pattern support
189
            // TODO: Probably can be optimized
190
            if ($isPattern || $shortestString{$z} === $this->parameterStartMarker) {
191
                $isPattern = true;
192
193
                // Concatenate longest matching prefix
194
                $parametrizedPrefix .= $shortestString{$z};
195
196
                // Compare characters with compared string
197
                if ($shortestString{$z} !== $longestString{$z}) {
198
                    // Clear pattern as pattern characters mismatch
199
                    $parametrizedPrefix = '';
0 ignored issues
show
Unused Code introduced by
$parametrizedPrefix is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
200
                    break;
201
                }
202
203
                // If pattern id closed unset flag fro special behaviour
204
                if ($shortestString{$z} === $this->parameterEndMarker) {
205
                    // If parametrized part ended append to longest matching prefix
206
                    $longestPrefix .= $parametrizedPrefix;
207
                    // Clear parametrized prefix as we can have other parametrized parts
208
                    $parametrizedPrefix = '';
209
                    // Reset parametrized flag
210
                    $isPattern = false;
211
                }
212
            } else {
213
                // Compare characters with compared string
214
                if ($shortestString{$z} !== $longestString{$z}) {
215
                    break; // Exit on first mismatching character
216
                }
217
218
                // Concatenate matching part of two strings
219
                $longestPrefix .= $initialString{$z};
220
            }
221
        }
222
223
        return $longestPrefix;
224
    }
225
226
    /**
227
     * Remove key string from the beginning of all sub-array strings.
228
     *
229
     * @param array  $array Input array of key => [keyStrings...]
230
     *
231
     * @param string $selfMarker Marker for storing self pointer
232
     *
233
     * @return array Processed array with removed keys from beginning of sub arrays
234
     */
235
    protected function removeKeyFromArrayStrings(array $array, string $selfMarker): array
236
    {
237
        $result = [];
238
        /** @var string[] $values */
239
        foreach ($array as $key => $values) {
240
            $lmpLength = strlen((string)$key);
241
            foreach ($values as $string) {
242
                $newString = substr($string, $lmpLength);
243
244
                if ($newString === false || $newString === '' || $string === $selfMarker) {
245
                    $result[$key][] = $selfMarker;
246
                } else {
247
                    $result[$key][] = $newString;
248
                }
249
            }
250
        }
251
252
        return $result;
253
    }
254
255
    /**
256
     * Find all duplication of source array values in compared array and remove them.
257
     *
258
     * @param array $source Source array
259
     * @param array $compared Compared array for filtering duplicates
260
     */
261
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
262
    {
263
        foreach ($source as $value) {
264
            foreach ($compared as $key => &$subValue) {
265
                if ($subValue === $value) {
266
                    unset($compared[$key]);
267
                }
268
            }
269
        }
270
    }
271
272
    /**
273
     * Recursive string similarity tree builder.
274
     *
275
     * @param string   $prefix
276
     * @param array    $input
277
     * @param TreeNode $result
278
     * @param string   $selfMarker
279
     */
280
    protected function innerProcessor(string $prefix, array $input, TreeNode $result, $selfMarker = self::SELF_NAME)
281
    {
282
        // Create tree node
283
        $newChild = $result->append($prefix);
284
285
        /**
286
         * Iterate all combinations of strings and group by LMP
287
         */
288
        $longestPrefixes = [];
289
        for ($i = 0, $count = count($input); $i < $count; $i++) {
290
            for ($j = $i + 1; $j < $count; $j++) {
291
                $longestMatchedPrefix = $this->getLongestMatchingPrefix($input[$i], $input[$j]);
292
293
                // We have found at least one matching character between strings
294
                if ($longestMatchedPrefix !== '') {
295
                    $this->addUniqueToArray($input[$i], $longestPrefixes[$longestMatchedPrefix]);
296
                    $this->addUniqueToArray($input[$j], $longestPrefixes[$longestMatchedPrefix]);
297
                }
298
            }
299
        }
300
301
        /**
302
         * Sort LMPs(array keys) ascending by key length
303
         */
304
        $this->sortArrayByKeys($longestPrefixes);
305
306
        /**
307
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
308
         */
309
        $keys = array_keys($longestPrefixes);
310
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
311
            for ($j = $i + 1; $j < $length; $j++) {
312
                $this->removeDuplicatesInSubArray($longestPrefixes[$keys[$i]], $longestPrefixes[$keys[$j]]);
313
            }
314
        }
315
316
        // Remove empty LMPs as they are included in smaller LMPs
317
        $longestPrefixes = array_filter($longestPrefixes);
318
319
        /**
320
         * Search for input string that do not have LMP, and add missing as LMP
321
         */
322
        foreach ($input as $string) {
323
            $found = false;
324
325
            if ($string !== $selfMarker) {
326
                foreach ($longestPrefixes as $strings) {
327
                    if (in_array($string, $strings, true)) {
328
                        $found = true;
329
                        break;
330
                    }
331
                }
332
333
                if (!$found) {
334
                    $longestPrefixes[$string] = [$selfMarker];
335
                }
336
            }
337
        }
338
339
        /**
340
         * After filtering LMPs remove LMP from matched string arrays
341
         */
342
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes, $selfMarker);
343
344
        /**
345
         * Sort LMPs(array keys) ascending by key length
346
         */
347
        $this->sortArrayByKeys($longestPrefixes);
348
349
        /**
350
         * If we have self marker as an input string - create LMP for it
351
         */
352
        if (in_array($selfMarker, $input, true)) {
353
            $longestPrefixes = array_merge([$selfMarker => []], $longestPrefixes);
354
        }
355
356
        /**
357
         * Recursively iterate current level LMPs
358
         */
359
        foreach ($longestPrefixes as $longestPrefix => $strings) {
360
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
361
        }
362
    }
363
}
364