Passed
Push — master ( 9506ee...b51029 )
by Vitaly
18:31 queued 08:49
created

StringConditionTree::prefixLength()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 2
eloc 2
nc 2
nop 1
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
use phpDocumentor\Reflection\Types\Integer;
8
9
/**
10
 * Class StringConditionTree
11
 *
12
 * @author Vitaly Egorov <[email protected]>
13
 */
14
class StringConditionTree
15
{
16
    /** Tree node root element identifier, needed for recursion */
17
    const ROOT_NAME = '';
18
19
    /** Final tree node branch identifier */
20
    const SELF_NAME = '@self';
21
22
    /** String parameter start marker */
23
    const PARAMETER_START = '{';
24
25
    /** String parameter end marker */
26
    const PARAMETER_END = '}';
27
28
    /** @var TreeNode Resulting collection for debugging */
29
    protected $debug;
30
31
    /**
32
     * Build similarity strings tree.
33
     *
34
     * @param array $input Collection of strings
35
     *
36
     * @return TreeNode Resulting similarity strings tree
37
     */
38
    public function process(array $input): TreeNode
39
    {
40
        /**
41
         * We need to find first matching character that present at least at one two string
42
         * to start building tree. Otherwise there is no reason to build tree.
43
         */
44
        $this->innerProcessor(self::ROOT_NAME, $input, $this->debug = new TreeNode());
45
46
        $this->debug = $this->debug->children[self::ROOT_NAME];
47
48
        return $this->debug;
49
    }
50
51
    /**
52
     * Prefix length counter for array sorting callback to sort by prefix length and put
53
     * parametrized prefixed at the end.
54
     *
55
     * @param string $prefix Prefix string
56
     *
57
     * @return int Prefix length
58
     */
59
    protected function prefixLength(string $prefix): int
60
    {
61
        return strpos($prefix, self::PARAMETER_START) ? PHP_INT_MAX : strlen($prefix);
62
    }
63
64
    /**
65
     * Sort array by key string lengths.
66
     *
67
     * @param array $input Input array for sorting
68
     * @param int   $order Sorting order
69
     */
70
    protected function sortArrayByKeys(array &$input, int $order = SORT_ASC)
71
    {
72
        array_multisort(array_map([$this, 'prefixLength'], array_keys($input)), $order, $input);
0 ignored issues
show
Bug introduced by
array_map(array($this, '...'), array_keys($input)) cannot be passed to array_multisort() as the parameter $arr expects a reference.
Loading history...
73
    }
74
75
    /**
76
     * Add only unique value to array.
77
     *
78
     * @param mixed $value  Unique value
79
     * @param array $array  Array for adding unique value
80
     * @param bool  $strict Strict uniqueness check
81
     *
82
     * @see in_array();
83
     *
84
     * @return bool True if unique value was added
85
     */
86
    protected function addUniqueToArray($value, &$array, bool $strict = true)
87
    {
88
        // Create array if not array is passed
89
        if (!is_array($array)) {
90
            $array = [];
91
        }
92
93
        // Add value to array if it is unique
94
        if (!in_array($value, $array, $strict)) {
95
            $array[] = $value;
96
97
            return true;
98
        } else { // Value is already in array
99
            return false;
100
        }
101
    }
102
103
    /**
104
     * Find longest matching prefix between two strings.
105
     *
106
     * @param string $initialString  Initial string
107
     * @param string $comparedString Compared string
108
     *
109
     * @return string Longest matching prefix
110
     */
111
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
112
    {
113
        // Iterate and compare how string matches
114
        $longestPrefix = '';
115
116
        // Define shortest & longest strings to avoid errors
117
        $initialLength = strlen($initialString);
118
        $comparedLength = strlen($comparedString);
119
120
        // Define shortest and longest strings to avoid character matching errors
121
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
122
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
123
124
        // Iterate initial string characters
125
        $isPattern = false;
126
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
127
            // Pattern support
128
            // TODO: Probably can be optimized
129
            if ($isPattern || $shortestString{$z} === '{') {
130
                $isPattern = true;
131
132
                // Concatenate longest matching prefix
133
                $longestPrefix .= $shortestString{$z};
134
135
                // Compare characters with compared string
136
                if ($shortestString{$z} !== $longestString{$z}) {
137
                    // Clear pattern as pattern characters mismatch
138
                    $longestPrefix = '';
139
                    break;
140
                }
141
142
                // If pattern id closed unset flag fro special behaviour
143
                if ($shortestString{$z} === '}') {
144
                    $isPattern = false;
145
                }
146
            } else {
147
                // Compare characters with compared string
148
                if ($shortestString{$z} !== $longestString{$z}) {
149
                    break; // Exit on first mismatching character
150
                }
151
152
                // Concatenate matching part of two strings
153
                $longestPrefix .= $initialString{$z};
154
            }
155
        }
156
157
        return $longestPrefix;
158
    }
159
160
    /**
161
     * Remove key string from the beginning of all sub-array strings.
162
     *
163
     * @param array  $array Input array of key => [keyStrings...]
164
     *
165
     * @param string $selfMarker Marker for storing self pointer
166
     *
167
     * @return array Processed array with removed keys from beginning of sub arrays
168
     */
169
    protected function removeKeyFromArrayStrings(array $array, string $selfMarker): array
170
    {
171
        $result = [];
172
        /** @var string[] $values */
173
        foreach ($array as $key => $values) {
174
            $lmpLength = strlen((string)$key);
175
            foreach ($values as $string) {
176
                $newString = substr($string, $lmpLength);
177
178
                if ($newString === '' || $string === $selfMarker) {
179
                    $result[$key][] = $selfMarker;
180
                } else {
181
                    $result[$key][] = $newString;
182
                }
183
            }
184
        }
185
186
        return $result;
187
    }
188
189
    /**
190
     * Find all duplication of source array values in compared array and remove them.
191
     *
192
     * @param array $source Source array
193
     * @param array $compared Compared array for filtering duplicates
194
     */
195
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
196
    {
197
        foreach ($source as $value) {
198
            foreach ($compared as $key => &$subValue) {
199
                if ($subValue === $value) {
200
                    unset($compared[$key]);
201
                }
202
            }
203
        }
204
    }
205
206
    /**
207
     * Recursive string similarity tree builder.
208
     *
209
     * @param string   $prefix
210
     * @param array    $input
211
     * @param TreeNode $result
212
     * @param string   $selfMarker
213
     */
214
    protected function innerProcessor(string $prefix, array $input, TreeNode $result, $selfMarker = self::SELF_NAME)
215
    {
216
        // Create tree node
217
        $newChild = $result->append($prefix);
218
219
        /**
220
         * Iterate all combinations of strings and group by LMP
221
         */
222
        $longestPrefixes = [];
223
        for ($i = 0, $count = count($input); $i < $count; $i++) {
224
            for ($j = $i + 1; $j < $count; $j++) {
225
                $longestMatchedPrefix = $this->getLongestMatchingPrefix($input[$i], $input[$j]);
226
227
                // We have found at least one matching character between strings
228
                if ($longestMatchedPrefix !== '') {
229
                    $this->addUniqueToArray($input[$i], $longestPrefixes[$longestMatchedPrefix]);
230
                    $this->addUniqueToArray($input[$j], $longestPrefixes[$longestMatchedPrefix]);
231
                }
232
            }
233
        }
234
235
        /**
236
         * Sort LMPs(array keys) ascending by key length
237
         */
238
        $this->sortArrayByKeys($longestPrefixes);
239
240
        /**
241
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
242
         */
243
        $keys = array_keys($longestPrefixes);
244
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
245
            for ($j = $i + 1; $j < $length; $j++) {
246
                $this->removeDuplicatesInSubArray($longestPrefixes[$keys[$i]], $longestPrefixes[$keys[$j]]);
247
            }
248
        }
249
250
        // Remove empty LMPs as they are included in smaller LMPs
251
        $longestPrefixes = array_filter($longestPrefixes);
252
253
        /**
254
         * Search for input string that do not have LMP, and add missing as LMP
255
         */
256
        foreach ($input as $string) {
257
            $found = false;
258
259
            if ($string !== $selfMarker) {
260
                foreach ($longestPrefixes as $strings) {
261
                    if (in_array($string, $strings, true)) {
262
                        $found = true;
263
                        break;
264
                    }
265
                }
266
267
                if (!$found) {
268
                    $longestPrefixes[$string] = [$selfMarker];
269
                }
270
            }
271
        }
272
273
        /**
274
         * After filtering LMPs remove LMP from matched string arrays
275
         */
276
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes, $selfMarker);
277
278
        /**
279
         * If we have self marker as an input string - create LMP for it
280
         */
281
        if (in_array($selfMarker, $input, true)) {
282
            $longestPrefixes = array_merge([$selfMarker => []], $longestPrefixes);
283
        }
284
285
        /**
286
         * Recursively iterate current level LMPs
287
         */
288
        foreach ($longestPrefixes as $longestPrefix => $strings) {
289
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
290
        }
291
    }
292
}
293