Passed
Push — master ( e29dc7...4b66e1 )
by Vitaly
06:54
created

StringConditionTree::process()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 10
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 1
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
8
/**
9
 * Class StringConditionTree
10
 *
11
 * @author Vitaly Egorov <[email protected]>
12
 */
13
class StringConditionTree
14
{
15
    const ROOT_NAME = '';
16
    const SELF_NAME = '@self';
17
18
    /** @var TreeNode Resulting collection for debugging */
19
    protected $debug;
20
21
    /** @var callable TreeNode value handler */
22
    protected $treeNodeValueHandler;
23
24
    /**
25
     * Set TreeNode value handler.
26
     *
27
     * @param callable $handler TreeNode value handler
28
     */
29
    public function setTreeNodeValueHandler(callable $handler)
30
    {
31
        $this->treeNodeValueHandler = $handler;
32
    }
33
34
    /**
35
     * Build similarity strings tree.
36
     *
37
     * @param array $input Collection of strings
38
     *
39
     * @return TreeNode Resulting similarity strings tree
40
     */
41
    public function process(array $input): TreeNode
42
    {
43
        /**
44
         * We need to find first matching character that present at least at one two string
45
         * to start building tree. Otherwise there is no reason to build tree.
46
         */
47
        $this->innerProcessor(self::ROOT_NAME, $input, $this->debug = new TreeNode());
48
49
        return $this->debug;
50
    }
51
52
    /**
53
     * Sort array by key string lengths.
54
     *
55
     * @param array $input Input array for sorting
56
     * @param int   $order Sorting order
57
     */
58
    protected function sortArrayByKeys(array &$input, int $order = SORT_ASC)
59
    {
60
        array_multisort(array_map('strlen', array_keys($input)), $order, $input);
0 ignored issues
show
Bug introduced by
array_map('strlen', array_keys($input)) cannot be passed to array_multisort() as the parameter $arr expects a reference.
Loading history...
61
    }
62
63
    /**
64
     * Add only unique value to array.
65
     *
66
     * @param mixed $value  Unique value
67
     * @param array $array  Array for adding unique value
68
     * @param bool  $strict Strict uniqueness check
69
     *
70
     * @see in_array();
71
     *
72
     * @return bool True if unique value was added
73
     */
74
    protected function addUniqueToArray($value, &$array, bool $strict = true)
75
    {
76
        // Create array if not array is passed
77
        if (!is_array($array)) {
78
            $array = [];
79
        }
80
81
        // Add value to array if it is unique
82
        if (!in_array($value, $array, $strict)) {
83
            $array[] = $value;
84
85
            return true;
86
        } else { // Value is already in array
87
            return false;
88
        }
89
    }
90
91
    /**
92
     * Find longest matching prefix between two strings.
93
     *
94
     * @param string $initialString  Initial string
95
     * @param string $comparedString Compared string
96
     *
97
     * @return string Longest matching prefix
98
     */
99
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
100
    {
101
        // Iterate and compare how string matches
102
        $longestPrefix = '';
103
104
        // Define shortest & longest strings to avoid errors
105
        $initialLength = strlen($initialString);
106
        $comparedLength = strlen($comparedString);
107
108
        // Define shortest and longest strings to avoid character matching errors
109
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
110
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
111
112
        // Iterate initial string characters
113
        $isPattern = false;
114
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
115
            // Pattern support
116
            // TODO: Probably can be optimized
117
            if ($isPattern || $shortestString{$z} === '{') {
118
                $isPattern = true;
119
120
                // Concatenate longest matching prefix
121
                $longestPrefix .= $shortestString{$z};
122
123
                // Compare characters with compared string
124
                if ($shortestString{$z} !== $longestString{$z}) {
125
                    // Clear pattern as pattern characters mismatch
126
                    $longestPrefix = '';
127
                    break;
128
                }
129
130
                // If pattern id closed unset flag fro special behaviour
131
                if ($shortestString{$z} === '}') {
132
                    $isPattern = false;
133
                }
134
            } else {
135
                // Compare characters with compared string
136
                if ($shortestString{$z} !== $longestString{$z}) {
137
                    break; // Exit on first mismatching character
138
                }
139
140
                // Concatenate matching part of two strings
141
                $longestPrefix .= $initialString{$z};
142
            }
143
        }
144
145
        return $longestPrefix;
146
    }
147
148
    /**
149
     * Remove key string from the beginning of all sub-array strings.
150
     *
151
     * @param array  $array Input array of key => [keyStrings...]
152
     *
153
     * @param string $selfMarker Marker for storing self pointer
154
     *
155
     * @return array Processed array with removed keys from beginning of sub arrays
156
     */
157
    protected function removeKeyFromArrayStrings(array $array, string $selfMarker): array
158
    {
159
        $result = [];
160
        /** @var string[] $values */
161
        foreach ($array as $key => $values) {
162
            $lmpLength = strlen((string)$key);
163
            foreach ($values as $string) {
164
                $newString = substr($string, $lmpLength);
165
166
                if ($newString === '' || $string === $selfMarker) {
167
                    $result[$key][] = $selfMarker;
168
                } else {
169
                    $result[$key][] = $newString;
170
                }
171
            }
172
        }
173
174
        return $result;
175
    }
176
177
    /**
178
     * Find all duplication of source array values in compared array and remove them.
179
     *
180
     * @param array $source Source array
181
     * @param array $compared Compared array for filtering duplicates
182
     */
183
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
184
    {
185
        foreach ($source as $value) {
186
            foreach ($compared as $key => &$subValue) {
187
                if ($subValue === $value) {
188
                    unset($compared[$key]);
189
                }
190
            }
191
        }
192
    }
193
194
    /**
195
     * Recursive string similarity tree builder.
196
     *
197
     * @param string   $prefix
198
     * @param array    $input
199
     * @param TreeNode $result
200
     * @param string   $selfMarker
201
     */
202
    protected function innerProcessor(string $prefix, array $input, TreeNode $result, $selfMarker = self::SELF_NAME)
203
    {
204
        // Rewrite prefix if TreeNode value handler is present
205
        $prefix = is_callable($this->treeNodeValueHandler) ? call_user_func($this->treeNodeValueHandler, $prefix) : $prefix;
206
207
        // Create tree node
208
        $newChild = $result->append($prefix);
209
210
        /**
211
         * Iterate all combinations of strings and group by LMP
212
         */
213
        $longestPrefixes = [];
214
        for ($i = 0, $count = count($input); $i < $count; $i++) {
215
            for ($j = $i + 1; $j < $count; $j++) {
216
                $longestMatchedPrefix = $this->getLongestMatchingPrefix($input[$i], $input[$j]);
217
218
                // We have found at least one matching character between strings
219
                if ($longestMatchedPrefix !== '') {
220
                    $this->addUniqueToArray($input[$i], $longestPrefixes[$longestMatchedPrefix]);
221
                    $this->addUniqueToArray($input[$j], $longestPrefixes[$longestMatchedPrefix]);
222
                }
223
            }
224
        }
225
226
        /**
227
         * Sort LMPs(array keys) ascending by key length
228
         */
229
        $this->sortArrayByKeys($longestPrefixes);
230
231
        /**
232
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
233
         */
234
        $keys = array_keys($longestPrefixes);
235
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
236
            for ($j = $i + 1; $j < $length; $j++) {
237
                $this->removeDuplicatesInSubArray($longestPrefixes[$keys[$i]], $longestPrefixes[$keys[$j]]);
238
            }
239
        }
240
241
        // Remove empty LMPs as they are included in smaller LMPs
242
        $longestPrefixes = array_filter($longestPrefixes);
243
244
        /**
245
         * Search for input string that do not have LMP, and add missing as LMP
246
         */
247
        foreach ($input as $string) {
248
            $found = false;
249
250
            if ($string !== $selfMarker) {
251
                foreach ($longestPrefixes as $strings) {
252
                    if (in_array($string, $strings, true)) {
253
                        $found = true;
254
                        break;
255
                    }
256
                }
257
258
                if (!$found) {
259
                    $longestPrefixes[$string] = [$selfMarker];
260
                }
261
            }
262
        }
263
264
        /**
265
         * After filtering LMPs remove LMP from matched string arrays
266
         */
267
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes, $selfMarker);
268
269
        /**
270
         * If we have self marker as an input string - create LMP for it
271
         */
272
        if (in_array($selfMarker, $input, true)) {
273
            $longestPrefixes = array_merge([$selfMarker => []], $longestPrefixes);
274
        }
275
276
        /**
277
         * Recursively iterate current level LMPs
278
         */
279
        foreach ($longestPrefixes as $longestPrefix => $strings) {
280
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
281
        }
282
    }
283
}
284