Passed
Push — master ( 3a58a4...e29dc7 )
by Vitaly
09:32
created

StringConditionTree   A

Complexity

Total Complexity 34

Size/Duplication

Total Lines 249
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 0
Metric Value
wmc 34
lcom 1
cbo 1
dl 0
loc 249
rs 9.2
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
A setTreeNodeValueHandler() 0 4 1
A process() 0 10 1
A sortArrayByKeys() 0 4 1
A addUniqueToArray() 0 16 3
B getLongestMatchingPrefix() 0 26 5
B removeKeyFromArrayStrings() 0 19 5
A removeDuplicatesInSubArray() 0 10 4
F innerProcessor() 0 81 14
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
8
/**
9
 * Class StringConditionTree
10
 *
11
 * @author Vitaly Egorov <[email protected]>
12
 */
13
class StringConditionTree
14
{
15
    const ROOT_NAME = '';
16
    const SELF_NAME = '@self';
17
18
    /** @var TreeNode Resulting collection for debugging */
19
    protected $debug;
20
21
    /** @var callable TreeNode value handler */
22
    protected $treeNodeValueHandler;
23
24
    /**
25
     * Set TreeNode value handler.
26
     *
27
     * @param callable $handler TreeNode value handler
28
     */
29
    public function setTreeNodeValueHandler(callable $handler)
30
    {
31
        $this->treeNodeValueHandler = $handler;
32
    }
33
34
    /**
35
     * Build similarity strings tree.
36
     *
37
     * @param array $input Collection of strings
38
     *
39
     * @return TreeNode Resulting similarity strings tree
40
     */
41
    public function process(array $input): TreeNode
42
    {
43
        /**
44
         * We need to find first matching character that present at least at one two string
45
         * to start building tree. Otherwise there is no reason to build tree.
46
         */
47
        $this->innerProcessor(self::ROOT_NAME, $input, $this->debug = new TreeNode());
48
49
        return $this->debug;
50
    }
51
52
    /**
53
     * Sort array by key string lengths.
54
     *
55
     * @param array $input Input array for sorting
56
     * @param int   $order Sorting order
57
     */
58
    protected function sortArrayByKeys(array &$input, int $order = SORT_ASC)
59
    {
60
        array_multisort(array_map('strlen', array_keys($input)), $order, $input);
0 ignored issues
show
Bug introduced by
array_map('strlen', array_keys($input)) cannot be passed to array_multisort() as the parameter $arr expects a reference.
Loading history...
61
    }
62
63
    /**
64
     * Add only unique value to array.
65
     *
66
     * @param mixed $value  Unique value
67
     * @param array $array  Array for adding unique value
68
     * @param bool  $strict Strict uniqueness check
69
     *
70
     * @see in_array();
71
     *
72
     * @return bool True if unique value was added
73
     */
74
    protected function addUniqueToArray($value, &$array, bool $strict = true)
75
    {
76
        // Create array if not array is passed
77
        if (!is_array($array)) {
78
            $array = [];
79
        }
80
81
        // Add value to array if it is unique
82
        if (!in_array($value, $array, $strict)) {
83
            $array[] = $value;
84
85
            return true;
86
        } else { // Value is already in array
87
            return false;
88
        }
89
    }
90
91
    /**
92
     * Find longest matching prefix between two strings.
93
     *
94
     * @param string $initialString  Initial string
95
     * @param string $comparedString Compared string
96
     *
97
     * @return string Longest matching prefix
98
     */
99
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
100
    {
101
        // Iterate and compare how string matches
102
        $longestPrefix = '';
103
104
        // Define shortest & longest strings to avoid errors
105
        $initialLength = strlen($initialString);
106
        $comparedLength = strlen($comparedString);
107
108
        // Define shortest and longest strings to avoid character matching errors
109
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
110
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
111
112
        // Iterate initial string characters
113
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
114
            // Compare characters with compared string
115
            if ($shortestString{$z} !== $longestString{$z}) {
116
                break; // Exit on first mismatching character
117
            }
118
119
            // Concatenate matching part of two strings
120
            $longestPrefix .= $initialString{$z};
121
        }
122
123
        return $longestPrefix;
124
    }
125
126
    /**
127
     * Remove key string from the beginning of all sub-array strings.
128
     *
129
     * @param array  $array Input array of key => [keyStrings...]
130
     *
131
     * @param string $selfMarker Marker for storing self pointer
132
     *
133
     * @return array Processed array with removed keys from beginning of sub arrays
134
     */
135
    protected function removeKeyFromArrayStrings(array $array, string $selfMarker): array
136
    {
137
        $result = [];
138
        /** @var string[] $values */
139
        foreach ($array as $key => $values) {
140
            $lmpLength = strlen((string)$key);
141
            foreach ($values as $string) {
142
                $newString = substr($string, $lmpLength);
143
144
                if ($newString === '' || $string === $selfMarker) {
145
                    $result[$key][] = $selfMarker;
146
                } else {
147
                    $result[$key][] = $newString;
148
                }
149
            }
150
        }
151
152
        return $result;
153
    }
154
155
    /**
156
     * Find all duplication of source array values in compared array and remove them.
157
     *
158
     * @param array $source Source array
159
     * @param array $compared Compared array for filtering duplicates
160
     */
161
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
162
    {
163
        foreach ($source as $value) {
164
            foreach ($compared as $key => &$subValue) {
165
                if ($subValue === $value) {
166
                    unset($compared[$key]);
167
                }
168
            }
169
        }
170
    }
171
172
    /**
173
     * Recursive string similarity tree builder.
174
     *
175
     * @param string   $prefix
176
     * @param array    $input
177
     * @param TreeNode $result
178
     * @param string   $selfMarker
179
     */
180
    protected function innerProcessor(string $prefix, array $input, TreeNode $result, $selfMarker = self::SELF_NAME)
181
    {
182
        // Rewrite prefix if TreeNode value handler is present
183
        $prefix = is_callable($this->treeNodeValueHandler) ? call_user_func($this->treeNodeValueHandler, $prefix) : $prefix;
184
185
        // Create tree node
186
        $newChild = $result->append($prefix);
187
188
        /**
189
         * Iterate all combinations of strings and group by LMP
190
         */
191
        $longestPrefixes = [];
192
        for ($i = 0, $count = count($input); $i < $count; $i++) {
193
            for ($j = $i + 1; $j < $count; $j++) {
194
                $longestMatchedPrefix = $this->getLongestMatchingPrefix($input[$i], $input[$j]);
195
196
                // We have found at least one matching character between strings
197
                if ($longestMatchedPrefix !== '') {
198
                    $this->addUniqueToArray($input[$i], $longestPrefixes[$longestMatchedPrefix]);
199
                    $this->addUniqueToArray($input[$j], $longestPrefixes[$longestMatchedPrefix]);
200
                }
201
            }
202
        }
203
204
        /**
205
         * Sort LMPs(array keys) ascending by key length
206
         */
207
        $this->sortArrayByKeys($longestPrefixes);
208
209
        /**
210
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
211
         */
212
        $keys = array_keys($longestPrefixes);
213
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
214
            for ($j = $i + 1; $j < $length; $j++) {
215
                $this->removeDuplicatesInSubArray($longestPrefixes[$keys[$i]], $longestPrefixes[$keys[$j]]);
216
            }
217
        }
218
219
        // Remove empty LMPs as they are included in smaller LMPs
220
        $longestPrefixes = array_filter($longestPrefixes);
221
222
        /**
223
         * Search for input string that do not have LMP, and add missing as LMP
224
         */
225
        foreach ($input as $string) {
226
            $found = false;
227
228
            if ($string !== $selfMarker) {
229
                foreach ($longestPrefixes as $strings) {
230
                    if (in_array($string, $strings, true)) {
231
                        $found = true;
232
                        break;
233
                    }
234
                }
235
236
                if (!$found) {
237
                    $longestPrefixes[$string] = [$selfMarker];
238
                }
239
            }
240
        }
241
242
        /**
243
         * After filtering LMPs remove LMP from matched string arrays
244
         */
245
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes, $selfMarker);
246
247
        /**
248
         * If we have self marker as an input string - create LMP for it
249
         */
250
        if (in_array($selfMarker, $input, true)) {
251
            $longestPrefixes = array_merge([$selfMarker => []], $longestPrefixes);
252
        }
253
254
        /**
255
         * Recursively iterate current level LMPs
256
         */
257
        foreach ($longestPrefixes as $longestPrefix => $strings) {
258
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
259
        }
260
    }
261
}
262