Passed
Push — master ( 49ee33...01b225 )
by Vitaly
15:05 queued 05:00
created

StringConditionTree::setTreeNodeValueHandler()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
8
/**
9
 * Class StringConditionTree
10
 *
11
 * @author Vitaly Egorov <[email protected]>
12
 */
13
class StringConditionTree
14
{
15
    const ROOT_NAME = '';
16
    const SELF_NAME = '@self';
17
18
    /** @var TreeNode Resulting collection for debugging */
19
    protected $debug;
20
21
    /**
22
     * Build similarity strings tree.
23
     *
24
     * @param array $input Collection of strings
25
     *
26
     * @return TreeNode Resulting similarity strings tree
27
     */
28
    public function process(array $input): TreeNode
29
    {
30
        /**
31
         * We need to find first matching character that present at least at one two string
32
         * to start building tree. Otherwise there is no reason to build tree.
33
         */
34
        $this->innerProcessor(self::ROOT_NAME, $input, $this->debug = new TreeNode());
35
36
        $this->debug = $this->debug->children[self::ROOT_NAME];
37
38
        return $this->debug;
39
    }
40
41
    /**
42
     * Sort array by key string lengths.
43
     *
44
     * @param array $input Input array for sorting
45
     * @param int   $order Sorting order
46
     */
47
    protected function sortArrayByKeys(array &$input, int $order = SORT_ASC)
48
    {
49
        array_multisort(array_map('strlen', array_keys($input)), $order, $input);
0 ignored issues
show
Bug introduced by
array_map('strlen', array_keys($input)) cannot be passed to array_multisort() as the parameter $arr expects a reference.
Loading history...
50
    }
51
52
    /**
53
     * Add only unique value to array.
54
     *
55
     * @param mixed $value  Unique value
56
     * @param array $array  Array for adding unique value
57
     * @param bool  $strict Strict uniqueness check
58
     *
59
     * @see in_array();
60
     *
61
     * @return bool True if unique value was added
62
     */
63
    protected function addUniqueToArray($value, &$array, bool $strict = true)
64
    {
65
        // Create array if not array is passed
66
        if (!is_array($array)) {
67
            $array = [];
68
        }
69
70
        // Add value to array if it is unique
71
        if (!in_array($value, $array, $strict)) {
72
            $array[] = $value;
73
74
            return true;
75
        } else { // Value is already in array
76
            return false;
77
        }
78
    }
79
80
    /**
81
     * Find longest matching prefix between two strings.
82
     *
83
     * @param string $initialString  Initial string
84
     * @param string $comparedString Compared string
85
     *
86
     * @return string Longest matching prefix
87
     */
88
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
89
    {
90
        // Iterate and compare how string matches
91
        $longestPrefix = '';
92
93
        // Define shortest & longest strings to avoid errors
94
        $initialLength = strlen($initialString);
95
        $comparedLength = strlen($comparedString);
96
97
        // Define shortest and longest strings to avoid character matching errors
98
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
99
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
100
101
        // Iterate initial string characters
102
        $isPattern = false;
103
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
104
            // Pattern support
105
            // TODO: Probably can be optimized
106
            if ($isPattern || $shortestString{$z} === '{') {
107
                $isPattern = true;
108
109
                // Concatenate longest matching prefix
110
                $longestPrefix .= $shortestString{$z};
111
112
                // Compare characters with compared string
113
                if ($shortestString{$z} !== $longestString{$z}) {
114
                    // Clear pattern as pattern characters mismatch
115
                    $longestPrefix = '';
116
                    break;
117
                }
118
119
                // If pattern id closed unset flag fro special behaviour
120
                if ($shortestString{$z} === '}') {
121
                    $isPattern = false;
122
                }
123
            } else {
124
                // Compare characters with compared string
125
                if ($shortestString{$z} !== $longestString{$z}) {
126
                    break; // Exit on first mismatching character
127
                }
128
129
                // Concatenate matching part of two strings
130
                $longestPrefix .= $initialString{$z};
131
            }
132
        }
133
134
        return $longestPrefix;
135
    }
136
137
    /**
138
     * Remove key string from the beginning of all sub-array strings.
139
     *
140
     * @param array  $array Input array of key => [keyStrings...]
141
     *
142
     * @param string $selfMarker Marker for storing self pointer
143
     *
144
     * @return array Processed array with removed keys from beginning of sub arrays
145
     */
146
    protected function removeKeyFromArrayStrings(array $array, string $selfMarker): array
147
    {
148
        $result = [];
149
        /** @var string[] $values */
150
        foreach ($array as $key => $values) {
151
            $lmpLength = strlen((string)$key);
152
            foreach ($values as $string) {
153
                $newString = substr($string, $lmpLength);
154
155
                if ($newString === '' || $string === $selfMarker) {
156
                    $result[$key][] = $selfMarker;
157
                } else {
158
                    $result[$key][] = $newString;
159
                }
160
            }
161
        }
162
163
        return $result;
164
    }
165
166
    /**
167
     * Find all duplication of source array values in compared array and remove them.
168
     *
169
     * @param array $source Source array
170
     * @param array $compared Compared array for filtering duplicates
171
     */
172
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
173
    {
174
        foreach ($source as $value) {
175
            foreach ($compared as $key => &$subValue) {
176
                if ($subValue === $value) {
177
                    unset($compared[$key]);
178
                }
179
            }
180
        }
181
    }
182
183
    /**
184
     * Recursive string similarity tree builder.
185
     *
186
     * @param string   $prefix
187
     * @param array    $input
188
     * @param TreeNode $result
189
     * @param string   $selfMarker
190
     */
191
    protected function innerProcessor(string $prefix, array $input, TreeNode $result, $selfMarker = self::SELF_NAME)
192
    {
193
        // Rewrite prefix if TreeNode value handler is present
194
        $prefix = is_callable($this->treeNodeValueHandler) ? call_user_func($this->treeNodeValueHandler, $prefix) : $prefix;
0 ignored issues
show
Bug introduced by
The property treeNodeValueHandler does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
195
196
        // Create tree node
197
        $newChild = $result->append($prefix);
198
199
        /**
200
         * Iterate all combinations of strings and group by LMP
201
         */
202
        $longestPrefixes = [];
203
        for ($i = 0, $count = count($input); $i < $count; $i++) {
204
            for ($j = $i + 1; $j < $count; $j++) {
205
                $longestMatchedPrefix = $this->getLongestMatchingPrefix($input[$i], $input[$j]);
206
207
                // We have found at least one matching character between strings
208
                if ($longestMatchedPrefix !== '') {
209
                    $this->addUniqueToArray($input[$i], $longestPrefixes[$longestMatchedPrefix]);
210
                    $this->addUniqueToArray($input[$j], $longestPrefixes[$longestMatchedPrefix]);
211
                }
212
            }
213
        }
214
215
        /**
216
         * Sort LMPs(array keys) ascending by key length
217
         */
218
        $this->sortArrayByKeys($longestPrefixes);
219
220
        /**
221
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
222
         */
223
        $keys = array_keys($longestPrefixes);
224
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
225
            for ($j = $i + 1; $j < $length; $j++) {
226
                $this->removeDuplicatesInSubArray($longestPrefixes[$keys[$i]], $longestPrefixes[$keys[$j]]);
227
            }
228
        }
229
230
        // Remove empty LMPs as they are included in smaller LMPs
231
        $longestPrefixes = array_filter($longestPrefixes);
232
233
        /**
234
         * Search for input string that do not have LMP, and add missing as LMP
235
         */
236
        foreach ($input as $string) {
237
            $found = false;
238
239
            if ($string !== $selfMarker) {
240
                foreach ($longestPrefixes as $strings) {
241
                    if (in_array($string, $strings, true)) {
242
                        $found = true;
243
                        break;
244
                    }
245
                }
246
247
                if (!$found) {
248
                    $longestPrefixes[$string] = [$selfMarker];
249
                }
250
            }
251
        }
252
253
        /**
254
         * After filtering LMPs remove LMP from matched string arrays
255
         */
256
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes, $selfMarker);
257
258
        /**
259
         * If we have self marker as an input string - create LMP for it
260
         */
261
        if (in_array($selfMarker, $input, true)) {
262
            $longestPrefixes = array_merge([$selfMarker => []], $longestPrefixes);
263
        }
264
265
        /**
266
         * Recursively iterate current level LMPs
267
         */
268
        foreach ($longestPrefixes as $longestPrefix => $strings) {
269
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
270
        }
271
    }
272
}
273