Passed
Push — master ( d0bef4...166358 )
by Vitaly
03:57
created

StringConditionTree::getPrefixStructure()   C

Complexity

Conditions 7
Paths 5

Size

Total Lines 42
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 42
rs 6.7272
c 0
b 0
f 0
cc 7
eloc 20
nc 5
nop 1
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
8
/**
9
 * Class StringConditionTree
10
 *
11
 * @author Vitaly Egorov <[email protected]>
12
 */
13
class StringConditionTree
14
{
15
    /** Tree node root element identifier, needed for recursion */
16
    const ROOT_NAME = '';
17
18
    /** Final tree node branch identifier */
19
    const SELF_NAME = '@self';
20
21
    /** String parameter start marker */
22
    const PARAMETER_START = '{';
23
24
    /** String parameter end marker */
25
    const PARAMETER_END = '}';
26
27
    /** Parameter sorting length value for counting */
28
    const PARAMETER_COF = 2000;
29
30
    /** @var TreeNode Resulting collection for debugging */
31
    protected $debug;
32
33
    /** @var array Collection of input string => identifier */
34
    protected $source;
35
36
    /** @var string Parametrized string start marker */
37
    protected $parameterStartMarker = self::PARAMETER_START;
38
39
    /** @var string Parametrized string end marker */
40
    protected $parameterEndMarker = self::PARAMETER_END;
41
42
    /** @var StructureSorter */
43
    protected $structureSorter;
44
45
    /**
46
     * StringConditionTree constructor.
47
     *
48
     * @param StructureSorter|null $structureSorter Parametrized strings array structure sorter
49
     * @param string               $parameterStartMarker Parametrized string start marker
50
     * @param string               $parameterEndMarker   Parametrized string end marker
51
     */
52
    public function __construct(
53
        StructureSorter $structureSorter = null,
54
        string $parameterStartMarker = self::PARAMETER_START,
55
        string $parameterEndMarker = self::PARAMETER_END
56
    ) {
57
        $this->parameterStartMarker = $parameterStartMarker;
58
        $this->parameterEndMarker = $parameterEndMarker;
59
        $this->structureSorter = $structureSorter
60
            ?? new StructureSorter($this->parameterStartMarker, $this->parameterEndMarker);
61
    }
62
63
    /**
64
     * Build similarity strings tree.
65
     *
66
     * @param array $input Collection of strings
67
     *
68
     * @return TreeNode Resulting similarity strings tree
69
     */
70
    public function process(array $input): TreeNode
71
    {
72
        $this->source = $input;
73
74
        /**
75
         * We need to find first matching character that present at least at one two string
76
         * to start building tree. Otherwise there is no reason to build tree.
77
         */
78
        $this->innerProcessor(self::ROOT_NAME, array_keys($input), $this->debug = new TreeNode());
79
80
        return $this->debug->children[self::ROOT_NAME];
81
    }
82
83
    /**
84
     * Recursive string similarity tree builder.
85
     *
86
     * @param string   $prefix
87
     * @param array    $input
88
     * @param TreeNode $result
89
     */
90
    protected function innerProcessor(string $prefix, array $input, TreeNode $result)
91
    {
92
        // Create tree node. Pass string identifier if present
93
        $newChild = $result->append($prefix, $this->source[$result->fullValue . $prefix] ?? '');
94
95
        /**
96
         * Iterate all combinations of strings and group by LMP, also if no LMP is
97
         * found consider strings as LMP itself
98
         */
99
        $longestPrefixes = $this->getLMPCollection($input);
100
101
        /**
102
         * Sort LMPs(array keys) ascending by key length
103
         */
104
        $longestPrefixes = $this->structureSorter->sortArrayByKeys($longestPrefixes);
105
106
        /**
107
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
108
         */
109
        $this->filterLMPStrings($longestPrefixes);
110
111
        /**
112
         * After filtering LMPs remove LMP from matched string arrays
113
         */
114
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes);
115
116
        /**
117
         * Recursively iterate current level LMPs
118
         */
119
        foreach ($longestPrefixes as $longestPrefix => $strings) {
120
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
121
        }
122
    }
123
124
    /**
125
     * Get collection of grouped longest matching prefixes with strings sub-array.
126
     *
127
     * @param array $input Input strings array
128
     *
129
     * @return array Longest matching prefixes array
130
     */
131
    protected function getLMPCollection(array $input): array
132
    {
133
        $longestPrefixes = [];
134
        foreach ($input as $initial) {
135
            $foundLMP = false;
136
            foreach ($input as $compared) {
137
                if ($initial !== $compared) {
138
                    $longestMatchedPrefix = $this->getLongestMatchingPrefix($initial, $compared);
139
140
                    // We have found at least one matching character between strings
141
                    if ($longestMatchedPrefix !== '') {
142
                        $foundLMP = true;
143
                        $this->addUniqueToArray($initial, $longestPrefixes[$longestMatchedPrefix]);
144
                    }
145
                }
146
            }
147
148
            // Add initial string as LMP
149
            if ($foundLMP === false) {
150
                $this->addUniqueToArray($initial, $longestPrefixes[$initial]);
151
            }
152
        }
153
154
        return $longestPrefixes;
155
    }
156
157
    /**
158
     * Find longest matching prefix between two strings.
159
     *
160
     * @param string $initialString  Initial string
161
     * @param string $comparedString Compared string
162
     *
163
     * @return string Longest matching prefix
164
     */
165
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
166
    {
167
        // Iterate and compare how string matches
168
        $longestPrefix = '';
169
170
        // Define shortest & longest strings to avoid errors
171
        $initialLength = strlen($initialString);
172
        $comparedLength = strlen($comparedString);
173
174
        // Define shortest and longest strings to avoid character matching errors
175
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
176
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
177
178
        // Iterate initial string characters
179
        $isPattern = false;
180
        $parametrizedPrefix = '';
181
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
182
            // Pattern support
183
            if ($isPattern || $shortestString{$z} === $this->parameterStartMarker) {
184
                $isPattern = true;
185
186
                // Concatenate longest matching prefix
187
                $parametrizedPrefix .= $shortestString{$z};
188
189
                // Compare characters with compared string
190
                if ($shortestString{$z} !== $longestString{$z}) {
191
                    break;
192
                }
193
194
                // If pattern id closed unset flag fro special behaviour
195
                if ($shortestString{$z} === $this->parameterEndMarker) {
196
                    // If parametrized part ended append to longest matching prefix
197
                    $longestPrefix .= $parametrizedPrefix;
198
                    // Clear parametrized prefix as we can have other parametrized parts
199
                    $parametrizedPrefix = '';
200
                    // Reset parametrized flag
201
                    $isPattern = false;
202
                }
203
            } else {
204
                // Compare characters with compared string
205
                if ($shortestString{$z} !== $longestString{$z}) {
206
                    break; // Exit on first mismatching character
207
                }
208
209
                // Concatenate matching part of two strings
210
                $longestPrefix .= $initialString{$z};
211
            }
212
        }
213
214
        return $longestPrefix;
215
    }
216
217
    /**
218
     * Add only unique value to array.
219
     *
220
     * @param mixed $value  Unique value
221
     * @param array $array  Array for adding unique value
222
     * @param bool  $strict Strict uniqueness check
223
     *
224
     * @return bool True if unique value was added
225
     * @see in_array();
226
     *
227
     */
228
    protected function addUniqueToArray($value, &$array, bool $strict = true)
229
    {
230
        // Create array if not array is passed
231
        if (!is_array($array)) {
232
            $array = [];
233
        }
234
235
        // Add value to array if it is unique
236
        if (!in_array($value, $array, $strict)) {
237
            $array[] = $value;
238
239
            return true;
240
        }
241
242
        // Value is already in array
243
        return false;
244
    }
245
246
    /**
247
     * Iterate LMP and remove duplicate strings in other LMPs.
248
     *
249
     * @param array $prefixes LMP collection, returning value
250
     */
251
    protected function filterLMPStrings(array &$prefixes)
252
    {
253
        $keys = array_keys($prefixes);
254
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
255
            for ($j = $i + 1; $j < $length; $j++) {
256
                $this->removeDuplicatesInSubArray($prefixes[$keys[$i]], $prefixes[$keys[$j]]);
257
            }
258
        }
259
    }
260
261
    /**
262
     * Find all duplication of source array values in compared array and remove them.
263
     *
264
     * @param array $source   Source array
265
     * @param array $compared Compared array for filtering duplicates
266
     */
267
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
268
    {
269
        foreach ($source as $value) {
270
            foreach ($compared as $key => &$subValue) {
271
                if ($subValue === $value) {
272
                    unset($compared[$key]);
273
                }
274
            }
275
        }
276
    }
277
278
    /**
279
     * Remove key string from the beginning of all sub-array strings.
280
     *
281
     * @param array $array Input array of key => [keyStrings...]
282
     *
283
     * @return array Processed array with removed keys from beginning of sub arrays
284
     */
285
    protected function removeKeyFromArrayStrings(array $array): array
286
    {
287
        $processed = [];
288
        /** @var string[] $stringsCollection */
289
        foreach ($array as $keyString => $stringsCollection) {
290
            $lmpLength = strlen((string)$keyString);
291
            foreach ($stringsCollection as $stringValue) {
292
                // Remove LMP from string
293
                $newString = substr($stringValue, $lmpLength);
294
295
                // This string has something left besides lmp
296
                if ($newString !== false && $newString !== '') {
297
                    $processed[$keyString][] = $newString;
298
                } elseif (array_key_exists($keyString, $processed) === false) {
299
                    // Add empty array to LMP if missing
300
                    $processed[$keyString] = [];
301
                }
302
            }
303
        }
304
305
        return $processed;
306
    }
307
}
308