Completed
Push — master ( edeffb...57b966 )
by Vitaly
14:57 queued 07:05
created

StringConditionTree::process()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 12
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 12
ccs 4
cts 4
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 4
nc 1
nop 1
crap 1
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
use samsonframework\stringconditiontree\string\Structure;
8
9
/**
10
 * Class StringConditionTree
11
 *
12
 * @author Vitaly Egorov <[email protected]>
13
 */
14
class StringConditionTree
15
{
16
    /** Tree node root element identifier, needed for recursion */
17
    const ROOT_NAME = '';
18
19
    /** Final tree node branch identifier */
20
    const SELF_NAME = '@self';
21
22
    /** String parameter start marker */
23
    const PARAMETER_START = '{';
24
25
    /** String parameter end marker */
26
    const PARAMETER_END = '}';
27
28
    /** Parameter sorting length value for counting */
29
    const PARAMETER_COF = 2000;
30
31
    /** @var TreeNode Resulting collection for debugging */
32
    protected $debug;
33
34
    /** @var array Collection of input string => identifier */
35
    protected $source;
36
37
    /** @var string Parametrized string start marker */
38
    protected $parameterStartMarker = self::PARAMETER_START;
39
40
    /** @var string Parametrized string end marker */
41
    protected $parameterEndMarker = self::PARAMETER_END;
42
43
    /**
44
     * StringConditionTree constructor.
45
     *
46
     * @param string               $parameterStartMarker Parametrized string start marker
47
     * @param string               $parameterEndMarker   Parametrized string end marker
48
     */
49 1
    public function __construct(
50
        string $parameterStartMarker = self::PARAMETER_START,
51
        string $parameterEndMarker = self::PARAMETER_END
52
    ) {
53 1
        $this->parameterStartMarker = $parameterStartMarker;
54 1
        $this->parameterEndMarker = $parameterEndMarker;
55 1
    }
56
57
    /**
58
     * Build similarity strings tree.
59
     *
60
     * @param array $input Collection of strings
61
     *
62
     * @return TreeNode Resulting similarity strings tree
63
     */
64 1
    public function process(array $input): TreeNode
65
    {
66 1
        $this->source = $input;
67
68
        /**
69
         * We need to find first matching character that present at least at one two string
70
         * to start building tree. Otherwise there is no reason to build tree.
71
         */
72 1
        $this->innerProcessor(self::ROOT_NAME, array_keys($input), $this->debug = new TreeNode());
73
74 1
        return $this->debug->children[self::ROOT_NAME];
75
    }
76
77
    /**
78
     * Recursive string similarity tree builder.
79
     *
80
     * @param string   $prefix
81
     * @param array    $input
82
     * @param TreeNode $result
83
     */
84 1
    protected function innerProcessor(string $prefix, array $input, TreeNode $result)
85
    {
86
        // Create tree node. Pass string identifier if present
87 1
        $newChild = $result->append($prefix, $this->source[$result->fullValue . $prefix] ?? '');
88
89
        /**
90
         * Iterate all combinations of strings and group by LMP, also if no LMP is
91
         * found consider strings as LMP itself
92
         */
93 1
        $longestPrefixes = $this->getLMPCollection($input);
94
95
        /**
96
         * Sort LMPs(array keys) ascending by key length
97
         */
98 1
        $longestPrefixes = $this->sortArrayByKeys($longestPrefixes);
99
100
        /**
101
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
102
         */
103 1
        $this->filterLMPStrings($longestPrefixes);
104
105
        /**
106
         * After filtering LMPs remove LMP from matched string arrays
107
         */
108 1
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes);
109
110
        /**
111
         * Recursively iterate current level LMPs
112
         */
113 1
        foreach ($longestPrefixes as $longestPrefix => $strings) {
114 1
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
115
        }
116 1
    }
117
118
    /**
119
     * Get collection of grouped longest matching prefixes with strings sub-array.
120
     *
121
     * @param array $input Input strings array
122
     *
123
     * @return array Longest matching prefixes array
124
     */
125 1
    protected function getLMPCollection(array $input): array
126
    {
127 1
        $longestPrefixes = [];
128 1
        for ($i = 0, $length = count($input); $i < $length; $i++) {
129 1
            $initial = $input[$i];
130 1
            $foundLMP = false;
131 1
            for ($j = 0; $j < $length; $j++) {
132 1
                $compared = $input[$j];
133 1
                if ($compared !== $initial) {
134 1
                    $longestMatchedPrefix = $this->getLongestMatchingPrefix($initial, $compared);
135
136
                    // We have found at least one matching character between strings
137 1
                    if ($longestMatchedPrefix !== '') {
138 1
                        $foundLMP = true;
139 1
                        $this->addUniqueToArray($initial, $longestPrefixes[$longestMatchedPrefix]);
140
                    }
141
                }
142
            }
143
144
            // Add initial string as LMP
145 1
            if ($foundLMP === false) {
146 1
                $this->addUniqueToArray($initial, $longestPrefixes[$initial]);
147
            }
148
        }
149
150 1
        return $longestPrefixes;
151
    }
152
153
    /**
154
     * Find longest matching prefix between two strings.
155
     *
156
     * @param string $initialString  Initial string
157
     * @param string $comparedString Compared string
158
     *
159
     * @return string Longest matching prefix
160
     */
161 1
    protected function getLongestMatchingPrefix(string $initialString, string $comparedString): string
162
    {
163
        // Iterate and compare how string matches
164 1
        $longestPrefix = '';
165
166
        // Define shortest & longest strings to avoid errors
167 1
        $initialLength = strlen($initialString);
168 1
        $comparedLength = strlen($comparedString);
169
170
        // Define shortest and longest strings to avoid character matching errors
171 1
        $shortestString = $initialLength < $comparedLength ? $initialString : $comparedString;
172 1
        $longestString = $initialLength >= $comparedLength ? $initialString : $comparedString;
173
174
        // Iterate initial string characters
175 1
        $isPattern = false;
176 1
        $parametrizedPrefix = '';
177 1
        for ($z = 0, $length = strlen($shortestString); $z < $length; $z++) {
178
            // Pattern support
179 1
            if ($isPattern || $shortestString{$z} === $this->parameterStartMarker) {
180 1
                $isPattern = true;
181
182
                // Concatenate longest matching prefix
183 1
                $parametrizedPrefix .= $shortestString{$z};
184
185
                // Compare characters with compared string
186 1
                if ($shortestString{$z} !== $longestString{$z}) {
187 1
                    break;
188
                }
189
190
                // If pattern id closed unset flag for special behaviour
191 1
                if ($shortestString{$z} === $this->parameterEndMarker) {
192
                    // If parametrized part ended append to longest matching prefix
193 1
                    $longestPrefix .= $parametrizedPrefix;
194
                    // Clear parametrized prefix as we can have other parametrized parts
195 1
                    $parametrizedPrefix = '';
196
                    // Reset parametrized flag
197 1
                    $isPattern = false;
198
                }
199
            } else {
200
                // Compare characters with compared string
201 1
                if ($shortestString{$z} !== $longestString{$z}) {
202 1
                    break; // Exit on first mismatching character
203
                }
204
205
                // Concatenate matching part of two strings
206 1
                $longestPrefix .= $initialString{$z};
207
            }
208
        }
209
210 1
        return $longestPrefix;
211
    }
212
213
    /**
214
     * Add only unique value to array.
215
     *
216
     * @param mixed $value  Unique value
217
     * @param array $array  Array for adding unique value
218
     * @param bool  $strict Strict uniqueness check
219
     *
220
     * @return bool True if unique value was added
221
     * @see in_array();
222
     *
223
     */
224 1
    protected function addUniqueToArray($value, &$array, bool $strict = true)
225
    {
226
        // Create array if not array is passed
227 1
        if (!is_array($array)) {
228 1
            $array = [];
229
        }
230
231
        // Add value to array if it is unique
232 1
        if (!in_array($value, $array, $strict)) {
233 1
            $array[] = $value;
234
235 1
            return true;
236
        }
237
238
        // Value is already in array
239 1
        return false;
240
    }
241
242
    /**
243
     * Sort strings array considering PCG and NPCG string structure.
244
     *
245
     * @param array $input Input array for sorting
246
     *
247
     * @return array Sorted keys array
248
     */
249 1
    public function sortArrayByKeys(array $input): array
250
    {
251
        /** @var Structure[] $structures */
252 1
        $structures = [];
253 1
        foreach (array_keys($input) as $string) {
254 1
            $structures[$string] = new Structure($string);
255
        }
256
257 1
        usort($structures, function (Structure $initial, Structure $compared) {
258 1
            return $initial->compare($compared);
259 1
        });
260
261 1
        $structures = array_reverse($structures);
262
263
        // Restore initial strings sub-arrays
264 1
        $result = [];
265 1
        foreach ($structures as $structure) {
266 1
            $result[$structure->input] = $input[$structure->input];
267
        }
268
269 1
        return $result;
270
    }
271
272
    /**
273
     * Iterate LMP and remove duplicate strings in other LMPs.
274
     *
275
     * @param array $prefixes LMP collection, returning value
276
     */
277 1
    protected function filterLMPStrings(array &$prefixes)
278
    {
279 1
        $keys = array_keys($prefixes);
280 1
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
281 1
            for ($j = $i + 1; $j < $length; $j++) {
282 1
                $this->removeDuplicatesInSubArray($prefixes[$keys[$i]], $prefixes[$keys[$j]]);
283
            }
284
        }
285 1
    }
286
287
    /**
288
     * Find all duplication of source array values in compared array and remove them.
289
     *
290
     * @param array $source   Source array
291
     * @param array $compared Compared array for filtering duplicates
292
     */
293 1
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
294
    {
295 1
        foreach ($source as $value) {
296 1
            foreach ($compared as $key => &$subValue) {
297 1
                if ($subValue === $value) {
298 1
                    unset($compared[$key]);
299
                }
300
            }
301
        }
302 1
    }
303
304
    /**
305
     * Remove key string from the beginning of all sub-array strings.
306
     *
307
     * @param array $array Input array of key => [keyStrings...]
308
     *
309
     * @return array Processed array with removed keys from beginning of sub arrays
310
     */
311 1
    protected function removeKeyFromArrayStrings(array $array): array
312
    {
313 1
        $processed = [];
314
        /** @var string[] $stringsCollection */
315 1
        foreach ($array as $keyString => $stringsCollection) {
316 1
            $lmpLength = strlen((string)$keyString);
317 1
            foreach ($stringsCollection as $stringValue) {
318
                // Remove LMP from string
319 1
                $newString = substr($stringValue, $lmpLength);
320
321
                // This string has something left besides lmp
322 1
                if ($newString !== false && $newString !== '') {
323 1
                    $processed[$keyString][] = $newString;
324 1
                } elseif (array_key_exists($keyString, $processed) === false) {
325
                    // Add empty array to LMP if missing
326 1
                    $processed[$keyString] = [];
327
                }
328
            }
329
        }
330
331 1
        return $processed;
332
    }
333
}
334