Completed
Push — master ( 605ec5...e9cc16 )
by Vitaly
02:18
created

StringConditionTree::removeKeyFromArrayStrings()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 22
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 22
ccs 11
cts 11
cp 1
rs 8.6737
c 0
b 0
f 0
cc 6
eloc 11
nc 5
nop 1
crap 6
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
8
use samsonframework\stringconditiontree\string\Structure;
9
use samsonframework\stringconditiontree\string\StructureCollection;
10
11
/**
12
 * Class StringConditionTree
13
 *
14
 * TODO: Remove variable character group markers, move LMP search to string.
15
 *
16
 * @author Vitaly Egorov <[email protected]>
17
 */
18
class StringConditionTree
19
{
20
    /** Tree node root element identifier, needed for recursion */
21
    const ROOT_NAME = '';
22
23
    /** Final tree node branch identifier */
24
    const SELF_NAME = '@self';
25
26
    /** String parameter start marker */
27
    const PARAMETER_START = '{';
28
29
    /** String parameter end marker */
30
    const PARAMETER_END = '}';
31
32
    /** Parameter sorting length value for counting */
33
    const PARAMETER_COF = 2000;
34
35
    /** @var TreeNode Resulting internalCollection for debugging */
36
    protected $debug;
37
38
    /** @var array Collection of string string => identifier */
39
    protected $source;
40
41
    /** @var string Parametrized string start marker */
42
    protected $parameterStartMarker = self::PARAMETER_START;
43
44
    /** @var string Parametrized string end marker */
45
    protected $parameterEndMarker = self::PARAMETER_END;
46
47
    /**
48
     * StringConditionTree constructor.
49
     *
50
     * @param string               $parameterStartMarker Parametrized string start marker
51
     * @param string               $parameterEndMarker   Parametrized string end marker
52
     */
53 1
    public function __construct(
54
        string $parameterStartMarker = self::PARAMETER_START,
55
        string $parameterEndMarker = self::PARAMETER_END
56
    ) {
57 1
        $this->parameterStartMarker = $parameterStartMarker;
58 1
        $this->parameterEndMarker = $parameterEndMarker;
59 1
    }
60
61
    /**
62
     * Build similarity strings tree.
63
     *
64
     * @param array $input Collection of strings
65
     *
66
     * @return TreeNode Resulting similarity strings tree
67
     */
68 1
    public function process(array $input): TreeNode
69
    {
70 1
        $this->source = $input;
71
72
        /**
73
         * We need to find first matching character that present at least at one two string
74
         * to start building tree. Otherwise there is no reason to build tree.
75
         */
76 1
        $this->innerProcessor(self::ROOT_NAME, array_keys($input), $this->debug = new TreeNode());
77
78 1
        $sct = StructureCollection::fromStringsArray(array_keys($input));
79
80 1
        $collection = $sct->getCommonPrefixesCollection();
81 1
        $node = new TreeNode('');
82 1
        $this->processor($collection, $node);
83
84
        //return $node;
85
86 1
        return $this->debug->children[self::ROOT_NAME];
87
    }
88
89
    /**
90
     * Recursive string similarity tree builder.
91
     *
92
     * @param string   $prefix
93
     * @param array    $input
94
     * @param TreeNode $result
95
     */
96 1
    protected function innerProcessor(string $prefix, array $input, TreeNode $result)
97
    {
98
        // Create tree node. Pass string identifier if present
99 1
        $newChild = $result->append($prefix, $this->source[$result->fullValue . $prefix] ?? '');
100
101
        /**
102
         * Iterate all combinations of strings and group by LMP, also if no LMP is
103
         * found consider strings as LMP itself
104
         */
105 1
        $longestPrefixes = $this->getLMPCollection($input);
106
107
        /**
108
         * Sort LMPs(array keys) ascending by key length
109
         */
110 1
        $longestPrefixes = $this->sortArrayByKeys($longestPrefixes);
111
112
        /**
113
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
114
         */
115 1
        $this->filterLMPStrings($longestPrefixes);
116
117
        /**
118
         * After filtering LMPs remove LMP from matched string arrays
119
         */
120 1
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes);
121
122
        /**
123
         * Recursively iterate current level LMPs
124
         */
125 1
        foreach ($longestPrefixes as $longestPrefix => $strings) {
126 1
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
127
        }
128 1
    }
129
130
    /**
131
     * Get internalCollection of grouped longest matching prefixes with strings sub-array.
132
     *
133
     * @param array $input Input strings array
134
     *
135
     * @return array Longest matching prefixes array
136
     */
137 1
    protected function getLMPCollection(array $input): array
138
    {
139 1
        $structures = $this->sortStructures($input);
140
141 1
        $longestPrefixes = [];
142
143
        // Iterate sorted character group internalCollection
144 1
        foreach ($structures as $initialStructure) {
145 1
            $foundLMP = false;
146
            // Iterate all character group internalCollection again
147 1
            foreach ($structures as $comparedStructure) {
148
                // Ignore same internalCollection
149 1
                if ($initialStructure === $comparedStructure) {
150 1
                    continue;
151
                }
152
153
                // Get common longest prefix between internalCollection
154 1
                if (($longestPrefix = $initialStructure->getCommonPrefix($comparedStructure)) !== '') {
155 1
                    $foundLMP = true;
156 1
                    $foundExisting = false;
157 1
                    foreach ($longestPrefixes as $prefix => $strings) {
158 1
                        if ((new Structure($prefix))->getCommonPrefix(new Structure($longestPrefix)) === $prefix) {
159 1
                            $longestPrefixes[$prefix][] = $comparedStructure->string;
160 1
                            $foundExisting = true;
161 1
                            break;
162
                        }
163
                    }
164
165 1
                    if (!$foundExisting) {
166 1
                        $longestPrefixes[$longestPrefix][] = $comparedStructure->string;
167
                    }
168
                }
169
            }
170
171
            // We have not found longest common prefix with no strings - add as separate
172 1
            if (!$foundLMP) {
173 1
                $longestPrefixes[$initialStructure->string][] = $initialStructure->string;
174
            }
175
        }
176
177
        // Clear duplicates
178 1
        foreach ($longestPrefixes as $prefix => &$strings) {
179 1
            $longestPrefixes[$prefix] = array_unique($strings);
180
        }
181
182 1
        return $longestPrefixes;
183
    }
184
185
    /**
186
     * Sort internalCollection array.
187
     *
188
     * @param array $strings Input strings array
189
     *
190
     * @return Structure[] Sorted internalCollection array
191
     */
192 1
    protected function sortStructures(array $strings): array
193
    {
194
        // Create internalCollection
195 1
        $structures = [];
196 1
        foreach ($strings as $string) {
197 1
            $structures[] = new Structure($string);
198
        }
199
200
        // Sort internalCollection
201
        usort($structures, function (Structure $initial, Structure $compared) {
202 1
            return $initial->compare($compared);
203 1
        });
204
205
        // Sort descending
206 1
        return array_reverse($structures);
207
    }
208
209
    /**
210
     * Sort strings array considering PCG and NPCG string structure.
211
     *
212
     * @param array $input Input array for sorting
213
     *
214
     * @return array Sorted keys array
215
     */
216 1
    public function sortArrayByKeys(array $input): array
217
    {
218
        /** @var Structure[] $structures */
219 1
        $structures = [];
220 1
        foreach (array_keys($input) as $string) {
221 1
            $structures[] = new Structure($string);
222
        }
223
224 1
        usort($structures, function (Structure $initial, Structure $compared) {
225 1
            return $initial->compare($compared);
226 1
        });
227
228 1
        $structures = array_reverse($structures);
229
230
        // Restore initial strings sub-arrays
231 1
        $result = [];
232 1
        foreach ($structures as $structure) {
233 1
            $result[$structure->string] = $input[$structure->string];
234
        }
235
236 1
        return $result;
237
    }
238
239
    /**
240
     * Iterate LMP and remove duplicate strings in other LMPs.
241
     *
242
     * @param array $prefixes LMP internalCollection, returning value
243
     */
244 1
    protected function filterLMPStrings(array &$prefixes)
245
    {
246 1
        $keys = array_keys($prefixes);
247 1
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
248 1
            for ($j = $i + 1; $j < $length; $j++) {
249 1
                $this->removeDuplicatesInSubArray($prefixes[$keys[$i]], $prefixes[$keys[$j]]);
250
            }
251
        }
252 1
    }
253
254
    /**
255
     * Find all duplication of source array values in compared array and remove them.
256
     *
257
     * @param array $source   Source array
258
     * @param array $compared Compared array for filtering duplicates
259
     */
260 1
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
261
    {
262 1
        foreach ($source as $value) {
263 1
            foreach ($compared as $key => &$subValue) {
264 1
                if ($subValue === $value) {
265 1
                    unset($compared[$key]);
266
                }
267
            }
268
        }
269 1
    }
270
271
    /**
272
     * Remove key string from the beginning of all sub-array strings.
273
     *
274
     * @param array $array Input array of key => [keyStrings...]
275
     *
276
     * @return array Processed array with removed keys from beginning of sub arrays
277
     */
278 1
    protected function removeKeyFromArrayStrings(array $array): array
279
    {
280 1
        $processed = [];
281
        /** @var string[] $stringsCollection */
282 1
        foreach ($array as $keyString => $stringsCollection) {
283 1
            $lmpLength = strlen((string)$keyString);
284 1
            foreach ($stringsCollection as $stringValue) {
285
                // Remove LMP from string
286 1
                $newString = substr($stringValue, $lmpLength);
287
288
                // This string has something left besides lmp
289 1
                if ($newString !== false && $newString !== '') {
290 1
                    $processed[$keyString][] = $newString;
291 1
                } elseif (array_key_exists($keyString, $processed) === false) {
292
                    // Add empty array to LMP if missing
293 1
                    $processed[$keyString] = [];
294
                }
295
            }
296
        }
297
298 1
        return $processed;
299
    }
300
301
    /**
302
     * @param StructureCollection[] $collection
303
     */
304 1
    protected function processor(array $collection, TreeNode $parent, string $parentPrefix = ''): void
305
    {
306 1
        foreach ($collection as $prefix => $item) {
307
            // Create tree node. Pass string identifier if present
308 1
            $newChild = $parent->append($prefix, $this->source[$parentPrefix.$prefix] ?? '');
309
310 1
            $lcpCollection = $item->getCommonPrefixesCollection();
311
312 1
            $this->processor($lcpCollection, $newChild, $parentPrefix.$prefix);
313
        }
314 1
    }
315
316
    /**
317
     * Convert strings arrays into array of internalCollection.
318
     *
319
     * @param array $strings Input strings array
320
     *
321
     * @return array
322
     */
323
    protected function convertStringsToStructures(array $strings)
324
    {
325
        // Create internalCollection
326
        $structures = [];
327
        foreach ($strings as $string) {
328
            $structures[$string] = new Structure($string);
329
        }
330
331
        return $structures;
332
    }
333
}
334