Completed
Push — master ( 9b1dd9...d16f9c )
by Vitaly
09:15 queued 06:30
created

StringConditionTree::innerProcessor()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 33
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 33
ccs 9
cts 9
cp 1
rs 8.8571
c 0
b 0
f 0
cc 2
eloc 8
nc 2
nop 3
crap 2
1
<?php declare(strict_types = 1);
2
/**
3
 * Created by Vitaly Iegorov <[email protected]>.
4
 * on 02.03.17 at 13:25
5
 */
6
namespace samsonframework\stringconditiontree;
7
8
use samsonframework\stringconditiontree\string\CommonPrefix;
9
use samsonframework\stringconditiontree\string\Structure;
10
11
/**
12
 * Class StringConditionTree
13
 *
14
 * TODO: Remove variable character group markers, move LMP search to string.
15
 *
16
 * @author Vitaly Egorov <[email protected]>
17
 */
18
class StringConditionTree
19
{
20
    /** Tree node root element identifier, needed for recursion */
21
    const ROOT_NAME = '';
22
23
    /** Final tree node branch identifier */
24
    const SELF_NAME = '@self';
25
26
    /** String parameter start marker */
27
    const PARAMETER_START = '{';
28
29
    /** String parameter end marker */
30
    const PARAMETER_END = '}';
31
32
    /** Parameter sorting length value for counting */
33
    const PARAMETER_COF = 2000;
34
35
    /** @var TreeNode Resulting collection for debugging */
36
    protected $debug;
37
38
    /** @var array Collection of string string => identifier */
39
    protected $source;
40
41
    /** @var string Parametrized string start marker */
42
    protected $parameterStartMarker = self::PARAMETER_START;
43
44
    /** @var string Parametrized string end marker */
45
    protected $parameterEndMarker = self::PARAMETER_END;
46
47
    /**
48
     * StringConditionTree constructor.
49
     *
50
     * @param string               $parameterStartMarker Parametrized string start marker
51
     * @param string               $parameterEndMarker   Parametrized string end marker
52
     */
53 1
    public function __construct(
54
        string $parameterStartMarker = self::PARAMETER_START,
55
        string $parameterEndMarker = self::PARAMETER_END
56
    ) {
57 1
        $this->parameterStartMarker = $parameterStartMarker;
58 1
        $this->parameterEndMarker = $parameterEndMarker;
59 1
    }
60
61
    /**
62
     * Build similarity strings tree.
63
     *
64
     * @param array $input Collection of strings
65
     *
66
     * @return TreeNode Resulting similarity strings tree
67
     */
68 1
    public function process(array $input): TreeNode
69
    {
70 1
        $this->source = $input;
71
72
        /**
73
         * We need to find first matching character that present at least at one two string
74
         * to start building tree. Otherwise there is no reason to build tree.
75
         */
76 1
        $this->innerProcessor(self::ROOT_NAME, array_keys($input), $this->debug = new TreeNode());
77
78 1
        return $this->debug->children[self::ROOT_NAME];
79
    }
80
81
    /**
82
     * Recursive string similarity tree builder.
83
     *
84
     * @param string   $prefix
85
     * @param array    $input
86
     * @param TreeNode $result
87
     */
88 1
    protected function innerProcessor(string $prefix, array $input, TreeNode $result)
89
    {
90
        // Create tree node. Pass string identifier if present
91 1
        $newChild = $result->append($prefix, $this->source[$result->fullValue . $prefix] ?? '');
92
93
        /**
94
         * Iterate all combinations of strings and group by LMP, also if no LMP is
95
         * found consider strings as LMP itself
96
         */
97 1
        $longestPrefixes = $this->getLMPCollection($input);
98
99
        /**
100
         * Sort LMPs(array keys) ascending by key length
101
         */
102 1
        $longestPrefixes = $this->sortArrayByKeys($longestPrefixes);
103
104
        /**
105
         * Iterate all sorted LMP strings and remove duplicates from LMP string ordered lower
106
         */
107 1
        $this->filterLMPStrings($longestPrefixes);
108
109
        /**
110
         * After filtering LMPs remove LMP from matched string arrays
111
         */
112 1
        $longestPrefixes = $this->removeKeyFromArrayStrings($longestPrefixes);
113
114
        /**
115
         * Recursively iterate current level LMPs
116
         */
117 1
        foreach ($longestPrefixes as $longestPrefix => $strings) {
118 1
            $this->innerProcessor((string)$longestPrefix, $strings, $newChild);
119
        }
120 1
    }
121
122
    /**
123
     * Get collection of grouped longest matching prefixes with strings sub-array.
124
     *
125
     * @param array $input Input strings array
126
     *
127
     * @return array Longest matching prefixes array
128
     */
129 1
    protected function getLMPCollection(array $input): array
130
    {
131
        /** @var Structure[] $input */
132 1
        $input = $this->sortStructures($input);
133
134 1
        $longestPrefixes = [];
135
136
        // Iterate sorted character group structures
137 1
        foreach ($input as $initialStructure) {
138 1
            $foundLMP = false;
139
            // Iterate all character group structures again
140 1
            foreach ($input as $comparedStructure) {
141
                // Ignore same structures
142 1
                if ($initialStructure === $comparedStructure) {
143 1
                    continue;
144
                }
145
146
                // Get common longest prefix between structures
147 1
                if (($longestPrefix = $initialStructure->getCommonPrefix($comparedStructure)) !== '') {
148 1
                    $foundLMP = true;
149 1
                    $foundExisting = false;
150 1
                    foreach ($longestPrefixes as $prefix => $strings) {
151 1
                        if ((new Structure($prefix))->getCommonPrefix(new Structure($longestPrefix)) === $prefix) {
152 1
                            $longestPrefixes[$prefix][] = $comparedStructure->string;
153 1
                            $foundExisting = true;
154 1
                            break;
155
                        }
156
                    }
157
158 1
                    if (!$foundExisting) {
159 1
                        $longestPrefixes[$longestPrefix][] = $comparedStructure->string;
160
                    }
161
                }
162
            }
163
164
            // We have not found longest common prefix with no strings - add as separate
165 1
            if (!$foundLMP) {
166 1
                $longestPrefixes[$initialStructure->string][] = $initialStructure->string;
167
            }
168
        }
169
170
        // Clear duplicates
171 1
        foreach ($longestPrefixes as $prefix => &$strings) {
172 1
            $longestPrefixes[$prefix] = array_unique($strings);
173
        }
174
175 1
        return $longestPrefixes;
176
    }
177
178
    /**
179
     * Sort structures array.
180
     *
181
     * @param array $strings Input strings array
182
     *
183
     * @return Structure[] Sorted structures array
184
     */
185 1
    protected function sortStructures(array $strings): array
186
    {
187
        // Create structures
188 1
        $structures = [];
189 1
        foreach ($strings as $string) {
190 1
            $structures[] = new Structure($string);
191
        }
192
193
        // Sort structures
194
        usort($structures, function (Structure $initial, Structure $compared) {
195 1
            return $initial->compare($compared);
196 1
        });
197
198
        // Sort descending
199 1
        return array_reverse($structures);
200
    }
201
202
    /**
203
     * Sort strings array considering PCG and NPCG string structure.
204
     *
205
     * @param array $input Input array for sorting
206
     *
207
     * @return array Sorted keys array
208
     */
209 1
    public function sortArrayByKeys(array $input): array
210
    {
211
        /** @var Structure[] $structures */
212 1
        $structures = [];
213 1
        foreach (array_keys($input) as $string) {
214 1
            $structures[] = new Structure($string);
215
        }
216
217 1
        usort($structures, function (Structure $initial, Structure $compared) {
218 1
            return $initial->compare($compared);
219 1
        });
220
221 1
        $structures = array_reverse($structures);
222
223
        // Restore initial strings sub-arrays
224 1
        $result = [];
225 1
        foreach ($structures as $structure) {
226 1
            $result[$structure->string] = $input[$structure->string];
227
        }
228
229 1
        return $result;
230
    }
231
232
    /**
233
     * Iterate LMP and remove duplicate strings in other LMPs.
234
     *
235
     * @param array $prefixes LMP collection, returning value
236
     */
237 1
    protected function filterLMPStrings(array &$prefixes)
238
    {
239 1
        $keys = array_keys($prefixes);
240 1
        for ($i = 0, $length = count($keys); $i < $length; $i++) {
241 1
            for ($j = $i + 1; $j < $length; $j++) {
242 1
                $this->removeDuplicatesInSubArray($prefixes[$keys[$i]], $prefixes[$keys[$j]]);
243
            }
244
        }
245 1
    }
246
247
    /**
248
     * Find all duplication of source array values in compared array and remove them.
249
     *
250
     * @param array $source   Source array
251
     * @param array $compared Compared array for filtering duplicates
252
     */
253 1
    protected function removeDuplicatesInSubArray(array $source, array &$compared)
254
    {
255 1
        foreach ($source as $value) {
256 1
            foreach ($compared as $key => &$subValue) {
257 1
                if ($subValue === $value) {
258 1
                    unset($compared[$key]);
259
                }
260
            }
261
        }
262 1
    }
263
264
    /**
265
     * Remove key string from the beginning of all sub-array strings.
266
     *
267
     * @param array $array Input array of key => [keyStrings...]
268
     *
269
     * @return array Processed array with removed keys from beginning of sub arrays
270
     */
271 1
    protected function removeKeyFromArrayStrings(array $array): array
272
    {
273 1
        $processed = [];
274
        /** @var string[] $stringsCollection */
275 1
        foreach ($array as $keyString => $stringsCollection) {
276 1
            $lmpLength = strlen((string)$keyString);
277 1
            foreach ($stringsCollection as $stringValue) {
278
                // Remove LMP from string
279 1
                $newString = substr($stringValue, $lmpLength);
280
281
                // This string has something left besides lmp
282 1
                if ($newString !== false && $newString !== '') {
283 1
                    $processed[$keyString][] = $newString;
284 1
                } elseif (array_key_exists($keyString, $processed) === false) {
285
                    // Add empty array to LMP if missing
286 1
                    $processed[$keyString] = [];
287
                }
288
            }
289
        }
290
291 1
        return $processed;
292
    }
293
}
294