TrieTree::contains()   C
last analyzed

Complexity

Conditions 7
Paths 7

Size

Total Lines 26
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 26
rs 6.7272
cc 7
eloc 18
nc 7
nop 1
1
<?php
2
/**
3
 * DataStructures for PHP
4
 *
5
 * @link      https://github.com/SiroDiaz/DataStructures
6
 * @copyright Copyright (c) 2017 Siro Díaz Palazón
7
 * @license   https://github.com/SiroDiaz/DataStructures/blob/master/README.md (MIT License)
8
 */
9
namespace DataStructures\Trees;
10
11
use DataStructures\Trees\Nodes\TrieNode;
12
use Countable;
13
14
/**
15
 * TrieTree
16
 *
17
 * The TrieTree class is a trie (also called digital tree and sometimes radix tree or prefix tree)
18
 * that is used to get in O(m) being m the word length.
19
 * It is used in software like word corrector and word suggest.
20
 *
21
 * @author Siro Diaz Palazon <[email protected]>
22
 */
23
class TrieTree implements Countable {
24
    private $root;
25
    private $numWords;
26
    private $size;
27
    
28
    public function __construct() {
29
        $this->root = new TrieNode();
30
        $this->numWords = 0;
31
        $this->size = 0;
32
    }
33
34
    /**
35
     * Returns true if the tree is empty. Number of prefixes is 0.
36
     *
37
     * @return bool true if empty.
38
     */
39
    public function empty() : bool {
40
        return $this->size === 0;
41
    }
42
43
    /**
44
     * Returns the number of prefixes that are contained in the trie.
45
     *
46
     * @return int the num of prefixes.
47
     */
48
    public function size() : int {
49
        return $this->size;
50
    }
51
52
    /**
53
     * Inserts a new word in the tree. If the word exists it doesn't do nothing.
54
     *
55
     * @param string $word the word to be added.
56
     */
57
    public function add($word) {
58
        $word = trim($word);
59
        $wordLength = mb_strlen($word);
60
        if($wordLength === 0) {
61
            return;
62
        }
63
64
        $current = &$this->root;
65
        for($i = 0; $i < $wordLength; $i++) {
66
            $char = mb_substr($word, $i, 1, 'UTF-8');
67
            if(!isset($current->children[$char])) {
68
                if($i === $wordLength - 1) {
69
                    $current->children[$char] = new TrieNode($char, $current, true);
70
                    $this->numWords++;
71
                } else {
72
                    $current->children[$char] = new TrieNode($char, $current, false);
73
                }
74
                $this->size++;
75
            } else {
76
                if($i === $wordLength - 1) {
77
                    if($current->children[$char]->isWord === false) {
78
                        $current->children[$char]->isWord = true;
79
                        $this->numWords++;
80
                    }
81
                }
82
            }
83
84
            $current = &$current->children[$char];
85
        }
86
    }
87
88
    /**
89
     * Returns true if the word is stored in the tree.
90
     *
91
     * @param string $word The word to check if exists.
92
     * @param bool true if is contained.
93
     */
94
    public function contains($word) : bool {
95
        $word = trim($word);
96
        $wordLength = mb_strlen($word);
97
        if($wordLength === 0) {
98
            return true;
99
        }
100
101
        $i = 0;
102
        $found = true;
103
        $current = $this->root;
104
        while($found && $i < $wordLength) {
105
            $char = mb_substr($word, $i, 1);
106
            if(isset($current->children[$char])) {
107
                $current = $current->children[$char];
108
            } else {
109
                $found = false;
110
            }
111
            $i++;
112
        }
113
114
        if($found && $current->isWord) {
115
            return true;
116
        }
117
118
        return false;
119
    }
120
121
    /**
122
     * Removes a word from the tree.
123
     *
124
     * @param string $word the word to delete if it is contained in the trie.
125
     */
126
    public function delete($word) {
127
        $wordLength = mb_strlen($word);
128
        $i = 0;
129
        $current = $this->root;
130
        
131
        while($i < $wordLength) {
132
            if($current->hasChildren()) {
133
                $char = mb_substr($word, $i, 1, 'UTF-8');
134
                if(isset($current->children[$char])) {
135
                    $node = &$current->children[$char];
136
                    if(($i === $wordLength - 1) && $node->isWord) {
137
                        if($node->hasChildren()) {
138
                            $node->isWord = false;
139
                        } else {
140
                            $parentNode = &$node->parent;
141
                            unset($parentNode->children[$char]);
142
                            $this->size--;
143
                            $j = $i - 1;
144
145
                            while($j >= 0 && $parentNode->isLeaf() && !$parentNode->isWord) {
146
                                $char = mb_substr($word, $j, 1, 'UTF-8');
147
                                $parentNode = &$parentNode->parent;
148
                                unset($parentNode->children[$char]);
149
                                $this->size--;
150
                                $j--;
151
                            }
152
                        }
153
154
                        $this->numWords--;
155
                    }
156
                }
157
            } else {
158
                return;
159
            }
160
161
            if(isset($current->children[$char])) {
162
                $current = $current->children[$char];
163
            } else {
164
                $current = $current->parent;    //TODO need to delete all leaf nodes
165
            }
166
            $i++;
167
        }
168
    }
169
170
    /**
171
     * Removes all nodes (and words) in the tree and resets the size
172
     * and word count.
173
     */
174
    public function clear() {
175
        foreach($this->root->children as $key => &$node) {
176
            $this->_clear($node);
177
            unset($this->root->children[$key]);
178
        }
179
180
        $this->size = 0;
181
        $this->numWords = 0;
182
    }
183
184
    /**
185
     * Recursive clear that removes all nodes, included leaf, except
186
     * the references to the first children.
187
     *
188
     * @param DataStructures\Trees\Nodes\TrieNode|null the node to traverse.
189
     * @return DataStructures\Trees\Nodes\TrieNode|null the next node to delete.
190
     */
191
    private function _clear(TrieNode &$node = null) {
192
        foreach($node->children as $char => &$n) {
193
            $aux = $node->children[$char];
194
            unset($node->children[$char]);
195
            $node = null;
196
            return $this->_clear($aux);
197
        }
198
    }
199
200
    /**
201
     * Returns an array containing all words stored in the tree.
202
     * It starts to search from the root that contains an empty string
203
     * using the withPrefix method.
204
     *
205
     * @return array an empty array if there are not words in the tree.
206
     */
207
    public function getWords() : array {
208
        if($this->size === 0) {
209
            return [];
210
        }
211
212
        return $this->withPrefix('');
213
    }
214
215
    /**
216
     * Checks if there is any word that starts with a concrete prefix.
217
     *
218
     * @param string $prefix The prefix to look up.
219
     * @return true if there are words that start with the especified prefix.
220
     */
221
    public function startsWith($prefix) : bool {
222
        return $this->getNodeFromPrefix($prefix) !== null;
223
    }
224
225
    /**
226
     * Returns an array with all words that has the especified prefix.
227
     * For example, with prefix 'he' it will retrieve: hell, hello, ....
228
     *
229
     * @param string $prefix The prefix that must have all words.
230
     * @return array All words that contains the prefix.
231
     */
232
    public function withPrefix($prefix) : array {
233
        $node = $this->getNodeFromPrefix($prefix);
234
        $words = [];
235
        if($node !== null) {
236
            if($node->isWord) {
237
                $words[] = $prefix;
238
            }
239 View Code Duplication
            foreach($node->children as $char => $n) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
240
                $words = $this->_traverseWithPrefix($node->children[$char], $words, $prefix . $char);
241
            }
242
        }
243
244
        return $words;
245
    }
246
247
    /**
248
     *
249
     */
250
    private function _traverseWithPrefix(TrieNode $node = null, $words = [], $word) {
251
        if($node->isWord) {
252
            $words[] = $word;
253
        }
254
255
        if(empty($node->children)) {
256
            return $words;
257
        }
258
259 View Code Duplication
        foreach($node->children as $char => &$n) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
260
            $words = $this->_traverseWithPrefix($node->children[$char], $words, $word . $char);
261
        }
262
263
        return $words;
264
    }
265
266
    /**
267
     * Retrieves the node where ends the prefix especified.
268
     *
269
     * @param string $prefix The prefix to look for.
270
     * @return DataStructures\Trees\Nodes\TrieNode|null null if not found.
271
     */
272
    private function getNodeFromPrefix($prefix) {
273
        if($this->size === 0) {
274
            return null;
275
        }
276
        
277
        $i = 0;
278
        $current = $this->root;
279
        $prefixLength = mb_strlen(trim($prefix));
280
        while($i < $prefixLength) {
281
            $char = mb_substr($prefix, $i, 1, 'UTF-8');
282
            if(!isset($current->children[$char])) {
283
                return null;
284
            }
285
            $current = $current->children[$char];
286
            $i++;
287
        }
288
289
        return $current;
290
    }
291
292
    /**
293
     * Gets the number of words stored in the tree.
294
     *
295
     * @return int The word count.
296
     */
297
    public function wordCount() : int {
298
        return $this->numWords;
299
    }
300
301
    /**
302
     * Gets the number of prefixes stored in the tree.
303
     *
304
     * @return int The word count.
305
     */
306
    public function count() : int {
307
        return $this->size();
308
    }
309
}