Completed
Push — master ( 738d97...df22b0 )
by Siro Díaz
02:13
created

TrieTree::empty()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 3
rs 10
cc 1
eloc 2
nc 1
nop 0
1
<?php
2
/**
3
 * DataStructures for PHP
4
 *
5
 * @link      https://github.com/SiroDiaz/DataStructures
6
 * @copyright Copyright (c) 2017 Siro Díaz Palazón
7
 * @license   https://github.com/SiroDiaz/DataStructures/blob/master/README.md (MIT License)
8
 */
9
namespace DataStructures\Trees;
10
11
use DataStructures\Trees\Nodes\TrieNode;
12
use Countable;
13
14
/**
15
 * TrieTree
16
 *
17
 * The TrieTree class is a trie (also called digital tree and sometimes radix tree or prefix tree)
18
 * that is used to get in O(m) being m the word length.
19
 * It is used in software like word corrector and word suggest.
20
 *
21
 * @author Siro Diaz Palazon <[email protected]>
22
 */
23
class TrieTree implements Countable {
24
    private $root;
25
    private $numWords;
26
    private $size;
27
    
28
    public function __construct() {
29
        $this->root = new TrieNode();
30
        $this->numWords = 0;
31
        $this->size = 0;
32
    }
33
34
    /**
35
     * Returns true if the tree is empty. Number of prefixes is 0.
36
     *
37
     * @return bool true if empty.
38
     */
39
    public function empty() : bool {
40
        return $this->size === 0;
41
    }
42
43
    /**
44
     * Returns the number of prefixes that are contained in the trie.
45
     *
46
     * @return int the num of prefixes.
47
     */
48
    public function size() : int {
49
        return $this->size;
50
    }
51
52
    /**
53
     * Inserts a new word in the tree. If the word exists it doesn't do nothing.
54
     *
55
     * @param string $word the word to be added.
56
     */
57
    public function add($word) {
58
        $word = trim($word);
59
        if(mb_strlen($word) === 0) {
60
            return;
61
        }
62
63
        $current = &$this->root;
64
        for($i = 0; $i < mb_strlen($word); $i++) {
65
            $char = mb_substr($word, $i, 1, 'UTF-8');
66
            if(!isset($current->children[$char])) {
67
                if($i === mb_strlen($word) - 1) {
68
                    $current->children[$char] = new TrieNode($char, $current, true);
69
                    $this->numWords++;
70
                } else {
71
                    $current->children[$char] = new TrieNode($char, $current, false);
72
                }
73
                $this->size++;
74
            } else {
75
                if($i === mb_strlen($word) - 1) {
76
                    if($current->children[$char]->isWord === false) {
77
                        $current->children[$char]->isWord = true;
78
                        $this->numWords++;
79
                    }
80
                }
81
            }
82
83
            $current = &$current->children[$char];
84
        }
85
    }
86
87
    /**
88
     * Returns true if the word is stored in the tree.
89
     *
90
     * @param string $word The word to check if exists.
91
     * @param bool true if is contained.
92
     */
93
    public function contains($word) : bool {
94
        $word = trim($word);
95
        $wordLength = mb_strlen($word);
96
        if($wordLength === 0) {
97
            return true;
98
        }
99
100
        $i = 0;
101
        $found = true;
102
        $current = $this->root;
103
        while($found && $i < $wordLength) {
104
            $char = mb_substr($word, $i, 1);
105
            if(isset($current->children[$char])) {
106
                $current = $current->children[$char];
107
            } else {
108
                $found = false;
109
            }
110
            $i++;
111
        }
112
113
        if($found && $current->isWord) {
114
            return true;
115
        }
116
117
        return false;
118
    }
119
120
    /**
121
     *
122
     */
123
    public function delete($word) {
124
        $wordLength = mb_strlen($word);
125
        $i = 0;
126
        $current = $this->root;
127
        
128
        while($i < $wordLength) {
129
            if($current->hasChildren()) {
130
                $char = mb_substr($word, $i, 1, 'UTF-8');
131
                if(isset($current->children[$char])) {
132
                    $node = &$current->children[$char];
133
                    if(($i === $wordLength - 1) && $node->isWord) {
134
                        if($node->hasChildren()) {
135
                            $node->isWord = false;
136
                        } else {
137
                            $parentNode = &$node->parent;
138
                            unset($parentNode->children[$char]);
139
                            $this->size--;
140
                            $j = $i - 1;
141
142
                            while($j >= 0 && $parentNode->isLeaf() && !$parentNode->isWord) {
143
                                $char = mb_substr($word, $j, 1, 'UTF-8');
144
                                $parentNode = &$parentNode->parent;
145
                                unset($parentNode->children[$char]);
146
                                $this->size--;
147
                                $j--;
148
                            }
149
                        }
150
151
                        $this->numWords--;
152
                    }
153
                }
154
            } else {
155
                return;
156
            }
157
158
            if(isset($current->children[$char])) {
159
                $current = $current->children[$char];
160
            } else {
161
                $current = $current->parent;    //TODO falta eliminar todos los nodos hoja
162
            }
163
            $i++;
164
        }
165
    }
166
167
    public function clear() {
168
        foreach($this->root->children as $key => &$node) {
169
            $this->_clear($node);
170
            unset($this->root->children[$key]);
171
        }
172
173
        $this->size = 0;
174
        $this->numWords = 0;
175
    }
176
177
    private function _clear(TrieNode &$node = null) {
178
        foreach($node->children as $char => &$n) {
179
            $aux = $node->children[$char];
180
            unset($node->children[$char]);
181
            $node = null;
182
            return $this->_clear($aux);
183
            
184
        }
185
    }
186
187
    public function getWords() : array {
188
        return [];
189
    }
190
191
    /**
192
     * Checks if there is any word that starts with a concrete prefix.
193
     *
194
     * @param string $prefix The prefix to look up.
195
     * @return true if there are words that start with the especified prefix.
196
     */
197
    public function startsWith($prefix) : bool {
198
        return $this->getNodeFromPrefix($prefix) !== null;
199
    }
200
201
    /**
202
     *
203
     */
204
    public function withPrefix($prefix) : array {
205
        $node = $this->getNodeFromPrefix($prefix);
206
        $words = [];
207
        if($node !== null) {
208 View Code Duplication
            foreach($node->children as $char => $n) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
209
                $words = $this->_traverse($node->children[$char], $words, $prefix . $char);
210
            }
211
            
212
        }
213
        
214
        return $words;
215
    }
216
217
    private function _traverse(TrieNode $node = null, $words = [], $word) {
218
        if(empty($node->children)) {
219
            // var_dump($words);
0 ignored issues
show
Unused Code Comprehensibility introduced by
67% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
220
            return $words;
221
        }
222
        if($node->isWord) {
223
            // var_dump($node);
0 ignored issues
show
Unused Code Comprehensibility introduced by
67% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
224
            $words[] = $word;
225
        }
226
227 View Code Duplication
        foreach($node->children as $char => $n) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
228
            return $this->_traverse($node->children[$char], $words, $word . $n->char);
229
        }   
230
    }
231
232
    /**
233
     * Retrieves the node where ends the prefix especified.
234
     *
235
     * @param string $prefix The prefix to look for.
236
     * @return DataStructures\Trees\Nodes\TrieNode|null null if not found.
237
     */
238
    private function getNodeFromPrefix($prefix) {
239
        if($this->size === 0) {
240
            return null;
241
        }
242
        
243
        $i = 0;
244
        $current = $this->root;
245
        $prefixLength = mb_strlen(trim($prefix));
246
        while($i < $prefixLength) {
247
            $char = mb_substr($prefix, $i, 1, 'UTF-8');
248
            if(!isset($current->children[$char])) {
249
                return null;
250
            }
251
            $current = $current->children[$char];
252
            $i++;
253
        }
254
255
        return $current;
256
    }
257
258
    /**
259
     * Gets the number of words stored in the tree.
260
     *
261
     * @return int The word count.
262
     */
263
    public function wordCount() : int {
264
        return $this->numWords;
265
    }
266
267
    /**
268
     * Gets the number of prefixes stored in the tree.
269
     *
270
     * @return int The word count.
271
     */
272
    public function count() : int {
273
        return $this->size();
274
    }
275
}