1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* DataStructures for PHP |
4
|
|
|
* |
5
|
|
|
* @link https://github.com/SiroDiaz/DataStructures |
6
|
|
|
* @copyright Copyright (c) 2017 Siro Díaz Palazón |
7
|
|
|
* @license https://github.com/SiroDiaz/DataStructures/blob/master/README.md (MIT License) |
8
|
|
|
*/ |
9
|
|
|
namespace DataStructures\Trees; |
10
|
|
|
|
11
|
|
|
use DataStructures\Trees\Nodes\TrieNode; |
12
|
|
|
use Countable; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* TrieTree |
16
|
|
|
* |
17
|
|
|
* The TrieTree class is a trie (also called digital tree and sometimes radix tree or prefix tree) |
18
|
|
|
* that is used to get in O(m) being m the word length. |
19
|
|
|
* It is used in software like word corrector and word suggest. |
20
|
|
|
* |
21
|
|
|
* @author Siro Diaz Palazon <[email protected]> |
22
|
|
|
*/ |
23
|
|
|
class TrieTree implements Countable { |
24
|
|
|
private $root; |
25
|
|
|
private $numWords; |
26
|
|
|
private $size; |
27
|
|
|
|
28
|
|
|
public function __construct() { |
29
|
|
|
$this->root = new TrieNode(); |
30
|
|
|
$this->numWords = 0; |
31
|
|
|
$this->size = 0; |
32
|
|
|
} |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Returns true if the tree is empty. Number of prefixes is 0. |
36
|
|
|
* |
37
|
|
|
* @return bool true if empty. |
38
|
|
|
*/ |
39
|
|
|
public function empty() : bool { |
40
|
|
|
return $this->size === 0; |
41
|
|
|
} |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* Returns the number of prefixes that are contained in the trie. |
45
|
|
|
* |
46
|
|
|
* @return int the num of prefixes. |
47
|
|
|
*/ |
48
|
|
|
public function size() : int { |
49
|
|
|
return $this->size; |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* Inserts a new word in the tree. If the word exists it doesn't do nothing. |
54
|
|
|
* |
55
|
|
|
* @param string $word the word to be added. |
56
|
|
|
*/ |
57
|
|
|
public function add($word) { |
58
|
|
|
$word = trim($word); |
59
|
|
|
if(mb_strlen($word) === 0) { |
60
|
|
|
return; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
$current = &$this->root; |
64
|
|
|
for($i = 0; $i < mb_strlen($word); $i++) { |
65
|
|
|
$char = mb_substr($word, $i, 1, 'UTF-8'); |
66
|
|
|
if(!isset($current->children[$char])) { |
67
|
|
|
if($i === mb_strlen($word) - 1) { |
68
|
|
|
$current->children[$char] = new TrieNode($char, true); |
69
|
|
|
$this->numWords++; |
70
|
|
|
} else { |
71
|
|
|
$current->children[$char] = new TrieNode($char, false); |
72
|
|
|
} |
73
|
|
|
$this->size++; |
74
|
|
|
} else { |
75
|
|
|
if($i === mb_strlen($word) - 1) { |
76
|
|
|
if($current->children[$char]->isWord === false) { |
77
|
|
|
$current->children[$char]->isWord = true; |
78
|
|
|
$this->numWords++; |
79
|
|
|
} |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
$current = &$current->children[$char]; |
84
|
|
|
} |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* Returns true if the word is stored in the tree. |
89
|
|
|
* |
90
|
|
|
* @param string $word The word to check if exists. |
91
|
|
|
* @param bool true if is contained. |
92
|
|
|
*/ |
93
|
|
|
public function contains($word) : bool { |
94
|
|
|
$word = trim($word); |
95
|
|
|
$wordLength = mb_strlen($word); |
96
|
|
|
if($wordLength === 0) { |
97
|
|
|
return true; |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
$i = 0; |
101
|
|
|
$found = true; |
102
|
|
|
$current = $this->root; |
103
|
|
|
while($found && $i < $wordLength) { |
104
|
|
|
$char = mb_substr($word, $i, 1); |
105
|
|
|
if(isset($current->children[$char])) { |
106
|
|
|
$current = $current->children[$char]; |
107
|
|
|
} else { |
108
|
|
|
$found = false; |
109
|
|
|
} |
110
|
|
|
$i++; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
if($found && $current->isWord) { |
114
|
|
|
return true; |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
return false; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
public function delete($word) { |
121
|
|
|
// while not consumidaPalabra($i < mb_strlen($word)) |
|
|
|
|
122
|
|
|
// if tiene solo una hija y es fin palabra borrar |
123
|
|
|
$wordLength = mb_strlen($word); |
124
|
|
|
$i = 0; |
125
|
|
|
$current = $this->root; |
126
|
|
|
while($i < $wordLength) { |
127
|
|
|
/* |
|
|
|
|
128
|
|
|
if() |
129
|
|
|
if($current->hasChildren()) { |
130
|
|
|
|
131
|
|
|
} |
132
|
|
|
*/ |
133
|
|
|
$current = $current->children[mb_substr($word, $i, 1, 'UTF-8')]; |
134
|
|
|
$i++; |
135
|
|
|
} |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
public function getWords() : array { |
139
|
|
|
return []; |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
/** |
143
|
|
|
* Checks if there is any word that starts with a concrete prefix. |
144
|
|
|
* |
145
|
|
|
* @param string $prefix The prefix to look up. |
146
|
|
|
* @return true if there are words that start with the especified prefix. |
147
|
|
|
*/ |
148
|
|
|
public function startsWith($prefix) : bool { |
149
|
|
|
return $this->getNodeFromPrefix($prefix) !== null; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* |
154
|
|
|
*/ |
155
|
|
|
public function withPrefix($prefix) : array { |
156
|
|
|
$node = $this->getNodeFromPrefix($prefix); |
157
|
|
|
$words = []; |
158
|
|
|
if($node !== null) { |
159
|
|
View Code Duplication |
foreach($node->children as $char => $n) { |
|
|
|
|
160
|
|
|
$words = $this->_traverse($node->children[$char], $words, $prefix . $char); |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
return $words; |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
private function _traverse(TrieNode $node = null, $words = [], $word) { |
169
|
|
|
if(empty($node->children)) { |
170
|
|
|
// var_dump($words); |
|
|
|
|
171
|
|
|
return $words; |
172
|
|
|
} |
173
|
|
|
if($node->isWord) { |
174
|
|
|
// var_dump($node); |
|
|
|
|
175
|
|
|
$words[] = $word; |
176
|
|
|
} |
177
|
|
|
|
178
|
|
View Code Duplication |
foreach($node->children as $char => $n) { |
|
|
|
|
179
|
|
|
return $this->_traverse($node->children[$char], $words, $word . $n->char); |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* Retrieves the node where ends the prefix especified. |
186
|
|
|
* |
187
|
|
|
* @param string $prefix The prefix to look for. |
188
|
|
|
* @return DataStructures\Trees\Nodes\TrieNode|null null if not found. |
189
|
|
|
*/ |
190
|
|
|
private function getNodeFromPrefix($prefix) { |
191
|
|
|
if($this->size === 0) { |
192
|
|
|
return null; |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
$i = 0; |
196
|
|
|
$current = $this->root; |
197
|
|
|
$prefixLength = mb_strlen(trim($prefix)); |
198
|
|
|
while($i < $prefixLength) { |
199
|
|
|
$char = mb_substr($prefix, $i, 1, 'UTF-8'); |
200
|
|
|
if(!isset($current->children[$char])) { |
201
|
|
|
return null; |
202
|
|
|
} |
203
|
|
|
$current = $current->children[$char]; |
204
|
|
|
$i++; |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
return $current; |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
/** |
211
|
|
|
* Gets the number of words stored in the tree. |
212
|
|
|
* |
213
|
|
|
* @return int The word count. |
214
|
|
|
*/ |
215
|
|
|
public function wordCount() : int { |
216
|
|
|
return $this->numWords; |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
/** |
220
|
|
|
* Gets the number of prefixes stored in the tree. |
221
|
|
|
* |
222
|
|
|
* @return int The word count. |
223
|
|
|
*/ |
224
|
|
|
public function count() : int { |
225
|
|
|
return $this->size(); |
226
|
|
|
} |
227
|
|
|
} |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.