1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* DataStructures for PHP |
4
|
|
|
* |
5
|
|
|
* @link https://github.com/SiroDiaz/DataStructures |
6
|
|
|
* @copyright Copyright (c) 2017 Siro Díaz Palazón |
7
|
|
|
* @license https://github.com/SiroDiaz/DataStructures/blob/master/README.md (MIT License) |
8
|
|
|
*/ |
9
|
|
|
namespace DataStructures\Trees; |
10
|
|
|
|
11
|
|
|
use DataStructures\Trees\Nodes\TrieNode; |
12
|
|
|
use Countable; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* TrieTree |
16
|
|
|
* |
17
|
|
|
* The TrieTree class is a trie (also called digital tree and sometimes radix tree or prefix tree) |
18
|
|
|
* that is used to get in O(m) being m the word length. |
19
|
|
|
* It is used in software like word corrector and word suggest. |
20
|
|
|
* |
21
|
|
|
* @author Siro Diaz Palazon <[email protected]> |
22
|
|
|
*/ |
23
|
|
|
class TrieTree implements Countable { |
24
|
|
|
private $root; |
25
|
|
|
private $numWords; |
26
|
|
|
private $size; |
27
|
|
|
|
28
|
|
|
public function __construct() { |
29
|
|
|
$this->root = new TrieNode(); |
30
|
|
|
$this->numWords = 0; |
31
|
|
|
$this->size = 0; |
32
|
|
|
} |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Returns true if the tree is empty. Number of prefixes is 0. |
36
|
|
|
* |
37
|
|
|
* @return bool true if empty. |
38
|
|
|
*/ |
39
|
|
|
public function empty() : bool { |
40
|
|
|
return $this->size === 0; |
41
|
|
|
} |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* Returns the number of prefixes that are contained in the trie. |
45
|
|
|
* |
46
|
|
|
* @return int the num of prefixes. |
47
|
|
|
*/ |
48
|
|
|
public function size() : int { |
49
|
|
|
return $this->size; |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* Inserts a new word in the tree. If the word exists it doesn't do nothing. |
54
|
|
|
* |
55
|
|
|
* @param string $word the word to be added. |
56
|
|
|
*/ |
57
|
|
|
public function add($word) { |
58
|
|
|
$word = trim($word); |
59
|
|
|
if(mb_strlen($word) === 0) { |
60
|
|
|
return; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
$current = &$this->root; |
64
|
|
|
for($i = 0; $i < mb_strlen($word); $i++) { |
65
|
|
|
$char = mb_substr($word, $i, 1, 'UTF-8'); |
66
|
|
|
if(!isset($current->children[$char])) { |
67
|
|
|
if($i === mb_strlen($word) - 1) { |
68
|
|
|
$current->children[$char] = new TrieNode($char, $current, true); |
69
|
|
|
$this->numWords++; |
70
|
|
|
} else { |
71
|
|
|
$current->children[$char] = new TrieNode($char, $current, false); |
72
|
|
|
} |
73
|
|
|
$this->size++; |
74
|
|
|
} else { |
75
|
|
|
if($i === mb_strlen($word) - 1) { |
76
|
|
|
if($current->children[$char]->isWord === false) { |
77
|
|
|
$current->children[$char]->isWord = true; |
78
|
|
|
$this->numWords++; |
79
|
|
|
} |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
$current = &$current->children[$char]; |
84
|
|
|
} |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* Returns true if the word is stored in the tree. |
89
|
|
|
* |
90
|
|
|
* @param string $word The word to check if exists. |
91
|
|
|
* @param bool true if is contained. |
92
|
|
|
*/ |
93
|
|
|
public function contains($word) : bool { |
94
|
|
|
$word = trim($word); |
95
|
|
|
$wordLength = mb_strlen($word); |
96
|
|
|
if($wordLength === 0) { |
97
|
|
|
return true; |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
$i = 0; |
101
|
|
|
$found = true; |
102
|
|
|
$current = $this->root; |
103
|
|
|
while($found && $i < $wordLength) { |
104
|
|
|
$char = mb_substr($word, $i, 1); |
105
|
|
|
if(isset($current->children[$char])) { |
106
|
|
|
$current = $current->children[$char]; |
107
|
|
|
} else { |
108
|
|
|
$found = false; |
109
|
|
|
} |
110
|
|
|
$i++; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
if($found && $current->isWord) { |
114
|
|
|
return true; |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
return false; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* |
122
|
|
|
*/ |
123
|
|
|
public function delete($word) { |
124
|
|
|
$wordLength = mb_strlen($word); |
125
|
|
|
$i = 0; |
126
|
|
|
$current = $this->root; |
127
|
|
|
|
128
|
|
|
while($i < $wordLength) { |
129
|
|
|
if($current->hasChildren()) { |
130
|
|
|
$char = mb_substr($word, $i, 1, 'UTF-8'); |
131
|
|
|
if(isset($current->children[$char])) { |
132
|
|
|
$node = &$current->children[$char]; |
133
|
|
|
if(($i === $wordLength - 1) && $node->isWord) { |
134
|
|
|
if($node->hasChildren()) { |
135
|
|
|
$node->isWord = false; |
136
|
|
|
} else { |
137
|
|
|
$parentNode = &$node->parent; |
138
|
|
|
unset($parentNode->children[$char]); |
139
|
|
|
$this->size--; |
140
|
|
|
$j = $i - 1; |
141
|
|
|
|
142
|
|
|
while($j >= 0 && $parentNode->isLeaf() && !$parentNode->isWord) { |
143
|
|
|
$char = mb_substr($word, $j, 1, 'UTF-8'); |
144
|
|
|
$parentNode = &$parentNode->parent; |
145
|
|
|
unset($parentNode->children[$char]); |
146
|
|
|
$this->size--; |
147
|
|
|
$j--; |
148
|
|
|
} |
149
|
|
|
} |
150
|
|
|
|
151
|
|
|
$this->numWords--; |
152
|
|
|
} |
153
|
|
|
} |
154
|
|
|
} else { |
155
|
|
|
return; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
if(isset($current->children[$char])) { |
159
|
|
|
$current = $current->children[$char]; |
160
|
|
|
} else { |
161
|
|
|
$current = $current->parent; //TODO falta eliminar todos los nodos hoja |
162
|
|
|
} |
163
|
|
|
$i++; |
164
|
|
|
} |
165
|
|
|
} |
166
|
|
|
|
167
|
|
|
public function clear() { |
168
|
|
|
foreach($this->root->children as $key => &$node) { |
169
|
|
|
$this->_clear($node); |
170
|
|
|
unset($this->root->children[$key]); |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
$this->size = 0; |
174
|
|
|
$this->numWords = 0; |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
private function _clear(TrieNode &$node = null) { |
178
|
|
|
foreach($node->children as $char => &$n) { |
179
|
|
|
$aux = $node->children[$char]; |
180
|
|
|
unset($node->children[$char]); |
181
|
|
|
$node = null; |
182
|
|
|
return $this->_clear($aux); |
183
|
|
|
|
184
|
|
|
} |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
public function getWords() : array { |
188
|
|
|
return []; |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
/** |
192
|
|
|
* Checks if there is any word that starts with a concrete prefix. |
193
|
|
|
* |
194
|
|
|
* @param string $prefix The prefix to look up. |
195
|
|
|
* @return true if there are words that start with the especified prefix. |
196
|
|
|
*/ |
197
|
|
|
public function startsWith($prefix) : bool { |
198
|
|
|
return $this->getNodeFromPrefix($prefix) !== null; |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
/** |
202
|
|
|
* |
203
|
|
|
*/ |
204
|
|
|
public function withPrefix($prefix) : array { |
205
|
|
|
$node = $this->getNodeFromPrefix($prefix); |
206
|
|
|
$words = []; |
207
|
|
|
if($node !== null) { |
208
|
|
View Code Duplication |
foreach($node->children as $char => $n) { |
|
|
|
|
209
|
|
|
$words = $this->_traverse($node->children[$char], $words, $prefix . $char); |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
return $words; |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
private function _traverse(TrieNode $node = null, $words = [], $word) { |
218
|
|
|
if(empty($node->children)) { |
219
|
|
|
// var_dump($words); |
|
|
|
|
220
|
|
|
return $words; |
221
|
|
|
} |
222
|
|
|
if($node->isWord) { |
223
|
|
|
// var_dump($node); |
|
|
|
|
224
|
|
|
$words[] = $word; |
225
|
|
|
} |
226
|
|
|
|
227
|
|
View Code Duplication |
foreach($node->children as $char => $n) { |
|
|
|
|
228
|
|
|
return $this->_traverse($node->children[$char], $words, $word . $n->char); |
229
|
|
|
} |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
/** |
233
|
|
|
* Retrieves the node where ends the prefix especified. |
234
|
|
|
* |
235
|
|
|
* @param string $prefix The prefix to look for. |
236
|
|
|
* @return DataStructures\Trees\Nodes\TrieNode|null null if not found. |
237
|
|
|
*/ |
238
|
|
|
private function getNodeFromPrefix($prefix) { |
239
|
|
|
if($this->size === 0) { |
240
|
|
|
return null; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
$i = 0; |
244
|
|
|
$current = $this->root; |
245
|
|
|
$prefixLength = mb_strlen(trim($prefix)); |
246
|
|
|
while($i < $prefixLength) { |
247
|
|
|
$char = mb_substr($prefix, $i, 1, 'UTF-8'); |
248
|
|
|
if(!isset($current->children[$char])) { |
249
|
|
|
return null; |
250
|
|
|
} |
251
|
|
|
$current = $current->children[$char]; |
252
|
|
|
$i++; |
253
|
|
|
} |
254
|
|
|
|
255
|
|
|
return $current; |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
/** |
259
|
|
|
* Gets the number of words stored in the tree. |
260
|
|
|
* |
261
|
|
|
* @return int The word count. |
262
|
|
|
*/ |
263
|
|
|
public function wordCount() : int { |
264
|
|
|
return $this->numWords; |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
/** |
268
|
|
|
* Gets the number of prefixes stored in the tree. |
269
|
|
|
* |
270
|
|
|
* @return int The word count. |
271
|
|
|
*/ |
272
|
|
|
public function count() : int { |
273
|
|
|
return $this->size(); |
274
|
|
|
} |
275
|
|
|
} |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.