1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* DataStructures for PHP |
4
|
|
|
* |
5
|
|
|
* @link https://github.com/SiroDiaz/DataStructures |
6
|
|
|
* @copyright Copyright (c) 2017 Siro Díaz Palazón |
7
|
|
|
* @license https://github.com/SiroDiaz/DataStructures/blob/master/README.md (MIT License) |
8
|
|
|
*/ |
9
|
|
|
namespace DataStructures\Trees; |
10
|
|
|
|
11
|
|
|
use DataStructures\Trees\Nodes\TrieNode; |
12
|
|
|
use Countable; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* TrieTree |
16
|
|
|
* |
17
|
|
|
* The TrieTree class is a trie (also called digital tree and sometimes radix tree or prefix tree) |
18
|
|
|
* that is used to get in O(m) being m the word length. |
19
|
|
|
* It is used in software like word corrector and word suggest. |
20
|
|
|
* |
21
|
|
|
* @author Siro Diaz Palazon <[email protected]> |
22
|
|
|
*/ |
23
|
|
|
class TrieTree implements Countable { |
24
|
|
|
private $root; |
25
|
|
|
private $numWords; |
26
|
|
|
private $size; |
27
|
|
|
|
28
|
|
|
public function __construct() { |
29
|
|
|
$this->root = new TrieNode(); |
30
|
|
|
$this->numWords = 0; |
31
|
|
|
$this->size = 0; |
32
|
|
|
} |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Returns true if the tree is empty. Number of prefixes is 0. |
36
|
|
|
* |
37
|
|
|
* @return bool true if empty. |
38
|
|
|
*/ |
39
|
|
|
public function empty() : bool { |
40
|
|
|
return $this->size === 0; |
41
|
|
|
} |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* Returns the number of prefixes that are contained in the trie. |
45
|
|
|
* |
46
|
|
|
* @return int the num of prefixes. |
47
|
|
|
*/ |
48
|
|
|
public function size() : int { |
49
|
|
|
return $this->size; |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* Inserts a new word in the tree. If the word exists it doesn't do nothing. |
54
|
|
|
* |
55
|
|
|
* @param string $word the word to be added. |
56
|
|
|
*/ |
57
|
|
|
public function add($word) { |
58
|
|
|
$word = trim($word); |
59
|
|
|
$wordLength = mb_strlen($word); |
60
|
|
|
if($wordLength === 0) { |
61
|
|
|
return; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
$current = &$this->root; |
65
|
|
|
for($i = 0; $i < $wordLength; $i++) { |
66
|
|
|
$char = mb_substr($word, $i, 1, 'UTF-8'); |
67
|
|
|
if(!isset($current->children[$char])) { |
68
|
|
|
if($i === $wordLength - 1) { |
69
|
|
|
$current->children[$char] = new TrieNode($char, $current, true); |
70
|
|
|
$this->numWords++; |
71
|
|
|
} else { |
72
|
|
|
$current->children[$char] = new TrieNode($char, $current, false); |
73
|
|
|
} |
74
|
|
|
$this->size++; |
75
|
|
|
} else { |
76
|
|
|
if($i === $wordLength - 1) { |
77
|
|
|
if($current->children[$char]->isWord === false) { |
78
|
|
|
$current->children[$char]->isWord = true; |
79
|
|
|
$this->numWords++; |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
$current = &$current->children[$char]; |
85
|
|
|
} |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* Returns true if the word is stored in the tree. |
90
|
|
|
* |
91
|
|
|
* @param string $word The word to check if exists. |
92
|
|
|
* @param bool true if is contained. |
93
|
|
|
*/ |
94
|
|
|
public function contains($word) : bool { |
95
|
|
|
$word = trim($word); |
96
|
|
|
$wordLength = mb_strlen($word); |
97
|
|
|
if($wordLength === 0) { |
98
|
|
|
return true; |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
$i = 0; |
102
|
|
|
$found = true; |
103
|
|
|
$current = $this->root; |
104
|
|
|
while($found && $i < $wordLength) { |
105
|
|
|
$char = mb_substr($word, $i, 1); |
106
|
|
|
if(isset($current->children[$char])) { |
107
|
|
|
$current = $current->children[$char]; |
108
|
|
|
} else { |
109
|
|
|
$found = false; |
110
|
|
|
} |
111
|
|
|
$i++; |
112
|
|
|
} |
113
|
|
|
|
114
|
|
|
if($found && $current->isWord) { |
115
|
|
|
return true; |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
return false; |
119
|
|
|
} |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* Removes a word from the tree. |
123
|
|
|
* |
124
|
|
|
* @param string $word the word to delete if it is contained in the trie. |
125
|
|
|
*/ |
126
|
|
|
public function delete($word) { |
127
|
|
|
$wordLength = mb_strlen($word); |
128
|
|
|
$i = 0; |
129
|
|
|
$current = $this->root; |
130
|
|
|
|
131
|
|
|
while($i < $wordLength) { |
132
|
|
|
if($current->hasChildren()) { |
133
|
|
|
$char = mb_substr($word, $i, 1, 'UTF-8'); |
134
|
|
|
if(isset($current->children[$char])) { |
135
|
|
|
$node = &$current->children[$char]; |
136
|
|
|
if(($i === $wordLength - 1) && $node->isWord) { |
137
|
|
|
if($node->hasChildren()) { |
138
|
|
|
$node->isWord = false; |
139
|
|
|
} else { |
140
|
|
|
$parentNode = &$node->parent; |
141
|
|
|
unset($parentNode->children[$char]); |
142
|
|
|
$this->size--; |
143
|
|
|
$j = $i - 1; |
144
|
|
|
|
145
|
|
|
while($j >= 0 && $parentNode->isLeaf() && !$parentNode->isWord) { |
146
|
|
|
$char = mb_substr($word, $j, 1, 'UTF-8'); |
147
|
|
|
$parentNode = &$parentNode->parent; |
148
|
|
|
unset($parentNode->children[$char]); |
149
|
|
|
$this->size--; |
150
|
|
|
$j--; |
151
|
|
|
} |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
$this->numWords--; |
155
|
|
|
} |
156
|
|
|
} |
157
|
|
|
} else { |
158
|
|
|
return; |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
if(isset($current->children[$char])) { |
162
|
|
|
$current = $current->children[$char]; |
163
|
|
|
} else { |
164
|
|
|
$current = $current->parent; //TODO need to delete all leaf nodes |
165
|
|
|
} |
166
|
|
|
$i++; |
167
|
|
|
} |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
/** |
171
|
|
|
* Removes all nodes (and words) in the tree and resets the size |
172
|
|
|
* and word count. |
173
|
|
|
*/ |
174
|
|
|
public function clear() { |
175
|
|
|
foreach($this->root->children as $key => &$node) { |
176
|
|
|
$this->_clear($node); |
177
|
|
|
unset($this->root->children[$key]); |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
$this->size = 0; |
181
|
|
|
$this->numWords = 0; |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* Recursive clear that removes all nodes, included leaf, except |
186
|
|
|
* the references to the first children. |
187
|
|
|
* |
188
|
|
|
* @param DataStructures\Trees\Nodes\TrieNode|null the node to traverse. |
189
|
|
|
* @return DataStructures\Trees\Nodes\TrieNode|null the next node to delete. |
190
|
|
|
*/ |
191
|
|
|
private function _clear(TrieNode &$node = null) { |
192
|
|
|
foreach($node->children as $char => &$n) { |
193
|
|
|
$aux = $node->children[$char]; |
194
|
|
|
unset($node->children[$char]); |
195
|
|
|
$node = null; |
196
|
|
|
return $this->_clear($aux); |
197
|
|
|
} |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* Returns an array containing all words stored in the tree. |
202
|
|
|
* It starts to search from the root that contains an empty string |
203
|
|
|
* using the withPrefix method. |
204
|
|
|
* |
205
|
|
|
* @return array an empty array if there are not words in the tree. |
206
|
|
|
*/ |
207
|
|
|
public function getWords() : array { |
208
|
|
|
if($this->size === 0) { |
209
|
|
|
return []; |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
return $this->withPrefix(''); |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
/** |
216
|
|
|
* Checks if there is any word that starts with a concrete prefix. |
217
|
|
|
* |
218
|
|
|
* @param string $prefix The prefix to look up. |
219
|
|
|
* @return true if there are words that start with the especified prefix. |
220
|
|
|
*/ |
221
|
|
|
public function startsWith($prefix) : bool { |
222
|
|
|
return $this->getNodeFromPrefix($prefix) !== null; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* Returns an array with all words that has the especified prefix. |
227
|
|
|
* For example, with prefix 'he' it will retrieve: hell, hello, .... |
228
|
|
|
* |
229
|
|
|
* @param string $prefix The prefix that must have all words. |
230
|
|
|
* @return array All words that contains the prefix. |
231
|
|
|
*/ |
232
|
|
|
public function withPrefix($prefix) : array { |
233
|
|
|
$node = $this->getNodeFromPrefix($prefix); |
234
|
|
|
$words = []; |
235
|
|
|
if($node !== null) { |
236
|
|
|
if($node->isWord) { |
237
|
|
|
$words[] = $prefix; |
238
|
|
|
} |
239
|
|
View Code Duplication |
foreach($node->children as $char => $n) { |
|
|
|
|
240
|
|
|
$words = $this->_traverseWithPrefix($node->children[$char], $words, $prefix . $char); |
241
|
|
|
} |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
return $words; |
245
|
|
|
} |
246
|
|
|
|
247
|
|
|
/** |
248
|
|
|
* |
249
|
|
|
*/ |
250
|
|
|
private function _traverseWithPrefix(TrieNode $node = null, $words = [], $word) { |
251
|
|
|
if($node->isWord) { |
252
|
|
|
$words[] = $word; |
253
|
|
|
} |
254
|
|
|
|
255
|
|
|
if(empty($node->children)) { |
256
|
|
|
return $words; |
257
|
|
|
} |
258
|
|
|
|
259
|
|
View Code Duplication |
foreach($node->children as $char => &$n) { |
|
|
|
|
260
|
|
|
$words = $this->_traverseWithPrefix($node->children[$char], $words, $word . $char); |
261
|
|
|
} |
262
|
|
|
|
263
|
|
|
return $words; |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
/** |
267
|
|
|
* Retrieves the node where ends the prefix especified. |
268
|
|
|
* |
269
|
|
|
* @param string $prefix The prefix to look for. |
270
|
|
|
* @return DataStructures\Trees\Nodes\TrieNode|null null if not found. |
271
|
|
|
*/ |
272
|
|
|
private function getNodeFromPrefix($prefix) { |
273
|
|
|
if($this->size === 0) { |
274
|
|
|
return null; |
275
|
|
|
} |
276
|
|
|
|
277
|
|
|
$i = 0; |
278
|
|
|
$current = $this->root; |
279
|
|
|
$prefixLength = mb_strlen(trim($prefix)); |
280
|
|
|
while($i < $prefixLength) { |
281
|
|
|
$char = mb_substr($prefix, $i, 1, 'UTF-8'); |
282
|
|
|
if(!isset($current->children[$char])) { |
283
|
|
|
return null; |
284
|
|
|
} |
285
|
|
|
$current = $current->children[$char]; |
286
|
|
|
$i++; |
287
|
|
|
} |
288
|
|
|
|
289
|
|
|
return $current; |
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
/** |
293
|
|
|
* Gets the number of words stored in the tree. |
294
|
|
|
* |
295
|
|
|
* @return int The word count. |
296
|
|
|
*/ |
297
|
|
|
public function wordCount() : int { |
298
|
|
|
return $this->numWords; |
299
|
|
|
} |
300
|
|
|
|
301
|
|
|
/** |
302
|
|
|
* Gets the number of prefixes stored in the tree. |
303
|
|
|
* |
304
|
|
|
* @return int The word count. |
305
|
|
|
*/ |
306
|
|
|
public function count() : int { |
307
|
|
|
return $this->size(); |
308
|
|
|
} |
309
|
|
|
} |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.