1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* DataStructures for PHP |
4
|
|
|
* |
5
|
|
|
* @link https://github.com/SiroDiaz/DataStructures |
6
|
|
|
* @copyright Copyright (c) 2017 Siro Díaz Palazón |
7
|
|
|
* @license https://github.com/SiroDiaz/DataStructures/blob/master/README.md (MIT License) |
8
|
|
|
*/ |
9
|
|
|
namespace DataStructures\Trees; |
10
|
|
|
|
11
|
|
|
use DataStructures\Trees\Nodes\TrieNode; |
12
|
|
|
use Countable; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* TrieTree |
16
|
|
|
* |
17
|
|
|
* The TrieTree class is a trie (also called digital tree and sometimes radix tree or prefix tree) |
18
|
|
|
* that is used to get in O(m) being m the word length. |
19
|
|
|
* It is used in software like word corrector and word suggest. |
20
|
|
|
* |
21
|
|
|
* @author Siro Diaz Palazon <[email protected]> |
22
|
|
|
*/ |
23
|
|
|
class TrieTree implements Countable { |
24
|
|
|
private $root; |
25
|
|
|
private $numWords; |
26
|
|
|
private $size; |
27
|
|
|
|
28
|
|
|
public function __construct() { |
29
|
|
|
$this->root = new TrieNode(); |
30
|
|
|
$this->numWords = 0; |
31
|
|
|
$this->size = 0; |
32
|
|
|
} |
33
|
|
|
|
34
|
|
|
public function numWords() : int { |
35
|
|
|
return $this->numWords; |
36
|
|
|
} |
37
|
|
|
|
38
|
|
|
public function size() : int { |
39
|
|
|
return $this->size; |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
public function add($word) { |
43
|
|
|
$word = trim($word); |
44
|
|
|
if(mb_strlen($word) === 0) { |
45
|
|
|
return; |
46
|
|
|
} |
47
|
|
|
|
48
|
|
|
$current = &$this->root; |
49
|
|
|
for($i = 0; $i < mb_strlen($word); $i++) { |
50
|
|
|
$char = mb_substr($word, $i, 1, 'UTF-8'); |
51
|
|
|
if(!isset($current->children[$char])) { |
52
|
|
|
if($i === mb_strlen($word) - 1) { |
53
|
|
|
$current->children[$char] = new TrieNode($char, true); |
54
|
|
|
$current->isWord = true; |
55
|
|
|
$this->numWords++; |
56
|
|
|
} else { |
57
|
|
|
$current->children[$char] = new TrieNode($char, false); |
58
|
|
|
} |
59
|
|
|
$this->size++; |
60
|
|
|
} else { |
61
|
|
|
if($i === mb_strlen($word) - 1) { |
62
|
|
|
if($current->isWord === false) { |
63
|
|
|
$current->isWord = true; |
64
|
|
|
$this->numWords++; |
65
|
|
|
} |
66
|
|
|
} |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
$current = &$current->children[$char]; |
70
|
|
|
} |
71
|
|
|
} |
72
|
|
|
|
73
|
|
|
public function contains($word) : bool { |
74
|
|
|
$word = trim($word); |
75
|
|
|
$wordLength = mb_strlen($word); |
76
|
|
|
if($wordLength === 0) { |
77
|
|
|
return true; |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
$i = 0; |
81
|
|
|
$found = true; |
82
|
|
|
$current = $this->root; |
83
|
|
|
while($found && $i < $wordLength) { |
84
|
|
|
$char = mb_substr($word, $i, 1); |
85
|
|
|
if(isset($current->children[$char])) { |
86
|
|
|
$current = $current->children[$char]; |
87
|
|
|
} else { |
88
|
|
|
$found = false; |
89
|
|
|
} |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
if($found && $current->isWord) { |
93
|
|
|
return true; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
return false; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
public function wordCount() { |
100
|
|
|
return $this->numWords; |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
public function count() { |
104
|
|
|
return $this->size(); |
105
|
|
|
} |
106
|
|
|
} |