1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace PHPHtmlParser\Dom\Node; |
||
6 | |||
7 | use PHPHtmlParser\Dom\Tag; |
||
8 | use PHPHtmlParser\Exceptions\ChildNotFoundException; |
||
9 | use PHPHtmlParser\Exceptions\UnknownChildTypeException; |
||
10 | |||
11 | /** |
||
12 | * Class HtmlNode. |
||
13 | * |
||
14 | * @property-read string $outerhtml |
||
15 | * @property-read string $innerhtml |
||
16 | * @property-read string $innerText |
||
17 | * @property-read string $text |
||
18 | * @property-read Tag $tag |
||
19 | * @property-read InnerNode $parent |
||
20 | */ |
||
21 | class HtmlNode extends InnerNode |
||
22 | { |
||
23 | /** |
||
24 | * Remembers what the innerHtml was if it was scanned previously. |
||
25 | * |
||
26 | * @var ?string |
||
27 | */ |
||
28 | protected $innerHtml; |
||
29 | |||
30 | /** |
||
31 | * Remembers what the outerHtml was if it was scanned previously. |
||
32 | * |
||
33 | * @var ?string |
||
34 | */ |
||
35 | protected $outerHtml; |
||
36 | |||
37 | /** |
||
38 | * Remembers what the innerText was if it was scanned previously. |
||
39 | * |
||
40 | * @var ?string |
||
41 | */ |
||
42 | protected $innerText; |
||
43 | |||
44 | /** |
||
45 | * Remembers what the text was if it was scanned previously. |
||
46 | * |
||
47 | * @var ?string |
||
48 | */ |
||
49 | protected $text; |
||
50 | |||
51 | /** |
||
52 | * Remembers what the text was when we looked into all our |
||
53 | * children nodes. |
||
54 | * |
||
55 | * @var ?string |
||
56 | */ |
||
57 | protected $textWithChildren; |
||
58 | |||
59 | /** |
||
60 | * Sets up the tag of this node. |
||
61 | * |
||
62 | * @param string|Tag $tag |
||
63 | */ |
||
64 | 426 | public function __construct($tag) |
|
65 | { |
||
66 | 426 | if (!$tag instanceof Tag) { |
|
67 | 342 | $tag = new Tag($tag); |
|
68 | } |
||
69 | 426 | $this->tag = $tag; |
|
70 | 426 | parent::__construct(); |
|
71 | 426 | } |
|
72 | |||
73 | /** |
||
74 | * @param bool $htmlSpecialCharsDecode |
||
75 | */ |
||
76 | 294 | public function setHtmlSpecialCharsDecode($htmlSpecialCharsDecode = false): void |
|
77 | { |
||
78 | 294 | parent::setHtmlSpecialCharsDecode($htmlSpecialCharsDecode); |
|
79 | 294 | $this->tag->setHtmlSpecialCharsDecode($htmlSpecialCharsDecode); |
|
80 | 294 | } |
|
81 | |||
82 | /** |
||
83 | * Gets the inner html of this node. |
||
84 | * |
||
85 | * @throws ChildNotFoundException |
||
86 | * @throws UnknownChildTypeException |
||
87 | */ |
||
88 | 174 | public function innerHtml(): string |
|
89 | { |
||
90 | 174 | if (!$this->hasChildren()) { |
|
91 | // no children |
||
92 | 24 | return ''; |
|
93 | } |
||
94 | |||
95 | 162 | if ($this->innerHtml !== null) { |
|
96 | // we already know the result. |
||
97 | 3 | return $this->innerHtml; |
|
98 | } |
||
99 | |||
100 | 162 | $child = $this->firstChild(); |
|
101 | 162 | $string = ''; |
|
102 | |||
103 | // continue to loop until we are out of children |
||
104 | 162 | while ($child !== null) { |
|
105 | 162 | if ($child instanceof TextNode) { |
|
106 | 150 | $string .= $child->text(); |
|
107 | 141 | } elseif ($child instanceof HtmlNode) { |
|
108 | 141 | $string .= $child->outerHtml(); |
|
109 | } else { |
||
110 | 3 | throw new UnknownChildTypeException('Unknown child type "' . \get_class($child) . '" found in node'); |
|
111 | } |
||
112 | |||
113 | try { |
||
114 | 162 | $child = $this->nextChild($child->id()); |
|
115 | 162 | } catch (ChildNotFoundException $e) { |
|
116 | // no more children |
||
117 | 162 | unset($e); |
|
118 | 162 | $child = null; |
|
119 | } |
||
120 | } |
||
121 | |||
122 | // remember the results |
||
123 | 162 | $this->innerHtml = $string; |
|
124 | |||
125 | 162 | return $string; |
|
126 | } |
||
127 | |||
128 | /** |
||
129 | * Gets the inner text of this node. |
||
130 | * |
||
131 | * @throws ChildNotFoundException |
||
132 | * @throws UnknownChildTypeException |
||
133 | */ |
||
134 | 6 | public function innerText(): string |
|
135 | { |
||
136 | 6 | if (\is_null($this->innerText)) { |
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
137 | 6 | $this->innerText = \strip_tags($this->innerHtml()); |
|
138 | } |
||
139 | |||
140 | 6 | return $this->innerText; |
|
141 | } |
||
142 | |||
143 | /** |
||
144 | * Gets the html of this node, including it's own |
||
145 | * tag. |
||
146 | * |
||
147 | * @throws ChildNotFoundException |
||
148 | * @throws UnknownChildTypeException |
||
149 | */ |
||
150 | 168 | public function outerHtml(): string |
|
151 | { |
||
152 | // special handling for root |
||
153 | 168 | if ($this->tag->name() == 'root') { |
|
154 | 15 | return $this->innerHtml(); |
|
155 | } |
||
156 | |||
157 | 168 | if ($this->outerHtml !== null) { |
|
158 | // we already know the results. |
||
159 | 6 | return $this->outerHtml; |
|
160 | } |
||
161 | |||
162 | 168 | $return = $this->tag->makeOpeningTag(); |
|
163 | 168 | if ($this->tag->isSelfClosing()) { |
|
164 | // ignore any children... there should not be any though |
||
165 | 93 | return $return; |
|
166 | } |
||
167 | |||
168 | // get the inner html |
||
169 | 150 | $return .= $this->innerHtml(); |
|
170 | |||
171 | // add closing tag |
||
172 | 150 | $return .= $this->tag->makeClosingTag(); |
|
173 | |||
174 | // remember the results |
||
175 | 150 | $this->outerHtml = $return; |
|
176 | |||
177 | 150 | return $return; |
|
178 | } |
||
179 | |||
180 | /** |
||
181 | * Gets the text of this node (if there is any text). Or get all the text |
||
182 | * in this node, including children. |
||
183 | */ |
||
184 | 48 | public function text(bool $lookInChildren = false): string |
|
185 | { |
||
186 | 48 | if ($lookInChildren) { |
|
187 | 6 | if ($this->textWithChildren !== null) { |
|
188 | // we already know the results. |
||
189 | 6 | return $this->textWithChildren; |
|
190 | } |
||
191 | 45 | } elseif ($this->text !== null) { |
|
192 | // we already know the results. |
||
193 | 3 | return $this->text; |
|
194 | } |
||
195 | |||
196 | // find out if this node has any text children |
||
197 | 48 | $text = ''; |
|
198 | 48 | foreach ($this->children as $child) { |
|
199 | /** @var AbstractNode $node */ |
||
200 | 45 | $node = $child['node']; |
|
201 | 45 | if ($node instanceof TextNode) { |
|
202 | 45 | $text .= $child['node']->text; |
|
203 | } elseif ( |
||
204 | 6 | $lookInChildren && |
|
205 | 6 | $node instanceof HtmlNode |
|
206 | ) { |
||
207 | 6 | $text .= $node->text($lookInChildren); |
|
208 | } |
||
209 | } |
||
210 | |||
211 | // remember our result |
||
212 | 48 | if ($lookInChildren) { |
|
213 | 6 | $this->textWithChildren = $text; |
|
214 | } else { |
||
215 | 45 | $this->text = $text; |
|
216 | } |
||
217 | |||
218 | 48 | return $text; |
|
219 | } |
||
220 | |||
221 | /** |
||
222 | * Call this when something in the node tree has changed. Like a child has been added |
||
223 | * or a parent has been changed. |
||
224 | */ |
||
225 | 408 | protected function clear(): void |
|
226 | { |
||
227 | 408 | $this->innerHtml = null; |
|
228 | 408 | $this->outerHtml = null; |
|
229 | 408 | $this->text = null; |
|
230 | 408 | $this->textWithChildren = null; |
|
231 | |||
232 | 408 | if ($this->parent !== null) { |
|
233 | 318 | $this->parent->clear(); |
|
234 | } |
||
235 | 408 | } |
|
236 | |||
237 | /** |
||
238 | * Returns all children of this html node. |
||
239 | */ |
||
240 | 6 | protected function getIteratorArray(): array |
|
241 | { |
||
242 | 6 | return $this->getChildren(); |
|
243 | } |
||
244 | } |
||
245 |