1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* This file is part of PHP-Typography. |
4
|
|
|
* |
5
|
|
|
* Copyright 2014-2019 Peter Putzer. |
6
|
|
|
* Copyright 2009-2011 KINGdesk, LLC. |
7
|
|
|
* |
8
|
|
|
* This program is free software; you can redistribute it and/or modify |
9
|
|
|
* it under the terms of the GNU General Public License as published by |
10
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
11
|
|
|
* (at your option) any later version. |
12
|
|
|
* |
13
|
|
|
* This program is distributed in the hope that it will be useful, |
14
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
15
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16
|
|
|
* GNU General Public License for more details. |
17
|
|
|
* |
18
|
|
|
* You should have received a copy of the GNU General Public License along |
19
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
20
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21
|
|
|
* |
22
|
|
|
* *** |
23
|
|
|
* |
24
|
|
|
* @package mundschenk-at/php-typography |
25
|
|
|
* @license http://www.gnu.org/licenses/gpl-2.0.html |
26
|
|
|
*/ |
27
|
|
|
|
28
|
|
|
namespace PHP_Typography; |
29
|
|
|
|
30
|
|
|
use Masterminds\HTML5\Elements; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Some static methods for DOM manipulation. |
34
|
|
|
* |
35
|
|
|
* @since 4.2.0 |
36
|
|
|
*/ |
37
|
|
|
abstract class DOM { |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* An array of block tag names. |
41
|
|
|
* |
42
|
|
|
* @var array |
43
|
|
|
*/ |
44
|
|
|
private static $block_tags; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* An array of tags that should never be modified. |
48
|
|
|
* |
49
|
|
|
* @var array |
50
|
|
|
*/ |
51
|
|
|
private static $inappropriate_tags; |
52
|
|
|
|
53
|
|
|
const ADDITIONAL_INAPPROPRIATE_TAGS = [ |
54
|
|
|
'button', |
55
|
|
|
'select', |
56
|
|
|
'optgroup', |
57
|
|
|
'option', |
58
|
|
|
'map', |
59
|
|
|
'head', |
60
|
|
|
'applet', |
61
|
|
|
'object', |
62
|
|
|
'svg', |
63
|
|
|
'math', |
64
|
|
|
]; |
65
|
|
|
|
66
|
|
|
/** |
67
|
|
|
* Retrieves an array of block tags. |
68
|
|
|
* |
69
|
|
|
* @param bool $reset Optional. Default false. |
70
|
|
|
* |
71
|
|
|
* @return array { |
72
|
|
|
* An array of boolean values indexed by tagname. |
73
|
|
|
* |
74
|
|
|
* @type bool $tag `true` if the tag is a block tag. |
75
|
|
|
* } |
76
|
|
|
*/ |
77
|
1 |
|
public static function block_tags( $reset = false ) { |
78
|
1 |
|
if ( empty( self::$block_tags ) || $reset ) { |
79
|
1 |
|
self::$block_tags = \array_merge( |
80
|
1 |
|
\array_flip( |
81
|
1 |
|
\array_filter( |
82
|
1 |
|
\array_keys( Elements::$html5 ), |
83
|
|
|
function( $tag ) { |
84
|
1 |
|
return Elements::isA( $tag, Elements::BLOCK_TAG ); |
85
|
1 |
|
} |
86
|
|
|
) |
87
|
|
|
), |
88
|
1 |
|
\array_flip( [ 'li', 'td', 'dt' ] ) // not included as "block tags" in current HTML5-PHP version. |
89
|
|
|
); |
90
|
|
|
} |
91
|
|
|
|
92
|
1 |
|
return self::$block_tags; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Retrieves an array of tags that we should never touch. |
97
|
|
|
* |
98
|
|
|
* @param bool $reset Optional. Default false. |
99
|
|
|
* |
100
|
|
|
* @return array { |
101
|
|
|
* An array of boolean values indexed by tagname. |
102
|
|
|
* |
103
|
|
|
* @type bool $tag `true` if the tag should never be modified in any way. |
104
|
|
|
* } |
105
|
|
|
*/ |
106
|
1 |
|
public static function inappropriate_tags( $reset = false ) { |
107
|
1 |
|
if ( empty( self::$inappropriate_tags ) || $reset ) { |
108
|
1 |
|
self::$inappropriate_tags = \array_flip( |
109
|
1 |
|
\array_merge( |
110
|
1 |
|
\array_filter( |
111
|
1 |
|
\array_keys( Elements::$html5 ), |
112
|
|
|
function( $tag ) { |
113
|
1 |
|
return Elements::isA( $tag, Elements::VOID_TAG ) |
114
|
1 |
|
|| Elements::isA( $tag, Elements::TEXT_RAW ) |
115
|
1 |
|
|| Elements::isA( $tag, Elements::TEXT_RCDATA ); |
116
|
1 |
|
} |
117
|
|
|
), |
118
|
1 |
|
self::ADDITIONAL_INAPPROPRIATE_TAGS |
119
|
|
|
) |
120
|
|
|
); |
121
|
|
|
} |
122
|
|
|
|
123
|
1 |
|
return self::$inappropriate_tags; |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
/** |
127
|
|
|
* Converts \DOMNodeList to array; |
128
|
|
|
* |
129
|
|
|
* @param \DOMNodeList $list Required. |
130
|
|
|
* |
131
|
|
|
* @return array An associative array in the form ( $spl_object_hash => $node ). |
132
|
|
|
*/ |
133
|
1 |
|
public static function nodelist_to_array( \DOMNodeList $list ) { |
134
|
1 |
|
$out = []; |
135
|
|
|
|
136
|
1 |
|
foreach ( $list as $node ) { |
137
|
1 |
|
$out[ \spl_object_hash( $node ) ] = $node; |
138
|
|
|
} |
139
|
|
|
|
140
|
1 |
|
return $out; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* Retrieves an array containing all the ancestors of the node. This could be done |
145
|
|
|
* via an XPath query for "ancestor::*", but DOM walking is in all likelyhood faster. |
146
|
|
|
* |
147
|
|
|
* @param \DOMNode $node Required. |
148
|
|
|
* |
149
|
|
|
* @return array An array of \DOMNode. |
150
|
|
|
*/ |
151
|
1 |
|
public static function get_ancestors( \DOMNode $node ) { |
152
|
1 |
|
$result = []; |
153
|
|
|
|
154
|
1 |
|
while ( ( $node = $node->parentNode ) && ( $node instanceof \DOMElement ) ) { // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition |
155
|
1 |
|
$result[] = $node; |
156
|
|
|
} |
157
|
|
|
|
158
|
1 |
|
return $result; |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
/** |
162
|
|
|
* Checks whether the \DOMNode has one of the given classes. |
163
|
|
|
* If $tag is a \DOMText, the parent DOMElement is checked instead. |
164
|
|
|
* |
165
|
|
|
* @param \DOMNode $tag An element or textnode. |
166
|
|
|
* @param string|array $classnames A single classname or an array of classnames. |
167
|
|
|
* |
168
|
|
|
* @return bool True if the element has any of the given class(es). |
169
|
|
|
*/ |
170
|
10 |
|
public static function has_class( \DOMNode $tag, $classnames ) { |
171
|
10 |
|
if ( $tag instanceof \DOMText ) { |
172
|
1 |
|
$tag = $tag->parentNode; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
// Bail if we are not working with a tag or if there is no classname. |
176
|
10 |
|
if ( ! ( $tag instanceof \DOMElement ) || empty( $classnames ) ) { |
177
|
2 |
|
return false; |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
// Ensure we always have an array of classnames. |
181
|
8 |
|
if ( ! \is_array( $classnames ) ) { |
182
|
5 |
|
$classnames = [ $classnames ]; |
183
|
|
|
} |
184
|
|
|
|
185
|
8 |
|
if ( $tag->hasAttribute( 'class' ) ) { |
186
|
7 |
|
$tag_classes = \array_flip( \explode( ' ', $tag->getAttribute( 'class' ) ) ); |
187
|
|
|
|
188
|
7 |
|
foreach ( $classnames as $classname ) { |
189
|
7 |
|
if ( isset( $tag_classes[ $classname ] ) ) { |
190
|
5 |
|
return true; |
191
|
|
|
} |
192
|
|
|
} |
193
|
|
|
} |
194
|
|
|
|
195
|
3 |
|
return false; |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* Retrieves the last character of the previous \DOMText sibling (if there is one). |
200
|
|
|
* |
201
|
|
|
* @param \DOMNode $node The content node. |
202
|
|
|
* |
203
|
|
|
* @return string A single character (or the empty string). |
204
|
|
|
*/ |
205
|
1 |
|
public static function get_prev_chr( \DOMNode $node ) { |
206
|
1 |
|
return self::get_adjacent_chr( $node, -1, 1, [ __CLASS__, 'get_previous_textnode' ] ); |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
/** |
210
|
|
|
* Retrieves the first character of the next \DOMText sibling (if there is one). |
211
|
|
|
* |
212
|
|
|
* @param \DOMNode $node The content node. |
213
|
|
|
* |
214
|
|
|
* @return string A single character (or the empty string). |
215
|
|
|
*/ |
216
|
1 |
|
public static function get_next_chr( \DOMNode $node ) { |
217
|
1 |
|
return self::get_adjacent_chr( $node, 0, 1, [ __CLASS__, 'get_next_textnode' ] ); |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
/** |
221
|
|
|
* Retrieves a character from the given \DOMNode. |
222
|
|
|
* |
223
|
|
|
* @since 5.0.0 |
224
|
|
|
* |
225
|
|
|
* @param \DOMNode $node Required. |
226
|
|
|
* @param int $position The position parameter for `substr`. |
227
|
|
|
* @param int $length The length parameter for `substr`. |
228
|
|
|
* @param callable $get_textnode A function to retrieve the \DOMText from the node. |
229
|
|
|
* |
230
|
|
|
* @return string The character or an empty string. |
231
|
|
|
*/ |
232
|
2 |
|
private static function get_adjacent_chr( \DOMNode $node, $position, $length, callable $get_textnode ) { |
233
|
2 |
|
$textnode = $get_textnode( $node ); |
234
|
|
|
|
235
|
2 |
|
if ( isset( $textnode ) && isset( $textnode->data ) ) { |
236
|
|
|
// Determine encoding. |
237
|
2 |
|
$func = Strings::functions( $textnode->data ); |
238
|
|
|
|
239
|
2 |
|
if ( ! empty( $func ) ) { |
240
|
2 |
|
return \preg_replace( '/\p{C}/Su', '', $func['substr']( $textnode->data, $position, $length ) ); |
241
|
|
|
} |
242
|
|
|
} |
243
|
|
|
|
244
|
2 |
|
return ''; |
245
|
|
|
} |
246
|
|
|
|
247
|
|
|
/** |
248
|
|
|
* Retrieves the previous \DOMText sibling (if there is one). |
249
|
|
|
* |
250
|
|
|
* @param \DOMNode|null $node Optional. The content node. Default null. |
251
|
|
|
* |
252
|
|
|
* @return \DOMText|null Null if $node is a block-level element or no text sibling exists. |
253
|
|
|
*/ |
254
|
2 |
|
public static function get_previous_textnode( \DOMNode $node = null ) { |
255
|
2 |
|
return self::get_adjacent_textnode( |
256
|
|
|
function( &$another_node = null ) { |
257
|
1 |
|
$another_node = $another_node->previousSibling; |
258
|
1 |
|
return self::get_last_textnode( $another_node ); |
259
|
2 |
|
}, |
260
|
2 |
|
[ __CLASS__, __FUNCTION__ ], |
261
|
|
|
$node |
262
|
|
|
); |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
/** |
266
|
|
|
* Retrieves the next \DOMText sibling (if there is one). |
267
|
|
|
* |
268
|
|
|
* @param \DOMNode|null $node Optional. The content node. Default null. |
269
|
|
|
* |
270
|
|
|
* @return \DOMText|null Null if $node is a block-level element or no text sibling exists. |
271
|
|
|
*/ |
272
|
2 |
|
public static function get_next_textnode( \DOMNode $node = null ) { |
273
|
2 |
|
return self::get_adjacent_textnode( |
274
|
|
|
function( &$another_node = null ) { |
275
|
1 |
|
$another_node = $another_node->nextSibling; |
276
|
1 |
|
return self::get_first_textnode( $another_node ); |
277
|
2 |
|
}, |
278
|
2 |
|
[ __CLASS__, __FUNCTION__ ], |
279
|
|
|
$node |
280
|
|
|
); |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* Retrieves an adjacent \DOMText sibling if there is one. |
285
|
|
|
* |
286
|
|
|
* @since 5.0.0 |
287
|
|
|
* |
288
|
|
|
* @param callable $iterate Takes a reference \DOMElement and returns a \DOMText (or null). |
289
|
|
|
* @param callable $get_adjacent_parent Takes a single \DOMElement parameter and returns a \DOMText (or null). |
290
|
|
|
* @param \DOMNode|null $node Optional. The content node. Default null. |
291
|
|
|
* |
292
|
|
|
* @return \DOMText|null Null if $node is a block-level element or no text sibling exists. |
293
|
|
|
*/ |
294
|
4 |
|
private static function get_adjacent_textnode( callable $iterate, callable $get_adjacent_parent, \DOMNode $node = null ) { |
295
|
4 |
|
if ( ! isset( $node ) || self::is_block_tag( $node ) ) { |
296
|
4 |
|
return null; |
297
|
|
|
} |
298
|
|
|
|
299
|
|
|
/** |
300
|
|
|
* The result node. |
301
|
|
|
* |
302
|
|
|
* @var \DOMText|null |
303
|
|
|
*/ |
304
|
2 |
|
$adjacent = null; |
305
|
|
|
|
306
|
|
|
/** |
307
|
|
|
* The initial node. |
308
|
|
|
* |
309
|
|
|
* @var \DOMNode|null |
310
|
|
|
*/ |
311
|
2 |
|
$iterated_node = $node; |
312
|
|
|
|
313
|
|
|
// Iterate to find adjacent node. |
314
|
2 |
|
while ( null !== $iterated_node && null === $adjacent ) { |
315
|
|
|
/** |
316
|
|
|
* Let's try the next node. |
317
|
|
|
* |
318
|
|
|
* @var \DOMNode|null |
319
|
|
|
*/ |
320
|
2 |
|
$adjacent = $iterate( $iterated_node ); |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
// Last ressort. |
324
|
2 |
|
if ( null === $adjacent ) { |
325
|
|
|
/** |
326
|
|
|
* The parent node. |
327
|
|
|
* |
328
|
|
|
* @var \DOMNode|null |
329
|
|
|
*/ |
330
|
2 |
|
$adjacent = $get_adjacent_parent( $node->parentNode ); |
331
|
|
|
} |
332
|
|
|
|
333
|
2 |
|
return $adjacent; |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
/** |
337
|
|
|
* Retrieves the first \DOMText child of the element. Block-level child elements are ignored. |
338
|
|
|
* |
339
|
|
|
* @param \DOMNode|null $node Optional. Default null. |
340
|
|
|
* @param bool $recursive Should be set to true on recursive calls. Optional. Default false. |
341
|
|
|
* |
342
|
|
|
* @return \DOMText|null The first child of type \DOMText, the element itself if it is of type \DOMText or null. |
343
|
|
|
*/ |
344
|
3 |
|
public static function get_first_textnode( \DOMNode $node = null, $recursive = false ) { |
345
|
3 |
|
return self::get_edge_textnode( [ __CLASS__, __FUNCTION__ ], $node, $recursive, false ); |
346
|
|
|
} |
347
|
|
|
|
348
|
|
|
/** |
349
|
|
|
* Retrieves the last \DOMText child of the element. Block-level child elements are ignored. |
350
|
|
|
* |
351
|
|
|
* @param \DOMNode|null $node Optional. Default null. |
352
|
|
|
* @param bool $recursive Should be set to true on recursive calls. Optional. Default false. |
353
|
|
|
* |
354
|
|
|
* @return \DOMText|null The last child of type \DOMText, the element itself if it is of type \DOMText or null. |
355
|
|
|
*/ |
356
|
3 |
|
public static function get_last_textnode( \DOMNode $node = null, $recursive = false ) { |
357
|
3 |
|
return self::get_edge_textnode( [ __CLASS__, __FUNCTION__ ], $node, $recursive, true ); |
358
|
|
|
} |
359
|
|
|
|
360
|
|
|
/** |
361
|
|
|
* Retrieves an edge \DOMText child of the element specified by the callable. |
362
|
|
|
* Block-level child elements are ignored. |
363
|
|
|
* |
364
|
|
|
* @since 5.0.0 |
365
|
|
|
* |
366
|
|
|
* @param callable $get_textnode Takes two parameters, a \DOMNode and a boolean flag for recursive calls. |
367
|
|
|
* @param \DOMNode|null $node Optional. Default null. |
368
|
|
|
* @param bool $recursive Should be set to true on recursive calls. Optional. Default false. |
369
|
|
|
* @param bool $reverse Whether to iterate forward or backward. Optional. Default false. |
370
|
|
|
* |
371
|
|
|
* @return \DOMText|null The last child of type \DOMText, the element itself if it is of type \DOMText or null. |
372
|
|
|
*/ |
373
|
6 |
|
private static function get_edge_textnode( callable $get_textnode, \DOMNode $node = null, $recursive = false, $reverse = false ) { |
374
|
6 |
|
if ( ! isset( $node ) ) { |
375
|
2 |
|
return null; |
376
|
|
|
} |
377
|
|
|
|
378
|
6 |
|
if ( $node instanceof \DOMText ) { |
379
|
2 |
|
return $node; |
380
|
6 |
|
} elseif ( ! $node instanceof \DOMElement || $recursive && self::is_block_tag( $node ) ) { |
381
|
|
|
// Return null if $node is neither \DOMText nor \DOMElement or |
382
|
|
|
// when we are recursing and already at the block level. |
383
|
4 |
|
return null; |
384
|
|
|
} |
385
|
|
|
|
386
|
4 |
|
$edge_textnode = null; |
387
|
|
|
|
388
|
4 |
|
if ( $node->hasChildNodes() ) { |
389
|
4 |
|
$children = $node->childNodes; |
390
|
4 |
|
$max = $children->length; |
391
|
4 |
|
$index = $reverse ? $max - 1 : 0; |
392
|
4 |
|
$incrementor = $reverse ? -1 : +1; |
393
|
|
|
|
394
|
4 |
|
while ( $index >= 0 && $index < $max && null === $edge_textnode ) { |
395
|
4 |
|
$edge_textnode = $get_textnode( $children->item( $index ), true ); |
396
|
4 |
|
$index += $incrementor; |
397
|
|
|
} |
398
|
|
|
} |
399
|
|
|
|
400
|
4 |
|
return $edge_textnode; |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
/** |
404
|
|
|
* Returns the nearest block-level parent (or null). |
405
|
|
|
* |
406
|
|
|
* @param \DOMNode $node Required. |
407
|
|
|
* |
408
|
|
|
* @return \DOMElement|null |
409
|
|
|
*/ |
410
|
8 |
|
public static function get_block_parent( \DOMNode $node ) { |
411
|
8 |
|
$parent = $node->parentNode; |
412
|
8 |
|
if ( ! $parent instanceof \DOMElement ) { |
413
|
1 |
|
return null; |
414
|
|
|
} |
415
|
|
|
|
416
|
7 |
|
while ( ! self::is_block_tag( $parent ) && $parent->parentNode instanceof \DOMElement ) { |
417
|
|
|
/** |
418
|
|
|
* The parent is sure to be a \DOMElement. |
419
|
|
|
* |
420
|
|
|
* @var \DOMElement |
421
|
|
|
*/ |
422
|
4 |
|
$parent = $parent->parentNode; |
423
|
|
|
} |
424
|
|
|
|
425
|
7 |
|
return $parent; |
426
|
|
|
} |
427
|
|
|
|
428
|
|
|
/** |
429
|
|
|
* Retrieves the tag name of the nearest block-level parent. |
430
|
|
|
* |
431
|
|
|
* @param \DOMNode $node A node. |
432
|
|
|
|
433
|
|
|
* @return string The tag name (or the empty string). |
434
|
|
|
*/ |
435
|
8 |
|
public static function get_block_parent_name( \DOMNode $node ) { |
436
|
8 |
|
$parent = self::get_block_parent( $node ); |
437
|
|
|
|
438
|
8 |
|
if ( ! empty( $parent ) ) { |
439
|
7 |
|
return $parent->tagName; |
440
|
|
|
} else { |
441
|
1 |
|
return ''; |
442
|
|
|
} |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
/** |
446
|
|
|
* Determines if a node is a block tag. |
447
|
|
|
* |
448
|
|
|
* @since 6.0.0 |
449
|
|
|
* |
450
|
|
|
* @param \DOMNode $node Required. |
451
|
|
|
* |
452
|
|
|
* @return bool |
453
|
|
|
*/ |
454
|
12 |
|
public static function is_block_tag( \DOMNode $node ) { |
455
|
12 |
|
return $node instanceof \DOMElement && isset( self::$block_tags[ $node->tagName ] ); |
456
|
|
|
} |
457
|
|
|
} |
458
|
|
|
|
459
|
|
|
/** |
460
|
|
|
* Initialize block tags on load. |
461
|
|
|
*/ |
462
|
|
|
DOM::block_tags(); // @codeCoverageIgnore |
463
|
|
|
|