Passed
Push — develop ( 30cf64...589229 )
by Guillaume
06:18 queued 04:10
created

HtmlPageCrawler   F

Complexity

Total Complexity 151

Size/Duplication

Total Lines 996
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 335
dl 0
loc 996
rs 2
c 0
b 0
f 0
wmc 151

How to fix   Complexity   

Complex Class

Complex classes like HtmlPageCrawler often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use HtmlPageCrawler, and based on these observations, apply Extract Interface, too.

1
<?php
2
namespace Wa72\HtmlPageDom;
3
4
use Symfony\Component\DomCrawler\Crawler;
5
6
/**
7
 * Extends \Symfony\Component\DomCrawler\Crawler by adding tree manipulation functions
8
 * for HTML documents inspired by jQuery such as setInnerHtml(), css(), append(), prepend(), before(),
9
 * addClass(), removeClass()
10
 *
11
 * @author Christoph Singer
12
 * @license MIT
13
 *
14
 */
15
class HtmlPageCrawler extends Crawler
16
{
17
    /**
18
     * the (internal) root element name used when importing html fragments
19
     * */
20
    const FRAGMENT_ROOT_TAGNAME = '_root';
21
22
    /**
23
     * Get an HtmlPageCrawler object from a HTML string, DOMNode, DOMNodeList or HtmlPageCrawler
24
     *
25
     * This is the equivalent to jQuery's $() function when used for wrapping DOMNodes or creating DOMElements from HTML code.
26
     *
27
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList|array $content
28
     * @return HtmlPageCrawler
29
     * @api
30
     */
31
    public static function create($content)
32
    {
33
        if ($content instanceof HtmlPageCrawler) {
34
            return $content;
35
        } else {
36
            return new HtmlPageCrawler($content);
37
        }
38
    }
39
40
    /**
41
     * Adds the specified class(es) to each element in the set of matched elements.
42
     *
43
     * @param string $name One or more space-separated classes to be added to the class attribute of each matched element.
44
     * @return HtmlPageCrawler $this for chaining
45
     * @api
46
     */
47
    public function addClass($name)
48
    {
49
        foreach ($this as $node) {
50
            if ($node instanceof \DOMElement) {
51
                /** @var \DOMElement $node */
52
                $classes = preg_split('/\s+/s', $node->getAttribute('class'));
53
                $found = false;
54
                $count = count($classes);
55
                for ($i = 0; $i < $count; $i++) {
56
                    if ($classes[$i] == $name) {
57
                        $found = true;
58
                    }
59
                }
60
                if (!$found) {
61
                    $classes[] = $name;
62
                    $node->setAttribute('class', trim(join(' ', $classes)));
63
                }
64
            }
65
        }
66
        return $this;
67
    }
68
69
    /**
70
     * Insert content, specified by the parameter, after each element in the set of matched elements.
71
     *
72
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $content
73
     * @return HtmlPageCrawler $this for chaining
74
     * @api
75
     */
76
    public function after($content)
77
    {
78
        $content = self::create($content);
79
        $newnodes = array();
80
        foreach ($this as $i => $node) {
81
            /** @var \DOMNode $node */
82
            $refnode = $node->nextSibling;
83
            foreach ($content as $newnode) {
84
                /** @var \DOMNode $newnode */
85
                $newnode = static::importNewnode($newnode, $node, $i);
86
                if ($refnode === null) {
87
                    $node->parentNode->appendChild($newnode);
88
                } else {
89
                    $node->parentNode->insertBefore($newnode, $refnode);
90
                }
91
                $newnodes[] = $newnode;
92
            }
93
        }
94
        $content->clear();
95
        $content->add($newnodes);
96
        return $this;
97
    }
98
99
    /**
100
     * Insert HTML content as child nodes of each element after existing children
101
     *
102
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $content HTML code fragment or DOMNode to append
103
     * @return HtmlPageCrawler $this for chaining
104
     * @api
105
     */
106
    public function append($content)
107
    {
108
        $content = self::create($content);
109
        $newnodes = array();
110
        foreach ($this as $i => $node) {
111
            /** @var \DOMNode $node */
112
            foreach ($content as $newnode) {
113
                /** @var \DOMNode $newnode */
114
                $newnode = static::importNewnode($newnode, $node, $i);
115
                $node->appendChild($newnode);
116
                $newnodes[] = $newnode;
117
            }
118
        }
119
        $content->clear();
120
        $content->add($newnodes);
121
        return $this;
122
    }
123
124
    /**
125
     * Insert every element in the set of matched elements to the end of the target.
126
     *
127
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $element
128
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler A new Crawler object containing all elements appended to the target elements
129
     * @api
130
     */
131
    public function appendTo($element)
132
    {
133
        $e = self::create($element);
134
        $newnodes = array();
135
        foreach ($e as $i => $node) {
136
            /** @var \DOMNode $node */
137
            foreach ($this as $newnode) {
138
                /** @var \DOMNode $newnode */
139
                if ($node !== $newnode) {
140
                    $newnode = static::importNewnode($newnode, $node, $i);
141
                    $node->appendChild($newnode);
142
                }
143
                $newnodes[] = $newnode;
144
            }
145
        }
146
        return self::create($newnodes);
147
    }
148
149
    /**
150
     * Sets an attribute on each element
151
     *
152
     * @param string $name
153
     * @param string $value
154
     * @return HtmlPageCrawler $this for chaining
155
     * @api
156
     */
157
    public function setAttribute($name, $value)
158
    {
159
        foreach ($this as $node) {
160
            if ($node instanceof \DOMElement) {
161
                /** @var \DOMElement $node */
162
                $node->setAttribute($name, $value);
163
            }
164
        }
165
        return $this;
166
    }
167
168
    /**
169
     * Returns the attribute value of the first node of the list.
170
     * This is just an alias for attr() for naming consistency with setAttribute()
171
     *
172
     * @param string $name The attribute name
173
     * @return string|null The attribute value or null if the attribute does not exist
174
     * @throws \InvalidArgumentException When current node is empty
175
     */
176
    public function getAttribute($name)
177
    {
178
        return parent::attr($name);
179
    }
180
181
    /**
182
     * Insert content, specified by the parameter, before each element in the set of matched elements.
183
     *
184
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $content
185
     * @return HtmlPageCrawler $this for chaining
186
     * @api
187
     */
188
    public function before($content)
189
    {
190
        $content = self::create($content);
191
        $newnodes = array();
192
        foreach ($this as $i => $node) {
193
            /** @var \DOMNode $node */
194
            foreach ($content as $newnode) {
195
                /** @var \DOMNode $newnode */
196
                if ($node !== $newnode) {
197
                    $newnode = static::importNewnode($newnode, $node, $i);
198
                    $node->parentNode->insertBefore($newnode, $node);
199
                    $newnodes[] = $newnode;
200
                }
201
            }
202
        }
203
        $content->clear();
204
        $content->add($newnodes);
205
        return $this;
206
    }
207
208
    /**
209
     * Create a deep copy of the set of matched elements.
210
     *
211
     * Equivalent to clone() in jQuery (clone is not a valid PHP function name)
212
     *
213
     * @return HtmlPageCrawler
214
     * @api
215
     */
216
    public function makeClone()
217
    {
218
        return clone $this;
219
    }
220
221
    public function __clone()
222
    {
223
        $newnodes = array();
224
        foreach ($this as $node) {
225
            /** @var \DOMNode $node */
226
            $newnodes[] = $node->cloneNode(true);
227
        }
228
        $this->clear();
229
        $this->add($newnodes);
230
    }
231
232
    /**
233
     * Get one CSS style property of the first element or set it for all elements in the list
234
     *
235
     * Function is here for compatibility with jQuery; it is the same as getStyle() and setStyle()
236
     *
237
     * @see HtmlPageCrawler::getStyle()
238
     * @see HtmlPageCrawler::setStyle()
239
     *
240
     * @param string $key The name of the style property
241
     * @param null|string $value The CSS value to set, or NULL to get the current value
242
     * @return HtmlPageCrawler|string If no param is provided, returns the CSS styles of the first element
243
     * @api
244
     */
245
    public function css($key, $value = null)
246
    {
247
        if (null === $value) {
248
            return $this->getStyle($key);
249
        } else {
250
            return $this->setStyle($key, $value);
251
        }
252
    }
253
254
    /**
255
     * get one CSS style property of the first element
256
     *
257
     * @param string $key name of the property
258
     * @return string|null value of the property
259
     */
260
    public function getStyle($key)
261
    {
262
        $styles = Helpers::cssStringToArray($this->getAttribute('style'));
263
        return (isset($styles[$key]) ? $styles[$key] : null);
264
    }
265
266
    /**
267
     * set one CSS style property for all elements in the list
268
     *
269
     * @param string $key name of the property
270
     * @param string $value value of the property
271
     * @return HtmlPageCrawler $this for chaining
272
     */
273
    public function setStyle($key, $value)
274
    {
275
        foreach ($this as $node) {
276
            if ($node instanceof \DOMElement) {
277
                /** @var \DOMElement $node */
278
                $styles = Helpers::cssStringToArray($node->getAttribute('style'));
279
                if ($value != '') {
280
                    $styles[$key] = $value;
281
                } elseif (isset($styles[$key])) {
282
                    unset($styles[$key]);
283
                }
284
                $node->setAttribute('style', Helpers::cssArrayToString($styles));
285
            }
286
        }
287
        return $this;
288
    }
289
290
    /**
291
     * Removes all child nodes and text from all nodes in set
292
     *
293
     * Equivalent to jQuery's empty() function which is not a valid function name in PHP
294
     * @return HtmlPageCrawler $this
295
     * @api
296
     */
297
    public function makeEmpty()
298
    {
299
        foreach ($this as $node) {
300
            $node->nodeValue = '';
301
        }
302
        return $this;
303
    }
304
305
    /**
306
     * Determine whether any of the matched elements are assigned the given class.
307
     *
308
     * @param string $name
309
     * @return bool
310
     * @api
311
     */
312
    public function hasClass($name)
313
    {
314
        foreach ($this as $node) {
315
            if ($node instanceof \DOMElement && $class = $node->getAttribute('class')) {
316
                $classes = preg_split('/\s+/s', $class);
317
                if (in_array($name, $classes)) {
318
                    return true;
319
                }
320
            }
321
        }
322
        return false;
323
    }
324
325
    /**
326
     * Set the HTML contents of each element
327
     *
328
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $content HTML code fragment
329
     * @return HtmlPageCrawler $this for chaining
330
     * @api
331
     */
332
    public function setInnerHtml($content)
333
    {
334
        $content = self::create($content);
335
        foreach ($this as $node) {
336
            $node->nodeValue = '';
337
            foreach ($content as $newnode) {
338
                /** @var \DOMNode $node */
339
                /** @var \DOMNode $newnode */
340
                $newnode = static::importNewnode($newnode, $node);
341
                $node->appendChild($newnode);
342
            }
343
        }
344
        return $this;
345
    }
346
347
    /**
348
     * Alias for Crawler::html() for naming consistency with setInnerHtml()
349
     *
350
     * @return string
351
     * @api
352
     */
353
    public function getInnerHtml()
354
    {
355
        return parent::html();
356
    }
357
358
    /**
359
     * Insert every element in the set of matched elements after the target.
360
     *
361
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $element
362
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler A new Crawler object containing all elements appended to the target elements
363
     * @api
364
     */
365
    public function insertAfter($element)
366
    {
367
        $e = self::create($element);
368
        $newnodes = array();
369
        foreach ($e as $i => $node) {
370
            /** @var \DOMNode $node */
371
            $refnode = $node->nextSibling;
372
            foreach ($this as $newnode) {
373
                /** @var \DOMNode $newnode */
374
                $newnode = static::importNewnode($newnode, $node, $i);
375
                if ($refnode === null) {
376
                    $node->parentNode->appendChild($newnode);
377
                } else {
378
                    $node->parentNode->insertBefore($newnode, $refnode);
379
                }
380
                $newnodes[] = $newnode;
381
            }
382
        }
383
        return self::create($newnodes);
384
    }
385
386
    /**
387
     * Insert every element in the set of matched elements before the target.
388
     *
389
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $element
390
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler A new Crawler object containing all elements appended to the target elements
391
     * @api
392
     */
393
    public function insertBefore($element)
394
    {
395
        $e = self::create($element);
396
        $newnodes = array();
397
        foreach ($e as $i => $node) {
398
            /** @var \DOMNode $node */
399
            foreach ($this as $newnode) {
400
                /** @var \DOMNode $newnode */
401
                $newnode = static::importNewnode($newnode, $node, $i);
402
                if ($newnode !== $node) {
403
                    $node->parentNode->insertBefore($newnode, $node);
404
                }
405
                $newnodes[] = $newnode;
406
            }
407
        }
408
        return self::create($newnodes);
409
    }
410
411
    /**
412
     * Insert content, specified by the parameter, to the beginning of each element in the set of matched elements.
413
     *
414
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $content HTML code fragment
415
     * @return HtmlPageCrawler $this for chaining
416
     * @api
417
     */
418
    public function prepend($content)
419
    {
420
        $content = self::create($content);
421
        $newnodes = array();
422
        foreach ($this as $i => $node) {
423
            $refnode = $node->firstChild;
424
            /** @var \DOMNode $node */
425
            foreach ($content as $newnode) {
426
                /** @var \DOMNode $newnode */
427
                $newnode = static::importNewnode($newnode, $node, $i);
428
                if ($refnode === null) {
429
                    $node->appendChild($newnode);
430
                } else if ($refnode !== $newnode) {
431
                    $node->insertBefore($newnode, $refnode);
432
                }
433
                $newnodes[] = $newnode;
434
            }
435
        }
436
        $content->clear();
437
        $content->add($newnodes);
438
        return $this;
439
    }
440
441
    /**
442
     * Insert every element in the set of matched elements to the beginning of the target.
443
     *
444
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $element
445
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler A new Crawler object containing all elements prepended to the target elements
446
     * @api
447
     */
448
    public function prependTo($element)
449
    {
450
        $e = self::create($element);
451
        $newnodes = array();
452
        foreach ($e as $i => $node) {
453
            $refnode = $node->firstChild;
454
            /** @var \DOMNode $node */
455
            foreach ($this as $newnode) {
456
                /** @var \DOMNode $newnode */
457
                $newnode = static::importNewnode($newnode, $node, $i);
458
                if ($newnode !== $node) {
459
                    if ($refnode === null) {
460
                        $node->appendChild($newnode);
461
                    } else {
462
                        $node->insertBefore($newnode, $refnode);
463
                    }
464
                }
465
                $newnodes[] = $newnode;
466
            }
467
        }
468
        return self::create($newnodes);
469
    }
470
471
    /**
472
     * Remove the set of matched elements from the DOM.
473
     *
474
     * (as opposed to Crawler::clear() which detaches the nodes only from Crawler
475
     * but leaves them in the DOM)
476
     *
477
     * @api
478
     */
479
    public function remove()
480
    {
481
        foreach ($this as $node) {
482
            /**
483
             * @var \DOMNode $node
484
             */
485
            if ($node->parentNode instanceof \DOMElement) {
486
                $node->parentNode->removeChild($node);
487
            }
488
        }
489
        $this->clear();
490
    }
491
492
    /**
493
     * Remove an attribute from each element in the set of matched elements.
494
     *
495
     * Alias for removeAttribute for compatibility with jQuery
496
     *
497
     * @param string $name
498
     * @return HtmlPageCrawler
499
     * @api
500
     */
501
    public function removeAttr($name)
502
    {
503
        return $this->removeAttribute($name);
504
    }
505
506
    /**
507
     * Remove an attribute from each element in the set of matched elements.
508
     *
509
     * @param string $name
510
     * @return HtmlPageCrawler
511
     */
512
    public function removeAttribute($name)
513
    {
514
        foreach ($this as $node) {
515
            if ($node instanceof \DOMElement) {
516
                /** @var \DOMElement $node */
517
                if ($node->hasAttribute($name)) {
518
                    $node->removeAttribute($name);
519
                }
520
            }
521
        }
522
        return $this;
523
    }
524
525
    /**
526
     * Remove a class from each element in the list
527
     *
528
     * @param string $name
529
     * @return HtmlPageCrawler $this for chaining
530
     * @api
531
     */
532
    public function removeClass($name)
533
    {
534
        foreach ($this as $node) {
535
            if ($node instanceof \DOMElement) {
536
                /** @var \DOMElement $node */
537
                $classes = preg_split('/\s+/s', $node->getAttribute('class'));
538
                $count = count($classes);
539
                for ($i = 0; $i < $count; $i++) {
540
                    if ($classes[$i] == $name) {
541
                        unset($classes[$i]);
542
                    }
543
                }
544
                $node->setAttribute('class', trim(join(' ', $classes)));
545
            }
546
        }
547
        return $this;
548
    }
549
550
    /**
551
     * Replace each target element with the set of matched elements.
552
     *
553
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $element
554
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler A new Crawler object containing all elements appended to the target elements
555
     * @api
556
     */
557
    public function replaceAll($element)
558
    {
559
        $e = self::create($element);
560
        $newnodes = array();
561
        foreach ($e as $i => $node) {
562
            /** @var \DOMNode $node */
563
            $parent = $node->parentNode;
564
            $refnode  = $node->nextSibling;
565
            foreach ($this as $j => $newnode) {
566
                /** @var \DOMNode $newnode */
567
                $newnode = static::importNewnode($newnode, $node, $i);
568
                if ($j == 0) {
569
                    $parent->replaceChild($newnode, $node);
570
                } else {
571
                    $parent->insertBefore($newnode, $refnode);
572
                }
573
                $newnodes[] = $newnode;
574
            }
575
        }
576
        return self::create($newnodes);
577
    }
578
579
    /**
580
     * Replace each element in the set of matched elements with the provided new content and return the set of elements that was removed.
581
     *
582
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $content
583
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler $this for chaining
584
     * @api
585
     */
586
    public function replaceWith($content)
587
    {
588
        $content = self::create($content);
589
        $newnodes = array();
590
        foreach ($this as $i => $node) {
591
            /** @var \DOMNode $node */
592
            $parent = $node->parentNode;
593
            $refnode  = $node->nextSibling;
594
            foreach ($content as $j => $newnode) {
595
                /** @var \DOMNode $newnode */
596
                $newnode = static::importNewnode($newnode, $node, $i);
597
                if ($j == 0) {
598
                    $parent->replaceChild($newnode, $node);
599
                } else {
600
                    $parent->insertBefore($newnode, $refnode);
601
                }
602
                $newnodes[] = $newnode;
603
            }
604
        }
605
        $content->clear();
606
        $content->add($newnodes);
607
        return $this;
608
    }
609
610
    /**
611
     * Get the combined text contents of each element in the set of matched elements, including their descendants.
612
     * This is what the jQuery text() function does, contrary to the Crawler::text() method that returns only
613
     * the text of the first node.
614
     *
615
     * @return string
616
     * @api
617
     */
618
    public function getCombinedText()
619
    {
620
        $text = '';
621
        foreach ($this as $node) {
622
            /** @var \DOMNode $node */
623
            $text .= $node->nodeValue;
624
        }
625
        return $text;
626
    }
627
628
    /**
629
     * Set the text contents of the matched elements.
630
     *
631
     * @param string $text
632
     * @return HtmlPageCrawler
633
     * @api
634
     */
635
    public function setText($text)
636
    {
637
        $text = htmlspecialchars($text);
638
        foreach ($this as $node) {
639
            /** @var \DOMNode $node */
640
            $node->nodeValue = $text;
641
        }
642
        return $this;
643
    }
644
645
    /**
646
     * Add or remove one or more classes from each element in the set of matched elements, depending the class’s presence.
647
     *
648
     * @param string $classname One or more classnames separated by spaces
649
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler $this for chaining
650
     * @api
651
     */
652
    public function toggleClass($classname)
653
    {
654
        $classes = explode(' ', $classname);
655
        foreach ($this as $i => $node) {
656
            $c = self::create($node);
657
            /** @var \DOMNode $node */
658
            foreach ($classes as $class) {
659
                if ($c->hasClass($class)) {
660
                    $c->removeClass($class);
661
                } else {
662
                    $c->addClass($class);
663
                }
664
            }
665
        }
666
        return $this;
667
    }
668
669
    /**
670
     * Remove the parents of the set of matched elements from the DOM, leaving the matched elements in their place.
671
     *
672
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler $this for chaining
673
     * @api
674
     */
675
    public function unwrap()
676
    {
677
        $parents = array();
678
        foreach($this as $i => $node) {
679
            $parents[] = $node->parentNode;
680
        }
681
682
        self::create($parents)->unwrapInner();
683
        return $this;
684
    }
685
686
    /**
687
     * Remove the matched elements, but promote the children to take their place.
688
     *
689
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler $this for chaining
690
     * @api
691
     */
692
    public function unwrapInner()
693
    {
694
        foreach($this as $i => $node) {
695
            if (!$node->parentNode instanceof \DOMElement) {
696
                throw new \InvalidArgumentException('DOMElement does not have a parent DOMElement node.');
697
            }
698
699
            /** @var \DOMNode[] $children */
700
            $children = iterator_to_array($node->childNodes);
701
            foreach ($children as $child) {
702
                $node->parentNode->insertBefore($child, $node);
703
            }
704
705
            $node->parentNode->removeChild($node);
706
        }
707
    }
708
709
710
    /**
711
     * Wrap an HTML structure around each element in the set of matched elements
712
     *
713
     * The HTML structure must contain only one root node, e.g.:
714
     * Works: <div><div></div></div>
715
     * Does not work: <div></div><div></div>
716
     *
717
     * @param string|HtmlPageCrawler|\DOMNode $wrappingElement
718
     * @return HtmlPageCrawler $this for chaining
719
     * @api
720
     */
721
    public function wrap($wrappingElement)
722
    {
723
        $content = self::create($wrappingElement);
724
        $newnodes = array();
725
        foreach ($this as $i => $node) {
726
            /** @var \DOMNode $node */
727
            $newnode = $content->getNode(0);
728
            /** @var \DOMNode $newnode */
729
//            $newnode = static::importNewnode($newnode, $node, $i);
730
            if ($newnode->ownerDocument !== $node->ownerDocument) {
731
                $newnode = $node->ownerDocument->importNode($newnode, true);
732
            } else {
733
                if ($i > 0) {
734
                    $newnode = $newnode->cloneNode(true);
735
                }
736
            }
737
            $oldnode = $node->parentNode->replaceChild($newnode, $node);
738
            while ($newnode->hasChildNodes()) {
739
                $elementFound = false;
740
                foreach ($newnode->childNodes as $child) {
741
                    if ($child instanceof \DOMElement) {
742
                        $newnode = $child;
743
                        $elementFound = true;
744
                        break;
745
                    }
746
                }
747
                if (!$elementFound) {
748
                    break;
749
                }
750
            }
751
            $newnode->appendChild($oldnode);
752
            $newnodes[] = $newnode;
753
        }
754
        $content->clear();
755
        $content->add($newnodes);
756
        return $this;
757
    }
758
759
    /**
760
     * Wrap an HTML structure around all elements in the set of matched elements.
761
     *
762
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $content
763
     * @throws \LogicException
764
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler $this for chaining
765
     * @api
766
     */
767
    public function wrapAll($content)
768
    {
769
        $content = self::create($content);
770
        $parent = $this->getNode(0)->parentNode;
771
        foreach ($this as $i => $node) {
772
            /** @var \DOMNode $node */
773
            if ($node->parentNode !== $parent) {
774
                throw new \LogicException('Nodes to be wrapped with wrapAll() must all have the same parent');
775
            }
776
        }
777
778
        $newnode = $content->getNode(0);
779
        /** @var \DOMNode $newnode */
780
        $newnode = static::importNewnode($newnode, $parent);
781
782
        $newnode = $parent->insertBefore($newnode,$this->getNode(0));
783
        $content->clear();
784
        $content->add($newnode);
785
786
        while ($newnode->hasChildNodes()) {
787
            $elementFound = false;
788
            foreach ($newnode->childNodes as $child) {
789
                if ($child instanceof \DOMElement) {
790
                    $newnode = $child;
791
                    $elementFound = true;
792
                    break;
793
                }
794
            }
795
            if (!$elementFound) {
796
                break;
797
            }
798
        }
799
        foreach ($this as $i => $node) {
800
            /** @var \DOMNode $node */
801
            $newnode->appendChild($node);
802
        }
803
        return $this;
804
    }
805
806
    /**
807
     * Wrap an HTML structure around the content of each element in the set of matched elements.
808
     *
809
     * @param string|HtmlPageCrawler|\DOMNode|\DOMNodeList $content
810
     * @return \Wa72\HtmlPageDom\HtmlPageCrawler $this for chaining
811
     * @api
812
     */
813
    public function wrapInner($content)
814
    {
815
        foreach ($this as $i => $node) {
816
            /** @var \DOMNode $node */
817
            self::create($node->childNodes)->wrapAll($content);
818
        }
819
        return $this;
820
    }
821
822
    /**
823
     * Get the HTML code fragment of all elements and their contents.
824
     *
825
     * If the first node contains a complete HTML document return only
826
     * the full code of this document.
827
     *
828
     * @return string HTML code (fragment)
829
     * @api
830
     */
831
    public function saveHTML()
832
    {
833
        if ($this->isHtmlDocument()) {
834
            return $this->getDOMDocument()->saveHTML();
835
        } else {
836
            $doc = new \DOMDocument('1.0', 'UTF-8');
837
            $root = $doc->appendChild($doc->createElement('_root'));
838
            foreach ($this as $node) {
839
                $root->appendChild($doc->importNode($node, true));
840
            }
841
            $html = trim($doc->saveHTML());
842
            return preg_replace('@^<'.self::FRAGMENT_ROOT_TAGNAME.'[^>]*>|</'.self::FRAGMENT_ROOT_TAGNAME.'>$@', '', $html);
843
        }
844
    }
845
846
    public function __toString()
847
    {
848
        return $this->saveHTML();
849
    }
850
851
    /**
852
     * checks whether the first node contains a complete html document
853
     * (as opposed to a document fragment)
854
     *
855
     * @return boolean
856
     */
857
    public function isHtmlDocument()
858
    {
859
        $node = $this->getNode(0);
860
        if ($node instanceof \DOMElement
861
            && $node->ownerDocument instanceof \DOMDocument
862
            && $node->ownerDocument->documentElement === $node
863
            && $node->nodeName == 'html'
864
        ) {
865
            return true;
866
        } else {
867
            return false;
868
        }
869
    }
870
871
    /**
872
     * get ownerDocument of the first element
873
     *
874
     * @return \DOMDocument|null
875
     */
876
    public function getDOMDocument()
877
    {
878
        $node = $this->getNode(0);
879
        $r = null;
880
        if ($node instanceof \DOMElement
881
            && $node->ownerDocument instanceof \DOMDocument
882
        ) {
883
            $r = $node->ownerDocument;
884
        }
885
        return $r;
886
    }
887
888
    /**
889
     * Filters the list of nodes with a CSS selector.
890
     *
891
     * @param string $selector
892
     * @return HtmlPageCrawler
893
     */
894
    public function filter($selector)
895
    {
896
        return parent::filter($selector);
897
    }
898
899
    /**
900
     * Filters the list of nodes with an XPath expression.
901
     *
902
     * @param string $xpath An XPath expression
903
     *
904
     * @return HtmlPageCrawler A new instance of Crawler with the filtered list of nodes
905
     *
906
     * @api
907
     */
908
    public function filterXPath($xpath)
909
    {
910
        return parent::filterXPath($xpath);
911
    }
912
913
    /**
914
     * Adds HTML/XML content to the HtmlPageCrawler object (but not to the DOM of an already attached node).
915
     *
916
     * Function overriden from Crawler because HTML fragments are always added as complete documents there
917
     *
918
     *
919
     * @param string      $content A string to parse as HTML/XML
920
     * @param null|string $type    The content type of the string
921
     *
922
     * @return null|void
923
     */
924
    public function addContent($content, $type = null)
925
    {
926
        if (empty($type)) {
927
            $type = 'text/html;charset=UTF-8';
928
        }
929
        if (substr($type, 0, 9) == 'text/html' && !preg_match('/<html\b[^>]*>/i', $content)) {
930
            // string contains no <html> Tag => no complete document but an HTML fragment!
931
            $this->addHtmlFragment($content);
932
        } else {
933
            parent::addContent($content, $type);
934
        }
935
    }
936
937
    public function addHtmlFragment($content, $charset = 'UTF-8')
938
    {
939
        $d = new \DOMDocument('1.0', $charset);
940
        $d->preserveWhiteSpace = false;
941
        $root = $d->appendChild($d->createElement(self::FRAGMENT_ROOT_TAGNAME));
942
        $bodynode = Helpers::getBodyNodeFromHtmlFragment($content, $charset);
943
        foreach ($bodynode->childNodes as $child) {
944
            $inode = $root->appendChild($d->importNode($child, true));
945
            if ($inode) {
946
                $this->addNode($inode);
947
            }
948
        }
949
    }
950
951
    /**
952
     * Adds a node to the current list of nodes.
953
     *
954
     * This method uses the appropriate specialized add*() method based
955
     * on the type of the argument.
956
     *
957
     * Overwritten from parent to allow Crawler to be added
958
     *
959
     * @param null|\DOMNodeList|array|\DOMNode|Crawler $node A node
960
     *
961
     * @api
962
     */
963
    public function add($node)
964
    {
965
        if ($node instanceof Crawler) {
966
            foreach ($node as $childnode) {
967
                $this->addNode($childnode);
968
            }
969
        } else {
970
            parent::add($node);
971
        }
972
    }
973
974
    /**
975
     * @param \DOMNode $newnode
976
     * @param \DOMNode $referencenode
977
     * @param int $clone
978
     * @return \DOMNode
979
     */
980
    protected static function importNewnode(\DOMNode $newnode, \DOMNode $referencenode, $clone = 0) {
981
        if ($newnode->ownerDocument !== $referencenode->ownerDocument) {
982
            $referencenode->ownerDocument->preserveWhiteSpace = false;
983
            $newnode = $referencenode->ownerDocument->importNode($newnode, true);
984
        } else {
985
            if ($clone > 0) {
986
                $newnode = $newnode->cloneNode(true);
987
            }
988
        }
989
        return $newnode;
990
    }
991
992
//    /**
993
//     * Checks whether the first node in the set is disconnected (has no parent node)
994
//     *
995
//     * @return bool
996
//     */
997
//    public function isDisconnected()
998
//    {
999
//        $parent = $this->getNode(0)->parentNode;
1000
//        return ($parent == null || $parent->tagName == self::FRAGMENT_ROOT_TAGNAME);
1001
//    }
1002
1003
    public function __get($name)
1004
    {
1005
        switch ($name) {
1006
            case 'count':
1007
            case 'length':
1008
                return count($this);
1009
        }
1010
        throw new \Exception('No such property ' . $name);
1011
    }
1012
}
1013