Completed
Push — master ( 5ed2e8...45d18c )
by Gilles
02:48
created
src/PHPHtmlParser/Dom.php 1 patch
Indentation   +629 added lines, -629 removed lines patch added patch discarded remove patch
@@ -16,633 +16,633 @@
 block discarded – undo
16 16
 class Dom
17 17
 {
18 18
 
19
-    /**
20
-     * The charset we would like the output to be in.
21
-     *
22
-     * @var string
23
-     */
24
-    protected $defaultCharset = 'UTF-8';
25
-
26
-    /**
27
-     * Contains the root node of this dom tree.
28
-     *
29
-     * @var HtmlNode
30
-     */
31
-    public $root;
32
-
33
-    /**
34
-     * The raw version of the document string.
35
-     *
36
-     * @var string
37
-     */
38
-    protected $raw;
39
-
40
-    /**
41
-     * The document string.
42
-     *
43
-     * @var Content
44
-     */
45
-    protected $content = null;
46
-
47
-    /**
48
-     * The original file size of the document.
49
-     *
50
-     * @var int
51
-     */
52
-    protected $rawSize;
53
-
54
-    /**
55
-     * The size of the document after it is cleaned.
56
-     *
57
-     * @var int
58
-     */
59
-    protected $size;
60
-
61
-    /**
62
-     * A global options array to be used by all load calls.
63
-     *
64
-     * @var array
65
-     */
66
-    protected $globalOptions = [];
67
-
68
-    /**
69
-     * A persistent option object to be used for all options in the
70
-     * parsing of the file.
71
-     *
72
-     * @var Options
73
-     */
74
-    protected $options;
75
-
76
-    /**
77
-     * A list of tags which will always be self closing
78
-     *
79
-     * @var array
80
-     */
81
-    protected $selfClosing = [
82
-        'img',
83
-        'br',
84
-        'input',
85
-        'meta',
86
-        'link',
87
-        'hr',
88
-        'base',
89
-        'embed',
90
-        'spacer',
91
-    ];
92
-
93
-    /**
94
-     * Returns the inner html of the root node.
95
-     *
96
-     * @return string
97
-     */
98
-    public function __toString()
99
-    {
100
-        return $this->root->innerHtml();
101
-    }
102
-
103
-    /**
104
-     * A simple wrapper around the root node.
105
-     *
106
-     * @param string $name
107
-     * @return mixed
108
-     */
109
-    public function __get($name)
110
-    {
111
-        return $this->root->$name;
112
-    }
113
-
114
-    /**
115
-     * Attempts to load the dom from any resource, string, file, or URL.
116
-     *
117
-     * @param string $str
118
-     * @param array $options
119
-     * @return $this
120
-     */
121
-    public function load($str, $options = [])
122
-    {
123
-        // check if it's a file
124
-        if (strpos($str, "\n") === false && is_file($str)) {
125
-            return $this->loadFromFile($str, $options);
126
-        }
127
-        // check if it's a url
128
-        if (preg_match("/^https?:\/\//i", $str)) {
129
-            return $this->loadFromUrl($str, $options);
130
-        }
131
-
132
-        return $this->loadStr($str, $options);
133
-    }
134
-
135
-    /**
136
-     * Loads the dom from a document file/url
137
-     *
138
-     * @param string $file
139
-     * @param array $options
140
-     * @return $this
141
-     */
142
-    public function loadFromFile($file, $options = [])
143
-    {
144
-        return $this->loadStr(file_get_contents($file), $options);
145
-    }
146
-
147
-    /**
148
-     * Use a curl interface implementation to attempt to load
149
-     * the content from a url.
150
-     *
151
-     * @param string $url
152
-     * @param array $options
153
-     * @param CurlInterface $curl
154
-     * @return $this
155
-     */
156
-    public function loadFromUrl($url, $options = [], CurlInterface $curl = null)
157
-    {
158
-        if (is_null($curl)) {
159
-            // use the default curl interface
160
-            $curl = new Curl;
161
-        }
162
-        $content = $curl->get($url);
163
-
164
-        return $this->loadStr($content, $options);
165
-    }
166
-
167
-    /**
168
-     * Parsers the html of the given string. Used for load(), loadFromFile(),
169
-     * and loadFromUrl().
170
-     *
171
-     * @param string $str
172
-     * @param array $option
173
-     * @return $this
174
-     */
175
-    public function loadStr($str, $option)
176
-    {
177
-        $this->options = new Options;
178
-        $this->options->setOptions($this->globalOptions)
179
-                      ->setOptions($option);
180
-
181
-        $this->rawSize = strlen($str);
182
-        $this->raw     = $str;
183
-
184
-        $html = $this->clean($str);
185
-
186
-        $this->size    = strlen($str);
187
-        $this->content = new Content($html);
188
-
189
-        $this->parse();
190
-        $this->detectCharset();
191
-
192
-        return $this;
193
-    }
194
-
195
-    /**
196
-     * Sets a global options array to be used by all load calls.
197
-     *
198
-     * @param array $options
199
-     * @return $this
200
-     */
201
-    public function setOptions(array $options)
202
-    {
203
-        $this->globalOptions = $options;
204
-
205
-        return $this;
206
-    }
207
-
208
-    /**
209
-     * Find elements by css selector on the root node.
210
-     *
211
-     * @param string $selector
212
-     * @param int $nth
213
-     * @return array
214
-     */
215
-    public function find($selector, $nth = null)
216
-    {
217
-        $this->isLoaded();
218
-
219
-        return $this->root->find($selector, $nth);
220
-    }
221
-
222
-    /**
223
-     * Adds the tag (or tags in an array) to the list of tags that will always
224
-     * be self closing.
225
-     *
226
-     * @param string|array $tag
227
-     * @return $this
228
-     */
229
-    public function addSelfClosingTag($tag)
230
-    {
231
-        if ( ! is_array($tag)) {
232
-            $tag = [$tag];
233
-        }
234
-        foreach ($tag as $value) {
235
-            $this->selfClosing[] = $value;
236
-        }
237
-
238
-        return $this;
239
-    }
240
-
241
-    /**
242
-     * Removes the tag (or tags in an array) from the list of tags that will
243
-     * always be self closing.
244
-     *
245
-     * @param string|array $tag
246
-     * @return $this
247
-     */
248
-    public function removeSelfClosingTag($tag)
249
-    {
250
-        if ( ! is_array($tag)) {
251
-            $tag = [$tag];
252
-        }
253
-        $this->selfClosing = array_diff($this->selfClosing, $tag);
254
-
255
-        return $this;
256
-    }
257
-
258
-    /**
259
-     * Sets the list of self closing tags to empty.
260
-     *
261
-     * @return $this
262
-     */
263
-    public function clearSelfClosingTags()
264
-    {
265
-        $this->selfClosing = [];
266
-
267
-        return $this;
268
-    }
269
-
270
-    /**
271
-     * Simple wrapper function that returns the first child.
272
-     *
273
-     * @return \PHPHtmlParser\Dom\AbstractNode
274
-     */
275
-    public function firstChild()
276
-    {
277
-        $this->isLoaded();
278
-
279
-        return $this->root->firstChild();
280
-    }
281
-
282
-    /**
283
-     * Simple wrapper function that returns the last child.
284
-     *
285
-     * @return \PHPHtmlParser\Dom\AbstractNode
286
-     */
287
-    public function lastChild()
288
-    {
289
-        $this->isLoaded();
290
-
291
-        return $this->root->lastChild();
292
-    }
293
-
294
-    /**
295
-     * Simple wrapper function that returns an element by the
296
-     * id.
297
-     *
298
-     * @param string $id
299
-     * @return \PHPHtmlParser\Dom\AbstractNode
300
-     */
301
-    public function getElementById($id)
302
-    {
303
-        $this->isLoaded();
304
-
305
-        return $this->find('#'.$id, 0);
306
-    }
307
-
308
-    /**
309
-     * Simple wrapper function that returns all elements by
310
-     * tag name.
311
-     *
312
-     * @param string $name
313
-     * @return array
314
-     */
315
-    public function getElementsByTag($name)
316
-    {
317
-        $this->isLoaded();
318
-
319
-        return $this->find($name);
320
-    }
321
-
322
-    /**
323
-     * Simple wrapper function that returns all elements by
324
-     * class name.
325
-     *
326
-     * @param string $class
327
-     * @return array
328
-     */
329
-    public function getElementsByClass($class)
330
-    {
331
-        $this->isLoaded();
332
-
333
-        return $this->find('.'.$class);
334
-    }
335
-
336
-    /**
337
-     * Checks if the load methods have been called.
338
-     *
339
-     * @throws NotLoadedException
340
-     */
341
-    protected function isLoaded()
342
-    {
343
-        if (is_null($this->content)) {
344
-            throw new NotLoadedException('Content is not loaded!');
345
-        }
346
-    }
347
-
348
-    /**
349
-     * Cleans the html of any none-html information.
350
-     *
351
-     * @param string $str
352
-     * @return string
353
-     */
354
-    protected function clean($str)
355
-    {
356
-        if ($this->options->get('cleanupInput') != true) {
357
-            // skip entire cleanup step
358
-            return $str;
359
-        }
360
-
361
-        // remove white space before closing tags
362
-        $str = mb_eregi_replace("'\s+>", "'>", $str);
363
-        $str = mb_eregi_replace('"\s+>', '">', $str);
364
-
365
-        // clean out the \n\r
366
-        $replace = ' ';
367
-        if ($this->options->get('preserveLineBreaks')) {
368
-            $replace = '
';
369
-        }
370
-        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
371
-
372
-        // strip the doctype
373
-        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
374
-
375
-        // strip out comments
376
-        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
377
-
378
-        // strip out cdata
379
-        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
380
-
381
-        // strip out <script> tags
382
-        if ($this->options->get('removeScripts') == true) {
383
-            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
384
-            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
385
-        }
386
-
387
-        // strip out <style> tags
388
-        if ($this->options->get('removeStyles') == true) {
389
-            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
390
-            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
391
-        }
392
-
393
-        // strip out server side scripts
394
-        $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
395
-
396
-        // strip smarty scripts
397
-        $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
398
-
399
-        return $str;
400
-    }
401
-
402
-    /**
403
-     * Attempts to parse the html in content.
404
-     */
405
-    protected function parse()
406
-    {
407
-        // add the root node
408
-        $this->root = new HtmlNode('root');
409
-        $activeNode = $this->root;
410
-        while ( ! is_null($activeNode)) {
411
-            $str = $this->content->copyUntil('<');
412
-            if ($str == '') {
413
-                $info = $this->parseTag();
414
-                if ( ! $info['status']) {
415
-                    // we are done here
416
-                    $activeNode = null;
417
-                    continue;
418
-                }
419
-
420
-                // check if it was a closing tag
421
-                if ($info['closing']) {
422
-                    $originalNode = $activeNode;
423
-                    while ($activeNode->getTag()->name() != $info['tag']) {
424
-                        $activeNode = $activeNode->getParent();
425
-                        if (is_null($activeNode)) {
426
-                            // we could not find opening tag
427
-                            $activeNode = $originalNode;
428
-                            break;
429
-                        }
430
-                    }
431
-                    if ( ! is_null($activeNode)) {
432
-                        $activeNode = $activeNode->getParent();
433
-                    }
434
-                    continue;
435
-                }
436
-
437
-                if ( ! isset($info['node'])) {
438
-                    continue;
439
-                }
440
-
441
-                /** @var AbstractNode $node */
442
-                $node = $info['node'];
443
-                $activeNode->addChild($node);
444
-
445
-                // check if node is self closing
446
-                if ( ! $node->getTag()->isSelfClosing()) {
447
-                    $activeNode = $node;
448
-                }
449
-            } else if ($this->options->whitespaceTextNode ||
450
-                trim($str) != ''
451
-            ) {
452
-                // we found text we care about
453
-                $textNode = new TextNode($str);
454
-                $activeNode->addChild($textNode);
455
-            }
456
-        }
457
-    }
458
-
459
-    /**
460
-     * Attempt to parse a tag out of the content.
461
-     *
462
-     * @return array
463
-     * @throws StrictException
464
-     */
465
-    protected function parseTag()
466
-    {
467
-        $return = [
468
-            'status'  => false,
469
-            'closing' => false,
470
-            'node'    => null,
471
-        ];
472
-        if ($this->content->char() != '<') {
473
-            // we are not at the beginning of a tag
474
-            return $return;
475
-        }
476
-
477
-        // check if this is a closing tag
478
-        if ($this->content->fastForward(1)->char() == '/') {
479
-            // end tag
480
-            $tag = $this->content->fastForward(1)
481
-                                 ->copyByToken('slash', true);
482
-            // move to end of tag
483
-            $this->content->copyUntil('>');
484
-            $this->content->fastForward(1);
485
-
486
-            // check if this closing tag counts
487
-            $tag = strtolower($tag);
488
-            if (in_array($tag, $this->selfClosing)) {
489
-                $return['status'] = true;
490
-
491
-                return $return;
492
-            } else {
493
-                $return['status']  = true;
494
-                $return['closing'] = true;
495
-                $return['tag']     = strtolower($tag);
496
-            }
497
-
498
-            return $return;
499
-        }
500
-
501
-        $tag  = strtolower($this->content->copyByToken('slash', true));
502
-        $node = new HtmlNode($tag);
503
-
504
-        // attributes
505
-        while ($this->content->char() != '>' &&
506
-            $this->content->char() != '/') {
507
-            $space = $this->content->skipByToken('blank', true);
508
-            if (empty($space)) {
509
-                $this->content->fastForward(1);
510
-                continue;
511
-            }
512
-
513
-            $name = $this->content->copyByToken('equal', true);
514
-            if ($name == '/') {
515
-                break;
516
-            }
517
-
518
-            if (empty($name)) {
519
-                $this->content->fastForward(1);
520
-                continue;
521
-            }
522
-
523
-            $this->content->skipByToken('blank');
524
-            if ($this->content->char() == '=') {
525
-                $attr = [];
526
-                $this->content->fastForward(1)
527
-                              ->skipByToken('blank');
528
-                switch ($this->content->char()) {
529
-                    case '"':
530
-                        $attr['doubleQuote'] = true;
531
-                        $this->content->fastForward(1);
532
-                        $string = $this->content->copyUntil('"', true, true);
533
-                        do {
534
-                            $moreString = $this->content->copyUntilUnless('"', '=>');
535
-                            $string .= $moreString;
536
-                        } while ( ! empty($moreString));
537
-                        $attr['value'] = $string;
538
-                        $this->content->fastForward(1);
539
-                        $node->getTag()->$name = $attr;
540
-                        break;
541
-                    case "'":
542
-                        $attr['doubleQuote'] = false;
543
-                        $this->content->fastForward(1);
544
-                        $string = $this->content->copyUntil("'", true, true);
545
-                        do {
546
-                            $moreString = $this->content->copyUntilUnless("'", '=>');
547
-                            $string .= $moreString;
548
-                        } while ( ! empty($moreString));
549
-                        $attr['value'] = $string;
550
-                        $this->content->fastForward(1);
551
-                        $node->getTag()->$name = $attr;
552
-                        break;
553
-                    default:
554
-                        $attr['doubleQuote']   = true;
555
-                        $attr['value']         = $this->content->copyByToken('attr', true);
556
-                        $node->getTag()->$name = $attr;
557
-                        break;
558
-                }
559
-            } else {
560
-                // no value attribute
561
-                if ($this->options->strict) {
562
-                    // can't have this in strict html
563
-                    $character = $this->content->getPosition();
564
-                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
565
-                }
566
-                $node->getTag()->$name = [
567
-                    'value'       => null,
568
-                    'doubleQuote' => true,
569
-                ];
570
-                if ($this->content->char() != '>') {
571
-                    $this->content->rewind(1);
572
-                }
573
-            }
574
-        }
575
-
576
-        $this->content->skipByToken('blank');
577
-        if ($this->content->char() == '/') {
578
-            // self closing tag
579
-            $node->getTag()->selfClosing();
580
-            $this->content->fastForward(1);
581
-        } elseif (in_array($tag, $this->selfClosing)) {
582
-
583
-            // Should be a self closing tag, check if we are strict
584
-            if ($this->options->strict) {
585
-                $character = $this->content->getPosition();
586
-                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
587
-            }
588
-
589
-            // We force self closing on this tag.
590
-            $node->getTag()->selfClosing();
591
-        }
592
-
593
-        $this->content->fastForward(1);
594
-
595
-        $return['status'] = true;
596
-        $return['node']   = $node;
597
-
598
-        return $return;
599
-    }
600
-
601
-    /**
602
-     * Attempts to detect the charset that the html was sent in.
603
-     *
604
-     * @return bool
605
-     */
606
-    protected function detectCharset()
607
-    {
608
-        // set the default
609
-        $encode = new Encode;
610
-        $encode->from($this->defaultCharset);
611
-        $encode->to($this->defaultCharset);
612
-
613
-        if ( ! is_null($this->options->enforceEncoding)) {
614
-            //  they want to enforce the given encoding
615
-            $encode->from($this->options->enforceEncoding);
616
-            $encode->to($this->options->enforceEncoding);
617
-
618
-            return false;
619
-        }
620
-
621
-        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
622
-        if (is_null($meta)) {
623
-            // could not find meta tag
624
-            $this->root->propagateEncoding($encode);
625
-
626
-            return false;
627
-        }
628
-        $content = $meta->content;
629
-        if (empty($content)) {
630
-            // could not find content
631
-            $this->root->propagateEncoding($encode);
632
-
633
-            return false;
634
-        }
635
-        $matches = [];
636
-        if (preg_match('/charset=(.+)/', $content, $matches)) {
637
-            $encode->from(trim($matches[1]));
638
-            $this->root->propagateEncoding($encode);
639
-
640
-            return true;
641
-        }
642
-
643
-        // no charset found
644
-        $this->root->propagateEncoding($encode);
645
-
646
-        return false;
647
-    }
19
+	/**
20
+	 * The charset we would like the output to be in.
21
+	 *
22
+	 * @var string
23
+	 */
24
+	protected $defaultCharset = 'UTF-8';
25
+
26
+	/**
27
+	 * Contains the root node of this dom tree.
28
+	 *
29
+	 * @var HtmlNode
30
+	 */
31
+	public $root;
32
+
33
+	/**
34
+	 * The raw version of the document string.
35
+	 *
36
+	 * @var string
37
+	 */
38
+	protected $raw;
39
+
40
+	/**
41
+	 * The document string.
42
+	 *
43
+	 * @var Content
44
+	 */
45
+	protected $content = null;
46
+
47
+	/**
48
+	 * The original file size of the document.
49
+	 *
50
+	 * @var int
51
+	 */
52
+	protected $rawSize;
53
+
54
+	/**
55
+	 * The size of the document after it is cleaned.
56
+	 *
57
+	 * @var int
58
+	 */
59
+	protected $size;
60
+
61
+	/**
62
+	 * A global options array to be used by all load calls.
63
+	 *
64
+	 * @var array
65
+	 */
66
+	protected $globalOptions = [];
67
+
68
+	/**
69
+	 * A persistent option object to be used for all options in the
70
+	 * parsing of the file.
71
+	 *
72
+	 * @var Options
73
+	 */
74
+	protected $options;
75
+
76
+	/**
77
+	 * A list of tags which will always be self closing
78
+	 *
79
+	 * @var array
80
+	 */
81
+	protected $selfClosing = [
82
+		'img',
83
+		'br',
84
+		'input',
85
+		'meta',
86
+		'link',
87
+		'hr',
88
+		'base',
89
+		'embed',
90
+		'spacer',
91
+	];
92
+
93
+	/**
94
+	 * Returns the inner html of the root node.
95
+	 *
96
+	 * @return string
97
+	 */
98
+	public function __toString()
99
+	{
100
+		return $this->root->innerHtml();
101
+	}
102
+
103
+	/**
104
+	 * A simple wrapper around the root node.
105
+	 *
106
+	 * @param string $name
107
+	 * @return mixed
108
+	 */
109
+	public function __get($name)
110
+	{
111
+		return $this->root->$name;
112
+	}
113
+
114
+	/**
115
+	 * Attempts to load the dom from any resource, string, file, or URL.
116
+	 *
117
+	 * @param string $str
118
+	 * @param array $options
119
+	 * @return $this
120
+	 */
121
+	public function load($str, $options = [])
122
+	{
123
+		// check if it's a file
124
+		if (strpos($str, "\n") === false && is_file($str)) {
125
+			return $this->loadFromFile($str, $options);
126
+		}
127
+		// check if it's a url
128
+		if (preg_match("/^https?:\/\//i", $str)) {
129
+			return $this->loadFromUrl($str, $options);
130
+		}
131
+
132
+		return $this->loadStr($str, $options);
133
+	}
134
+
135
+	/**
136
+	 * Loads the dom from a document file/url
137
+	 *
138
+	 * @param string $file
139
+	 * @param array $options
140
+	 * @return $this
141
+	 */
142
+	public function loadFromFile($file, $options = [])
143
+	{
144
+		return $this->loadStr(file_get_contents($file), $options);
145
+	}
146
+
147
+	/**
148
+	 * Use a curl interface implementation to attempt to load
149
+	 * the content from a url.
150
+	 *
151
+	 * @param string $url
152
+	 * @param array $options
153
+	 * @param CurlInterface $curl
154
+	 * @return $this
155
+	 */
156
+	public function loadFromUrl($url, $options = [], CurlInterface $curl = null)
157
+	{
158
+		if (is_null($curl)) {
159
+			// use the default curl interface
160
+			$curl = new Curl;
161
+		}
162
+		$content = $curl->get($url);
163
+
164
+		return $this->loadStr($content, $options);
165
+	}
166
+
167
+	/**
168
+	 * Parsers the html of the given string. Used for load(), loadFromFile(),
169
+	 * and loadFromUrl().
170
+	 *
171
+	 * @param string $str
172
+	 * @param array $option
173
+	 * @return $this
174
+	 */
175
+	public function loadStr($str, $option)
176
+	{
177
+		$this->options = new Options;
178
+		$this->options->setOptions($this->globalOptions)
179
+					  ->setOptions($option);
180
+
181
+		$this->rawSize = strlen($str);
182
+		$this->raw     = $str;
183
+
184
+		$html = $this->clean($str);
185
+
186
+		$this->size    = strlen($str);
187
+		$this->content = new Content($html);
188
+
189
+		$this->parse();
190
+		$this->detectCharset();
191
+
192
+		return $this;
193
+	}
194
+
195
+	/**
196
+	 * Sets a global options array to be used by all load calls.
197
+	 *
198
+	 * @param array $options
199
+	 * @return $this
200
+	 */
201
+	public function setOptions(array $options)
202
+	{
203
+		$this->globalOptions = $options;
204
+
205
+		return $this;
206
+	}
207
+
208
+	/**
209
+	 * Find elements by css selector on the root node.
210
+	 *
211
+	 * @param string $selector
212
+	 * @param int $nth
213
+	 * @return array
214
+	 */
215
+	public function find($selector, $nth = null)
216
+	{
217
+		$this->isLoaded();
218
+
219
+		return $this->root->find($selector, $nth);
220
+	}
221
+
222
+	/**
223
+	 * Adds the tag (or tags in an array) to the list of tags that will always
224
+	 * be self closing.
225
+	 *
226
+	 * @param string|array $tag
227
+	 * @return $this
228
+	 */
229
+	public function addSelfClosingTag($tag)
230
+	{
231
+		if ( ! is_array($tag)) {
232
+			$tag = [$tag];
233
+		}
234
+		foreach ($tag as $value) {
235
+			$this->selfClosing[] = $value;
236
+		}
237
+
238
+		return $this;
239
+	}
240
+
241
+	/**
242
+	 * Removes the tag (or tags in an array) from the list of tags that will
243
+	 * always be self closing.
244
+	 *
245
+	 * @param string|array $tag
246
+	 * @return $this
247
+	 */
248
+	public function removeSelfClosingTag($tag)
249
+	{
250
+		if ( ! is_array($tag)) {
251
+			$tag = [$tag];
252
+		}
253
+		$this->selfClosing = array_diff($this->selfClosing, $tag);
254
+
255
+		return $this;
256
+	}
257
+
258
+	/**
259
+	 * Sets the list of self closing tags to empty.
260
+	 *
261
+	 * @return $this
262
+	 */
263
+	public function clearSelfClosingTags()
264
+	{
265
+		$this->selfClosing = [];
266
+
267
+		return $this;
268
+	}
269
+
270
+	/**
271
+	 * Simple wrapper function that returns the first child.
272
+	 *
273
+	 * @return \PHPHtmlParser\Dom\AbstractNode
274
+	 */
275
+	public function firstChild()
276
+	{
277
+		$this->isLoaded();
278
+
279
+		return $this->root->firstChild();
280
+	}
281
+
282
+	/**
283
+	 * Simple wrapper function that returns the last child.
284
+	 *
285
+	 * @return \PHPHtmlParser\Dom\AbstractNode
286
+	 */
287
+	public function lastChild()
288
+	{
289
+		$this->isLoaded();
290
+
291
+		return $this->root->lastChild();
292
+	}
293
+
294
+	/**
295
+	 * Simple wrapper function that returns an element by the
296
+	 * id.
297
+	 *
298
+	 * @param string $id
299
+	 * @return \PHPHtmlParser\Dom\AbstractNode
300
+	 */
301
+	public function getElementById($id)
302
+	{
303
+		$this->isLoaded();
304
+
305
+		return $this->find('#'.$id, 0);
306
+	}
307
+
308
+	/**
309
+	 * Simple wrapper function that returns all elements by
310
+	 * tag name.
311
+	 *
312
+	 * @param string $name
313
+	 * @return array
314
+	 */
315
+	public function getElementsByTag($name)
316
+	{
317
+		$this->isLoaded();
318
+
319
+		return $this->find($name);
320
+	}
321
+
322
+	/**
323
+	 * Simple wrapper function that returns all elements by
324
+	 * class name.
325
+	 *
326
+	 * @param string $class
327
+	 * @return array
328
+	 */
329
+	public function getElementsByClass($class)
330
+	{
331
+		$this->isLoaded();
332
+
333
+		return $this->find('.'.$class);
334
+	}
335
+
336
+	/**
337
+	 * Checks if the load methods have been called.
338
+	 *
339
+	 * @throws NotLoadedException
340
+	 */
341
+	protected function isLoaded()
342
+	{
343
+		if (is_null($this->content)) {
344
+			throw new NotLoadedException('Content is not loaded!');
345
+		}
346
+	}
347
+
348
+	/**
349
+	 * Cleans the html of any none-html information.
350
+	 *
351
+	 * @param string $str
352
+	 * @return string
353
+	 */
354
+	protected function clean($str)
355
+	{
356
+		if ($this->options->get('cleanupInput') != true) {
357
+			// skip entire cleanup step
358
+			return $str;
359
+		}
360
+
361
+		// remove white space before closing tags
362
+		$str = mb_eregi_replace("'\s+>", "'>", $str);
363
+		$str = mb_eregi_replace('"\s+>', '">', $str);
364
+
365
+		// clean out the \n\r
366
+		$replace = ' ';
367
+		if ($this->options->get('preserveLineBreaks')) {
368
+			$replace = '&#10;';
369
+		}
370
+		$str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
371
+
372
+		// strip the doctype
373
+		$str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
374
+
375
+		// strip out comments
376
+		$str = mb_eregi_replace("<!--(.*?)-->", '', $str);
377
+
378
+		// strip out cdata
379
+		$str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
380
+
381
+		// strip out <script> tags
382
+		if ($this->options->get('removeScripts') == true) {
383
+			$str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
384
+			$str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
385
+		}
386
+
387
+		// strip out <style> tags
388
+		if ($this->options->get('removeStyles') == true) {
389
+			$str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
390
+			$str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
391
+		}
392
+
393
+		// strip out server side scripts
394
+		$str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
395
+
396
+		// strip smarty scripts
397
+		$str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
398
+
399
+		return $str;
400
+	}
401
+
402
+	/**
403
+	 * Attempts to parse the html in content.
404
+	 */
405
+	protected function parse()
406
+	{
407
+		// add the root node
408
+		$this->root = new HtmlNode('root');
409
+		$activeNode = $this->root;
410
+		while ( ! is_null($activeNode)) {
411
+			$str = $this->content->copyUntil('<');
412
+			if ($str == '') {
413
+				$info = $this->parseTag();
414
+				if ( ! $info['status']) {
415
+					// we are done here
416
+					$activeNode = null;
417
+					continue;
418
+				}
419
+
420
+				// check if it was a closing tag
421
+				if ($info['closing']) {
422
+					$originalNode = $activeNode;
423
+					while ($activeNode->getTag()->name() != $info['tag']) {
424
+						$activeNode = $activeNode->getParent();
425
+						if (is_null($activeNode)) {
426
+							// we could not find opening tag
427
+							$activeNode = $originalNode;
428
+							break;
429
+						}
430
+					}
431
+					if ( ! is_null($activeNode)) {
432
+						$activeNode = $activeNode->getParent();
433
+					}
434
+					continue;
435
+				}
436
+
437
+				if ( ! isset($info['node'])) {
438
+					continue;
439
+				}
440
+
441
+				/** @var AbstractNode $node */
442
+				$node = $info['node'];
443
+				$activeNode->addChild($node);
444
+
445
+				// check if node is self closing
446
+				if ( ! $node->getTag()->isSelfClosing()) {
447
+					$activeNode = $node;
448
+				}
449
+			} else if ($this->options->whitespaceTextNode ||
450
+				trim($str) != ''
451
+			) {
452
+				// we found text we care about
453
+				$textNode = new TextNode($str);
454
+				$activeNode->addChild($textNode);
455
+			}
456
+		}
457
+	}
458
+
459
+	/**
460
+	 * Attempt to parse a tag out of the content.
461
+	 *
462
+	 * @return array
463
+	 * @throws StrictException
464
+	 */
465
+	protected function parseTag()
466
+	{
467
+		$return = [
468
+			'status'  => false,
469
+			'closing' => false,
470
+			'node'    => null,
471
+		];
472
+		if ($this->content->char() != '<') {
473
+			// we are not at the beginning of a tag
474
+			return $return;
475
+		}
476
+
477
+		// check if this is a closing tag
478
+		if ($this->content->fastForward(1)->char() == '/') {
479
+			// end tag
480
+			$tag = $this->content->fastForward(1)
481
+								 ->copyByToken('slash', true);
482
+			// move to end of tag
483
+			$this->content->copyUntil('>');
484
+			$this->content->fastForward(1);
485
+
486
+			// check if this closing tag counts
487
+			$tag = strtolower($tag);
488
+			if (in_array($tag, $this->selfClosing)) {
489
+				$return['status'] = true;
490
+
491
+				return $return;
492
+			} else {
493
+				$return['status']  = true;
494
+				$return['closing'] = true;
495
+				$return['tag']     = strtolower($tag);
496
+			}
497
+
498
+			return $return;
499
+		}
500
+
501
+		$tag  = strtolower($this->content->copyByToken('slash', true));
502
+		$node = new HtmlNode($tag);
503
+
504
+		// attributes
505
+		while ($this->content->char() != '>' &&
506
+			$this->content->char() != '/') {
507
+			$space = $this->content->skipByToken('blank', true);
508
+			if (empty($space)) {
509
+				$this->content->fastForward(1);
510
+				continue;
511
+			}
512
+
513
+			$name = $this->content->copyByToken('equal', true);
514
+			if ($name == '/') {
515
+				break;
516
+			}
517
+
518
+			if (empty($name)) {
519
+				$this->content->fastForward(1);
520
+				continue;
521
+			}
522
+
523
+			$this->content->skipByToken('blank');
524
+			if ($this->content->char() == '=') {
525
+				$attr = [];
526
+				$this->content->fastForward(1)
527
+							  ->skipByToken('blank');
528
+				switch ($this->content->char()) {
529
+					case '"':
530
+						$attr['doubleQuote'] = true;
531
+						$this->content->fastForward(1);
532
+						$string = $this->content->copyUntil('"', true, true);
533
+						do {
534
+							$moreString = $this->content->copyUntilUnless('"', '=>');
535
+							$string .= $moreString;
536
+						} while ( ! empty($moreString));
537
+						$attr['value'] = $string;
538
+						$this->content->fastForward(1);
539
+						$node->getTag()->$name = $attr;
540
+						break;
541
+					case "'":
542
+						$attr['doubleQuote'] = false;
543
+						$this->content->fastForward(1);
544
+						$string = $this->content->copyUntil("'", true, true);
545
+						do {
546
+							$moreString = $this->content->copyUntilUnless("'", '=>');
547
+							$string .= $moreString;
548
+						} while ( ! empty($moreString));
549
+						$attr['value'] = $string;
550
+						$this->content->fastForward(1);
551
+						$node->getTag()->$name = $attr;
552
+						break;
553
+					default:
554
+						$attr['doubleQuote']   = true;
555
+						$attr['value']         = $this->content->copyByToken('attr', true);
556
+						$node->getTag()->$name = $attr;
557
+						break;
558
+				}
559
+			} else {
560
+				// no value attribute
561
+				if ($this->options->strict) {
562
+					// can't have this in strict html
563
+					$character = $this->content->getPosition();
564
+					throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
565
+				}
566
+				$node->getTag()->$name = [
567
+					'value'       => null,
568
+					'doubleQuote' => true,
569
+				];
570
+				if ($this->content->char() != '>') {
571
+					$this->content->rewind(1);
572
+				}
573
+			}
574
+		}
575
+
576
+		$this->content->skipByToken('blank');
577
+		if ($this->content->char() == '/') {
578
+			// self closing tag
579
+			$node->getTag()->selfClosing();
580
+			$this->content->fastForward(1);
581
+		} elseif (in_array($tag, $this->selfClosing)) {
582
+
583
+			// Should be a self closing tag, check if we are strict
584
+			if ($this->options->strict) {
585
+				$character = $this->content->getPosition();
586
+				throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
587
+			}
588
+
589
+			// We force self closing on this tag.
590
+			$node->getTag()->selfClosing();
591
+		}
592
+
593
+		$this->content->fastForward(1);
594
+
595
+		$return['status'] = true;
596
+		$return['node']   = $node;
597
+
598
+		return $return;
599
+	}
600
+
601
+	/**
602
+	 * Attempts to detect the charset that the html was sent in.
603
+	 *
604
+	 * @return bool
605
+	 */
606
+	protected function detectCharset()
607
+	{
608
+		// set the default
609
+		$encode = new Encode;
610
+		$encode->from($this->defaultCharset);
611
+		$encode->to($this->defaultCharset);
612
+
613
+		if ( ! is_null($this->options->enforceEncoding)) {
614
+			//  they want to enforce the given encoding
615
+			$encode->from($this->options->enforceEncoding);
616
+			$encode->to($this->options->enforceEncoding);
617
+
618
+			return false;
619
+		}
620
+
621
+		$meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
622
+		if (is_null($meta)) {
623
+			// could not find meta tag
624
+			$this->root->propagateEncoding($encode);
625
+
626
+			return false;
627
+		}
628
+		$content = $meta->content;
629
+		if (empty($content)) {
630
+			// could not find content
631
+			$this->root->propagateEncoding($encode);
632
+
633
+			return false;
634
+		}
635
+		$matches = [];
636
+		if (preg_match('/charset=(.+)/', $content, $matches)) {
637
+			$encode->from(trim($matches[1]));
638
+			$this->root->propagateEncoding($encode);
639
+
640
+			return true;
641
+		}
642
+
643
+		// no charset found
644
+		$this->root->propagateEncoding($encode);
645
+
646
+		return false;
647
+	}
648 648
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom/TextNode.php 1 patch
Indentation   +82 added lines, -82 removed lines patch added patch discarded remove patch
@@ -9,97 +9,97 @@
 block discarded – undo
9 9
 class TextNode extends LeafNode
10 10
 {
11 11
 
12
-    /**
13
-     * This is a text node.
14
-     *
15
-     * @var Tag
16
-     */
17
-    protected $tag;
12
+	/**
13
+	 * This is a text node.
14
+	 *
15
+	 * @var Tag
16
+	 */
17
+	protected $tag;
18 18
 
19
-    /**
20
-     * This is the text in this node.
21
-     *
22
-     * @var string
23
-     */
24
-    protected $text;
19
+	/**
20
+	 * This is the text in this node.
21
+	 *
22
+	 * @var string
23
+	 */
24
+	protected $text;
25 25
 
26
-    /**
27
-     * This is the converted version of the text.
28
-     *
29
-     * @var string
30
-     */
31
-    protected $convertedText = null;
26
+	/**
27
+	 * This is the converted version of the text.
28
+	 *
29
+	 * @var string
30
+	 */
31
+	protected $convertedText = null;
32 32
 
33
-    /**
34
-     * Sets the text for this node.
35
-     *
36
-     * @param string $text
37
-     */
38
-    public function __construct($text)
39
-    {
40
-        // remove double spaces
41
-        $text = mb_ereg_replace('\s+', ' ', $text);
33
+	/**
34
+	 * Sets the text for this node.
35
+	 *
36
+	 * @param string $text
37
+	 */
38
+	public function __construct($text)
39
+	{
40
+		// remove double spaces
41
+		$text = mb_ereg_replace('\s+', ' ', $text);
42 42
 
43
-        // restore line breaks
44
-        $text = str_replace('&#10;', "\n", $text);
43
+		// restore line breaks
44
+		$text = str_replace('&#10;', "\n", $text);
45 45
 
46
-        $this->text = $text;
47
-        $this->tag  = new Tag('text');
48
-        parent::__construct();
49
-    }
46
+		$this->text = $text;
47
+		$this->tag  = new Tag('text');
48
+		parent::__construct();
49
+	}
50 50
 
51
-    /**
52
-     * Returns the text of this node.
53
-     *
54
-     * @return string
55
-     */
56
-    public function text()
57
-    {
58
-        // convert charset
59
-        if ( ! is_null($this->encode)) {
60
-            if ( ! is_null($this->convertedText)) {
61
-                // we already know the converted value
62
-                return $this->convertedText;
63
-            }
64
-            $text = $this->encode->convert($this->text);
51
+	/**
52
+	 * Returns the text of this node.
53
+	 *
54
+	 * @return string
55
+	 */
56
+	public function text()
57
+	{
58
+		// convert charset
59
+		if ( ! is_null($this->encode)) {
60
+			if ( ! is_null($this->convertedText)) {
61
+				// we already know the converted value
62
+				return $this->convertedText;
63
+			}
64
+			$text = $this->encode->convert($this->text);
65 65
 
66
-            // remember the conversion
67
-            $this->convertedText = $text;
66
+			// remember the conversion
67
+			$this->convertedText = $text;
68 68
 
69
-            return $text;
70
-        } else {
71
-            return $this->text;
72
-        }
73
-    }
69
+			return $text;
70
+		} else {
71
+			return $this->text;
72
+		}
73
+	}
74 74
 
75
-    /**
76
-     * This node has no html, just return the text.
77
-     *
78
-     * @return string
79
-     * @uses $this->text()
80
-     */
81
-    public function innerHtml()
82
-    {
83
-        return $this->text();
84
-    }
75
+	/**
76
+	 * This node has no html, just return the text.
77
+	 *
78
+	 * @return string
79
+	 * @uses $this->text()
80
+	 */
81
+	public function innerHtml()
82
+	{
83
+		return $this->text();
84
+	}
85 85
 
86
-    /**
87
-     * This node has no html, just return the text.
88
-     *
89
-     * @return string
90
-     * @uses $this->text()
91
-     */
92
-    public function outerHtml()
93
-    {
94
-        return $this->text();
95
-    }
86
+	/**
87
+	 * This node has no html, just return the text.
88
+	 *
89
+	 * @return string
90
+	 * @uses $this->text()
91
+	 */
92
+	public function outerHtml()
93
+	{
94
+		return $this->text();
95
+	}
96 96
 
97
-    /**
98
-     * Call this when something in the node tree has changed. Like a child has been added
99
-     * or a parent has been changed.
100
-     */
101
-    protected function clear()
102
-    {
103
-        $this->convertedText = null;
104
-    }
97
+	/**
98
+	 * Call this when something in the node tree has changed. Like a child has been added
99
+	 * or a parent has been changed.
100
+	 */
101
+	protected function clear()
102
+	{
103
+		$this->convertedText = null;
104
+	}
105 105
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom/Tag.php 1 patch
Indentation   +255 added lines, -255 removed lines patch added patch discarded remove patch
@@ -12,259 +12,259 @@
 block discarded – undo
12 12
 class Tag
13 13
 {
14 14
 
15
-    /**
16
-     * The name of the tag.
17
-     *
18
-     * @var string
19
-     */
20
-    protected $name;
21
-
22
-    /**
23
-     * The attributes of the tag.
24
-     *
25
-     * @var array
26
-     */
27
-    protected $attr = [];
28
-
29
-    /**
30
-     * Is this tag self closing.
31
-     *
32
-     * @var bool
33
-     */
34
-    protected $selfClosing = false;
35
-
36
-    /**
37
-     * Tag noise
38
-     */
39
-    protected $noise = '';
40
-
41
-    /**
42
-     * The encoding class to... encode the tags
43
-     *
44
-     * @var mixed
45
-     */
46
-    protected $encode = null;
47
-
48
-    /**
49
-     * Sets up the tag with a name.
50
-     *
51
-     * @param $name
52
-     */
53
-    public function __construct($name)
54
-    {
55
-        $this->name = $name;
56
-    }
57
-
58
-    /**
59
-     * Magic method to get any of the attributes.
60
-     *
61
-     * @param string $key
62
-     * @return mixed
63
-     */
64
-    public function __get($key)
65
-    {
66
-        return $this->getAttribute($key);
67
-    }
68
-
69
-    /**
70
-     * Magic method to set any attribute.
71
-     *
72
-     * @param string $key
73
-     * @param mixed $value
74
-     */
75
-    public function __set($key, $value)
76
-    {
77
-        $this->setAttribute($key, $value);
78
-    }
79
-
80
-    /**
81
-     * Returns the name of this tag.
82
-     *
83
-     * @return string
84
-     */
85
-    public function name()
86
-    {
87
-        return $this->name;
88
-    }
89
-
90
-    /**
91
-     * Sets the tag to be self closing.
92
-     *
93
-     * @return $this
94
-     */
95
-    public function selfClosing()
96
-    {
97
-        $this->selfClosing = true;
98
-
99
-        return $this;
100
-    }
101
-
102
-    /**
103
-     * Checks if the tag is self closing.
104
-     *
105
-     * @return bool
106
-     */
107
-    public function isSelfClosing()
108
-    {
109
-        return $this->selfClosing;
110
-    }
111
-
112
-    /**
113
-     * Sets the encoding type to be used.
114
-     *
115
-     * @param Encode $encode
116
-     */
117
-    public function setEncoding(Encode $encode)
118
-    {
119
-        $this->encode = $encode;
120
-    }
121
-
122
-    /**
123
-     * Sets the noise for this tag (if any)
124
-     *
125
-     * @param $noise
126
-     * @return $this
127
-     */
128
-    public function noise($noise)
129
-    {
130
-        $this->noise = $noise;
131
-
132
-        return $this;
133
-    }
134
-
135
-    /**
136
-     * Set an attribute for this tag.
137
-     *
138
-     * @param string $key
139
-     * @param string|array $value
140
-     * @return $this
141
-     */
142
-    public function setAttribute($key, $value)
143
-    {
144
-        $key = strtolower($key);
145
-        if ( ! is_array($value)) {
146
-            $value = [
147
-                'value'       => $value,
148
-                'doubleQuote' => true,
149
-            ];
150
-        }
151
-        $this->attr[$key] = $value;
152
-
153
-        return $this;
154
-    }
155
-
156
-    /**
157
-     * Removes an attribute from this tag.
158
-     *
159
-     * @param $key
160
-     * @return void
161
-     */
162
-    public function removeAttribute($key)
163
-    {
164
-        $key = strtolower($key);
165
-        unset($this->attr[$key]);
166
-    }
167
-
168
-    /**
169
-     * Removes all attributes on this tag.
170
-     *
171
-     * @return void
172
-     */
173
-    public function removeAllAttributes()
174
-    {
175
-        $this->attr = [];
176
-    }
177
-
178
-    /**
179
-     * Sets the attributes for this tag
180
-     *
181
-     * @param array $attr
182
-     * @return $this
183
-     */
184
-    public function setAttributes(array $attr)
185
-    {
186
-        foreach ($attr as $key => $value) {
187
-            $this->setAttribute($key, $value);
188
-        }
189
-
190
-        return $this;
191
-    }
192
-
193
-    /**
194
-     * Returns all attributes of this tag.
195
-     *
196
-     * @return array
197
-     */
198
-    public function getAttributes()
199
-    {
200
-        $return = [];
201
-        foreach ($this->attr as $attr => $info) {
202
-            $return[$attr] = $this->getAttribute($attr);
203
-        }
204
-
205
-        return $return;
206
-    }
207
-
208
-    /**
209
-     * Returns an attribute by the key
210
-     *
211
-     * @param string $key
212
-     * @return mixed
213
-     */
214
-    public function getAttribute($key)
215
-    {
216
-        if ( ! isset($this->attr[$key])) {
217
-            return null;
218
-        }
219
-        $value = $this->attr[$key]['value'];
220
-        if (is_string($value) && ! is_null($this->encode)) {
221
-            // convert charset
222
-            $this->attr[$key]['value'] = $this->encode->convert($value);
223
-        }
224
-
225
-        return $this->attr[$key];
226
-    }
227
-
228
-    /**
229
-     * Generates the opening tag for this object.
230
-     *
231
-     * @return string
232
-     */
233
-    public function makeOpeningTag()
234
-    {
235
-        $return = '<'.$this->name;
236
-
237
-        // add the attributes
238
-        foreach ($this->attr as $key => $info) {
239
-            $info = $this->getAttribute($key);
240
-            $val  = $info['value'];
241
-            if (is_null($val)) {
242
-                $return .= ' '.$key;
243
-            } elseif ($info['doubleQuote']) {
244
-                $return .= ' '.$key.'="'.$val.'"';
245
-            } else {
246
-                $return .= ' '.$key.'=\''.$val.'\'';
247
-            }
248
-        }
249
-
250
-        if ($this->selfClosing) {
251
-            return $return.' />';
252
-        } else {
253
-            return $return.'>';
254
-        }
255
-    }
256
-
257
-    /**
258
-     * Generates the closing tag for this object.
259
-     *
260
-     * @return string
261
-     */
262
-    public function makeClosingTag()
263
-    {
264
-        if ($this->selfClosing) {
265
-            return '';
266
-        }
267
-
268
-        return '</'.$this->name.'>';
269
-    }
15
+	/**
16
+	 * The name of the tag.
17
+	 *
18
+	 * @var string
19
+	 */
20
+	protected $name;
21
+
22
+	/**
23
+	 * The attributes of the tag.
24
+	 *
25
+	 * @var array
26
+	 */
27
+	protected $attr = [];
28
+
29
+	/**
30
+	 * Is this tag self closing.
31
+	 *
32
+	 * @var bool
33
+	 */
34
+	protected $selfClosing = false;
35
+
36
+	/**
37
+	 * Tag noise
38
+	 */
39
+	protected $noise = '';
40
+
41
+	/**
42
+	 * The encoding class to... encode the tags
43
+	 *
44
+	 * @var mixed
45
+	 */
46
+	protected $encode = null;
47
+
48
+	/**
49
+	 * Sets up the tag with a name.
50
+	 *
51
+	 * @param $name
52
+	 */
53
+	public function __construct($name)
54
+	{
55
+		$this->name = $name;
56
+	}
57
+
58
+	/**
59
+	 * Magic method to get any of the attributes.
60
+	 *
61
+	 * @param string $key
62
+	 * @return mixed
63
+	 */
64
+	public function __get($key)
65
+	{
66
+		return $this->getAttribute($key);
67
+	}
68
+
69
+	/**
70
+	 * Magic method to set any attribute.
71
+	 *
72
+	 * @param string $key
73
+	 * @param mixed $value
74
+	 */
75
+	public function __set($key, $value)
76
+	{
77
+		$this->setAttribute($key, $value);
78
+	}
79
+
80
+	/**
81
+	 * Returns the name of this tag.
82
+	 *
83
+	 * @return string
84
+	 */
85
+	public function name()
86
+	{
87
+		return $this->name;
88
+	}
89
+
90
+	/**
91
+	 * Sets the tag to be self closing.
92
+	 *
93
+	 * @return $this
94
+	 */
95
+	public function selfClosing()
96
+	{
97
+		$this->selfClosing = true;
98
+
99
+		return $this;
100
+	}
101
+
102
+	/**
103
+	 * Checks if the tag is self closing.
104
+	 *
105
+	 * @return bool
106
+	 */
107
+	public function isSelfClosing()
108
+	{
109
+		return $this->selfClosing;
110
+	}
111
+
112
+	/**
113
+	 * Sets the encoding type to be used.
114
+	 *
115
+	 * @param Encode $encode
116
+	 */
117
+	public function setEncoding(Encode $encode)
118
+	{
119
+		$this->encode = $encode;
120
+	}
121
+
122
+	/**
123
+	 * Sets the noise for this tag (if any)
124
+	 *
125
+	 * @param $noise
126
+	 * @return $this
127
+	 */
128
+	public function noise($noise)
129
+	{
130
+		$this->noise = $noise;
131
+
132
+		return $this;
133
+	}
134
+
135
+	/**
136
+	 * Set an attribute for this tag.
137
+	 *
138
+	 * @param string $key
139
+	 * @param string|array $value
140
+	 * @return $this
141
+	 */
142
+	public function setAttribute($key, $value)
143
+	{
144
+		$key = strtolower($key);
145
+		if ( ! is_array($value)) {
146
+			$value = [
147
+				'value'       => $value,
148
+				'doubleQuote' => true,
149
+			];
150
+		}
151
+		$this->attr[$key] = $value;
152
+
153
+		return $this;
154
+	}
155
+
156
+	/**
157
+	 * Removes an attribute from this tag.
158
+	 *
159
+	 * @param $key
160
+	 * @return void
161
+	 */
162
+	public function removeAttribute($key)
163
+	{
164
+		$key = strtolower($key);
165
+		unset($this->attr[$key]);
166
+	}
167
+
168
+	/**
169
+	 * Removes all attributes on this tag.
170
+	 *
171
+	 * @return void
172
+	 */
173
+	public function removeAllAttributes()
174
+	{
175
+		$this->attr = [];
176
+	}
177
+
178
+	/**
179
+	 * Sets the attributes for this tag
180
+	 *
181
+	 * @param array $attr
182
+	 * @return $this
183
+	 */
184
+	public function setAttributes(array $attr)
185
+	{
186
+		foreach ($attr as $key => $value) {
187
+			$this->setAttribute($key, $value);
188
+		}
189
+
190
+		return $this;
191
+	}
192
+
193
+	/**
194
+	 * Returns all attributes of this tag.
195
+	 *
196
+	 * @return array
197
+	 */
198
+	public function getAttributes()
199
+	{
200
+		$return = [];
201
+		foreach ($this->attr as $attr => $info) {
202
+			$return[$attr] = $this->getAttribute($attr);
203
+		}
204
+
205
+		return $return;
206
+	}
207
+
208
+	/**
209
+	 * Returns an attribute by the key
210
+	 *
211
+	 * @param string $key
212
+	 * @return mixed
213
+	 */
214
+	public function getAttribute($key)
215
+	{
216
+		if ( ! isset($this->attr[$key])) {
217
+			return null;
218
+		}
219
+		$value = $this->attr[$key]['value'];
220
+		if (is_string($value) && ! is_null($this->encode)) {
221
+			// convert charset
222
+			$this->attr[$key]['value'] = $this->encode->convert($value);
223
+		}
224
+
225
+		return $this->attr[$key];
226
+	}
227
+
228
+	/**
229
+	 * Generates the opening tag for this object.
230
+	 *
231
+	 * @return string
232
+	 */
233
+	public function makeOpeningTag()
234
+	{
235
+		$return = '<'.$this->name;
236
+
237
+		// add the attributes
238
+		foreach ($this->attr as $key => $info) {
239
+			$info = $this->getAttribute($key);
240
+			$val  = $info['value'];
241
+			if (is_null($val)) {
242
+				$return .= ' '.$key;
243
+			} elseif ($info['doubleQuote']) {
244
+				$return .= ' '.$key.'="'.$val.'"';
245
+			} else {
246
+				$return .= ' '.$key.'=\''.$val.'\'';
247
+			}
248
+		}
249
+
250
+		if ($this->selfClosing) {
251
+			return $return.' />';
252
+		} else {
253
+			return $return.'>';
254
+		}
255
+	}
256
+
257
+	/**
258
+	 * Generates the closing tag for this object.
259
+	 *
260
+	 * @return string
261
+	 */
262
+	public function makeClosingTag()
263
+	{
264
+		if ($this->selfClosing) {
265
+			return '';
266
+		}
267
+
268
+		return '</'.$this->name.'>';
269
+	}
270 270
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom/AbstractNode.php 1 patch
Indentation   +469 added lines, -469 removed lines patch added patch discarded remove patch
@@ -18,473 +18,473 @@
 block discarded – undo
18 18
 abstract class AbstractNode
19 19
 {
20 20
 
21
-    /**
22
-     * Contains the tag name/type
23
-     *
24
-     * @var \PHPHtmlParser\Dom\Tag
25
-     */
26
-    protected $tag;
27
-
28
-    /**
29
-     * Contains a list of attributes on this tag.
30
-     *
31
-     * @var array
32
-     */
33
-    protected $attr = [];
34
-
35
-    /**
36
-     * Contains the parent Node.
37
-     *
38
-     * @var InnerNode
39
-     */
40
-    protected $parent = null;
41
-
42
-    /**
43
-     * The unique id of the class. Given by PHP.
44
-     *
45
-     * @var string
46
-     */
47
-    protected $id;
48
-
49
-    /**
50
-     * The encoding class used to encode strings.
51
-     *
52
-     * @var mixed
53
-     */
54
-    protected $encode;
55
-
56
-    /**
57
-     * Creates a unique spl hash for this node.
58
-     */
59
-    public function __construct()
60
-    {
61
-        $this->id = spl_object_hash($this);
62
-    }
63
-
64
-    /**
65
-     * Magic get method for attributes and certain methods.
66
-     *
67
-     * @param string $key
68
-     * @return mixed
69
-     */
70
-    public function __get($key)
71
-    {
72
-        // check attribute first
73
-        if ( ! is_null($this->getAttribute($key))) {
74
-            return $this->getAttribute($key);
75
-        }
76
-        switch (strtolower($key)) {
77
-            case 'outerhtml':
78
-                return $this->outerHtml();
79
-            case 'innerhtml':
80
-                return $this->innerHtml();
81
-            case 'text':
82
-                return $this->text();
83
-            case 'tag':
84
-                return $this->getTag();
85
-            case 'parent': $this->getParent();
86
-        }
87
-
88
-        return null;
89
-    }
90
-
91
-    /**
92
-     * Attempts to clear out any object references.
93
-     */
94
-    public function __destruct()
95
-    {
96
-        $this->tag      = null;
97
-        $this->attr     = [];
98
-        $this->parent   = null;
99
-        $this->children = [];
100
-    }
101
-
102
-    /**
103
-     * Simply calls the outer text method.
104
-     *
105
-     * @return string
106
-     */
107
-    public function __toString()
108
-    {
109
-        return $this->outerHtml();
110
-    }
111
-
112
-    /**
113
-     * Returns the id of this object.
114
-     */
115
-    public function id()
116
-    {
117
-        return $this->id;
118
-    }
119
-
120
-    /**
121
-     * Returns the parent of node.
122
-     *
123
-     * @return AbstractNode
124
-     */
125
-    public function getParent()
126
-    {
127
-        return $this->parent;
128
-    }
129
-
130
-    /**
131
-     * Sets the parent node.
132
-     *
133
-     * @param InnerNode $parent
134
-     * @return $this
135
-     * @throws CircularException
136
-     */
137
-    public function setParent(InnerNode $parent)
138
-    {
139
-        // remove from old parent
140
-        if ( ! is_null($this->parent)) {
141
-            if ($this->parent->id() == $parent->id()) {
142
-                // already the parent
143
-                return $this;
144
-            }
145
-
146
-            $this->parent->removeChild($this->id);
147
-        }
148
-
149
-        $this->parent = $parent;
150
-
151
-        // assign child to parent
152
-        $this->parent->addChild($this);
153
-
154
-        //clear any cache
155
-        $this->clear();
156
-
157
-        return $this;
158
-    }
159
-
160
-    /**
161
-     * Removes this node and all its children from the
162
-     * DOM tree.
163
-     *
164
-     * @return void
165
-     */
166
-    public function delete()
167
-    {
168
-        if ( ! is_null($this->parent)) {
169
-            $this->parent->removeChild($this->id);
170
-        }
171
-
172
-        $this->parent = null;
173
-    }
174
-
175
-    /**
176
-     * Sets the encoding class to this node.
177
-     *
178
-     * @param Encode $encode
179
-     * @return void
180
-     */
181
-    public function propagateEncoding(Encode $encode)
182
-    {
183
-        $this->encode = $encode;
184
-        $this->tag->setEncoding($encode);
185
-    }
186
-
187
-    /**
188
-     * Checks if the given node id is an ancestor of
189
-     * the current node.
190
-     *
191
-     * @param int $id
192
-     * @return bool
193
-     */
194
-    public function isAncestor($id)
195
-    {
196
-        if ( ! is_null($this->getAncestor($id))) {
197
-            return true;
198
-        }
199
-
200
-        return false;
201
-    }
202
-
203
-    /**
204
-     * Attempts to get an ancestor node by the given id.
205
-     *
206
-     * @param int $id
207
-     * @return null|AbstractNode
208
-     */
209
-    public function getAncestor($id)
210
-    {
211
-        if ( ! is_null($this->parent)) {
212
-            if ($this->parent->id() == $id) {
213
-                return $this->parent;
214
-            }
215
-
216
-            return $this->parent->getAncestor($id);
217
-        }
218
-
219
-        return null;
220
-    }
221
-
222
-    /**
223
-     * Attempts to get the next sibling.
224
-     *
225
-     * @return AbstractNode
226
-     * @throws ParentNotFoundException
227
-     */
228
-    public function nextSibling()
229
-    {
230
-        if (is_null($this->parent)) {
231
-            throw new ParentNotFoundException('Parent is not set for this node.');
232
-        }
233
-
234
-        return $this->parent->nextChild($this->id);
235
-    }
236
-
237
-    /**
238
-     * Attempts to get the previous sibling
239
-     *
240
-     * @return AbstractNode
241
-     * @throws ParentNotFoundException
242
-     */
243
-    public function previousSibling()
244
-    {
245
-        if (is_null($this->parent)) {
246
-            throw new ParentNotFoundException('Parent is not set for this node.');
247
-        }
248
-
249
-        return $this->parent->previousChild($this->id);
250
-    }
251
-
252
-    /**
253
-     * Gets the tag object of this node.
254
-     *
255
-     * @return Tag
256
-     */
257
-    public function getTag()
258
-    {
259
-        return $this->tag;
260
-    }
261
-
262
-    /**
263
-     * A wrapper method that simply calls the getAttribute method
264
-     * on the tag of this node.
265
-     *
266
-     * @return array
267
-     */
268
-    public function getAttributes()
269
-    {
270
-        $attributes = $this->tag->getAttributes();
271
-        foreach ($attributes as $name => $info) {
272
-            $attributes[$name] = $info['value'];
273
-        }
274
-
275
-        return $attributes;
276
-    }
277
-
278
-    /**
279
-     * A wrapper method that simply calls the getAttribute method
280
-     * on the tag of this node.
281
-     *
282
-     * @param string $key
283
-     * @return mixed
284
-     */
285
-    public function getAttribute($key)
286
-    {
287
-        $attribute = $this->tag->getAttribute($key);
288
-        if ( ! is_null($attribute)) {
289
-            $attribute = $attribute['value'];
290
-        }
291
-
292
-        return $attribute;
293
-    }
294
-
295
-    /**
296
-     * A wrapper method that simply calls the setAttribute method
297
-     * on the tag of this node.
298
-     *
299
-     * @param string $key
300
-     * @param string $value
301
-     * @return $this
302
-     */
303
-    public function setAttribute($key, $value)
304
-    {
305
-        $this->tag->setAttribute($key, $value);
306
-
307
-        return $this;
308
-    }
309
-
310
-    /**
311
-     * A wrapper method that simply calls the removeAttribute method
312
-     * on the tag of this node.
313
-     *
314
-     * @param string $key
315
-     * @return void
316
-     */
317
-    public function removeAttribute($key)
318
-    {
319
-        $this->tag->removeAttribute($key);
320
-    }
321
-
322
-    /**
323
-     * A wrapper method that simply calls the removeAllAttributes
324
-     * method on the tag of this node.
325
-     *
326
-     * @return void
327
-     */
328
-    public function removeAllAttributes()
329
-    {
330
-        $this->tag->removeAllAttributes();
331
-    }
332
-
333
-    /**
334
-     * Function to locate a specific ancestor tag in the path to the root.
335
-     *
336
-     * @param  string $tag
337
-     * @return AbstractNode
338
-     * @throws ParentNotFoundException
339
-     */
340
-    public function ancestorByTag($tag)
341
-    {
342
-        // Start by including ourselves in the comparison.
343
-        $node = $this;
344
-
345
-        while ( ! is_null($node)) {
346
-            if ($node->tag->name() == $tag) {
347
-                return $node;
348
-            }
349
-
350
-            $node = $node->getParent();
351
-        }
352
-
353
-        throw new ParentNotFoundException('Could not find an ancestor with "'.$tag.'" tag');
354
-    }
355
-
356
-    /**
357
-     * Find elements by css selector
358
-     *
359
-     * @param string $selector
360
-     * @param int $nth
361
-     * @return array|AbstractNode
362
-     */
363
-    public function find($selector, $nth = null)
364
-    {
365
-        $selector = new Selector($selector);
366
-        $nodes    = $selector->find($this);
367
-
368
-        if ( ! is_null($nth)) {
369
-            // return nth-element or array
370
-            if (isset($nodes[$nth])) {
371
-                return $nodes[$nth];
372
-            }
373
-
374
-            return null;
375
-        }
376
-
377
-        return $nodes;
378
-    }
379
-
380
-    /**
381
-     * Function to try a few tricks to determine the displayed size of an img on the page.
382
-     * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
383
-     *
384
-     * Future enhancement:
385
-     * Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
386
-     *
387
-     * Far future enhancement
388
-     * Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
389
-     * Note that in this case, the class or id will have the img sub-selector for it to apply to the image.
390
-     *
391
-     * ridiculously far future development
392
-     * If the class or id is specified in a SEPARATE css file that's not on the page, go get it and do what we were just doing for the ones on the page.
393
-     *
394
-     * @author John Schlick
395
-     * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
396
-     */
397
-    public function get_display_size()
398
-    {
399
-        $width  = -1;
400
-        $height = -1;
401
-
402
-        if ($this->tag->name() != 'img') {
403
-            return false;
404
-        }
405
-
406
-        // See if there is a height or width attribute in the tag itself.
407
-        if ( ! is_null($this->tag->getAttribute('width'))) {
408
-            $width = $this->tag->getAttribute('width');
409
-        }
410
-
411
-        if ( ! is_null($this->tag->getAttribute('height'))) {
412
-            $height = $this->tag->getAttribute('height');
413
-        }
414
-
415
-        // Now look for an inline style.
416
-        if ( ! is_null($this->tag->getAttribute('style'))) {
417
-            // Thanks to user 'gnarf' from stackoverflow for this regular expression.
418
-            $attributes = [];
419
-            preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->tag->getAttribute('style'), $matches,
420
-                PREG_SET_ORDER);
421
-            foreach ($matches as $match) {
422
-                $attributes[$match[1]] = $match[2];
423
-            }
424
-
425
-            $width = $this->getLength($attributes, $width, 'width');
426
-            $height = $this->getLength($attributes, $width, 'height');
427
-        }
428
-
429
-        $result = [
430
-            'height' => $height,
431
-            'width'  => $width,
432
-        ];
433
-
434
-        return $result;
435
-    }
436
-
437
-    /**
438
-     * If there is a length in the style attributes use it.
439
-     *
440
-     * @param array $attributes
441
-     * @param int $length
442
-     * @param string $key
443
-     * @return int
444
-     */
445
-    protected function getLength(array $attributes, $length, $key)
446
-    {
447
-        if (isset($attributes[$key]) && $length == -1) {
448
-            // check that the last two characters are px (pixels)
449
-            if (strtolower(substr($attributes[$key], -2)) == 'px') {
450
-                $proposed_length = substr($attributes[$key], 0, -2);
451
-                // Now make sure that it's an integer and not something stupid.
452
-                if (filter_var($proposed_length, FILTER_VALIDATE_INT)) {
453
-                    $length = $proposed_length;
454
-                }
455
-            }
456
-        }
457
-
458
-        return $length;
459
-    }
460
-
461
-    /**
462
-     * Gets the inner html of this node.
463
-     *
464
-     * @return string
465
-     */
466
-    abstract public function innerHtml();
467
-
468
-    /**
469
-     * Gets the html of this node, including it's own
470
-     * tag.
471
-     *
472
-     * @return string
473
-     */
474
-    abstract public function outerHtml();
475
-
476
-    /**
477
-     * Gets the text of this node (if there is any text).
478
-     *
479
-     * @return string
480
-     */
481
-    abstract public function text();
482
-
483
-    /**
484
-     * Call this when something in the node tree has changed. Like a child has been added
485
-     * or a parent has been changed.
486
-     *
487
-     * @return void
488
-     */
489
-    abstract protected function clear();
21
+	/**
22
+	 * Contains the tag name/type
23
+	 *
24
+	 * @var \PHPHtmlParser\Dom\Tag
25
+	 */
26
+	protected $tag;
27
+
28
+	/**
29
+	 * Contains a list of attributes on this tag.
30
+	 *
31
+	 * @var array
32
+	 */
33
+	protected $attr = [];
34
+
35
+	/**
36
+	 * Contains the parent Node.
37
+	 *
38
+	 * @var InnerNode
39
+	 */
40
+	protected $parent = null;
41
+
42
+	/**
43
+	 * The unique id of the class. Given by PHP.
44
+	 *
45
+	 * @var string
46
+	 */
47
+	protected $id;
48
+
49
+	/**
50
+	 * The encoding class used to encode strings.
51
+	 *
52
+	 * @var mixed
53
+	 */
54
+	protected $encode;
55
+
56
+	/**
57
+	 * Creates a unique spl hash for this node.
58
+	 */
59
+	public function __construct()
60
+	{
61
+		$this->id = spl_object_hash($this);
62
+	}
63
+
64
+	/**
65
+	 * Magic get method for attributes and certain methods.
66
+	 *
67
+	 * @param string $key
68
+	 * @return mixed
69
+	 */
70
+	public function __get($key)
71
+	{
72
+		// check attribute first
73
+		if ( ! is_null($this->getAttribute($key))) {
74
+			return $this->getAttribute($key);
75
+		}
76
+		switch (strtolower($key)) {
77
+			case 'outerhtml':
78
+				return $this->outerHtml();
79
+			case 'innerhtml':
80
+				return $this->innerHtml();
81
+			case 'text':
82
+				return $this->text();
83
+			case 'tag':
84
+				return $this->getTag();
85
+			case 'parent': $this->getParent();
86
+		}
87
+
88
+		return null;
89
+	}
90
+
91
+	/**
92
+	 * Attempts to clear out any object references.
93
+	 */
94
+	public function __destruct()
95
+	{
96
+		$this->tag      = null;
97
+		$this->attr     = [];
98
+		$this->parent   = null;
99
+		$this->children = [];
100
+	}
101
+
102
+	/**
103
+	 * Simply calls the outer text method.
104
+	 *
105
+	 * @return string
106
+	 */
107
+	public function __toString()
108
+	{
109
+		return $this->outerHtml();
110
+	}
111
+
112
+	/**
113
+	 * Returns the id of this object.
114
+	 */
115
+	public function id()
116
+	{
117
+		return $this->id;
118
+	}
119
+
120
+	/**
121
+	 * Returns the parent of node.
122
+	 *
123
+	 * @return AbstractNode
124
+	 */
125
+	public function getParent()
126
+	{
127
+		return $this->parent;
128
+	}
129
+
130
+	/**
131
+	 * Sets the parent node.
132
+	 *
133
+	 * @param InnerNode $parent
134
+	 * @return $this
135
+	 * @throws CircularException
136
+	 */
137
+	public function setParent(InnerNode $parent)
138
+	{
139
+		// remove from old parent
140
+		if ( ! is_null($this->parent)) {
141
+			if ($this->parent->id() == $parent->id()) {
142
+				// already the parent
143
+				return $this;
144
+			}
145
+
146
+			$this->parent->removeChild($this->id);
147
+		}
148
+
149
+		$this->parent = $parent;
150
+
151
+		// assign child to parent
152
+		$this->parent->addChild($this);
153
+
154
+		//clear any cache
155
+		$this->clear();
156
+
157
+		return $this;
158
+	}
159
+
160
+	/**
161
+	 * Removes this node and all its children from the
162
+	 * DOM tree.
163
+	 *
164
+	 * @return void
165
+	 */
166
+	public function delete()
167
+	{
168
+		if ( ! is_null($this->parent)) {
169
+			$this->parent->removeChild($this->id);
170
+		}
171
+
172
+		$this->parent = null;
173
+	}
174
+
175
+	/**
176
+	 * Sets the encoding class to this node.
177
+	 *
178
+	 * @param Encode $encode
179
+	 * @return void
180
+	 */
181
+	public function propagateEncoding(Encode $encode)
182
+	{
183
+		$this->encode = $encode;
184
+		$this->tag->setEncoding($encode);
185
+	}
186
+
187
+	/**
188
+	 * Checks if the given node id is an ancestor of
189
+	 * the current node.
190
+	 *
191
+	 * @param int $id
192
+	 * @return bool
193
+	 */
194
+	public function isAncestor($id)
195
+	{
196
+		if ( ! is_null($this->getAncestor($id))) {
197
+			return true;
198
+		}
199
+
200
+		return false;
201
+	}
202
+
203
+	/**
204
+	 * Attempts to get an ancestor node by the given id.
205
+	 *
206
+	 * @param int $id
207
+	 * @return null|AbstractNode
208
+	 */
209
+	public function getAncestor($id)
210
+	{
211
+		if ( ! is_null($this->parent)) {
212
+			if ($this->parent->id() == $id) {
213
+				return $this->parent;
214
+			}
215
+
216
+			return $this->parent->getAncestor($id);
217
+		}
218
+
219
+		return null;
220
+	}
221
+
222
+	/**
223
+	 * Attempts to get the next sibling.
224
+	 *
225
+	 * @return AbstractNode
226
+	 * @throws ParentNotFoundException
227
+	 */
228
+	public function nextSibling()
229
+	{
230
+		if (is_null($this->parent)) {
231
+			throw new ParentNotFoundException('Parent is not set for this node.');
232
+		}
233
+
234
+		return $this->parent->nextChild($this->id);
235
+	}
236
+
237
+	/**
238
+	 * Attempts to get the previous sibling
239
+	 *
240
+	 * @return AbstractNode
241
+	 * @throws ParentNotFoundException
242
+	 */
243
+	public function previousSibling()
244
+	{
245
+		if (is_null($this->parent)) {
246
+			throw new ParentNotFoundException('Parent is not set for this node.');
247
+		}
248
+
249
+		return $this->parent->previousChild($this->id);
250
+	}
251
+
252
+	/**
253
+	 * Gets the tag object of this node.
254
+	 *
255
+	 * @return Tag
256
+	 */
257
+	public function getTag()
258
+	{
259
+		return $this->tag;
260
+	}
261
+
262
+	/**
263
+	 * A wrapper method that simply calls the getAttribute method
264
+	 * on the tag of this node.
265
+	 *
266
+	 * @return array
267
+	 */
268
+	public function getAttributes()
269
+	{
270
+		$attributes = $this->tag->getAttributes();
271
+		foreach ($attributes as $name => $info) {
272
+			$attributes[$name] = $info['value'];
273
+		}
274
+
275
+		return $attributes;
276
+	}
277
+
278
+	/**
279
+	 * A wrapper method that simply calls the getAttribute method
280
+	 * on the tag of this node.
281
+	 *
282
+	 * @param string $key
283
+	 * @return mixed
284
+	 */
285
+	public function getAttribute($key)
286
+	{
287
+		$attribute = $this->tag->getAttribute($key);
288
+		if ( ! is_null($attribute)) {
289
+			$attribute = $attribute['value'];
290
+		}
291
+
292
+		return $attribute;
293
+	}
294
+
295
+	/**
296
+	 * A wrapper method that simply calls the setAttribute method
297
+	 * on the tag of this node.
298
+	 *
299
+	 * @param string $key
300
+	 * @param string $value
301
+	 * @return $this
302
+	 */
303
+	public function setAttribute($key, $value)
304
+	{
305
+		$this->tag->setAttribute($key, $value);
306
+
307
+		return $this;
308
+	}
309
+
310
+	/**
311
+	 * A wrapper method that simply calls the removeAttribute method
312
+	 * on the tag of this node.
313
+	 *
314
+	 * @param string $key
315
+	 * @return void
316
+	 */
317
+	public function removeAttribute($key)
318
+	{
319
+		$this->tag->removeAttribute($key);
320
+	}
321
+
322
+	/**
323
+	 * A wrapper method that simply calls the removeAllAttributes
324
+	 * method on the tag of this node.
325
+	 *
326
+	 * @return void
327
+	 */
328
+	public function removeAllAttributes()
329
+	{
330
+		$this->tag->removeAllAttributes();
331
+	}
332
+
333
+	/**
334
+	 * Function to locate a specific ancestor tag in the path to the root.
335
+	 *
336
+	 * @param  string $tag
337
+	 * @return AbstractNode
338
+	 * @throws ParentNotFoundException
339
+	 */
340
+	public function ancestorByTag($tag)
341
+	{
342
+		// Start by including ourselves in the comparison.
343
+		$node = $this;
344
+
345
+		while ( ! is_null($node)) {
346
+			if ($node->tag->name() == $tag) {
347
+				return $node;
348
+			}
349
+
350
+			$node = $node->getParent();
351
+		}
352
+
353
+		throw new ParentNotFoundException('Could not find an ancestor with "'.$tag.'" tag');
354
+	}
355
+
356
+	/**
357
+	 * Find elements by css selector
358
+	 *
359
+	 * @param string $selector
360
+	 * @param int $nth
361
+	 * @return array|AbstractNode
362
+	 */
363
+	public function find($selector, $nth = null)
364
+	{
365
+		$selector = new Selector($selector);
366
+		$nodes    = $selector->find($this);
367
+
368
+		if ( ! is_null($nth)) {
369
+			// return nth-element or array
370
+			if (isset($nodes[$nth])) {
371
+				return $nodes[$nth];
372
+			}
373
+
374
+			return null;
375
+		}
376
+
377
+		return $nodes;
378
+	}
379
+
380
+	/**
381
+	 * Function to try a few tricks to determine the displayed size of an img on the page.
382
+	 * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
383
+	 *
384
+	 * Future enhancement:
385
+	 * Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
386
+	 *
387
+	 * Far future enhancement
388
+	 * Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
389
+	 * Note that in this case, the class or id will have the img sub-selector for it to apply to the image.
390
+	 *
391
+	 * ridiculously far future development
392
+	 * If the class or id is specified in a SEPARATE css file that's not on the page, go get it and do what we were just doing for the ones on the page.
393
+	 *
394
+	 * @author John Schlick
395
+	 * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
396
+	 */
397
+	public function get_display_size()
398
+	{
399
+		$width  = -1;
400
+		$height = -1;
401
+
402
+		if ($this->tag->name() != 'img') {
403
+			return false;
404
+		}
405
+
406
+		// See if there is a height or width attribute in the tag itself.
407
+		if ( ! is_null($this->tag->getAttribute('width'))) {
408
+			$width = $this->tag->getAttribute('width');
409
+		}
410
+
411
+		if ( ! is_null($this->tag->getAttribute('height'))) {
412
+			$height = $this->tag->getAttribute('height');
413
+		}
414
+
415
+		// Now look for an inline style.
416
+		if ( ! is_null($this->tag->getAttribute('style'))) {
417
+			// Thanks to user 'gnarf' from stackoverflow for this regular expression.
418
+			$attributes = [];
419
+			preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->tag->getAttribute('style'), $matches,
420
+				PREG_SET_ORDER);
421
+			foreach ($matches as $match) {
422
+				$attributes[$match[1]] = $match[2];
423
+			}
424
+
425
+			$width = $this->getLength($attributes, $width, 'width');
426
+			$height = $this->getLength($attributes, $width, 'height');
427
+		}
428
+
429
+		$result = [
430
+			'height' => $height,
431
+			'width'  => $width,
432
+		];
433
+
434
+		return $result;
435
+	}
436
+
437
+	/**
438
+	 * If there is a length in the style attributes use it.
439
+	 *
440
+	 * @param array $attributes
441
+	 * @param int $length
442
+	 * @param string $key
443
+	 * @return int
444
+	 */
445
+	protected function getLength(array $attributes, $length, $key)
446
+	{
447
+		if (isset($attributes[$key]) && $length == -1) {
448
+			// check that the last two characters are px (pixels)
449
+			if (strtolower(substr($attributes[$key], -2)) == 'px') {
450
+				$proposed_length = substr($attributes[$key], 0, -2);
451
+				// Now make sure that it's an integer and not something stupid.
452
+				if (filter_var($proposed_length, FILTER_VALIDATE_INT)) {
453
+					$length = $proposed_length;
454
+				}
455
+			}
456
+		}
457
+
458
+		return $length;
459
+	}
460
+
461
+	/**
462
+	 * Gets the inner html of this node.
463
+	 *
464
+	 * @return string
465
+	 */
466
+	abstract public function innerHtml();
467
+
468
+	/**
469
+	 * Gets the html of this node, including it's own
470
+	 * tag.
471
+	 *
472
+	 * @return string
473
+	 */
474
+	abstract public function outerHtml();
475
+
476
+	/**
477
+	 * Gets the text of this node (if there is any text).
478
+	 *
479
+	 * @return string
480
+	 */
481
+	abstract public function text();
482
+
483
+	/**
484
+	 * Call this when something in the node tree has changed. Like a child has been added
485
+	 * or a parent has been changed.
486
+	 *
487
+	 * @return void
488
+	 */
489
+	abstract protected function clear();
490 490
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom/InnerNode.php 1 patch
Indentation   +301 added lines, -301 removed lines patch added patch discarded remove patch
@@ -13,305 +13,305 @@
 block discarded – undo
13 13
 abstract class InnerNode extends ArrayNode
14 14
 {
15 15
 
16
-    /**
17
-     * An array of all the children.
18
-     *
19
-     * @var array
20
-     */
21
-    protected $children = [];
22
-
23
-    /**
24
-     * Sets the encoding class to this node and propagates it
25
-     * to all its children.
26
-     *
27
-     * @param Encode $encode
28
-     * @return void
29
-     */
30
-    public function propagateEncoding(Encode $encode)
31
-    {
32
-        $this->encode = $encode;
33
-        $this->tag->setEncoding($encode);
34
-        // check children
35
-        foreach ($this->children as $id => $child) {
36
-            /** @var AbstractNode $node */
37
-            $node = $child['node'];
38
-            $node->propagateEncoding($encode);
39
-        }
40
-    }
41
-
42
-    /**
43
-     * Checks if this node has children.
44
-     *
45
-     * @return bool
46
-     */
47
-    public function hasChildren()
48
-    {
49
-        return ! empty($this->children);
50
-    }
51
-
52
-    /**
53
-     * Returns the child by id.
54
-     *
55
-     * @param int $id
56
-     * @return AbstractNode
57
-     * @throws ChildNotFoundException
58
-     */
59
-    public function getChild($id)
60
-    {
61
-        if ( ! isset($this->children[$id])) {
62
-            throw new ChildNotFoundException("Child '$id' not found in this node.");
63
-        }
64
-
65
-        return $this->children[$id]['node'];
66
-    }
67
-
68
-    /**
69
-     * Returns a new array of child nodes
70
-     *
71
-     * @return array
72
-     */
73
-    public function getChildren()
74
-    {
75
-        $nodes = [];
76
-        try {
77
-            $child = $this->firstChild();
78
-            do {
79
-                $nodes[] = $child;
80
-                $child   = $this->nextChild($child->id());
81
-            } while ( ! is_null($child));
82
-        } catch (ChildNotFoundException $e) {
83
-            // we are done looking for children
84
-        }
85
-
86
-        return $nodes;
87
-    }
88
-
89
-    /**
90
-     * Counts children
91
-     *
92
-     * @return int
93
-     */
94
-    public function countChildren()
95
-    {
96
-        return count($this->children);
97
-    }
98
-
99
-    /**
100
-     * Adds a child node to this node and returns the id of the child for this
101
-     * parent.
102
-     *
103
-     * @param AbstractNode $child
104
-     * @return bool
105
-     * @throws CircularException
106
-     */
107
-    public function addChild(AbstractNode $child)
108
-    {
109
-        $key = null;
110
-
111
-        // check integrity
112
-        if ($this->isAncestor($child->id())) {
113
-            throw new CircularException('Can not add child. It is my ancestor.');
114
-        }
115
-
116
-        // check if child is itself
117
-        if ($child->id() == $this->id) {
118
-            throw new CircularException('Can not set itself as a child.');
119
-        }
120
-
121
-        if ($this->hasChildren()) {
122
-            if (isset($this->children[$child->id()])) {
123
-                // we already have this child
124
-                return false;
125
-            }
126
-            $sibling                      = $this->lastChild();
127
-            $key                          = $sibling->id();
128
-            $this->children[$key]['next'] = $child->id();
129
-        }
130
-
131
-        // add the child
132
-        $this->children[$child->id()] = [
133
-            'node' => $child,
134
-            'next' => null,
135
-            'prev' => $key,
136
-        ];
137
-
138
-        // tell child I am the new parent
139
-        $child->setParent($this);
140
-
141
-        //clear any cache
142
-        $this->clear();
143
-
144
-        return true;
145
-    }
146
-
147
-    /**
148
-     * Removes the child by id.
149
-     *
150
-     * @param int $id
151
-     * @return $this
152
-     */
153
-    public function removeChild($id)
154
-    {
155
-        if ( ! isset($this->children[$id])) {
156
-            return $this;
157
-        }
158
-
159
-        // handle moving next and previous assignments.
160
-        $next = $this->children[$id]['next'];
161
-        $prev = $this->children[$id]['prev'];
162
-        if ( ! is_null($next)) {
163
-            $this->children[$next]['prev'] = $prev;
164
-        }
165
-        if ( ! is_null($prev)) {
166
-            $this->children[$prev]['next'] = $next;
167
-        }
168
-
169
-        // remove the child
170
-        unset($this->children[$id]);
171
-
172
-        //clear any cache
173
-        $this->clear();
174
-
175
-        return $this;
176
-    }
177
-
178
-    /**
179
-     * Attempts to get the next child.
180
-     *
181
-     * @param int $id
182
-     * @return AbstractNode
183
-     * @uses $this->getChild()
184
-     * @throws ChildNotFoundException
185
-     */
186
-    public function nextChild($id)
187
-    {
188
-        $child = $this->getChild($id);
189
-        $next  = $this->children[$child->id()]['next'];
190
-
191
-        return $this->getChild($next);
192
-    }
193
-
194
-    /**
195
-     * Attempts to get the previous child.
196
-     *
197
-     * @param int $id
198
-     * @return AbstractNode
199
-     * @uses $this->getChild()
200
-     * @throws ChildNotFoundException
201
-     */
202
-    public function previousChild($id)
203
-    {
204
-        $child = $this->getchild($id);
205
-        $next  = $this->children[$child->id()]['prev'];
206
-
207
-        return $this->getChild($next);
208
-    }
209
-
210
-    /**
211
-     * Checks if the given node id is a child of the
212
-     * current node.
213
-     *
214
-     * @param int $id
215
-     * @return bool
216
-     */
217
-    public function isChild($id)
218
-    {
219
-        foreach ($this->children as $childId => $child) {
220
-            if ($id == $childId) {
221
-                return true;
222
-            }
223
-        }
224
-
225
-        return false;
226
-    }
227
-
228
-    /**
229
-     * Removes the child with id $childId and replace it with the new child
230
-     * $newChild.
231
-     *
232
-     * @param int $childId
233
-     * @param AbstractNode $newChild
234
-     * @throws ChildNotFoundException
235
-     */
236
-    public function replaceChild($childId, AbstractNode $newChild)
237
-    {
238
-        $oldChild = $this->getChild($childId);
239
-        $keys = array_keys($this->children);
240
-        $index = array_search($childId, $keys, true);
241
-        $keys[$index] = $newChild->id();
242
-        $this->children = array_combine($keys, $this->children);
243
-        $this->children[$newChild->id()] = $newChild;
244
-        unset($oldChild);
245
-    }
246
-
247
-    /**
248
-     * Shortcut to return the first child.
249
-     *
250
-     * @return AbstractNode
251
-     * @uses $this->getChild()
252
-     */
253
-    public function firstChild()
254
-    {
255
-        reset($this->children);
256
-        $key = key($this->children);
257
-
258
-        return $this->getChild($key);
259
-    }
260
-
261
-    /**
262
-     * Attempts to get the last child.
263
-     *
264
-     * @return AbstractNode
265
-     */
266
-    public function lastChild()
267
-    {
268
-        end($this->children);
269
-        $key = key($this->children);
270
-
271
-        return $this->getChild($key);
272
-    }
273
-
274
-    /**
275
-     * Checks if the given node id is a descendant of the
276
-     * current node.
277
-     *
278
-     * @param int $id
279
-     * @return bool
280
-     */
281
-    public function isDescendant($id)
282
-    {
283
-        if ($this->isChild($id)) {
284
-            return true;
285
-        }
286
-
287
-        foreach ($this->children as $childId => $child) {
288
-            /** @var InnerNode $node */
289
-            $node = $child['node'];
290
-            if ($node instanceof InnerNode &&
291
-                $node->hasChildren() &&
292
-                $node->isDescendant($id)
293
-            ) {
294
-                return true;
295
-            }
296
-        }
297
-
298
-        return false;
299
-    }
300
-
301
-    /**
302
-     * Sets the parent node.
303
-     *
304
-     * @param InnerNode $parent
305
-     * @return $this
306
-     * @throws CircularException
307
-     */
308
-    public function setParent(InnerNode $parent)
309
-    {
310
-        // check integrity
311
-        if ($this->isDescendant($parent->id())) {
312
-            throw new CircularException('Can not add descendant "'.$parent->id().'" as my parent.');
313
-        }
314
-
315
-        return parent::setParent($parent);
316
-    }
16
+	/**
17
+	 * An array of all the children.
18
+	 *
19
+	 * @var array
20
+	 */
21
+	protected $children = [];
22
+
23
+	/**
24
+	 * Sets the encoding class to this node and propagates it
25
+	 * to all its children.
26
+	 *
27
+	 * @param Encode $encode
28
+	 * @return void
29
+	 */
30
+	public function propagateEncoding(Encode $encode)
31
+	{
32
+		$this->encode = $encode;
33
+		$this->tag->setEncoding($encode);
34
+		// check children
35
+		foreach ($this->children as $id => $child) {
36
+			/** @var AbstractNode $node */
37
+			$node = $child['node'];
38
+			$node->propagateEncoding($encode);
39
+		}
40
+	}
41
+
42
+	/**
43
+	 * Checks if this node has children.
44
+	 *
45
+	 * @return bool
46
+	 */
47
+	public function hasChildren()
48
+	{
49
+		return ! empty($this->children);
50
+	}
51
+
52
+	/**
53
+	 * Returns the child by id.
54
+	 *
55
+	 * @param int $id
56
+	 * @return AbstractNode
57
+	 * @throws ChildNotFoundException
58
+	 */
59
+	public function getChild($id)
60
+	{
61
+		if ( ! isset($this->children[$id])) {
62
+			throw new ChildNotFoundException("Child '$id' not found in this node.");
63
+		}
64
+
65
+		return $this->children[$id]['node'];
66
+	}
67
+
68
+	/**
69
+	 * Returns a new array of child nodes
70
+	 *
71
+	 * @return array
72
+	 */
73
+	public function getChildren()
74
+	{
75
+		$nodes = [];
76
+		try {
77
+			$child = $this->firstChild();
78
+			do {
79
+				$nodes[] = $child;
80
+				$child   = $this->nextChild($child->id());
81
+			} while ( ! is_null($child));
82
+		} catch (ChildNotFoundException $e) {
83
+			// we are done looking for children
84
+		}
85
+
86
+		return $nodes;
87
+	}
88
+
89
+	/**
90
+	 * Counts children
91
+	 *
92
+	 * @return int
93
+	 */
94
+	public function countChildren()
95
+	{
96
+		return count($this->children);
97
+	}
98
+
99
+	/**
100
+	 * Adds a child node to this node and returns the id of the child for this
101
+	 * parent.
102
+	 *
103
+	 * @param AbstractNode $child
104
+	 * @return bool
105
+	 * @throws CircularException
106
+	 */
107
+	public function addChild(AbstractNode $child)
108
+	{
109
+		$key = null;
110
+
111
+		// check integrity
112
+		if ($this->isAncestor($child->id())) {
113
+			throw new CircularException('Can not add child. It is my ancestor.');
114
+		}
115
+
116
+		// check if child is itself
117
+		if ($child->id() == $this->id) {
118
+			throw new CircularException('Can not set itself as a child.');
119
+		}
120
+
121
+		if ($this->hasChildren()) {
122
+			if (isset($this->children[$child->id()])) {
123
+				// we already have this child
124
+				return false;
125
+			}
126
+			$sibling                      = $this->lastChild();
127
+			$key                          = $sibling->id();
128
+			$this->children[$key]['next'] = $child->id();
129
+		}
130
+
131
+		// add the child
132
+		$this->children[$child->id()] = [
133
+			'node' => $child,
134
+			'next' => null,
135
+			'prev' => $key,
136
+		];
137
+
138
+		// tell child I am the new parent
139
+		$child->setParent($this);
140
+
141
+		//clear any cache
142
+		$this->clear();
143
+
144
+		return true;
145
+	}
146
+
147
+	/**
148
+	 * Removes the child by id.
149
+	 *
150
+	 * @param int $id
151
+	 * @return $this
152
+	 */
153
+	public function removeChild($id)
154
+	{
155
+		if ( ! isset($this->children[$id])) {
156
+			return $this;
157
+		}
158
+
159
+		// handle moving next and previous assignments.
160
+		$next = $this->children[$id]['next'];
161
+		$prev = $this->children[$id]['prev'];
162
+		if ( ! is_null($next)) {
163
+			$this->children[$next]['prev'] = $prev;
164
+		}
165
+		if ( ! is_null($prev)) {
166
+			$this->children[$prev]['next'] = $next;
167
+		}
168
+
169
+		// remove the child
170
+		unset($this->children[$id]);
171
+
172
+		//clear any cache
173
+		$this->clear();
174
+
175
+		return $this;
176
+	}
177
+
178
+	/**
179
+	 * Attempts to get the next child.
180
+	 *
181
+	 * @param int $id
182
+	 * @return AbstractNode
183
+	 * @uses $this->getChild()
184
+	 * @throws ChildNotFoundException
185
+	 */
186
+	public function nextChild($id)
187
+	{
188
+		$child = $this->getChild($id);
189
+		$next  = $this->children[$child->id()]['next'];
190
+
191
+		return $this->getChild($next);
192
+	}
193
+
194
+	/**
195
+	 * Attempts to get the previous child.
196
+	 *
197
+	 * @param int $id
198
+	 * @return AbstractNode
199
+	 * @uses $this->getChild()
200
+	 * @throws ChildNotFoundException
201
+	 */
202
+	public function previousChild($id)
203
+	{
204
+		$child = $this->getchild($id);
205
+		$next  = $this->children[$child->id()]['prev'];
206
+
207
+		return $this->getChild($next);
208
+	}
209
+
210
+	/**
211
+	 * Checks if the given node id is a child of the
212
+	 * current node.
213
+	 *
214
+	 * @param int $id
215
+	 * @return bool
216
+	 */
217
+	public function isChild($id)
218
+	{
219
+		foreach ($this->children as $childId => $child) {
220
+			if ($id == $childId) {
221
+				return true;
222
+			}
223
+		}
224
+
225
+		return false;
226
+	}
227
+
228
+	/**
229
+	 * Removes the child with id $childId and replace it with the new child
230
+	 * $newChild.
231
+	 *
232
+	 * @param int $childId
233
+	 * @param AbstractNode $newChild
234
+	 * @throws ChildNotFoundException
235
+	 */
236
+	public function replaceChild($childId, AbstractNode $newChild)
237
+	{
238
+		$oldChild = $this->getChild($childId);
239
+		$keys = array_keys($this->children);
240
+		$index = array_search($childId, $keys, true);
241
+		$keys[$index] = $newChild->id();
242
+		$this->children = array_combine($keys, $this->children);
243
+		$this->children[$newChild->id()] = $newChild;
244
+		unset($oldChild);
245
+	}
246
+
247
+	/**
248
+	 * Shortcut to return the first child.
249
+	 *
250
+	 * @return AbstractNode
251
+	 * @uses $this->getChild()
252
+	 */
253
+	public function firstChild()
254
+	{
255
+		reset($this->children);
256
+		$key = key($this->children);
257
+
258
+		return $this->getChild($key);
259
+	}
260
+
261
+	/**
262
+	 * Attempts to get the last child.
263
+	 *
264
+	 * @return AbstractNode
265
+	 */
266
+	public function lastChild()
267
+	{
268
+		end($this->children);
269
+		$key = key($this->children);
270
+
271
+		return $this->getChild($key);
272
+	}
273
+
274
+	/**
275
+	 * Checks if the given node id is a descendant of the
276
+	 * current node.
277
+	 *
278
+	 * @param int $id
279
+	 * @return bool
280
+	 */
281
+	public function isDescendant($id)
282
+	{
283
+		if ($this->isChild($id)) {
284
+			return true;
285
+		}
286
+
287
+		foreach ($this->children as $childId => $child) {
288
+			/** @var InnerNode $node */
289
+			$node = $child['node'];
290
+			if ($node instanceof InnerNode &&
291
+				$node->hasChildren() &&
292
+				$node->isDescendant($id)
293
+			) {
294
+				return true;
295
+			}
296
+		}
297
+
298
+		return false;
299
+	}
300
+
301
+	/**
302
+	 * Sets the parent node.
303
+	 *
304
+	 * @param InnerNode $parent
305
+	 * @return $this
306
+	 * @throws CircularException
307
+	 */
308
+	public function setParent(InnerNode $parent)
309
+	{
310
+		// check integrity
311
+		if ($this->isDescendant($parent->id())) {
312
+			throw new CircularException('Can not add descendant "'.$parent->id().'" as my parent.');
313
+		}
314
+
315
+		return parent::setParent($parent);
316
+	}
317 317
 }
318 318
\ No newline at end of file
Please login to merge, or discard this patch.