Completed
Push — master ( a4eae3...80f547 )
by Gilles
02:42
created
src/PHPHtmlParser/Dom/HtmlNode.php 1 patch
Indentation   +185 added lines, -185 removed lines patch added patch discarded remove patch
@@ -12,189 +12,189 @@
 block discarded – undo
12 12
 class HtmlNode extends ArrayNode
13 13
 {
14 14
 
15
-    /**
16
-     * Remembers what the innerHtml was if it was scanned previously.
17
-     */
18
-    protected $innerHtml = null;
19
-
20
-    /**
21
-     * Remembers what the outerHtml was if it was scanned previously.
22
-     *
23
-     * @var string
24
-     */
25
-    protected $outerHtml = null;
26
-
27
-    /**
28
-     * Remembers what the text was if it was scanned previously.
29
-     *
30
-     * @var string
31
-     */
32
-    protected $text = null;
33
-
34
-    /**
35
-     * Remembers what the text was when we looked into all our
36
-     * children nodes.
37
-     *
38
-     * @var string
39
-     */
40
-    protected $textWithChildren = null;
41
-
42
-    /**
43
-     * Sets up the tag of this node.
44
-     *
45
-     * @param $tag
46
-     */
47
-    public function __construct($tag)
48
-    {
49
-        if ( ! $tag instanceof Tag) {
50
-            $tag = new Tag($tag);
51
-        }
52
-        $this->tag = $tag;
53
-        parent::__construct();
54
-    }
55
-
56
-    /**
57
-     * Gets the inner html of this node.
58
-     *
59
-     * @return string
60
-     * @throws UnknownChildTypeException
61
-     */
62
-    public function innerHtml()
63
-    {
64
-        if ( ! $this->hasChildren()) {
65
-            // no children
66
-            return '';
67
-        }
68
-
69
-        if ( ! is_null($this->innerHtml)) {
70
-            // we already know the result.
71
-            return $this->innerHtml;
72
-        }
73
-
74
-        $child  = $this->firstChild();
75
-        $string = '';
76
-
77
-        // continue to loop until we are out of children
78
-        while ( ! is_null($child)) {
79
-            if ($child instanceof TextNode) {
80
-                $string .= $child->text();
81
-            } elseif ($child instanceof HtmlNode) {
82
-                $string .= $child->outerHtml();
83
-            } else {
84
-                throw new UnknownChildTypeException('Unknown child type "'.get_class($child).'" found in node');
85
-            }
86
-
87
-            try {
88
-                $child = $this->nextChild($child->id());
89
-            } catch (ChildNotFoundException $e) {
90
-                // no more children
91
-                $child = null;
92
-            }
93
-        }
94
-
95
-        // remember the results
96
-        $this->innerHtml = $string;
97
-
98
-        return $string;
99
-    }
100
-
101
-    /**
102
-     * Gets the html of this node, including it's own
103
-     * tag.
104
-     *
105
-     * @return string
106
-     */
107
-    public function outerHtml()
108
-    {
109
-        // special handling for root
110
-        if ($this->tag->name() == 'root') {
111
-            return $this->innerHtml();
112
-        }
113
-
114
-        if ( ! is_null($this->outerHtml)) {
115
-            // we already know the results.
116
-            return $this->outerHtml;
117
-        }
118
-
119
-        $return = $this->tag->makeOpeningTag();
120
-        if ($this->tag->isSelfClosing()) {
121
-            // ignore any children... there should not be any though
122
-            return $return;
123
-        }
124
-
125
-        // get the inner html
126
-        $return .= $this->innerHtml();
127
-
128
-        // add closing tag
129
-        $return .= $this->tag->makeClosingTag();
130
-
131
-        // remember the results
132
-        $this->outerHtml = $return;
133
-
134
-        return $return;
135
-    }
136
-
137
-    /**
138
-     * Gets the text of this node (if there is any text). Or get all the text
139
-     * in this node, including children.
140
-     *
141
-     * @param bool $lookInChildren
142
-     * @return string
143
-     */
144
-    public function text($lookInChildren = false)
145
-    {
146
-        if ($lookInChildren) {
147
-            if ( ! is_null($this->textWithChildren)) {
148
-                // we already know the results.
149
-                return $this->textWithChildren;
150
-            }
151
-        } elseif ( ! is_null($this->text)) {
152
-            // we already know the results.
153
-            return $this->text;
154
-        }
155
-
156
-        // find out if this node has any text children
157
-        $text = '';
158
-        foreach ($this->children as $child) {
159
-            /** @var AbstractNode $node */
160
-            $node = $child['node'];
161
-            if ($node instanceof TextNode) {
162
-                $text .= $child['node']->text;
163
-            } elseif ($lookInChildren &&
164
-                $node instanceof HtmlNode
165
-            ) {
166
-                $text .= $node->text($lookInChildren);
167
-            }
168
-        }
169
-
170
-        // remember our result
171
-        if ($lookInChildren) {
172
-            $this->textWithChildren = $text;
173
-        } else {
174
-            $this->text = $text;
175
-        }
176
-
177
-        return $text;
178
-    }
179
-
180
-    /**
181
-     * Call this when something in the node tree has changed. Like a child has been added
182
-     * or a parent has been changed.
183
-     */
184
-    protected function clear()
185
-    {
186
-        $this->innerHtml = null;
187
-        $this->outerHtml = null;
188
-        $this->text      = null;
189
-    }
190
-
191
-    /**
192
-     * Returns all children of this html node.
193
-     *
194
-     * @return array
195
-     */
196
-    protected function getIteratorArray()
197
-    {
198
-        return $this->getChildren();
199
-    }
15
+	/**
16
+	 * Remembers what the innerHtml was if it was scanned previously.
17
+	 */
18
+	protected $innerHtml = null;
19
+
20
+	/**
21
+	 * Remembers what the outerHtml was if it was scanned previously.
22
+	 *
23
+	 * @var string
24
+	 */
25
+	protected $outerHtml = null;
26
+
27
+	/**
28
+	 * Remembers what the text was if it was scanned previously.
29
+	 *
30
+	 * @var string
31
+	 */
32
+	protected $text = null;
33
+
34
+	/**
35
+	 * Remembers what the text was when we looked into all our
36
+	 * children nodes.
37
+	 *
38
+	 * @var string
39
+	 */
40
+	protected $textWithChildren = null;
41
+
42
+	/**
43
+	 * Sets up the tag of this node.
44
+	 *
45
+	 * @param $tag
46
+	 */
47
+	public function __construct($tag)
48
+	{
49
+		if ( ! $tag instanceof Tag) {
50
+			$tag = new Tag($tag);
51
+		}
52
+		$this->tag = $tag;
53
+		parent::__construct();
54
+	}
55
+
56
+	/**
57
+	 * Gets the inner html of this node.
58
+	 *
59
+	 * @return string
60
+	 * @throws UnknownChildTypeException
61
+	 */
62
+	public function innerHtml()
63
+	{
64
+		if ( ! $this->hasChildren()) {
65
+			// no children
66
+			return '';
67
+		}
68
+
69
+		if ( ! is_null($this->innerHtml)) {
70
+			// we already know the result.
71
+			return $this->innerHtml;
72
+		}
73
+
74
+		$child  = $this->firstChild();
75
+		$string = '';
76
+
77
+		// continue to loop until we are out of children
78
+		while ( ! is_null($child)) {
79
+			if ($child instanceof TextNode) {
80
+				$string .= $child->text();
81
+			} elseif ($child instanceof HtmlNode) {
82
+				$string .= $child->outerHtml();
83
+			} else {
84
+				throw new UnknownChildTypeException('Unknown child type "'.get_class($child).'" found in node');
85
+			}
86
+
87
+			try {
88
+				$child = $this->nextChild($child->id());
89
+			} catch (ChildNotFoundException $e) {
90
+				// no more children
91
+				$child = null;
92
+			}
93
+		}
94
+
95
+		// remember the results
96
+		$this->innerHtml = $string;
97
+
98
+		return $string;
99
+	}
100
+
101
+	/**
102
+	 * Gets the html of this node, including it's own
103
+	 * tag.
104
+	 *
105
+	 * @return string
106
+	 */
107
+	public function outerHtml()
108
+	{
109
+		// special handling for root
110
+		if ($this->tag->name() == 'root') {
111
+			return $this->innerHtml();
112
+		}
113
+
114
+		if ( ! is_null($this->outerHtml)) {
115
+			// we already know the results.
116
+			return $this->outerHtml;
117
+		}
118
+
119
+		$return = $this->tag->makeOpeningTag();
120
+		if ($this->tag->isSelfClosing()) {
121
+			// ignore any children... there should not be any though
122
+			return $return;
123
+		}
124
+
125
+		// get the inner html
126
+		$return .= $this->innerHtml();
127
+
128
+		// add closing tag
129
+		$return .= $this->tag->makeClosingTag();
130
+
131
+		// remember the results
132
+		$this->outerHtml = $return;
133
+
134
+		return $return;
135
+	}
136
+
137
+	/**
138
+	 * Gets the text of this node (if there is any text). Or get all the text
139
+	 * in this node, including children.
140
+	 *
141
+	 * @param bool $lookInChildren
142
+	 * @return string
143
+	 */
144
+	public function text($lookInChildren = false)
145
+	{
146
+		if ($lookInChildren) {
147
+			if ( ! is_null($this->textWithChildren)) {
148
+				// we already know the results.
149
+				return $this->textWithChildren;
150
+			}
151
+		} elseif ( ! is_null($this->text)) {
152
+			// we already know the results.
153
+			return $this->text;
154
+		}
155
+
156
+		// find out if this node has any text children
157
+		$text = '';
158
+		foreach ($this->children as $child) {
159
+			/** @var AbstractNode $node */
160
+			$node = $child['node'];
161
+			if ($node instanceof TextNode) {
162
+				$text .= $child['node']->text;
163
+			} elseif ($lookInChildren &&
164
+				$node instanceof HtmlNode
165
+			) {
166
+				$text .= $node->text($lookInChildren);
167
+			}
168
+		}
169
+
170
+		// remember our result
171
+		if ($lookInChildren) {
172
+			$this->textWithChildren = $text;
173
+		} else {
174
+			$this->text = $text;
175
+		}
176
+
177
+		return $text;
178
+	}
179
+
180
+	/**
181
+	 * Call this when something in the node tree has changed. Like a child has been added
182
+	 * or a parent has been changed.
183
+	 */
184
+	protected function clear()
185
+	{
186
+		$this->innerHtml = null;
187
+		$this->outerHtml = null;
188
+		$this->text      = null;
189
+	}
190
+
191
+	/**
192
+	 * Returns all children of this html node.
193
+	 *
194
+	 * @return array
195
+	 */
196
+	protected function getIteratorArray()
197
+	{
198
+		return $this->getChildren();
199
+	}
200 200
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom/AbstractNode.php 1 patch
Indentation   +683 added lines, -683 removed lines patch added patch discarded remove patch
@@ -17,687 +17,687 @@
 block discarded – undo
17 17
 abstract class AbstractNode
18 18
 {
19 19
 
20
-    /**
21
-     * Contains the tag name/type
22
-     *
23
-     * @var \PHPHtmlParser\Dom\Tag
24
-     */
25
-    protected $tag;
26
-
27
-    /**
28
-     * Contains a list of attributes on this tag.
29
-     *
30
-     * @var array
31
-     */
32
-    protected $attr = [];
33
-
34
-    /**
35
-     * An array of all the children.
36
-     *
37
-     * @var array
38
-     */
39
-    protected $children = [];
40
-
41
-    /**
42
-     * Contains the parent Node.
43
-     *
44
-     * @var AbstractNode
45
-     */
46
-    protected $parent = null;
47
-
48
-    /**
49
-     * The unique id of the class. Given by PHP.
50
-     *
51
-     * @var string
52
-     */
53
-    protected $id;
54
-
55
-    /**
56
-     * The encoding class used to encode strings.
57
-     *
58
-     * @var mixed
59
-     */
60
-    protected $encode;
61
-
62
-    /**
63
-     * Creates a unique spl hash for this node.
64
-     */
65
-    public function __construct()
66
-    {
67
-        $this->id = spl_object_hash($this);
68
-    }
69
-
70
-    /**
71
-     * Magic get method for attributes and certain methods.
72
-     *
73
-     * @param string $key
74
-     * @return mixed
75
-     */
76
-    public function __get($key)
77
-    {
78
-        // check attribute first
79
-        if ( ! is_null($this->getAttribute($key))) {
80
-            return $this->getAttribute($key);
81
-        }
82
-        switch (strtolower($key)) {
83
-            case 'outerhtml':
84
-                return $this->outerHtml();
85
-            case 'innerhtml':
86
-                return $this->innerHtml();
87
-            case 'text':
88
-                return $this->text();
89
-        }
90
-
91
-        return null;
92
-    }
93
-
94
-    /**
95
-     * Attempts to clear out any object references.
96
-     */
97
-    public function __destruct()
98
-    {
99
-        $this->tag      = null;
100
-        $this->attr     = [];
101
-        $this->parent   = null;
102
-        $this->children = [];
103
-    }
104
-
105
-    /**
106
-     * Simply calls the outer text method.
107
-     *
108
-     * @return string
109
-     */
110
-    public function __toString()
111
-    {
112
-        return $this->outerHtml();
113
-    }
114
-
115
-    /**
116
-     * Returns the id of this object.
117
-     */
118
-    public function id()
119
-    {
120
-        return $this->id;
121
-    }
122
-
123
-    /**
124
-     * Returns the parent of node.
125
-     *
126
-     * @return AbstractNode
127
-     */
128
-    public function getParent()
129
-    {
130
-        return $this->parent;
131
-    }
132
-
133
-    /**
134
-     * Sets the parent node.
135
-     *
136
-     * @param AbstractNode $parent
137
-     * @return $this
138
-     * @throws CircularException
139
-     */
140
-    public function setParent(AbstractNode $parent)
141
-    {
142
-        // check integrity
143
-        if ($this->isDescendant($parent->id())) {
144
-            throw new CircularException('Can not add descendant "'.$parent->id().'" as my parent.');
145
-        }
146
-
147
-        // remove from old parent
148
-        if ( ! is_null($this->parent)) {
149
-            if ($this->parent->id() == $parent->id()) {
150
-                // already the parent
151
-                return $this;
152
-            }
153
-
154
-            $this->parent->removeChild($this->id);
155
-        }
156
-
157
-        $this->parent = $parent;
158
-
159
-        // assign child to parent
160
-        $this->parent->addChild($this);
161
-
162
-        //clear any cache
163
-        $this->clear();
164
-
165
-        return $this;
166
-    }
167
-
168
-    /**
169
-     * Sets the encoding class to this node and propagates it
170
-     * to all its children.
171
-     *
172
-     * @param Encode $encode
173
-     */
174
-    public function propagateEncoding(Encode $encode)
175
-    {
176
-        $this->encode = $encode;
177
-        $this->tag->setEncoding($encode);
178
-        // check children
179
-        foreach ($this->children as $id => $child) {
180
-            /** @var AbstractNode $node */
181
-            $node = $child['node'];
182
-            $node->propagateEncoding($encode);
183
-        }
184
-    }
185
-
186
-    /**
187
-     * Checks if this node has children.
188
-     *
189
-     * @return bool
190
-     */
191
-    public function hasChildren()
192
-    {
193
-        return ! empty($this->children);
194
-    }
195
-
196
-    /**
197
-     * Returns the child by id.
198
-     *
199
-     * @param int $id
200
-     * @return AbstractNode
201
-     * @throws ChildNotFoundException
202
-     */
203
-    public function getChild($id)
204
-    {
205
-        if ( ! isset($this->children[$id])) {
206
-            throw new ChildNotFoundException("Child '$id' not found in this node.");
207
-        }
208
-
209
-        return $this->children[$id]['node'];
210
-    }
211
-
212
-    /**
213
-     * Returns a new array of child nodes
214
-     *
215
-     * @return array
216
-     */
217
-    public function getChildren()
218
-    {
219
-        $nodes = [];
220
-        try {
221
-            $child = $this->firstChild();
222
-            do {
223
-                $nodes[] = $child;
224
-                $child   = $this->nextChild($child->id());
225
-            } while ( ! is_null($child));
226
-        } catch (ChildNotFoundException $e) {
227
-            // we are done looking for children
228
-        }
229
-
230
-        return $nodes;
231
-    }
232
-
233
-    /**
234
-     * Counts children
235
-     *
236
-     * @return int
237
-     */
238
-    public function countChildren()
239
-    {
240
-        return count($this->children);
241
-    }
242
-
243
-    /**
244
-     * Adds a child node to this node and returns the id of the child for this
245
-     * parent.
246
-     *
247
-     * @param AbstractNode $child
248
-     * @return bool
249
-     * @throws CircularException
250
-     */
251
-    public function addChild(AbstractNode $child)
252
-    {
253
-        $key = null;
254
-
255
-        // check integrity
256
-        if ($this->isAncestor($child->id())) {
257
-            throw new CircularException('Can not add child. It is my ancestor.');
258
-        }
259
-
260
-        // check if child is itself
261
-        if ($child->id() == $this->id) {
262
-            throw new CircularException('Can not set itself as a child.');
263
-        }
264
-
265
-        if ($this->hasChildren()) {
266
-            if (isset($this->children[$child->id()])) {
267
-                // we already have this child
268
-                return false;
269
-            }
270
-            $sibling                      = $this->lastChild();
271
-            $key                          = $sibling->id();
272
-            $this->children[$key]['next'] = $child->id();
273
-        }
274
-
275
-        // add the child
276
-        $this->children[$child->id()] = [
277
-            'node' => $child,
278
-            'next' => null,
279
-            'prev' => $key,
280
-        ];
281
-
282
-        // tell child I am the new parent
283
-        $child->setParent($this);
284
-
285
-        //clear any cache
286
-        $this->clear();
287
-
288
-        return true;
289
-    }
290
-
291
-    /**
292
-     * Removes the child by id.
293
-     *
294
-     * @param int $id
295
-     * @return $this
296
-     */
297
-    public function removeChild($id)
298
-    {
299
-        if ( ! isset($this->children[$id])) {
300
-            return $this;
301
-        }
302
-
303
-        // handle moving next and previous assignments.
304
-        $next = $this->children[$id]['next'];
305
-        $prev = $this->children[$id]['prev'];
306
-        if ( ! is_null($next)) {
307
-            $this->children[$next]['prev'] = $prev;
308
-        }
309
-        if ( ! is_null($prev)) {
310
-            $this->children[$prev]['next'] = $next;
311
-        }
312
-
313
-        // remove the child
314
-        unset($this->children[$id]);
315
-
316
-        //clear any cache
317
-        $this->clear();
318
-
319
-        return $this;
320
-    }
321
-
322
-    /**
323
-     * Attempts to get the next child.
324
-     *
325
-     * @param int $id
326
-     * @return AbstractNode
327
-     * @uses $this->getChild()
328
-     */
329
-    public function nextChild($id)
330
-    {
331
-        $child = $this->getChild($id);
332
-        $next  = $this->children[$child->id()]['next'];
333
-
334
-        return $this->getChild($next);
335
-    }
336
-
337
-    /**
338
-     * Attempts to get the previous child.
339
-     *
340
-     * @param int $id
341
-     * @return AbstractNode
342
-     * @uses $this->getChild()
343
-     */
344
-    public function previousChild($id)
345
-    {
346
-        $child = $this->getchild($id);
347
-        $next  = $this->children[$child->id()]['prev'];
348
-
349
-        return $this->getChild($next);
350
-    }
351
-
352
-    /**
353
-     * Checks if the given node id is a child of the
354
-     * current node.
355
-     *
356
-     * @param int $id
357
-     * @return bool
358
-     */
359
-    public function isChild($id)
360
-    {
361
-        foreach ($this->children as $childId => $child) {
362
-            if ($id == $childId) {
363
-                return true;
364
-            }
365
-        }
366
-
367
-        return false;
368
-    }
369
-
370
-    /**
371
-     * Checks if the given node id is a descendant of the
372
-     * current node.
373
-     *
374
-     * @param int $id
375
-     * @return bool
376
-     */
377
-    public function isDescendant($id)
378
-    {
379
-        if ($this->isChild($id)) {
380
-            return true;
381
-        }
382
-
383
-        foreach ($this->children as $childId => $child) {
384
-            /** @var AbstractNode $node */
385
-            $node = $child['node'];
386
-            if ($node->hasChildren() &&
387
-                $node->isDescendant($id)
388
-            ) {
389
-                return true;
390
-            }
391
-        }
392
-
393
-        return false;
394
-    }
395
-
396
-    /**
397
-     * Checks if the given node id is an ancestor of
398
-     * the current node.
399
-     *
400
-     * @param int $id
401
-     * @return bool
402
-     */
403
-    public function isAncestor($id)
404
-    {
405
-        if ( ! is_null($this->getAncestor($id))) {
406
-            return true;
407
-        }
408
-
409
-        return false;
410
-    }
411
-
412
-    /**
413
-     * Attempts to get an ancestor node by the given id.
414
-     *
415
-     * @param int $id
416
-     * @return null|AbstractNode
417
-     */
418
-    public function getAncestor($id)
419
-    {
420
-        if ( ! is_null($this->parent)) {
421
-            if ($this->parent->id() == $id) {
422
-                return $this->parent;
423
-            }
424
-
425
-            return $this->parent->getAncestor($id);
426
-        }
427
-
428
-        return null;
429
-    }
430
-
431
-    /**
432
-     * Shortcut to return the first child.
433
-     *
434
-     * @return AbstractNode
435
-     * @uses $this->getChild()
436
-     */
437
-    public function firstChild()
438
-    {
439
-        reset($this->children);
440
-        $key = key($this->children);
441
-
442
-        return $this->getChild($key);
443
-    }
444
-
445
-    /**
446
-     * Attempts to get the last child.
447
-     *
448
-     * @return AbstractNode
449
-     */
450
-    public function lastChild()
451
-    {
452
-        end($this->children);
453
-        $key = key($this->children);
454
-
455
-        return $this->getChild($key);
456
-    }
457
-
458
-    /**
459
-     * Attempts to get the next sibling.
460
-     *
461
-     * @return AbstractNode
462
-     * @throws ParentNotFoundException
463
-     */
464
-    public function nextSibling()
465
-    {
466
-        if (is_null($this->parent)) {
467
-            throw new ParentNotFoundException('Parent is not set for this node.');
468
-        }
469
-
470
-        return $this->parent->nextChild($this->id);
471
-    }
472
-
473
-    /**
474
-     * Attempts to get the previous sibling
475
-     *
476
-     * @return AbstractNode
477
-     * @throws ParentNotFoundException
478
-     */
479
-    public function previousSibling()
480
-    {
481
-        if (is_null($this->parent)) {
482
-            throw new ParentNotFoundException('Parent is not set for this node.');
483
-        }
484
-
485
-        return $this->parent->previousChild($this->id);
486
-    }
487
-
488
-    /**
489
-     * Gets the tag object of this node.
490
-     *
491
-     * @return Tag
492
-     */
493
-    public function getTag()
494
-    {
495
-        return $this->tag;
496
-    }
497
-
498
-    /**
499
-     * A wrapper method that simply calls the getAttribute method
500
-     * on the tag of this node.
501
-     *
502
-     * @return array
503
-     */
504
-    public function getAttributes()
505
-    {
506
-        $attributes = $this->tag->getAttributes();
507
-        foreach ($attributes as $name => $info) {
508
-            $attributes[$name] = $info['value'];
509
-        }
510
-
511
-        return $attributes;
512
-    }
513
-
514
-    /**
515
-     * A wrapper method that simply calls the getAttribute method
516
-     * on the tag of this node.
517
-     *
518
-     * @param string $key
519
-     * @return mixed
520
-     */
521
-    public function getAttribute($key)
522
-    {
523
-        $attribute = $this->tag->getAttribute($key);
524
-        if ( ! is_null($attribute)) {
525
-            $attribute = $attribute['value'];
526
-        }
527
-
528
-        return $attribute;
529
-    }
530
-
531
-    /**
532
-     * A wrapper method that simply calls the setAttribute method
533
-     * on the tag of this node.
534
-     *
535
-     * @param string $key
536
-     * @param string $value
537
-     * @return $this
538
-     */
539
-    public function setAttribute($key, $value)
540
-    {
541
-        $this->tag->setAttribute($key, $value);
542
-
543
-        return $this;
544
-    }
545
-
546
-    /**
547
-     * Function to locate a specific ancestor tag in the path to the root.
548
-     *
549
-     * @param  string $tag
550
-     * @return AbstractNode
551
-     * @throws ParentNotFoundException
552
-     */
553
-    public function ancestorByTag($tag)
554
-    {
555
-        // Start by including ourselves in the comparison.
556
-        $node = $this;
557
-
558
-        while ( ! is_null($node)) {
559
-            if ($node->tag->name() == $tag) {
560
-                return $node;
561
-            }
562
-
563
-            $node = $node->getParent();
564
-        }
565
-
566
-        throw new ParentNotFoundException('Could not find an ancestor with "'.$tag.'" tag');
567
-    }
568
-
569
-    /**
570
-     * Find elements by css selector
571
-     *
572
-     * @param string $selector
573
-     * @param int $nth
574
-     * @return array|AbstractNode
575
-     */
576
-    public function find($selector, $nth = null)
577
-    {
578
-        $selector = new Selector($selector);
579
-        $nodes    = $selector->find($this);
580
-
581
-        if ( ! is_null($nth)) {
582
-            // return nth-element or array
583
-            if (isset($nodes[$nth])) {
584
-                return $nodes[$nth];
585
-            }
586
-
587
-            return null;
588
-        }
589
-
590
-        return $nodes;
591
-    }
592
-
593
-    /**
594
-     * Function to try a few tricks to determine the displayed size of an img on the page.
595
-     * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
596
-     *
597
-     * Future enhancement:
598
-     * Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
599
-     *
600
-     * Far future enhancement
601
-     * Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
602
-     * Note that in this case, the class or id will have the img sub-selector for it to apply to the image.
603
-     *
604
-     * ridiculously far future development
605
-     * If the class or id is specified in a SEPARATE css file that's not on the page, go get it and do what we were just doing for the ones on the page.
606
-     *
607
-     * @author John Schlick
608
-     * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
609
-     */
610
-    public function get_display_size()
611
-    {
612
-        $width  = -1;
613
-        $height = -1;
614
-
615
-        if ($this->tag->name() != 'img') {
616
-            return false;
617
-        }
618
-
619
-        // See if there is a height or width attribute in the tag itself.
620
-        if ( ! is_null($this->tag->getAttribute('width'))) {
621
-            $width = $this->tag->getAttribute('width');
622
-        }
623
-
624
-        if ( ! is_null($this->tag->getAttribute('height'))) {
625
-            $height = $this->tag->getAttribute('height');
626
-        }
627
-
628
-        // Now look for an inline style.
629
-        if ( ! is_null($this->tag->getAttribute('style'))) {
630
-            // Thanks to user 'gnarf' from stackoverflow for this regular expression.
631
-            $attributes = [];
632
-            preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->tag->getAttribute('style'), $matches,
633
-                PREG_SET_ORDER);
634
-            foreach ($matches as $match) {
635
-                $attributes[$match[1]] = $match[2];
636
-            }
637
-
638
-            $width = $this->getLength($attributes, $width, 'width');
639
-            $height = $this->getLength($attributes, $width, 'height');
640
-        }
641
-
642
-        $result = [
643
-            'height' => $height,
644
-            'width'  => $width,
645
-        ];
646
-
647
-        return $result;
648
-    }
649
-
650
-    /**
651
-     * If there is a length in the style attributes use it.
652
-     *
653
-     * @param array $attributes
654
-     * @param int $length
655
-     * @param string $key
656
-     * @return int
657
-     */
658
-    protected function getLength(array $attributes, $length, $key)
659
-    {
660
-        if (isset($attributes[$key]) && $length == -1) {
661
-            // check that the last two characters are px (pixels)
662
-            if (strtolower(substr($attributes[$key], -2)) == 'px') {
663
-                $proposed_length = substr($attributes[$key], 0, -2);
664
-                // Now make sure that it's an integer and not something stupid.
665
-                if (filter_var($proposed_length, FILTER_VALIDATE_INT)) {
666
-                    $length = $proposed_length;
667
-                }
668
-            }
669
-        }
670
-
671
-        return $length;
672
-    }
673
-
674
-    /**
675
-     * Gets the inner html of this node.
676
-     *
677
-     * @return string
678
-     */
679
-    abstract public function innerHtml();
680
-
681
-    /**
682
-     * Gets the html of this node, including it's own
683
-     * tag.
684
-     *
685
-     * @return string
686
-     */
687
-    abstract public function outerHtml();
688
-
689
-    /**
690
-     * Gets the text of this node (if there is any text).
691
-     *
692
-     * @return string
693
-     */
694
-    abstract public function text();
695
-
696
-    /**
697
-     * Call this when something in the node tree has changed. Like a child has been added
698
-     * or a parent has been changed.
699
-     *
700
-     * @return void
701
-     */
702
-    abstract protected function clear();
20
+	/**
21
+	 * Contains the tag name/type
22
+	 *
23
+	 * @var \PHPHtmlParser\Dom\Tag
24
+	 */
25
+	protected $tag;
26
+
27
+	/**
28
+	 * Contains a list of attributes on this tag.
29
+	 *
30
+	 * @var array
31
+	 */
32
+	protected $attr = [];
33
+
34
+	/**
35
+	 * An array of all the children.
36
+	 *
37
+	 * @var array
38
+	 */
39
+	protected $children = [];
40
+
41
+	/**
42
+	 * Contains the parent Node.
43
+	 *
44
+	 * @var AbstractNode
45
+	 */
46
+	protected $parent = null;
47
+
48
+	/**
49
+	 * The unique id of the class. Given by PHP.
50
+	 *
51
+	 * @var string
52
+	 */
53
+	protected $id;
54
+
55
+	/**
56
+	 * The encoding class used to encode strings.
57
+	 *
58
+	 * @var mixed
59
+	 */
60
+	protected $encode;
61
+
62
+	/**
63
+	 * Creates a unique spl hash for this node.
64
+	 */
65
+	public function __construct()
66
+	{
67
+		$this->id = spl_object_hash($this);
68
+	}
69
+
70
+	/**
71
+	 * Magic get method for attributes and certain methods.
72
+	 *
73
+	 * @param string $key
74
+	 * @return mixed
75
+	 */
76
+	public function __get($key)
77
+	{
78
+		// check attribute first
79
+		if ( ! is_null($this->getAttribute($key))) {
80
+			return $this->getAttribute($key);
81
+		}
82
+		switch (strtolower($key)) {
83
+			case 'outerhtml':
84
+				return $this->outerHtml();
85
+			case 'innerhtml':
86
+				return $this->innerHtml();
87
+			case 'text':
88
+				return $this->text();
89
+		}
90
+
91
+		return null;
92
+	}
93
+
94
+	/**
95
+	 * Attempts to clear out any object references.
96
+	 */
97
+	public function __destruct()
98
+	{
99
+		$this->tag      = null;
100
+		$this->attr     = [];
101
+		$this->parent   = null;
102
+		$this->children = [];
103
+	}
104
+
105
+	/**
106
+	 * Simply calls the outer text method.
107
+	 *
108
+	 * @return string
109
+	 */
110
+	public function __toString()
111
+	{
112
+		return $this->outerHtml();
113
+	}
114
+
115
+	/**
116
+	 * Returns the id of this object.
117
+	 */
118
+	public function id()
119
+	{
120
+		return $this->id;
121
+	}
122
+
123
+	/**
124
+	 * Returns the parent of node.
125
+	 *
126
+	 * @return AbstractNode
127
+	 */
128
+	public function getParent()
129
+	{
130
+		return $this->parent;
131
+	}
132
+
133
+	/**
134
+	 * Sets the parent node.
135
+	 *
136
+	 * @param AbstractNode $parent
137
+	 * @return $this
138
+	 * @throws CircularException
139
+	 */
140
+	public function setParent(AbstractNode $parent)
141
+	{
142
+		// check integrity
143
+		if ($this->isDescendant($parent->id())) {
144
+			throw new CircularException('Can not add descendant "'.$parent->id().'" as my parent.');
145
+		}
146
+
147
+		// remove from old parent
148
+		if ( ! is_null($this->parent)) {
149
+			if ($this->parent->id() == $parent->id()) {
150
+				// already the parent
151
+				return $this;
152
+			}
153
+
154
+			$this->parent->removeChild($this->id);
155
+		}
156
+
157
+		$this->parent = $parent;
158
+
159
+		// assign child to parent
160
+		$this->parent->addChild($this);
161
+
162
+		//clear any cache
163
+		$this->clear();
164
+
165
+		return $this;
166
+	}
167
+
168
+	/**
169
+	 * Sets the encoding class to this node and propagates it
170
+	 * to all its children.
171
+	 *
172
+	 * @param Encode $encode
173
+	 */
174
+	public function propagateEncoding(Encode $encode)
175
+	{
176
+		$this->encode = $encode;
177
+		$this->tag->setEncoding($encode);
178
+		// check children
179
+		foreach ($this->children as $id => $child) {
180
+			/** @var AbstractNode $node */
181
+			$node = $child['node'];
182
+			$node->propagateEncoding($encode);
183
+		}
184
+	}
185
+
186
+	/**
187
+	 * Checks if this node has children.
188
+	 *
189
+	 * @return bool
190
+	 */
191
+	public function hasChildren()
192
+	{
193
+		return ! empty($this->children);
194
+	}
195
+
196
+	/**
197
+	 * Returns the child by id.
198
+	 *
199
+	 * @param int $id
200
+	 * @return AbstractNode
201
+	 * @throws ChildNotFoundException
202
+	 */
203
+	public function getChild($id)
204
+	{
205
+		if ( ! isset($this->children[$id])) {
206
+			throw new ChildNotFoundException("Child '$id' not found in this node.");
207
+		}
208
+
209
+		return $this->children[$id]['node'];
210
+	}
211
+
212
+	/**
213
+	 * Returns a new array of child nodes
214
+	 *
215
+	 * @return array
216
+	 */
217
+	public function getChildren()
218
+	{
219
+		$nodes = [];
220
+		try {
221
+			$child = $this->firstChild();
222
+			do {
223
+				$nodes[] = $child;
224
+				$child   = $this->nextChild($child->id());
225
+			} while ( ! is_null($child));
226
+		} catch (ChildNotFoundException $e) {
227
+			// we are done looking for children
228
+		}
229
+
230
+		return $nodes;
231
+	}
232
+
233
+	/**
234
+	 * Counts children
235
+	 *
236
+	 * @return int
237
+	 */
238
+	public function countChildren()
239
+	{
240
+		return count($this->children);
241
+	}
242
+
243
+	/**
244
+	 * Adds a child node to this node and returns the id of the child for this
245
+	 * parent.
246
+	 *
247
+	 * @param AbstractNode $child
248
+	 * @return bool
249
+	 * @throws CircularException
250
+	 */
251
+	public function addChild(AbstractNode $child)
252
+	{
253
+		$key = null;
254
+
255
+		// check integrity
256
+		if ($this->isAncestor($child->id())) {
257
+			throw new CircularException('Can not add child. It is my ancestor.');
258
+		}
259
+
260
+		// check if child is itself
261
+		if ($child->id() == $this->id) {
262
+			throw new CircularException('Can not set itself as a child.');
263
+		}
264
+
265
+		if ($this->hasChildren()) {
266
+			if (isset($this->children[$child->id()])) {
267
+				// we already have this child
268
+				return false;
269
+			}
270
+			$sibling                      = $this->lastChild();
271
+			$key                          = $sibling->id();
272
+			$this->children[$key]['next'] = $child->id();
273
+		}
274
+
275
+		// add the child
276
+		$this->children[$child->id()] = [
277
+			'node' => $child,
278
+			'next' => null,
279
+			'prev' => $key,
280
+		];
281
+
282
+		// tell child I am the new parent
283
+		$child->setParent($this);
284
+
285
+		//clear any cache
286
+		$this->clear();
287
+
288
+		return true;
289
+	}
290
+
291
+	/**
292
+	 * Removes the child by id.
293
+	 *
294
+	 * @param int $id
295
+	 * @return $this
296
+	 */
297
+	public function removeChild($id)
298
+	{
299
+		if ( ! isset($this->children[$id])) {
300
+			return $this;
301
+		}
302
+
303
+		// handle moving next and previous assignments.
304
+		$next = $this->children[$id]['next'];
305
+		$prev = $this->children[$id]['prev'];
306
+		if ( ! is_null($next)) {
307
+			$this->children[$next]['prev'] = $prev;
308
+		}
309
+		if ( ! is_null($prev)) {
310
+			$this->children[$prev]['next'] = $next;
311
+		}
312
+
313
+		// remove the child
314
+		unset($this->children[$id]);
315
+
316
+		//clear any cache
317
+		$this->clear();
318
+
319
+		return $this;
320
+	}
321
+
322
+	/**
323
+	 * Attempts to get the next child.
324
+	 *
325
+	 * @param int $id
326
+	 * @return AbstractNode
327
+	 * @uses $this->getChild()
328
+	 */
329
+	public function nextChild($id)
330
+	{
331
+		$child = $this->getChild($id);
332
+		$next  = $this->children[$child->id()]['next'];
333
+
334
+		return $this->getChild($next);
335
+	}
336
+
337
+	/**
338
+	 * Attempts to get the previous child.
339
+	 *
340
+	 * @param int $id
341
+	 * @return AbstractNode
342
+	 * @uses $this->getChild()
343
+	 */
344
+	public function previousChild($id)
345
+	{
346
+		$child = $this->getchild($id);
347
+		$next  = $this->children[$child->id()]['prev'];
348
+
349
+		return $this->getChild($next);
350
+	}
351
+
352
+	/**
353
+	 * Checks if the given node id is a child of the
354
+	 * current node.
355
+	 *
356
+	 * @param int $id
357
+	 * @return bool
358
+	 */
359
+	public function isChild($id)
360
+	{
361
+		foreach ($this->children as $childId => $child) {
362
+			if ($id == $childId) {
363
+				return true;
364
+			}
365
+		}
366
+
367
+		return false;
368
+	}
369
+
370
+	/**
371
+	 * Checks if the given node id is a descendant of the
372
+	 * current node.
373
+	 *
374
+	 * @param int $id
375
+	 * @return bool
376
+	 */
377
+	public function isDescendant($id)
378
+	{
379
+		if ($this->isChild($id)) {
380
+			return true;
381
+		}
382
+
383
+		foreach ($this->children as $childId => $child) {
384
+			/** @var AbstractNode $node */
385
+			$node = $child['node'];
386
+			if ($node->hasChildren() &&
387
+				$node->isDescendant($id)
388
+			) {
389
+				return true;
390
+			}
391
+		}
392
+
393
+		return false;
394
+	}
395
+
396
+	/**
397
+	 * Checks if the given node id is an ancestor of
398
+	 * the current node.
399
+	 *
400
+	 * @param int $id
401
+	 * @return bool
402
+	 */
403
+	public function isAncestor($id)
404
+	{
405
+		if ( ! is_null($this->getAncestor($id))) {
406
+			return true;
407
+		}
408
+
409
+		return false;
410
+	}
411
+
412
+	/**
413
+	 * Attempts to get an ancestor node by the given id.
414
+	 *
415
+	 * @param int $id
416
+	 * @return null|AbstractNode
417
+	 */
418
+	public function getAncestor($id)
419
+	{
420
+		if ( ! is_null($this->parent)) {
421
+			if ($this->parent->id() == $id) {
422
+				return $this->parent;
423
+			}
424
+
425
+			return $this->parent->getAncestor($id);
426
+		}
427
+
428
+		return null;
429
+	}
430
+
431
+	/**
432
+	 * Shortcut to return the first child.
433
+	 *
434
+	 * @return AbstractNode
435
+	 * @uses $this->getChild()
436
+	 */
437
+	public function firstChild()
438
+	{
439
+		reset($this->children);
440
+		$key = key($this->children);
441
+
442
+		return $this->getChild($key);
443
+	}
444
+
445
+	/**
446
+	 * Attempts to get the last child.
447
+	 *
448
+	 * @return AbstractNode
449
+	 */
450
+	public function lastChild()
451
+	{
452
+		end($this->children);
453
+		$key = key($this->children);
454
+
455
+		return $this->getChild($key);
456
+	}
457
+
458
+	/**
459
+	 * Attempts to get the next sibling.
460
+	 *
461
+	 * @return AbstractNode
462
+	 * @throws ParentNotFoundException
463
+	 */
464
+	public function nextSibling()
465
+	{
466
+		if (is_null($this->parent)) {
467
+			throw new ParentNotFoundException('Parent is not set for this node.');
468
+		}
469
+
470
+		return $this->parent->nextChild($this->id);
471
+	}
472
+
473
+	/**
474
+	 * Attempts to get the previous sibling
475
+	 *
476
+	 * @return AbstractNode
477
+	 * @throws ParentNotFoundException
478
+	 */
479
+	public function previousSibling()
480
+	{
481
+		if (is_null($this->parent)) {
482
+			throw new ParentNotFoundException('Parent is not set for this node.');
483
+		}
484
+
485
+		return $this->parent->previousChild($this->id);
486
+	}
487
+
488
+	/**
489
+	 * Gets the tag object of this node.
490
+	 *
491
+	 * @return Tag
492
+	 */
493
+	public function getTag()
494
+	{
495
+		return $this->tag;
496
+	}
497
+
498
+	/**
499
+	 * A wrapper method that simply calls the getAttribute method
500
+	 * on the tag of this node.
501
+	 *
502
+	 * @return array
503
+	 */
504
+	public function getAttributes()
505
+	{
506
+		$attributes = $this->tag->getAttributes();
507
+		foreach ($attributes as $name => $info) {
508
+			$attributes[$name] = $info['value'];
509
+		}
510
+
511
+		return $attributes;
512
+	}
513
+
514
+	/**
515
+	 * A wrapper method that simply calls the getAttribute method
516
+	 * on the tag of this node.
517
+	 *
518
+	 * @param string $key
519
+	 * @return mixed
520
+	 */
521
+	public function getAttribute($key)
522
+	{
523
+		$attribute = $this->tag->getAttribute($key);
524
+		if ( ! is_null($attribute)) {
525
+			$attribute = $attribute['value'];
526
+		}
527
+
528
+		return $attribute;
529
+	}
530
+
531
+	/**
532
+	 * A wrapper method that simply calls the setAttribute method
533
+	 * on the tag of this node.
534
+	 *
535
+	 * @param string $key
536
+	 * @param string $value
537
+	 * @return $this
538
+	 */
539
+	public function setAttribute($key, $value)
540
+	{
541
+		$this->tag->setAttribute($key, $value);
542
+
543
+		return $this;
544
+	}
545
+
546
+	/**
547
+	 * Function to locate a specific ancestor tag in the path to the root.
548
+	 *
549
+	 * @param  string $tag
550
+	 * @return AbstractNode
551
+	 * @throws ParentNotFoundException
552
+	 */
553
+	public function ancestorByTag($tag)
554
+	{
555
+		// Start by including ourselves in the comparison.
556
+		$node = $this;
557
+
558
+		while ( ! is_null($node)) {
559
+			if ($node->tag->name() == $tag) {
560
+				return $node;
561
+			}
562
+
563
+			$node = $node->getParent();
564
+		}
565
+
566
+		throw new ParentNotFoundException('Could not find an ancestor with "'.$tag.'" tag');
567
+	}
568
+
569
+	/**
570
+	 * Find elements by css selector
571
+	 *
572
+	 * @param string $selector
573
+	 * @param int $nth
574
+	 * @return array|AbstractNode
575
+	 */
576
+	public function find($selector, $nth = null)
577
+	{
578
+		$selector = new Selector($selector);
579
+		$nodes    = $selector->find($this);
580
+
581
+		if ( ! is_null($nth)) {
582
+			// return nth-element or array
583
+			if (isset($nodes[$nth])) {
584
+				return $nodes[$nth];
585
+			}
586
+
587
+			return null;
588
+		}
589
+
590
+		return $nodes;
591
+	}
592
+
593
+	/**
594
+	 * Function to try a few tricks to determine the displayed size of an img on the page.
595
+	 * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
596
+	 *
597
+	 * Future enhancement:
598
+	 * Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
599
+	 *
600
+	 * Far future enhancement
601
+	 * Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
602
+	 * Note that in this case, the class or id will have the img sub-selector for it to apply to the image.
603
+	 *
604
+	 * ridiculously far future development
605
+	 * If the class or id is specified in a SEPARATE css file that's not on the page, go get it and do what we were just doing for the ones on the page.
606
+	 *
607
+	 * @author John Schlick
608
+	 * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
609
+	 */
610
+	public function get_display_size()
611
+	{
612
+		$width  = -1;
613
+		$height = -1;
614
+
615
+		if ($this->tag->name() != 'img') {
616
+			return false;
617
+		}
618
+
619
+		// See if there is a height or width attribute in the tag itself.
620
+		if ( ! is_null($this->tag->getAttribute('width'))) {
621
+			$width = $this->tag->getAttribute('width');
622
+		}
623
+
624
+		if ( ! is_null($this->tag->getAttribute('height'))) {
625
+			$height = $this->tag->getAttribute('height');
626
+		}
627
+
628
+		// Now look for an inline style.
629
+		if ( ! is_null($this->tag->getAttribute('style'))) {
630
+			// Thanks to user 'gnarf' from stackoverflow for this regular expression.
631
+			$attributes = [];
632
+			preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->tag->getAttribute('style'), $matches,
633
+				PREG_SET_ORDER);
634
+			foreach ($matches as $match) {
635
+				$attributes[$match[1]] = $match[2];
636
+			}
637
+
638
+			$width = $this->getLength($attributes, $width, 'width');
639
+			$height = $this->getLength($attributes, $width, 'height');
640
+		}
641
+
642
+		$result = [
643
+			'height' => $height,
644
+			'width'  => $width,
645
+		];
646
+
647
+		return $result;
648
+	}
649
+
650
+	/**
651
+	 * If there is a length in the style attributes use it.
652
+	 *
653
+	 * @param array $attributes
654
+	 * @param int $length
655
+	 * @param string $key
656
+	 * @return int
657
+	 */
658
+	protected function getLength(array $attributes, $length, $key)
659
+	{
660
+		if (isset($attributes[$key]) && $length == -1) {
661
+			// check that the last two characters are px (pixels)
662
+			if (strtolower(substr($attributes[$key], -2)) == 'px') {
663
+				$proposed_length = substr($attributes[$key], 0, -2);
664
+				// Now make sure that it's an integer and not something stupid.
665
+				if (filter_var($proposed_length, FILTER_VALIDATE_INT)) {
666
+					$length = $proposed_length;
667
+				}
668
+			}
669
+		}
670
+
671
+		return $length;
672
+	}
673
+
674
+	/**
675
+	 * Gets the inner html of this node.
676
+	 *
677
+	 * @return string
678
+	 */
679
+	abstract public function innerHtml();
680
+
681
+	/**
682
+	 * Gets the html of this node, including it's own
683
+	 * tag.
684
+	 *
685
+	 * @return string
686
+	 */
687
+	abstract public function outerHtml();
688
+
689
+	/**
690
+	 * Gets the text of this node (if there is any text).
691
+	 *
692
+	 * @return string
693
+	 */
694
+	abstract public function text();
695
+
696
+	/**
697
+	 * Call this when something in the node tree has changed. Like a child has been added
698
+	 * or a parent has been changed.
699
+	 *
700
+	 * @return void
701
+	 */
702
+	abstract protected function clear();
703 703
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom/Tag.php 1 patch
Indentation   +233 added lines, -233 removed lines patch added patch discarded remove patch
@@ -12,237 +12,237 @@
 block discarded – undo
12 12
 class Tag
13 13
 {
14 14
 
15
-    /**
16
-     * The name of the tag.
17
-     *
18
-     * @var string
19
-     */
20
-    protected $name;
21
-
22
-    /**
23
-     * The attributes of the tag.
24
-     *
25
-     * @var array
26
-     */
27
-    protected $attr = [];
28
-
29
-    /**
30
-     * Is this tag self closing.
31
-     *
32
-     * @var bool
33
-     */
34
-    protected $selfClosing = false;
35
-
36
-    /**
37
-     * Tag noise
38
-     */
39
-    protected $noise = '';
40
-
41
-    /**
42
-     * The encoding class to... encode the tags
43
-     *
44
-     * @var mixed
45
-     */
46
-    protected $encode = null;
47
-
48
-    /**
49
-     * Sets up the tag with a name.
50
-     *
51
-     * @param $name
52
-     */
53
-    public function __construct($name)
54
-    {
55
-        $this->name = $name;
56
-    }
57
-
58
-    /**
59
-     * Magic method to get any of the attributes.
60
-     *
61
-     * @param string $key
62
-     * @return mixed
63
-     */
64
-    public function __get($key)
65
-    {
66
-        return $this->getAttribute($key);
67
-    }
68
-
69
-    /**
70
-     * Magic method to set any attribute.
71
-     *
72
-     * @param string $key
73
-     * @param mixed $value
74
-     */
75
-    public function __set($key, $value)
76
-    {
77
-        $this->setAttribute($key, $value);
78
-    }
79
-
80
-    /**
81
-     * Returns the name of this tag.
82
-     *
83
-     * @return string
84
-     */
85
-    public function name()
86
-    {
87
-        return $this->name;
88
-    }
89
-
90
-    /**
91
-     * Sets the tag to be self closing.
92
-     *
93
-     * @return $this
94
-     */
95
-    public function selfClosing()
96
-    {
97
-        $this->selfClosing = true;
98
-
99
-        return $this;
100
-    }
101
-
102
-    /**
103
-     * Checks if the tag is self closing.
104
-     *
105
-     * @return bool
106
-     */
107
-    public function isSelfClosing()
108
-    {
109
-        return $this->selfClosing;
110
-    }
111
-
112
-    /**
113
-     * Sets the encoding type to be used.
114
-     *
115
-     * @param Encode $encode
116
-     */
117
-    public function setEncoding(Encode $encode)
118
-    {
119
-        $this->encode = $encode;
120
-    }
121
-
122
-    /**
123
-     * Sets the noise for this tag (if any)
124
-     *
125
-     * @param $noise
126
-     * @return $this
127
-     */
128
-    public function noise($noise)
129
-    {
130
-        $this->noise = $noise;
131
-
132
-        return $this;
133
-    }
134
-
135
-    /**
136
-     * Set an attribute for this tag.
137
-     *
138
-     * @param string $key
139
-     * @param mixed $value
140
-     * @return $this
141
-     */
142
-    public function setAttribute($key, $value)
143
-    {
144
-        $key = strtolower($key);
145
-        if ( ! is_array($value)) {
146
-            $value = [
147
-                'value'       => $value,
148
-                'doubleQuote' => true,
149
-            ];
150
-        }
151
-        $this->attr[$key] = $value;
152
-
153
-        return $this;
154
-    }
155
-
156
-    /**
157
-     * Sets the attributes for this tag
158
-     *
159
-     * @param array $attr
160
-     * @return $this
161
-     */
162
-    public function setAttributes(array $attr)
163
-    {
164
-        foreach ($attr as $key => $value) {
165
-            $this->setAttribute($key, $value);
166
-        }
167
-
168
-        return $this;
169
-    }
170
-
171
-    /**
172
-     * Returns all attributes of this tag.
173
-     *
174
-     * @return array
175
-     */
176
-    public function getAttributes()
177
-    {
178
-        $return = [];
179
-        foreach ($this->attr as $attr => $info) {
180
-            $return[$attr] = $this->getAttribute($attr);
181
-        }
182
-
183
-        return $return;
184
-    }
185
-
186
-    /**
187
-     * Returns an attribute by the key
188
-     *
189
-     * @param string $key
190
-     * @return mixed
191
-     */
192
-    public function getAttribute($key)
193
-    {
194
-        if ( ! isset($this->attr[$key])) {
195
-            return null;
196
-        }
197
-        $value = $this->attr[$key]['value'];
198
-        if (is_string($value) && ! is_null($this->encode)) {
199
-            // convert charset
200
-            $this->attr[$key]['value'] = $this->encode->convert($value);
201
-        }
202
-
203
-        return $this->attr[$key];
204
-    }
205
-
206
-    /**
207
-     * Generates the opening tag for this object.
208
-     *
209
-     * @return string
210
-     */
211
-    public function makeOpeningTag()
212
-    {
213
-        $return = '<'.$this->name;
214
-
215
-        // add the attributes
216
-        foreach ($this->attr as $key => $info) {
217
-            $info = $this->getAttribute($key);
218
-            $val  = $info['value'];
219
-            if (is_null($val)) {
220
-                $return .= ' '.$key;
221
-            } elseif ($info['doubleQuote']) {
222
-                $return .= ' '.$key.'="'.$val.'"';
223
-            } else {
224
-                $return .= ' '.$key.'=\''.$val.'\'';
225
-            }
226
-        }
227
-
228
-        if ($this->selfClosing) {
229
-            return $return.' />';
230
-        } else {
231
-            return $return.'>';
232
-        }
233
-    }
234
-
235
-    /**
236
-     * Generates the closing tag for this object.
237
-     *
238
-     * @return string
239
-     */
240
-    public function makeClosingTag()
241
-    {
242
-        if ($this->selfClosing) {
243
-            return '';
244
-        }
245
-
246
-        return '</'.$this->name.'>';
247
-    }
15
+	/**
16
+	 * The name of the tag.
17
+	 *
18
+	 * @var string
19
+	 */
20
+	protected $name;
21
+
22
+	/**
23
+	 * The attributes of the tag.
24
+	 *
25
+	 * @var array
26
+	 */
27
+	protected $attr = [];
28
+
29
+	/**
30
+	 * Is this tag self closing.
31
+	 *
32
+	 * @var bool
33
+	 */
34
+	protected $selfClosing = false;
35
+
36
+	/**
37
+	 * Tag noise
38
+	 */
39
+	protected $noise = '';
40
+
41
+	/**
42
+	 * The encoding class to... encode the tags
43
+	 *
44
+	 * @var mixed
45
+	 */
46
+	protected $encode = null;
47
+
48
+	/**
49
+	 * Sets up the tag with a name.
50
+	 *
51
+	 * @param $name
52
+	 */
53
+	public function __construct($name)
54
+	{
55
+		$this->name = $name;
56
+	}
57
+
58
+	/**
59
+	 * Magic method to get any of the attributes.
60
+	 *
61
+	 * @param string $key
62
+	 * @return mixed
63
+	 */
64
+	public function __get($key)
65
+	{
66
+		return $this->getAttribute($key);
67
+	}
68
+
69
+	/**
70
+	 * Magic method to set any attribute.
71
+	 *
72
+	 * @param string $key
73
+	 * @param mixed $value
74
+	 */
75
+	public function __set($key, $value)
76
+	{
77
+		$this->setAttribute($key, $value);
78
+	}
79
+
80
+	/**
81
+	 * Returns the name of this tag.
82
+	 *
83
+	 * @return string
84
+	 */
85
+	public function name()
86
+	{
87
+		return $this->name;
88
+	}
89
+
90
+	/**
91
+	 * Sets the tag to be self closing.
92
+	 *
93
+	 * @return $this
94
+	 */
95
+	public function selfClosing()
96
+	{
97
+		$this->selfClosing = true;
98
+
99
+		return $this;
100
+	}
101
+
102
+	/**
103
+	 * Checks if the tag is self closing.
104
+	 *
105
+	 * @return bool
106
+	 */
107
+	public function isSelfClosing()
108
+	{
109
+		return $this->selfClosing;
110
+	}
111
+
112
+	/**
113
+	 * Sets the encoding type to be used.
114
+	 *
115
+	 * @param Encode $encode
116
+	 */
117
+	public function setEncoding(Encode $encode)
118
+	{
119
+		$this->encode = $encode;
120
+	}
121
+
122
+	/**
123
+	 * Sets the noise for this tag (if any)
124
+	 *
125
+	 * @param $noise
126
+	 * @return $this
127
+	 */
128
+	public function noise($noise)
129
+	{
130
+		$this->noise = $noise;
131
+
132
+		return $this;
133
+	}
134
+
135
+	/**
136
+	 * Set an attribute for this tag.
137
+	 *
138
+	 * @param string $key
139
+	 * @param mixed $value
140
+	 * @return $this
141
+	 */
142
+	public function setAttribute($key, $value)
143
+	{
144
+		$key = strtolower($key);
145
+		if ( ! is_array($value)) {
146
+			$value = [
147
+				'value'       => $value,
148
+				'doubleQuote' => true,
149
+			];
150
+		}
151
+		$this->attr[$key] = $value;
152
+
153
+		return $this;
154
+	}
155
+
156
+	/**
157
+	 * Sets the attributes for this tag
158
+	 *
159
+	 * @param array $attr
160
+	 * @return $this
161
+	 */
162
+	public function setAttributes(array $attr)
163
+	{
164
+		foreach ($attr as $key => $value) {
165
+			$this->setAttribute($key, $value);
166
+		}
167
+
168
+		return $this;
169
+	}
170
+
171
+	/**
172
+	 * Returns all attributes of this tag.
173
+	 *
174
+	 * @return array
175
+	 */
176
+	public function getAttributes()
177
+	{
178
+		$return = [];
179
+		foreach ($this->attr as $attr => $info) {
180
+			$return[$attr] = $this->getAttribute($attr);
181
+		}
182
+
183
+		return $return;
184
+	}
185
+
186
+	/**
187
+	 * Returns an attribute by the key
188
+	 *
189
+	 * @param string $key
190
+	 * @return mixed
191
+	 */
192
+	public function getAttribute($key)
193
+	{
194
+		if ( ! isset($this->attr[$key])) {
195
+			return null;
196
+		}
197
+		$value = $this->attr[$key]['value'];
198
+		if (is_string($value) && ! is_null($this->encode)) {
199
+			// convert charset
200
+			$this->attr[$key]['value'] = $this->encode->convert($value);
201
+		}
202
+
203
+		return $this->attr[$key];
204
+	}
205
+
206
+	/**
207
+	 * Generates the opening tag for this object.
208
+	 *
209
+	 * @return string
210
+	 */
211
+	public function makeOpeningTag()
212
+	{
213
+		$return = '<'.$this->name;
214
+
215
+		// add the attributes
216
+		foreach ($this->attr as $key => $info) {
217
+			$info = $this->getAttribute($key);
218
+			$val  = $info['value'];
219
+			if (is_null($val)) {
220
+				$return .= ' '.$key;
221
+			} elseif ($info['doubleQuote']) {
222
+				$return .= ' '.$key.'="'.$val.'"';
223
+			} else {
224
+				$return .= ' '.$key.'=\''.$val.'\'';
225
+			}
226
+		}
227
+
228
+		if ($this->selfClosing) {
229
+			return $return.' />';
230
+		} else {
231
+			return $return.'>';
232
+		}
233
+	}
234
+
235
+	/**
236
+	 * Generates the closing tag for this object.
237
+	 *
238
+	 * @return string
239
+	 */
240
+	public function makeClosingTag()
241
+	{
242
+		if ($this->selfClosing) {
243
+			return '';
244
+		}
245
+
246
+		return '</'.$this->name.'>';
247
+	}
248 248
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Selector.php 1 patch
Indentation   +355 added lines, -355 removed lines patch added patch discarded remove patch
@@ -13,359 +13,359 @@
 block discarded – undo
13 13
 class Selector
14 14
 {
15 15
 
16
-    /**
17
-     * Pattern of CSS selectors, modified from 'mootools'
18
-     *
19
-     * @var string
20
-     */
21
-    protected $pattern = "/([\w-:\*>]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
22
-
23
-    protected $selectors = [];
24
-
25
-    /**
26
-     * Constructs with the selector string
27
-     *
28
-     * @param string $selector
29
-     */
30
-    public function __construct($selector)
31
-    {
32
-        $this->parseSelectorString($selector);
33
-    }
34
-
35
-    /**
36
-     * Returns the selectors that where found in __construct
37
-     *
38
-     * @return array
39
-     */
40
-    public function getSelectors()
41
-    {
42
-        return $this->selectors;
43
-    }
44
-
45
-    /**
46
-     * Attempts to find the selectors starting from the given
47
-     * node object.
48
-     *
49
-     * @param AbstractNode $node
50
-     * @return array|Collection
51
-     */
52
-    public function find(AbstractNode $node)
53
-    {
54
-        $results = new Collection;
55
-        foreach ($this->selectors as $selector) {
56
-            $nodes = [$node];
57
-            if (count($selector) == 0) {
58
-                continue;
59
-            }
60
-
61
-            $options = [];
62
-            foreach ($selector as $rule) {
63
-                if ($rule['alterNext']) {
64
-                    $options[] = $this->alterNext($rule);
65
-                    continue;
66
-                }
67
-                $nodes = $this->seek($nodes, $rule, $options);
68
-                // clear the options
69
-                $options = [];
70
-            }
71
-
72
-            // this is the final set of nodes
73
-            foreach ($nodes as $result) {
74
-                $results[] = $result;
75
-            }
76
-        }
77
-
78
-        return $results;
79
-    }
80
-
81
-    /**
82
-     * Parses the selector string
83
-     *
84
-     * @param string $selector
85
-     */
86
-    protected function parseSelectorString($selector)
87
-    {
88
-        $matches = [];
89
-        preg_match_all($this->pattern, trim($selector).' ', $matches, PREG_SET_ORDER);
90
-
91
-        // skip tbody
92
-        $result = [];
93
-        foreach ($matches as $match) {
94
-            // default values
95
-            $tag       = strtolower(trim($match[1]));
96
-            $operator  = '=';
97
-            $key       = null;
98
-            $value     = null;
99
-            $noKey     = false;
100
-            $alterNext = false;
101
-
102
-            // check for elements that alter the behavior of the next element
103
-            if ($tag == '>') {
104
-                $alterNext = true;
105
-            }
106
-
107
-            // check for id selector
108
-            if ( ! empty($match[2])) {
109
-                $key   = 'id';
110
-                $value = $match[2];
111
-            }
112
-
113
-            // check for class selector
114
-            if ( ! empty($match[3])) {
115
-                $key   = 'class';
116
-                $value = $match[3];
117
-            }
118
-
119
-            // and final attribute selector
120
-            if ( ! empty($match[4])) {
121
-                $key = strtolower($match[4]);
122
-            }
123
-            if ( ! empty($match[5])) {
124
-                $operator = $match[5];
125
-            }
126
-            if ( ! empty($match[6])) {
127
-                $value = $match[6];
128
-            }
129
-
130
-            // check for elements that do not have a specified attribute
131
-            if (isset($key[0]) && $key[0] == '!') {
132
-                $key   = substr($key, 1);
133
-                $noKey = true;
134
-            }
135
-
136
-            $result[] = [
137
-                'tag'       => $tag,
138
-                'key'       => $key,
139
-                'value'     => $value,
140
-                'operator'  => $operator,
141
-                'noKey'     => $noKey,
142
-                'alterNext' => $alterNext,
143
-            ];
144
-            if (trim($match[7]) == ',') {
145
-                $this->selectors[] = $result;
146
-                $result            = [];
147
-            }
148
-        }
149
-
150
-        // save last results
151
-        if (count($result) > 0) {
152
-            $this->selectors[] = $result;
153
-        }
154
-    }
155
-
156
-    /**
157
-     * Attempts to find all children that match the rule
158
-     * given.
159
-     *
160
-     * @param array $nodes
161
-     * @param array $rule
162
-     * @param array $options
163
-     * @return array
164
-     * @recursive
165
-     */
166
-    protected function seek(array $nodes, array $rule, array $options)
167
-    {
168
-        // XPath index
169
-        if (count($rule['tag']) > 0 &&
170
-            count($rule['key']) > 0 &&
171
-            is_numeric($rule['key'])
172
-        ) {
173
-            $count = 0;
174
-            /** @var AbstractNode $node */
175
-            foreach ($nodes as $node) {
176
-                if ($rule['tag'] == '*' ||
177
-                    $rule['tag'] == $node->getTag()->name()) {
178
-                    ++$count;
179
-                    if ($count == $rule['key']) {
180
-                        // found the node we wanted
181
-                        return [$node];
182
-                    }
183
-                }
184
-            }
185
-
186
-            return [];
187
-        }
188
-
189
-        $options = $this->flattenOptions($options);
190
-
191
-        $return = [];
192
-        /** @var AbstractNode $node */
193
-        foreach ($nodes as $node) {
194
-            // check if we are a leaf
195
-            if ( ! $node->hasChildren()) {
196
-                continue;
197
-            }
198
-
199
-            $children = [];
200
-            $child    = $node->firstChild();
201
-            while ( ! is_null($child)) {
202
-                // wild card, grab all
203
-                if ($rule['tag'] == '*' && is_null($rule['key'])) {
204
-                    $return[] = $child;
205
-                    try {
206
-                        $child = $node->nextChild($child->id());
207
-                    } catch (ChildNotFoundException $e) {
208
-                        // no more children
209
-                        $child = null;
210
-                    }
211
-                    continue;
212
-                }
213
-
214
-                $pass = true;
215
-                // check tag
216
-                if ( ! empty($rule['tag']) && $rule['tag'] != $child->getTag()->name() &&
217
-                    $rule['tag'] != '*'
218
-                ) {
219
-                    // child failed tag check
220
-                    $pass = false;
221
-                }
222
-
223
-                // check key
224
-                if ($pass && ! is_null($rule['key'])) {
225
-                    if ($rule['noKey']) {
226
-                        if ( ! is_null($child->getAttribute($rule['key']))) {
227
-                            $pass = false;
228
-                        }
229
-                    } else {
230
-                        if ($rule['key'] != 'plaintext' &&
231
-                            is_null($child->getAttribute($rule['key']))
232
-                        ) {
233
-                            $pass = false;
234
-                        }
235
-                    }
236
-                }
237
-
238
-                // compare values
239
-                if ($pass && ! is_null($rule['key']) &&
240
-                    ! is_null($rule['value']) && $rule['value'] != '*'
241
-                ) {
242
-                    if ($rule['key'] == 'plaintext') {
243
-                        // plaintext search
244
-                        $nodeValue = $child->text();
245
-                    } else {
246
-                        // normal search
247
-                        $nodeValue = $child->getAttribute($rule['key']);
248
-                    }
249
-
250
-                    $check = $this->match($rule['operator'], $rule['value'], $nodeValue);
251
-
252
-                    // handle multiple classes
253
-                    if ( ! $check && $rule['key'] == 'class') {
254
-                        $childClasses = explode(' ', $child->getAttribute('class'));
255
-                        foreach ($childClasses as $class) {
256
-                            if ( ! empty($class)) {
257
-                                $check = $this->match($rule['operator'], $rule['value'], $class);
258
-                            }
259
-                            if ($check) {
260
-                                break;
261
-                            }
262
-                        }
263
-                    }
264
-
265
-                    if ( ! $check) {
266
-                        $pass = false;
267
-                    }
268
-                }
269
-
270
-                if ($pass) {
271
-                    // it passed all checks
272
-                    $return[] = $child;
273
-                } else {
274
-                    // this child failed to be matched
275
-                    if ($child->hasChildren()) {
276
-                        // we still want to check its children
277
-                        $children[] = $child;
278
-                    }
279
-                }
280
-
281
-                try {
282
-                    // get next child
283
-                    $child = $node->nextChild($child->id());
284
-                } catch (ChildNotFoundException $e) {
285
-                    // no more children
286
-                    $child = null;
287
-                }
288
-            }
289
-
290
-            if (( ! isset($options['checkGrandChildren']) ||
291
-                    $options['checkGrandChildren'])
292
-                && count($children) > 0
293
-            ) {
294
-                // we have children that failed but are not leaves.
295
-                $matches = $this->seek($children, $rule, $options);
296
-                foreach ($matches as $match) {
297
-                    $return[] = $match;
298
-                }
299
-            }
300
-        }
301
-
302
-        return $return;
303
-    }
304
-
305
-    /**
306
-     * Attempts to match the given arguments with the given operator.
307
-     *
308
-     * @param string $operator
309
-     * @param string $pattern
310
-     * @param string $value
311
-     * @return bool
312
-     */
313
-    protected function match($operator, $pattern, $value)
314
-    {
315
-        $value   = strtolower($value);
316
-        $pattern = strtolower($pattern);
317
-        switch ($operator) {
318
-            case '=':
319
-                return $value === $pattern;
320
-            case '!=':
321
-                return $value !== $pattern;
322
-            case '^=':
323
-                return preg_match('/^'.preg_quote($pattern, '/').'/', $value);
324
-            case '$=':
325
-                return preg_match('/'.preg_quote($pattern, '/').'$/', $value);
326
-            case '*=':
327
-                if ($pattern[0] == '/') {
328
-                    return preg_match($pattern, $value);
329
-                }
330
-
331
-                return preg_match("/".$pattern."/i", $value);
332
-        }
333
-
334
-        return false;
335
-    }
336
-
337
-    /**
338
-     * Attempts to figure out what the alteration will be for
339
-     * the next element.
340
-     *
341
-     * @param array $rule
342
-     * @return array
343
-     */
344
-    protected function alterNext($rule)
345
-    {
346
-        $options = [];
347
-        if ($rule['tag'] == '>') {
348
-            $options['checkGrandChildren'] = false;
349
-        }
350
-
351
-        return $options;
352
-    }
353
-
354
-    /**
355
-     * Flattens the option array.
356
-     *
357
-     * @param array $optionsArray
358
-     * @return array
359
-     */
360
-    protected function flattenOptions(array $optionsArray)
361
-    {
362
-        $options = [];
363
-        foreach ($optionsArray as $optionArray) {
364
-            foreach ($optionArray as $key => $option) {
365
-                $options[$key] = $option;
366
-            }
367
-        }
368
-
369
-        return $options;
370
-    }
16
+	/**
17
+	 * Pattern of CSS selectors, modified from 'mootools'
18
+	 *
19
+	 * @var string
20
+	 */
21
+	protected $pattern = "/([\w-:\*>]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
22
+
23
+	protected $selectors = [];
24
+
25
+	/**
26
+	 * Constructs with the selector string
27
+	 *
28
+	 * @param string $selector
29
+	 */
30
+	public function __construct($selector)
31
+	{
32
+		$this->parseSelectorString($selector);
33
+	}
34
+
35
+	/**
36
+	 * Returns the selectors that where found in __construct
37
+	 *
38
+	 * @return array
39
+	 */
40
+	public function getSelectors()
41
+	{
42
+		return $this->selectors;
43
+	}
44
+
45
+	/**
46
+	 * Attempts to find the selectors starting from the given
47
+	 * node object.
48
+	 *
49
+	 * @param AbstractNode $node
50
+	 * @return array|Collection
51
+	 */
52
+	public function find(AbstractNode $node)
53
+	{
54
+		$results = new Collection;
55
+		foreach ($this->selectors as $selector) {
56
+			$nodes = [$node];
57
+			if (count($selector) == 0) {
58
+				continue;
59
+			}
60
+
61
+			$options = [];
62
+			foreach ($selector as $rule) {
63
+				if ($rule['alterNext']) {
64
+					$options[] = $this->alterNext($rule);
65
+					continue;
66
+				}
67
+				$nodes = $this->seek($nodes, $rule, $options);
68
+				// clear the options
69
+				$options = [];
70
+			}
71
+
72
+			// this is the final set of nodes
73
+			foreach ($nodes as $result) {
74
+				$results[] = $result;
75
+			}
76
+		}
77
+
78
+		return $results;
79
+	}
80
+
81
+	/**
82
+	 * Parses the selector string
83
+	 *
84
+	 * @param string $selector
85
+	 */
86
+	protected function parseSelectorString($selector)
87
+	{
88
+		$matches = [];
89
+		preg_match_all($this->pattern, trim($selector).' ', $matches, PREG_SET_ORDER);
90
+
91
+		// skip tbody
92
+		$result = [];
93
+		foreach ($matches as $match) {
94
+			// default values
95
+			$tag       = strtolower(trim($match[1]));
96
+			$operator  = '=';
97
+			$key       = null;
98
+			$value     = null;
99
+			$noKey     = false;
100
+			$alterNext = false;
101
+
102
+			// check for elements that alter the behavior of the next element
103
+			if ($tag == '>') {
104
+				$alterNext = true;
105
+			}
106
+
107
+			// check for id selector
108
+			if ( ! empty($match[2])) {
109
+				$key   = 'id';
110
+				$value = $match[2];
111
+			}
112
+
113
+			// check for class selector
114
+			if ( ! empty($match[3])) {
115
+				$key   = 'class';
116
+				$value = $match[3];
117
+			}
118
+
119
+			// and final attribute selector
120
+			if ( ! empty($match[4])) {
121
+				$key = strtolower($match[4]);
122
+			}
123
+			if ( ! empty($match[5])) {
124
+				$operator = $match[5];
125
+			}
126
+			if ( ! empty($match[6])) {
127
+				$value = $match[6];
128
+			}
129
+
130
+			// check for elements that do not have a specified attribute
131
+			if (isset($key[0]) && $key[0] == '!') {
132
+				$key   = substr($key, 1);
133
+				$noKey = true;
134
+			}
135
+
136
+			$result[] = [
137
+				'tag'       => $tag,
138
+				'key'       => $key,
139
+				'value'     => $value,
140
+				'operator'  => $operator,
141
+				'noKey'     => $noKey,
142
+				'alterNext' => $alterNext,
143
+			];
144
+			if (trim($match[7]) == ',') {
145
+				$this->selectors[] = $result;
146
+				$result            = [];
147
+			}
148
+		}
149
+
150
+		// save last results
151
+		if (count($result) > 0) {
152
+			$this->selectors[] = $result;
153
+		}
154
+	}
155
+
156
+	/**
157
+	 * Attempts to find all children that match the rule
158
+	 * given.
159
+	 *
160
+	 * @param array $nodes
161
+	 * @param array $rule
162
+	 * @param array $options
163
+	 * @return array
164
+	 * @recursive
165
+	 */
166
+	protected function seek(array $nodes, array $rule, array $options)
167
+	{
168
+		// XPath index
169
+		if (count($rule['tag']) > 0 &&
170
+			count($rule['key']) > 0 &&
171
+			is_numeric($rule['key'])
172
+		) {
173
+			$count = 0;
174
+			/** @var AbstractNode $node */
175
+			foreach ($nodes as $node) {
176
+				if ($rule['tag'] == '*' ||
177
+					$rule['tag'] == $node->getTag()->name()) {
178
+					++$count;
179
+					if ($count == $rule['key']) {
180
+						// found the node we wanted
181
+						return [$node];
182
+					}
183
+				}
184
+			}
185
+
186
+			return [];
187
+		}
188
+
189
+		$options = $this->flattenOptions($options);
190
+
191
+		$return = [];
192
+		/** @var AbstractNode $node */
193
+		foreach ($nodes as $node) {
194
+			// check if we are a leaf
195
+			if ( ! $node->hasChildren()) {
196
+				continue;
197
+			}
198
+
199
+			$children = [];
200
+			$child    = $node->firstChild();
201
+			while ( ! is_null($child)) {
202
+				// wild card, grab all
203
+				if ($rule['tag'] == '*' && is_null($rule['key'])) {
204
+					$return[] = $child;
205
+					try {
206
+						$child = $node->nextChild($child->id());
207
+					} catch (ChildNotFoundException $e) {
208
+						// no more children
209
+						$child = null;
210
+					}
211
+					continue;
212
+				}
213
+
214
+				$pass = true;
215
+				// check tag
216
+				if ( ! empty($rule['tag']) && $rule['tag'] != $child->getTag()->name() &&
217
+					$rule['tag'] != '*'
218
+				) {
219
+					// child failed tag check
220
+					$pass = false;
221
+				}
222
+
223
+				// check key
224
+				if ($pass && ! is_null($rule['key'])) {
225
+					if ($rule['noKey']) {
226
+						if ( ! is_null($child->getAttribute($rule['key']))) {
227
+							$pass = false;
228
+						}
229
+					} else {
230
+						if ($rule['key'] != 'plaintext' &&
231
+							is_null($child->getAttribute($rule['key']))
232
+						) {
233
+							$pass = false;
234
+						}
235
+					}
236
+				}
237
+
238
+				// compare values
239
+				if ($pass && ! is_null($rule['key']) &&
240
+					! is_null($rule['value']) && $rule['value'] != '*'
241
+				) {
242
+					if ($rule['key'] == 'plaintext') {
243
+						// plaintext search
244
+						$nodeValue = $child->text();
245
+					} else {
246
+						// normal search
247
+						$nodeValue = $child->getAttribute($rule['key']);
248
+					}
249
+
250
+					$check = $this->match($rule['operator'], $rule['value'], $nodeValue);
251
+
252
+					// handle multiple classes
253
+					if ( ! $check && $rule['key'] == 'class') {
254
+						$childClasses = explode(' ', $child->getAttribute('class'));
255
+						foreach ($childClasses as $class) {
256
+							if ( ! empty($class)) {
257
+								$check = $this->match($rule['operator'], $rule['value'], $class);
258
+							}
259
+							if ($check) {
260
+								break;
261
+							}
262
+						}
263
+					}
264
+
265
+					if ( ! $check) {
266
+						$pass = false;
267
+					}
268
+				}
269
+
270
+				if ($pass) {
271
+					// it passed all checks
272
+					$return[] = $child;
273
+				} else {
274
+					// this child failed to be matched
275
+					if ($child->hasChildren()) {
276
+						// we still want to check its children
277
+						$children[] = $child;
278
+					}
279
+				}
280
+
281
+				try {
282
+					// get next child
283
+					$child = $node->nextChild($child->id());
284
+				} catch (ChildNotFoundException $e) {
285
+					// no more children
286
+					$child = null;
287
+				}
288
+			}
289
+
290
+			if (( ! isset($options['checkGrandChildren']) ||
291
+					$options['checkGrandChildren'])
292
+				&& count($children) > 0
293
+			) {
294
+				// we have children that failed but are not leaves.
295
+				$matches = $this->seek($children, $rule, $options);
296
+				foreach ($matches as $match) {
297
+					$return[] = $match;
298
+				}
299
+			}
300
+		}
301
+
302
+		return $return;
303
+	}
304
+
305
+	/**
306
+	 * Attempts to match the given arguments with the given operator.
307
+	 *
308
+	 * @param string $operator
309
+	 * @param string $pattern
310
+	 * @param string $value
311
+	 * @return bool
312
+	 */
313
+	protected function match($operator, $pattern, $value)
314
+	{
315
+		$value   = strtolower($value);
316
+		$pattern = strtolower($pattern);
317
+		switch ($operator) {
318
+			case '=':
319
+				return $value === $pattern;
320
+			case '!=':
321
+				return $value !== $pattern;
322
+			case '^=':
323
+				return preg_match('/^'.preg_quote($pattern, '/').'/', $value);
324
+			case '$=':
325
+				return preg_match('/'.preg_quote($pattern, '/').'$/', $value);
326
+			case '*=':
327
+				if ($pattern[0] == '/') {
328
+					return preg_match($pattern, $value);
329
+				}
330
+
331
+				return preg_match("/".$pattern."/i", $value);
332
+		}
333
+
334
+		return false;
335
+	}
336
+
337
+	/**
338
+	 * Attempts to figure out what the alteration will be for
339
+	 * the next element.
340
+	 *
341
+	 * @param array $rule
342
+	 * @return array
343
+	 */
344
+	protected function alterNext($rule)
345
+	{
346
+		$options = [];
347
+		if ($rule['tag'] == '>') {
348
+			$options['checkGrandChildren'] = false;
349
+		}
350
+
351
+		return $options;
352
+	}
353
+
354
+	/**
355
+	 * Flattens the option array.
356
+	 *
357
+	 * @param array $optionsArray
358
+	 * @return array
359
+	 */
360
+	protected function flattenOptions(array $optionsArray)
361
+	{
362
+		$options = [];
363
+		foreach ($optionsArray as $optionArray) {
364
+			foreach ($optionArray as $key => $option) {
365
+				$options[$key] = $option;
366
+			}
367
+		}
368
+
369
+		return $options;
370
+	}
371 371
 }
Please login to merge, or discard this patch.