Passed
Push — master ( e57079...36e9b4 )
by Gilles
02:49
created
src/PHPHtmlParser/Selector.php 2 patches
Doc Comments   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -47,7 +47,7 @@
 block discarded – undo
47 47
      * node object.
48 48
      *
49 49
      * @param AbstractNode $node
50
-     * @return array|Collection
50
+     * @return Collection
51 51
      */
52 52
     public function find(AbstractNode $node)
53 53
     {
Please login to merge, or discard this patch.
Indentation   +353 added lines, -353 removed lines patch added patch discarded remove patch
@@ -13,357 +13,357 @@
 block discarded – undo
13 13
 class Selector
14 14
 {
15 15
 
16
-    /**
17
-     * Pattern of CSS selectors, modified from 'mootools'
18
-     *
19
-     * @var string
20
-     */
21
-    protected $pattern = "/([\w-:\*>]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
22
-
23
-    protected $selectors = [];
24
-
25
-    /**
26
-     * Constructs with the selector string
27
-     *
28
-     * @param string $selector
29
-     */
30
-    public function __construct($selector)
31
-    {
32
-        $this->parseSelectorString($selector);
33
-    }
34
-
35
-    /**
36
-     * Returns the selectors that where found in __construct
37
-     *
38
-     * @return array
39
-     */
40
-    public function getSelectors()
41
-    {
42
-        return $this->selectors;
43
-    }
44
-
45
-    /**
46
-     * Attempts to find the selectors starting from the given
47
-     * node object.
48
-     *
49
-     * @param AbstractNode $node
50
-     * @return array|Collection
51
-     */
52
-    public function find(AbstractNode $node)
53
-    {
54
-        $results = new Collection;
55
-        foreach ($this->selectors as $selector) {
56
-            $nodes = [$node];
57
-            if (count($selector) == 0) {
58
-                continue;
59
-            }
60
-
61
-            $options = [];
62
-            foreach ($selector as $rule) {
63
-                if ($rule['alterNext']) {
64
-                    $options[] = $this->alterNext($rule);
65
-                    continue;
66
-                }
67
-                $nodes = $this->seek($nodes, $rule, $options);
68
-                // clear the options
69
-                $options = [];
70
-            }
71
-
72
-            // this is the final set of nodes
73
-            foreach ($nodes as $result) {
74
-                $results[] = $result;
75
-            }
76
-        }
77
-
78
-        return $results;
79
-    }
80
-
81
-    /**
82
-     * Parses the selector string
83
-     *
84
-     * @param string $selector
85
-     */
86
-    protected function parseSelectorString($selector)
87
-    {
88
-        $matches = [];
89
-        preg_match_all($this->pattern, trim($selector).' ', $matches, PREG_SET_ORDER);
90
-
91
-        // skip tbody
92
-        $result = [];
93
-        foreach ($matches as $match) {
94
-            // default values
95
-            $tag       = strtolower(trim($match[1]));
96
-            $operator  = '=';
97
-            $key       = null;
98
-            $value     = null;
99
-            $noKey     = false;
100
-            $alterNext = false;
101
-
102
-            // check for elements that alter the behavior of the next element
103
-            if ($tag == '>') {
104
-                $alterNext = true;
105
-            }
106
-
107
-            // check for id selector
108
-            if ( ! empty($match[2])) {
109
-                $key   = 'id';
110
-                $value = $match[2];
111
-            }
112
-
113
-            // check for class selector
114
-            if ( ! empty($match[3])) {
115
-                $key   = 'class';
116
-                $value = $match[3];
117
-            }
118
-
119
-            // and final attribute selector
120
-            if ( ! empty($match[4])) {
121
-                $key = strtolower($match[4]);
122
-            }
123
-            if ( ! empty($match[5])) {
124
-                $operator = $match[5];
125
-            }
126
-            if ( ! empty($match[6])) {
127
-                $value = $match[6];
128
-            }
129
-
130
-            // check for elements that do not have a specified attribute
131
-            if (isset($key[0]) AND $key[0] == '!') {
132
-                $key   = substr($key, 1);
133
-                $noKey = true;
134
-            }
135
-
136
-            $result[] = [
137
-                'tag'       => $tag,
138
-                'key'       => $key,
139
-                'value'     => $value,
140
-                'operator'  => $operator,
141
-                'noKey'     => $noKey,
142
-                'alterNext' => $alterNext,
143
-            ];
144
-            if (trim($match[7]) == ',') {
145
-                $this->selectors[] = $result;
146
-                $result            = [];
147
-            }
148
-        }
149
-
150
-        // save last results
151
-        if (count($result) > 0) {
152
-            $this->selectors[] = $result;
153
-        }
154
-    }
155
-
156
-    /**
157
-     * Attempts to find all children that match the rule
158
-     * given.
159
-     *
160
-     * @param array $nodes
161
-     * @param array $rule
162
-     * @param array $options
163
-     * @return array
164
-     * @recursive
165
-     */
166
-    protected function seek(array $nodes, array $rule, array $options)
167
-    {
168
-        // XPath index
169
-        if ( ! empty($rule['tag']) AND ! empty($rule['key']) AND
170
-            is_numeric($rule['key'])
171
-        ) {
172
-            $count = 0;
173
-            /** @var AbstractNode $node */
174
-            foreach ($nodes as $node) {
175
-                if ($rule['tag'] == '*' OR $rule['tag'] == $node->getTag()->name()) {
176
-                    ++$count;
177
-                    if ($count == $rule['key']) {
178
-                        // found the node we wanted
179
-                        return [$node];
180
-                    }
181
-                }
182
-            }
183
-
184
-            return [];
185
-        }
186
-
187
-        $options = $this->flattenOptions($options);
188
-
189
-        $return = [];
190
-        /** @var AbstractNode $node */
191
-        foreach ($nodes as $node) {
192
-            // check if we are a leaf
193
-            if ( ! $node->hasChildren()) {
194
-                continue;
195
-            }
196
-
197
-            $children = [];
198
-            $child    = $node->firstChild();
199
-            while ( ! is_null($child)) {
200
-                // wild card, grab all
201
-                if ($rule['tag'] == '*' AND is_null($rule['key'])) {
202
-                    $return[] = $child;
203
-                    try {
204
-                        $child = $node->nextChild($child->id());
205
-                    } catch (ChildNotFoundException $e) {
206
-                        // no more children
207
-                        $child = null;
208
-                    }
209
-                    continue;
210
-                }
211
-
212
-                $pass = true;
213
-                // check tag
214
-                if ( ! empty($rule['tag']) AND $rule['tag'] != $child->getTag()->name() AND
215
-                    $rule['tag'] != '*'
216
-                ) {
217
-                    // child failed tag check
218
-                    $pass = false;
219
-                }
220
-
221
-                // check key
222
-                if ($pass AND ! is_null($rule['key'])) {
223
-                    if ($rule['noKey']) {
224
-                        if ( ! is_null($child->getAttribute($rule['key']))) {
225
-                            $pass = false;
226
-                        }
227
-                    } else {
228
-                        if ($rule['key'] != 'plaintext' and
229
-                            is_null($child->getAttribute($rule['key']))
230
-                        ) {
231
-                            $pass = false;
232
-                        }
233
-                    }
234
-                }
235
-
236
-                // compare values
237
-                if ($pass and ! is_null($rule['key']) and
238
-                    ! is_null($rule['value']) and $rule['value'] != '*'
239
-                ) {
240
-                    if ($rule['key'] == 'plaintext') {
241
-                        // plaintext search
242
-                        $nodeValue = $child->text();
243
-                    } else {
244
-                        // normal search
245
-                        $nodeValue = $child->getAttribute($rule['key']);
246
-                    }
247
-
248
-                    $check = $this->match($rule['operator'], $rule['value'], $nodeValue);
249
-
250
-                    // handle multiple classes
251
-                    if ( ! $check and $rule['key'] == 'class') {
252
-                        $childClasses = explode(' ', $child->getAttribute('class'));
253
-                        foreach ($childClasses as $class) {
254
-                            if ( ! empty($class)) {
255
-                                $check = $this->match($rule['operator'], $rule['value'], $class);
256
-                            }
257
-                            if ($check) {
258
-                                break;
259
-                            }
260
-                        }
261
-                    }
262
-
263
-                    if ( ! $check) {
264
-                        $pass = false;
265
-                    }
266
-                }
267
-
268
-                if ($pass) {
269
-                    // it passed all checks
270
-                    $return[] = $child;
271
-                } else {
272
-                    // this child failed to be matched
273
-                    if ($child->hasChildren()) {
274
-                        // we still want to check its children
275
-                        $children[] = $child;
276
-                    }
277
-                }
278
-
279
-                try {
280
-                    // get next child
281
-                    $child = $node->nextChild($child->id());
282
-                } catch (ChildNotFoundException $e) {
283
-                    // no more children
284
-                    $child = null;
285
-                }
286
-            }
287
-
288
-            if (( ! isset($options['checkGrandChildren']) ||
289
-                    $options['checkGrandChildren'])
290
-                && count($children) > 0
291
-            ) {
292
-                // we have children that failed but are not leaves.
293
-                $matches = $this->seek($children, $rule, $options);
294
-                foreach ($matches as $match) {
295
-                    $return[] = $match;
296
-                }
297
-            }
298
-        }
299
-
300
-        return $return;
301
-    }
302
-
303
-    /**
304
-     * Attempts to match the given arguments with the given operator.
305
-     *
306
-     * @param string $operator
307
-     * @param string $pattern
308
-     * @param string $value
309
-     * @return bool
310
-     */
311
-    protected function match($operator, $pattern, $value)
312
-    {
313
-        $value   = strtolower($value);
314
-        $pattern = strtolower($pattern);
315
-        switch ($operator) {
316
-            case '=':
317
-                return $value === $pattern;
318
-            case '!=':
319
-                return $value !== $pattern;
320
-            case '^=':
321
-                return preg_match('/^'.preg_quote($pattern, '/').'/', $value);
322
-            case '$=':
323
-                return preg_match('/'.preg_quote($pattern, '/').'$/', $value);
324
-            case '*=':
325
-                if ($pattern[0] == '/') {
326
-                    return preg_match($pattern, $value);
327
-                }
328
-
329
-                return preg_match("/".$pattern."/i", $value);
330
-        }
331
-
332
-        return false;
333
-    }
334
-
335
-    /**
336
-     * Attempts to figure out what the alteration will be for
337
-     * the next element.
338
-     *
339
-     * @param array $rule
340
-     * @return array
341
-     */
342
-    protected function alterNext($rule)
343
-    {
344
-        $options = [];
345
-        if ($rule['tag'] == '>') {
346
-            $options['checkGrandChildren'] = false;
347
-        }
348
-
349
-        return $options;
350
-    }
351
-
352
-    /**
353
-     * Flattens the option array.
354
-     *
355
-     * @param array $optionsArray
356
-     * @return array
357
-     */
358
-    protected function flattenOptions(array $optionsArray)
359
-    {
360
-        $options = [];
361
-        foreach ($optionsArray as $optionArray) {
362
-            foreach ($optionArray as $key => $option) {
363
-                $options[$key] = $option;
364
-            }
365
-        }
366
-
367
-        return $options;
368
-    }
16
+	/**
17
+	 * Pattern of CSS selectors, modified from 'mootools'
18
+	 *
19
+	 * @var string
20
+	 */
21
+	protected $pattern = "/([\w-:\*>]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
22
+
23
+	protected $selectors = [];
24
+
25
+	/**
26
+	 * Constructs with the selector string
27
+	 *
28
+	 * @param string $selector
29
+	 */
30
+	public function __construct($selector)
31
+	{
32
+		$this->parseSelectorString($selector);
33
+	}
34
+
35
+	/**
36
+	 * Returns the selectors that where found in __construct
37
+	 *
38
+	 * @return array
39
+	 */
40
+	public function getSelectors()
41
+	{
42
+		return $this->selectors;
43
+	}
44
+
45
+	/**
46
+	 * Attempts to find the selectors starting from the given
47
+	 * node object.
48
+	 *
49
+	 * @param AbstractNode $node
50
+	 * @return array|Collection
51
+	 */
52
+	public function find(AbstractNode $node)
53
+	{
54
+		$results = new Collection;
55
+		foreach ($this->selectors as $selector) {
56
+			$nodes = [$node];
57
+			if (count($selector) == 0) {
58
+				continue;
59
+			}
60
+
61
+			$options = [];
62
+			foreach ($selector as $rule) {
63
+				if ($rule['alterNext']) {
64
+					$options[] = $this->alterNext($rule);
65
+					continue;
66
+				}
67
+				$nodes = $this->seek($nodes, $rule, $options);
68
+				// clear the options
69
+				$options = [];
70
+			}
71
+
72
+			// this is the final set of nodes
73
+			foreach ($nodes as $result) {
74
+				$results[] = $result;
75
+			}
76
+		}
77
+
78
+		return $results;
79
+	}
80
+
81
+	/**
82
+	 * Parses the selector string
83
+	 *
84
+	 * @param string $selector
85
+	 */
86
+	protected function parseSelectorString($selector)
87
+	{
88
+		$matches = [];
89
+		preg_match_all($this->pattern, trim($selector).' ', $matches, PREG_SET_ORDER);
90
+
91
+		// skip tbody
92
+		$result = [];
93
+		foreach ($matches as $match) {
94
+			// default values
95
+			$tag       = strtolower(trim($match[1]));
96
+			$operator  = '=';
97
+			$key       = null;
98
+			$value     = null;
99
+			$noKey     = false;
100
+			$alterNext = false;
101
+
102
+			// check for elements that alter the behavior of the next element
103
+			if ($tag == '>') {
104
+				$alterNext = true;
105
+			}
106
+
107
+			// check for id selector
108
+			if ( ! empty($match[2])) {
109
+				$key   = 'id';
110
+				$value = $match[2];
111
+			}
112
+
113
+			// check for class selector
114
+			if ( ! empty($match[3])) {
115
+				$key   = 'class';
116
+				$value = $match[3];
117
+			}
118
+
119
+			// and final attribute selector
120
+			if ( ! empty($match[4])) {
121
+				$key = strtolower($match[4]);
122
+			}
123
+			if ( ! empty($match[5])) {
124
+				$operator = $match[5];
125
+			}
126
+			if ( ! empty($match[6])) {
127
+				$value = $match[6];
128
+			}
129
+
130
+			// check for elements that do not have a specified attribute
131
+			if (isset($key[0]) AND $key[0] == '!') {
132
+				$key   = substr($key, 1);
133
+				$noKey = true;
134
+			}
135
+
136
+			$result[] = [
137
+				'tag'       => $tag,
138
+				'key'       => $key,
139
+				'value'     => $value,
140
+				'operator'  => $operator,
141
+				'noKey'     => $noKey,
142
+				'alterNext' => $alterNext,
143
+			];
144
+			if (trim($match[7]) == ',') {
145
+				$this->selectors[] = $result;
146
+				$result            = [];
147
+			}
148
+		}
149
+
150
+		// save last results
151
+		if (count($result) > 0) {
152
+			$this->selectors[] = $result;
153
+		}
154
+	}
155
+
156
+	/**
157
+	 * Attempts to find all children that match the rule
158
+	 * given.
159
+	 *
160
+	 * @param array $nodes
161
+	 * @param array $rule
162
+	 * @param array $options
163
+	 * @return array
164
+	 * @recursive
165
+	 */
166
+	protected function seek(array $nodes, array $rule, array $options)
167
+	{
168
+		// XPath index
169
+		if ( ! empty($rule['tag']) AND ! empty($rule['key']) AND
170
+			is_numeric($rule['key'])
171
+		) {
172
+			$count = 0;
173
+			/** @var AbstractNode $node */
174
+			foreach ($nodes as $node) {
175
+				if ($rule['tag'] == '*' OR $rule['tag'] == $node->getTag()->name()) {
176
+					++$count;
177
+					if ($count == $rule['key']) {
178
+						// found the node we wanted
179
+						return [$node];
180
+					}
181
+				}
182
+			}
183
+
184
+			return [];
185
+		}
186
+
187
+		$options = $this->flattenOptions($options);
188
+
189
+		$return = [];
190
+		/** @var AbstractNode $node */
191
+		foreach ($nodes as $node) {
192
+			// check if we are a leaf
193
+			if ( ! $node->hasChildren()) {
194
+				continue;
195
+			}
196
+
197
+			$children = [];
198
+			$child    = $node->firstChild();
199
+			while ( ! is_null($child)) {
200
+				// wild card, grab all
201
+				if ($rule['tag'] == '*' AND is_null($rule['key'])) {
202
+					$return[] = $child;
203
+					try {
204
+						$child = $node->nextChild($child->id());
205
+					} catch (ChildNotFoundException $e) {
206
+						// no more children
207
+						$child = null;
208
+					}
209
+					continue;
210
+				}
211
+
212
+				$pass = true;
213
+				// check tag
214
+				if ( ! empty($rule['tag']) AND $rule['tag'] != $child->getTag()->name() AND
215
+					$rule['tag'] != '*'
216
+				) {
217
+					// child failed tag check
218
+					$pass = false;
219
+				}
220
+
221
+				// check key
222
+				if ($pass AND ! is_null($rule['key'])) {
223
+					if ($rule['noKey']) {
224
+						if ( ! is_null($child->getAttribute($rule['key']))) {
225
+							$pass = false;
226
+						}
227
+					} else {
228
+						if ($rule['key'] != 'plaintext' and
229
+							is_null($child->getAttribute($rule['key']))
230
+						) {
231
+							$pass = false;
232
+						}
233
+					}
234
+				}
235
+
236
+				// compare values
237
+				if ($pass and ! is_null($rule['key']) and
238
+					! is_null($rule['value']) and $rule['value'] != '*'
239
+				) {
240
+					if ($rule['key'] == 'plaintext') {
241
+						// plaintext search
242
+						$nodeValue = $child->text();
243
+					} else {
244
+						// normal search
245
+						$nodeValue = $child->getAttribute($rule['key']);
246
+					}
247
+
248
+					$check = $this->match($rule['operator'], $rule['value'], $nodeValue);
249
+
250
+					// handle multiple classes
251
+					if ( ! $check and $rule['key'] == 'class') {
252
+						$childClasses = explode(' ', $child->getAttribute('class'));
253
+						foreach ($childClasses as $class) {
254
+							if ( ! empty($class)) {
255
+								$check = $this->match($rule['operator'], $rule['value'], $class);
256
+							}
257
+							if ($check) {
258
+								break;
259
+							}
260
+						}
261
+					}
262
+
263
+					if ( ! $check) {
264
+						$pass = false;
265
+					}
266
+				}
267
+
268
+				if ($pass) {
269
+					// it passed all checks
270
+					$return[] = $child;
271
+				} else {
272
+					// this child failed to be matched
273
+					if ($child->hasChildren()) {
274
+						// we still want to check its children
275
+						$children[] = $child;
276
+					}
277
+				}
278
+
279
+				try {
280
+					// get next child
281
+					$child = $node->nextChild($child->id());
282
+				} catch (ChildNotFoundException $e) {
283
+					// no more children
284
+					$child = null;
285
+				}
286
+			}
287
+
288
+			if (( ! isset($options['checkGrandChildren']) ||
289
+					$options['checkGrandChildren'])
290
+				&& count($children) > 0
291
+			) {
292
+				// we have children that failed but are not leaves.
293
+				$matches = $this->seek($children, $rule, $options);
294
+				foreach ($matches as $match) {
295
+					$return[] = $match;
296
+				}
297
+			}
298
+		}
299
+
300
+		return $return;
301
+	}
302
+
303
+	/**
304
+	 * Attempts to match the given arguments with the given operator.
305
+	 *
306
+	 * @param string $operator
307
+	 * @param string $pattern
308
+	 * @param string $value
309
+	 * @return bool
310
+	 */
311
+	protected function match($operator, $pattern, $value)
312
+	{
313
+		$value   = strtolower($value);
314
+		$pattern = strtolower($pattern);
315
+		switch ($operator) {
316
+			case '=':
317
+				return $value === $pattern;
318
+			case '!=':
319
+				return $value !== $pattern;
320
+			case '^=':
321
+				return preg_match('/^'.preg_quote($pattern, '/').'/', $value);
322
+			case '$=':
323
+				return preg_match('/'.preg_quote($pattern, '/').'$/', $value);
324
+			case '*=':
325
+				if ($pattern[0] == '/') {
326
+					return preg_match($pattern, $value);
327
+				}
328
+
329
+				return preg_match("/".$pattern."/i", $value);
330
+		}
331
+
332
+		return false;
333
+	}
334
+
335
+	/**
336
+	 * Attempts to figure out what the alteration will be for
337
+	 * the next element.
338
+	 *
339
+	 * @param array $rule
340
+	 * @return array
341
+	 */
342
+	protected function alterNext($rule)
343
+	{
344
+		$options = [];
345
+		if ($rule['tag'] == '>') {
346
+			$options['checkGrandChildren'] = false;
347
+		}
348
+
349
+		return $options;
350
+	}
351
+
352
+	/**
353
+	 * Flattens the option array.
354
+	 *
355
+	 * @param array $optionsArray
356
+	 * @return array
357
+	 */
358
+	protected function flattenOptions(array $optionsArray)
359
+	{
360
+		$options = [];
361
+		foreach ($optionsArray as $optionArray) {
362
+			foreach ($optionArray as $key => $option) {
363
+				$options[$key] = $option;
364
+			}
365
+		}
366
+
367
+		return $options;
368
+	}
369 369
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/StaticDom.php 2 patches
Doc Comments   +3 added lines, -3 removed lines patch added patch discarded remove patch
@@ -57,7 +57,7 @@  discard block
 block discarded – undo
57 57
      * new object.
58 58
      *
59 59
      * @param string $str
60
-     * @return $this
60
+     * @return Dom
61 61
      */
62 62
     public static function load($str)
63 63
     {
@@ -72,7 +72,7 @@  discard block
 block discarded – undo
72 72
      * new object.
73 73
      *
74 74
      * @param string $file
75
-     * @return $this
75
+     * @return Dom
76 76
      */
77 77
     public static function loadFromFile($file)
78 78
     {
@@ -88,7 +88,7 @@  discard block
 block discarded – undo
88 88
      *
89 89
      * @param string $url
90 90
      * @param CurlInterface $curl
91
-     * @return $this
91
+     * @return Dom
92 92
      */
93 93
     public static function loadFromUrl($url, CurlInterface $curl = null)
94 94
     {
Please login to merge, or discard this patch.
Indentation   +88 added lines, -88 removed lines patch added patch discarded remove patch
@@ -11,102 +11,102 @@
 block discarded – undo
11 11
 final class StaticDom
12 12
 {
13 13
 
14
-    private static $dom = null;
14
+	private static $dom = null;
15 15
 
16
-    /**
17
-     * Attempts to call the given method on the most recent created dom
18
-     * from bellow.
19
-     *
20
-     * @param string $method
21
-     * @param array $arguments
22
-     * @throws NotLoadedException
23
-     * @return mixed
24
-     */
25
-    public static function __callStatic($method, $arguments)
26
-    {
27
-        if (self::$dom instanceof Dom) {
28
-            return call_user_func_array([self::$dom, $method], $arguments);
29
-        } else {
30
-            throw new NotLoadedException('The dom is not loaded. Can not call a dom method.');
31
-        }
32
-    }
16
+	/**
17
+	 * Attempts to call the given method on the most recent created dom
18
+	 * from bellow.
19
+	 *
20
+	 * @param string $method
21
+	 * @param array $arguments
22
+	 * @throws NotLoadedException
23
+	 * @return mixed
24
+	 */
25
+	public static function __callStatic($method, $arguments)
26
+	{
27
+		if (self::$dom instanceof Dom) {
28
+			return call_user_func_array([self::$dom, $method], $arguments);
29
+		} else {
30
+			throw new NotLoadedException('The dom is not loaded. Can not call a dom method.');
31
+		}
32
+	}
33 33
 
34
-    /**
35
-     * Call this to mount the static facade. The facade allows you to use
36
-     * this object as a $className.
37
-     *
38
-     * @param string $className
39
-     * @param Dom $dom
40
-     * @return bool
41
-     */
42
-    public static function mount($className = 'Dom', Dom $dom = null)
43
-    {
44
-        if (class_exists($className)) {
45
-            return false;
46
-        }
47
-        class_alias(__CLASS__, $className);
48
-        if ($dom instanceof Dom) {
49
-            self::$dom = $dom;
50
-        }
34
+	/**
35
+	 * Call this to mount the static facade. The facade allows you to use
36
+	 * this object as a $className.
37
+	 *
38
+	 * @param string $className
39
+	 * @param Dom $dom
40
+	 * @return bool
41
+	 */
42
+	public static function mount($className = 'Dom', Dom $dom = null)
43
+	{
44
+		if (class_exists($className)) {
45
+			return false;
46
+		}
47
+		class_alias(__CLASS__, $className);
48
+		if ($dom instanceof Dom) {
49
+			self::$dom = $dom;
50
+		}
51 51
 
52
-        return true;
53
-    }
52
+		return true;
53
+	}
54 54
 
55
-    /**
56
-     * Creates a new dom object and calls load() on the
57
-     * new object.
58
-     *
59
-     * @param string $str
60
-     * @return $this
61
-     */
62
-    public static function load($str)
63
-    {
64
-        $dom       = new Dom;
65
-        self::$dom = $dom;
55
+	/**
56
+	 * Creates a new dom object and calls load() on the
57
+	 * new object.
58
+	 *
59
+	 * @param string $str
60
+	 * @return $this
61
+	 */
62
+	public static function load($str)
63
+	{
64
+		$dom       = new Dom;
65
+		self::$dom = $dom;
66 66
 
67
-        return $dom->load($str);
68
-    }
67
+		return $dom->load($str);
68
+	}
69 69
 
70
-    /**
71
-     * Creates a new dom object and calls loadFromFile() on the
72
-     * new object.
73
-     *
74
-     * @param string $file
75
-     * @return $this
76
-     */
77
-    public static function loadFromFile($file)
78
-    {
79
-        $dom       = new Dom;
80
-        self::$dom = $dom;
70
+	/**
71
+	 * Creates a new dom object and calls loadFromFile() on the
72
+	 * new object.
73
+	 *
74
+	 * @param string $file
75
+	 * @return $this
76
+	 */
77
+	public static function loadFromFile($file)
78
+	{
79
+		$dom       = new Dom;
80
+		self::$dom = $dom;
81 81
 
82
-        return $dom->loadFromFile($file);
83
-    }
82
+		return $dom->loadFromFile($file);
83
+	}
84 84
 
85
-    /**
86
-     * Creates a new dom object and calls loadFromUrl() on the
87
-     * new object.
88
-     *
89
-     * @param string $url
90
-     * @param CurlInterface $curl
91
-     * @return $this
92
-     */
93
-    public static function loadFromUrl($url, CurlInterface $curl = null)
94
-    {
95
-        $dom       = new Dom;
96
-        self::$dom = $dom;
97
-        if (is_null($curl)) {
98
-            // use the default curl interface
99
-            $curl = new Curl;
100
-        }
85
+	/**
86
+	 * Creates a new dom object and calls loadFromUrl() on the
87
+	 * new object.
88
+	 *
89
+	 * @param string $url
90
+	 * @param CurlInterface $curl
91
+	 * @return $this
92
+	 */
93
+	public static function loadFromUrl($url, CurlInterface $curl = null)
94
+	{
95
+		$dom       = new Dom;
96
+		self::$dom = $dom;
97
+		if (is_null($curl)) {
98
+			// use the default curl interface
99
+			$curl = new Curl;
100
+		}
101 101
 
102
-        return $dom->loadFromUrl($url, $curl);
103
-    }
102
+		return $dom->loadFromUrl($url, $curl);
103
+	}
104 104
 
105
-    /**
106
-     * Sets the $dom variable to null.
107
-     */
108
-    public static function unload()
109
-    {
110
-        self::$dom = null;
111
-    }
105
+	/**
106
+	 * Sets the $dom variable to null.
107
+	 */
108
+	public static function unload()
109
+	{
110
+		self::$dom = null;
111
+	}
112 112
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Options.php 1 patch
Indentation   +65 added lines, -65 removed lines patch added patch discarded remove patch
@@ -12,76 +12,76 @@
 block discarded – undo
12 12
 class Options
13 13
 {
14 14
 
15
-    /**
16
-     * The default options array
17
-     *
18
-     * @param array
19
-     */
20
-    protected $defaults = [
21
-        'whitespaceTextNode' => true,
22
-        'strict'             => false,
23
-        'enforceEncoding'    => null,
24
-        'cleanupInput'       => true,
25
-        'removeScripts'      => true,
26
-        'removeStyles'       => true,
27
-        'preserveLineBreaks' => false,
28
-    ];
15
+	/**
16
+	 * The default options array
17
+	 *
18
+	 * @param array
19
+	 */
20
+	protected $defaults = [
21
+		'whitespaceTextNode' => true,
22
+		'strict'             => false,
23
+		'enforceEncoding'    => null,
24
+		'cleanupInput'       => true,
25
+		'removeScripts'      => true,
26
+		'removeStyles'       => true,
27
+		'preserveLineBreaks' => false,
28
+	];
29 29
 
30
-    /**
31
-     * The list of all current options set.
32
-     *
33
-     * @param array
34
-     */
35
-    protected $options = [];
30
+	/**
31
+	 * The list of all current options set.
32
+	 *
33
+	 * @param array
34
+	 */
35
+	protected $options = [];
36 36
 
37
-    /**
38
-     * Sets the default options in the options array
39
-     */
40
-    public function __construct()
41
-    {
42
-        $this->options = $this->defaults;
43
-    }
37
+	/**
38
+	 * Sets the default options in the options array
39
+	 */
40
+	public function __construct()
41
+	{
42
+		$this->options = $this->defaults;
43
+	}
44 44
 
45
-    /**
46
-     * A magic get to call the get() method.
47
-     *
48
-     * @param string $key
49
-     * @return mixed
50
-     * @uses $this->get()
51
-     */
52
-    public function __get($key)
53
-    {
54
-        return $this->get($key);
55
-    }
45
+	/**
46
+	 * A magic get to call the get() method.
47
+	 *
48
+	 * @param string $key
49
+	 * @return mixed
50
+	 * @uses $this->get()
51
+	 */
52
+	public function __get($key)
53
+	{
54
+		return $this->get($key);
55
+	}
56 56
 
57
-    /**
58
-     * Sets a new options param to override the current option array.
59
-     *
60
-     * @param array $options
61
-     * @return $this
62
-     */
63
-    public function setOptions(array $options)
64
-    {
65
-        foreach ($options as $key => $option) {
66
-            $this->options[$key] = $option;
67
-        }
57
+	/**
58
+	 * Sets a new options param to override the current option array.
59
+	 *
60
+	 * @param array $options
61
+	 * @return $this
62
+	 */
63
+	public function setOptions(array $options)
64
+	{
65
+		foreach ($options as $key => $option) {
66
+			$this->options[$key] = $option;
67
+		}
68 68
 
69
-        return $this;
70
-    }
69
+		return $this;
70
+	}
71 71
 
72
-    /**
73
-     * Gets the value associated to the key, or null if the key is not
74
-     * found.
75
-     *
76
-     * @param string
77
-     * @return mixed
78
-     */
79
-    public function get($key)
80
-    {
81
-        if (isset($this->options[$key])) {
82
-            return $this->options[$key];
83
-        }
72
+	/**
73
+	 * Gets the value associated to the key, or null if the key is not
74
+	 * found.
75
+	 *
76
+	 * @param string
77
+	 * @return mixed
78
+	 */
79
+	public function get($key)
80
+	{
81
+		if (isset($this->options[$key])) {
82
+			return $this->options[$key];
83
+		}
84 84
 
85
-        return null;
86
-    }
85
+		return null;
86
+	}
87 87
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Content.php 1 patch
Indentation   +240 added lines, -240 removed lines patch added patch discarded remove patch
@@ -9,244 +9,244 @@
 block discarded – undo
9 9
 class Content
10 10
 {
11 11
 
12
-    /**
13
-     * The content string.
14
-     *
15
-     * @var string
16
-     */
17
-    protected $content;
18
-
19
-    /**
20
-     * The size of the content.
21
-     *
22
-     * @var integer
23
-     */
24
-    protected $size;
25
-
26
-    /**
27
-     * The current position we are in the content.
28
-     *
29
-     * @var integer
30
-     */
31
-    protected $pos;
32
-
33
-    /**
34
-     * The following 4 strings are tags that are important to us.
35
-     *
36
-     * @var string
37
-     */
38
-    protected $blank = " \t\r\n";
39
-    protected $equal = ' =/>';
40
-    protected $slash = " />\r\n\t";
41
-    protected $attr = ' >';
42
-
43
-    /**
44
-     * Content constructor.
45
-     *
46
-     * @param $content
47
-     */
48
-    public function __construct($content)
49
-    {
50
-        $this->content = $content;
51
-        $this->size    = strlen($content);
52
-        $this->pos     = 0;
53
-    }
54
-
55
-    /**
56
-     * Returns the current position of the content.
57
-     *
58
-     * @return int
59
-     */
60
-    public function getPosition()
61
-    {
62
-        return $this->pos;
63
-    }
64
-
65
-    /**
66
-     * Gets the current character we are at.
67
-     *
68
-     * @param int $char
69
-     * @return string
70
-     */
71
-    public function char($char = null)
72
-    {
73
-        $pos = $this->pos;
74
-        if ( ! is_null($char)) {
75
-            $pos = $char;
76
-        }
77
-
78
-        if ( ! isset($this->content[$pos])) {
79
-            return '';
80
-        }
81
-
82
-        return $this->content[$pos];
83
-    }
84
-
85
-    /**
86
-     * Moves the current position forward.
87
-     *
88
-     * @param int $count
89
-     * @return $this
90
-     */
91
-    public function fastForward($count)
92
-    {
93
-        $this->pos += $count;
94
-
95
-        return $this;
96
-    }
97
-
98
-    /**
99
-     * Moves the current position backward.
100
-     *
101
-     * @param int $count
102
-     * @return $this
103
-     */
104
-    public function rewind($count)
105
-    {
106
-        $this->pos -= $count;
107
-        if ($this->pos < 0) {
108
-            $this->pos = 0;
109
-        }
110
-
111
-        return $this;
112
-    }
113
-
114
-    /**
115
-     * Copy the content until we find the given string.
116
-     *
117
-     * @param string $string
118
-     * @param bool $char
119
-     * @param bool $escape
120
-     * @return string
121
-     */
122
-    public function copyUntil($string, $char = false, $escape = false)
123
-    {
124
-        if ($this->pos >= $this->size) {
125
-            // nothing left
126
-            return '';
127
-        }
128
-
129
-        if ($escape) {
130
-            $position = $this->pos;
131
-            $found    = false;
132
-            while ( ! $found) {
133
-                $position = strpos($this->content, $string, $position);
134
-                if ($position === false) {
135
-                    // reached the end
136
-                    $found = true;
137
-                    continue;
138
-                }
139
-
140
-                if ($this->char($position - 1) == '\\') {
141
-                    // this character is escaped
142
-                    ++$position;
143
-                    continue;
144
-                }
145
-
146
-                $found = true;
147
-            }
148
-        } elseif ($char) {
149
-            $position = strcspn($this->content, $string, $this->pos);
150
-            $position += $this->pos;
151
-        } else {
152
-            $position = strpos($this->content, $string, $this->pos);
153
-        }
154
-
155
-        if ($position === false) {
156
-            // could not find character, just return the remaining of the content
157
-            $return    = substr($this->content, $this->pos, $this->size - $this->pos);
158
-            $this->pos = $this->size;
159
-
160
-            return $return;
161
-        }
162
-
163
-        if ($position == $this->pos) {
164
-            // we are at the right place
165
-            return '';
166
-        }
167
-
168
-        $return = substr($this->content, $this->pos, $position - $this->pos);
169
-        // set the new position
170
-        $this->pos = $position;
171
-
172
-        return $return;
173
-    }
174
-
175
-    /**
176
-     * Copies the content until the string is found and return it
177
-     * unless the 'unless' is found in the substring.
178
-     *
179
-     * @param string $string
180
-     * @param string $unless
181
-     * @return string
182
-     */
183
-    public function copyUntilUnless($string, $unless)
184
-    {
185
-        $lastPos = $this->pos;
186
-        $this->fastForward(1);
187
-        $foundString = $this->copyUntil($string, true, true);
188
-
189
-        $position = strcspn($foundString, $unless);
190
-        if ($position == strlen($foundString)) {
191
-            return $string.$foundString;
192
-        }
193
-        // rewind changes and return nothing
194
-        $this->pos = $lastPos;
195
-
196
-        return '';
197
-    }
198
-
199
-    /**
200
-     * Copies the content until it reaches the token string.,
201
-     *
202
-     * @param string $token
203
-     * @param bool $char
204
-     * @param bool $escape
205
-     * @return string
206
-     * @uses $this->copyUntil()
207
-     */
208
-    public function copyByToken($token, $char = false, $escape = false)
209
-    {
210
-        $string = $this->$token;
211
-
212
-        return $this->copyUntil($string, $char, $escape);
213
-    }
214
-
215
-    /**
216
-     * Skip a given set of characters.
217
-     *
218
-     * @param string $string
219
-     * @param bool $copy
220
-     * @return $this|string
221
-     */
222
-    public function skip($string, $copy = false)
223
-    {
224
-        $len = strspn($this->content, $string, $this->pos);
225
-
226
-        // make it chainable if they don't want a copy
227
-        $return = $this;
228
-        if ($copy) {
229
-            $return = substr($this->content, $this->pos, $len);
230
-        }
231
-
232
-        // update the position
233
-        $this->pos += $len;
234
-
235
-        return $return;
236
-    }
237
-
238
-    /**
239
-     * Skip a given token of pre-defined characters.
240
-     *
241
-     * @param string $token
242
-     * @param bool $copy
243
-     * @return null|string
244
-     * @uses $this->skip()
245
-     */
246
-    public function skipByToken($token, $copy = false)
247
-    {
248
-        $string = $this->$token;
249
-
250
-        return $this->skip($string, $copy);
251
-    }
12
+	/**
13
+	 * The content string.
14
+	 *
15
+	 * @var string
16
+	 */
17
+	protected $content;
18
+
19
+	/**
20
+	 * The size of the content.
21
+	 *
22
+	 * @var integer
23
+	 */
24
+	protected $size;
25
+
26
+	/**
27
+	 * The current position we are in the content.
28
+	 *
29
+	 * @var integer
30
+	 */
31
+	protected $pos;
32
+
33
+	/**
34
+	 * The following 4 strings are tags that are important to us.
35
+	 *
36
+	 * @var string
37
+	 */
38
+	protected $blank = " \t\r\n";
39
+	protected $equal = ' =/>';
40
+	protected $slash = " />\r\n\t";
41
+	protected $attr = ' >';
42
+
43
+	/**
44
+	 * Content constructor.
45
+	 *
46
+	 * @param $content
47
+	 */
48
+	public function __construct($content)
49
+	{
50
+		$this->content = $content;
51
+		$this->size    = strlen($content);
52
+		$this->pos     = 0;
53
+	}
54
+
55
+	/**
56
+	 * Returns the current position of the content.
57
+	 *
58
+	 * @return int
59
+	 */
60
+	public function getPosition()
61
+	{
62
+		return $this->pos;
63
+	}
64
+
65
+	/**
66
+	 * Gets the current character we are at.
67
+	 *
68
+	 * @param int $char
69
+	 * @return string
70
+	 */
71
+	public function char($char = null)
72
+	{
73
+		$pos = $this->pos;
74
+		if ( ! is_null($char)) {
75
+			$pos = $char;
76
+		}
77
+
78
+		if ( ! isset($this->content[$pos])) {
79
+			return '';
80
+		}
81
+
82
+		return $this->content[$pos];
83
+	}
84
+
85
+	/**
86
+	 * Moves the current position forward.
87
+	 *
88
+	 * @param int $count
89
+	 * @return $this
90
+	 */
91
+	public function fastForward($count)
92
+	{
93
+		$this->pos += $count;
94
+
95
+		return $this;
96
+	}
97
+
98
+	/**
99
+	 * Moves the current position backward.
100
+	 *
101
+	 * @param int $count
102
+	 * @return $this
103
+	 */
104
+	public function rewind($count)
105
+	{
106
+		$this->pos -= $count;
107
+		if ($this->pos < 0) {
108
+			$this->pos = 0;
109
+		}
110
+
111
+		return $this;
112
+	}
113
+
114
+	/**
115
+	 * Copy the content until we find the given string.
116
+	 *
117
+	 * @param string $string
118
+	 * @param bool $char
119
+	 * @param bool $escape
120
+	 * @return string
121
+	 */
122
+	public function copyUntil($string, $char = false, $escape = false)
123
+	{
124
+		if ($this->pos >= $this->size) {
125
+			// nothing left
126
+			return '';
127
+		}
128
+
129
+		if ($escape) {
130
+			$position = $this->pos;
131
+			$found    = false;
132
+			while ( ! $found) {
133
+				$position = strpos($this->content, $string, $position);
134
+				if ($position === false) {
135
+					// reached the end
136
+					$found = true;
137
+					continue;
138
+				}
139
+
140
+				if ($this->char($position - 1) == '\\') {
141
+					// this character is escaped
142
+					++$position;
143
+					continue;
144
+				}
145
+
146
+				$found = true;
147
+			}
148
+		} elseif ($char) {
149
+			$position = strcspn($this->content, $string, $this->pos);
150
+			$position += $this->pos;
151
+		} else {
152
+			$position = strpos($this->content, $string, $this->pos);
153
+		}
154
+
155
+		if ($position === false) {
156
+			// could not find character, just return the remaining of the content
157
+			$return    = substr($this->content, $this->pos, $this->size - $this->pos);
158
+			$this->pos = $this->size;
159
+
160
+			return $return;
161
+		}
162
+
163
+		if ($position == $this->pos) {
164
+			// we are at the right place
165
+			return '';
166
+		}
167
+
168
+		$return = substr($this->content, $this->pos, $position - $this->pos);
169
+		// set the new position
170
+		$this->pos = $position;
171
+
172
+		return $return;
173
+	}
174
+
175
+	/**
176
+	 * Copies the content until the string is found and return it
177
+	 * unless the 'unless' is found in the substring.
178
+	 *
179
+	 * @param string $string
180
+	 * @param string $unless
181
+	 * @return string
182
+	 */
183
+	public function copyUntilUnless($string, $unless)
184
+	{
185
+		$lastPos = $this->pos;
186
+		$this->fastForward(1);
187
+		$foundString = $this->copyUntil($string, true, true);
188
+
189
+		$position = strcspn($foundString, $unless);
190
+		if ($position == strlen($foundString)) {
191
+			return $string.$foundString;
192
+		}
193
+		// rewind changes and return nothing
194
+		$this->pos = $lastPos;
195
+
196
+		return '';
197
+	}
198
+
199
+	/**
200
+	 * Copies the content until it reaches the token string.,
201
+	 *
202
+	 * @param string $token
203
+	 * @param bool $char
204
+	 * @param bool $escape
205
+	 * @return string
206
+	 * @uses $this->copyUntil()
207
+	 */
208
+	public function copyByToken($token, $char = false, $escape = false)
209
+	{
210
+		$string = $this->$token;
211
+
212
+		return $this->copyUntil($string, $char, $escape);
213
+	}
214
+
215
+	/**
216
+	 * Skip a given set of characters.
217
+	 *
218
+	 * @param string $string
219
+	 * @param bool $copy
220
+	 * @return $this|string
221
+	 */
222
+	public function skip($string, $copy = false)
223
+	{
224
+		$len = strspn($this->content, $string, $this->pos);
225
+
226
+		// make it chainable if they don't want a copy
227
+		$return = $this;
228
+		if ($copy) {
229
+			$return = substr($this->content, $this->pos, $len);
230
+		}
231
+
232
+		// update the position
233
+		$this->pos += $len;
234
+
235
+		return $return;
236
+	}
237
+
238
+	/**
239
+	 * Skip a given token of pre-defined characters.
240
+	 *
241
+	 * @param string $token
242
+	 * @param bool $copy
243
+	 * @return null|string
244
+	 * @uses $this->skip()
245
+	 */
246
+	public function skipByToken($token, $copy = false)
247
+	{
248
+		$string = $this->$token;
249
+
250
+		return $this->skip($string, $copy);
251
+	}
252 252
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom.php 1 patch
Indentation   +628 added lines, -628 removed lines patch added patch discarded remove patch
@@ -16,632 +16,632 @@
 block discarded – undo
16 16
 class Dom
17 17
 {
18 18
 
19
-    /**
20
-     * The charset we would like the output to be in.
21
-     *
22
-     * @var string
23
-     */
24
-    protected $defaultCharset = 'UTF-8';
25
-
26
-    /**
27
-     * Contains the root node of this dom tree.
28
-     *
29
-     * @var HtmlNode
30
-     */
31
-    public $root;
32
-
33
-    /**
34
-     * The raw version of the document string.
35
-     *
36
-     * @var string
37
-     */
38
-    protected $raw;
39
-
40
-    /**
41
-     * The document string.
42
-     *
43
-     * @var Content
44
-     */
45
-    protected $content = null;
46
-
47
-    /**
48
-     * The original file size of the document.
49
-     *
50
-     * @var int
51
-     */
52
-    protected $rawSize;
53
-
54
-    /**
55
-     * The size of the document after it is cleaned.
56
-     *
57
-     * @var int
58
-     */
59
-    protected $size;
60
-
61
-    /**
62
-     * A global options array to be used by all load calls.
63
-     *
64
-     * @var array
65
-     */
66
-    protected $globalOptions = [];
67
-
68
-    /**
69
-     * A persistent option object to be used for all options in the
70
-     * parsing of the file.
71
-     *
72
-     * @var Options
73
-     */
74
-    protected $options;
75
-
76
-    /**
77
-     * A list of tags which will always be self closing
78
-     *
79
-     * @var array
80
-     */
81
-    protected $selfClosing = [
82
-        'img',
83
-        'br',
84
-        'input',
85
-        'meta',
86
-        'link',
87
-        'hr',
88
-        'base',
89
-        'embed',
90
-        'spacer',
91
-    ];
92
-
93
-    /**
94
-     * Returns the inner html of the root node.
95
-     *
96
-     * @return string
97
-     */
98
-    public function __toString()
99
-    {
100
-        return $this->root->innerHtml();
101
-    }
102
-
103
-    /**
104
-     * A simple wrapper around the root node.
105
-     *
106
-     * @param string $name
107
-     * @return mixed
108
-     */
109
-    public function __get($name)
110
-    {
111
-        return $this->root->$name;
112
-    }
113
-
114
-    /**
115
-     * Attempts to load the dom from any resource, string, file, or URL.
116
-     *
117
-     * @param string $str
118
-     * @param array $options
119
-     * @return $this
120
-     */
121
-    public function load($str, $options = [])
122
-    {
123
-        // check if it's a file
124
-        if (strpos($str, "\n") === false && is_file($str)) {
125
-            return $this->loadFromFile($str, $options);
126
-        }
127
-        // check if it's a url
128
-        if (preg_match("/^https?:\/\//i", $str)) {
129
-            return $this->loadFromUrl($str, $options);
130
-        }
131
-
132
-        return $this->loadStr($str, $options);
133
-    }
134
-
135
-    /**
136
-     * Loads the dom from a document file/url
137
-     *
138
-     * @param string $file
139
-     * @param array $options
140
-     * @return $this
141
-     */
142
-    public function loadFromFile($file, $options = [])
143
-    {
144
-        return $this->loadStr(file_get_contents($file), $options);
145
-    }
146
-
147
-    /**
148
-     * Use a curl interface implementation to attempt to load
149
-     * the content from a url.
150
-     *
151
-     * @param string $url
152
-     * @param array $options
153
-     * @param CurlInterface $curl
154
-     * @return $this
155
-     */
156
-    public function loadFromUrl($url, $options = [], CurlInterface $curl = null)
157
-    {
158
-        if (is_null($curl)) {
159
-            // use the default curl interface
160
-            $curl = new Curl;
161
-        }
162
-        $content = $curl->get($url);
163
-
164
-        return $this->loadStr($content, $options);
165
-    }
166
-
167
-    /**
168
-     * Parsers the html of the given string. Used for load(), loadFromFile(),
169
-     * and loadFromUrl().
170
-     *
171
-     * @param string $str
172
-     * @param array $option
173
-     * @return $this
174
-     */
175
-    public function loadStr($str, $option)
176
-    {
177
-        $this->options = new Options;
178
-        $this->options->setOptions($this->globalOptions)
179
-                      ->setOptions($option);
180
-
181
-        $this->rawSize = strlen($str);
182
-        $this->raw     = $str;
183
-
184
-        $html = $this->clean($str);
185
-
186
-        $this->size    = strlen($str);
187
-        $this->content = new Content($html);
188
-
189
-        $this->parse();
190
-        $this->detectCharset();
191
-
192
-        return $this;
193
-    }
194
-
195
-    /**
196
-     * Sets a global options array to be used by all load calls.
197
-     *
198
-     * @param array $options
199
-     * @return $this
200
-     */
201
-    public function setOptions(array $options)
202
-    {
203
-        $this->globalOptions = $options;
204
-
205
-        return $this;
206
-    }
207
-
208
-    /**
209
-     * Find elements by css selector on the root node.
210
-     *
211
-     * @param string $selector
212
-     * @param int $nth
213
-     * @return array
214
-     */
215
-    public function find($selector, $nth = null)
216
-    {
217
-        $this->isLoaded();
218
-
219
-        return $this->root->find($selector, $nth);
220
-    }
221
-
222
-    /**
223
-     * Adds the tag (or tags in an array) to the list of tags that will always
224
-     * be self closing.
225
-     *
226
-     * @param string|array $tag
227
-     * @return $this
228
-     */
229
-    public function addSelfClosingTag($tag)
230
-    {
231
-        if ( ! is_array($tag)) {
232
-            $tag = [$tag];
233
-        }
234
-        foreach ($tag as $value) {
235
-            $this->selfClosing[] = $value;
236
-        }
237
-
238
-        return $this;
239
-    }
240
-
241
-    /**
242
-     * Removes the tag (or tags in an array) from the list of tags that will
243
-     * always be self closing.
244
-     *
245
-     * @param string|array $tag
246
-     * @return $this
247
-     */
248
-    public function removeSelfClosingTag($tag)
249
-    {
250
-        if ( ! is_array($tag)) {
251
-            $tag = [$tag];
252
-        }
253
-        $this->selfClosing = array_diff($this->selfClosing, $tag);
254
-
255
-        return $this;
256
-    }
257
-
258
-    /**
259
-     * Sets the list of self closing tags to empty.
260
-     *
261
-     * @return $this
262
-     */
263
-    public function clearSelfClosingTags()
264
-    {
265
-        $this->selfClosing = [];
266
-
267
-        return $this;
268
-    }
269
-
270
-    /**
271
-     * Simple wrapper function that returns the first child.
272
-     *
273
-     * @return \PHPHtmlParser\Dom\AbstractNode
274
-     */
275
-    public function firstChild()
276
-    {
277
-        $this->isLoaded();
278
-
279
-        return $this->root->firstChild();
280
-    }
281
-
282
-    /**
283
-     * Simple wrapper function that returns the last child.
284
-     *
285
-     * @return \PHPHtmlParser\Dom\AbstractNode
286
-     */
287
-    public function lastChild()
288
-    {
289
-        $this->isLoaded();
290
-
291
-        return $this->root->lastChild();
292
-    }
293
-
294
-    /**
295
-     * Simple wrapper function that returns an element by the
296
-     * id.
297
-     *
298
-     * @param string $id
299
-     * @return \PHPHtmlParser\Dom\AbstractNode
300
-     */
301
-    public function getElementById($id)
302
-    {
303
-        $this->isLoaded();
304
-
305
-        return $this->find('#'.$id, 0);
306
-    }
307
-
308
-    /**
309
-     * Simple wrapper function that returns all elements by
310
-     * tag name.
311
-     *
312
-     * @param string $name
313
-     * @return array
314
-     */
315
-    public function getElementsByTag($name)
316
-    {
317
-        $this->isLoaded();
318
-
319
-        return $this->find($name);
320
-    }
321
-
322
-    /**
323
-     * Simple wrapper function that returns all elements by
324
-     * class name.
325
-     *
326
-     * @param string $class
327
-     * @return array
328
-     */
329
-    public function getElementsByClass($class)
330
-    {
331
-        $this->isLoaded();
332
-
333
-        return $this->find('.'.$class);
334
-    }
335
-
336
-    /**
337
-     * Checks if the load methods have been called.
338
-     *
339
-     * @throws NotLoadedException
340
-     */
341
-    protected function isLoaded()
342
-    {
343
-        if (is_null($this->content)) {
344
-            throw new NotLoadedException('Content is not loaded!');
345
-        }
346
-    }
347
-
348
-    /**
349
-     * Cleans the html of any none-html information.
350
-     *
351
-     * @param string $str
352
-     * @return string
353
-     */
354
-    protected function clean($str)
355
-    {
356
-        if ($this->options->get('cleanupInput') != true) {
357
-            // skip entire cleanup step
358
-            return $str;
359
-        }
360
-
361
-        // clean out the \n\r
362
-        $replace = ' ';
363
-        if ($this->options->get('preserveLineBreaks')) {
364
-            $replace = '&#10';
365
-        }
366
-        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
367
-
368
-        // strip the doctype
369
-        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
370
-
371
-        // strip out comments
372
-        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
373
-
374
-        // strip out cdata
375
-        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
376
-
377
-        // strip out <script> tags
378
-        if ($this->options->get('removeScripts') == true) {
379
-            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
380
-            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
381
-        }
382
-
383
-        // strip out <style> tags
384
-        if ($this->options->get('removeStyles') == true) {
385
-            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
386
-            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
387
-        }
388
-
389
-        // strip out preformatted tags
390
-        $str = mb_eregi_replace("<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>", '', $str);
391
-
392
-        // strip out server side scripts
393
-        $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
394
-
395
-        // strip smarty scripts
396
-        $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
397
-
398
-        return $str;
399
-    }
400
-
401
-    /**
402
-     * Attempts to parse the html in content.
403
-     */
404
-    protected function parse()
405
-    {
406
-        // add the root node
407
-        $this->root = new HtmlNode('root');
408
-        $activeNode = $this->root;
409
-        while ( ! is_null($activeNode)) {
410
-            $str = $this->content->copyUntil('<');
411
-            if ($str == '') {
412
-                $info = $this->parseTag();
413
-                if ( ! $info['status']) {
414
-                    // we are done here
415
-                    $activeNode = null;
416
-                    continue;
417
-                }
418
-
419
-                // check if it was a closing tag
420
-                if ($info['closing']) {
421
-                    $originalNode = $activeNode;
422
-                    while ($activeNode->getTag()->name() != $info['tag']) {
423
-                        $activeNode = $activeNode->getParent();
424
-                        if (is_null($activeNode)) {
425
-                            // we could not find opening tag
426
-                            $activeNode = $originalNode;
427
-                            break;
428
-                        }
429
-                    }
430
-                    if ( ! is_null($activeNode)) {
431
-                        $activeNode = $activeNode->getParent();
432
-                    }
433
-                    continue;
434
-                }
435
-
436
-                if ( ! isset($info['node'])) {
437
-                    continue;
438
-                }
439
-
440
-                /** @var AbstractNode $node */
441
-                $node = $info['node'];
442
-                $activeNode->addChild($node);
443
-
444
-                // check if node is self closing
445
-                if ( ! $node->getTag()->isSelfClosing()) {
446
-                    $activeNode = $node;
447
-                }
448
-            } else if ($this->options->whitespaceTextNode or
449
-                trim($str) != ''
450
-            ) {
451
-                // we found text we care about
452
-                $textNode = new TextNode($str);
453
-                $activeNode->addChild($textNode);
454
-            }
455
-        }
456
-    }
457
-
458
-    /**
459
-     * Attempt to parse a tag out of the content.
460
-     *
461
-     * @return array
462
-     * @throws StrictException
463
-     */
464
-    protected function parseTag()
465
-    {
466
-        $return = [
467
-            'status'  => false,
468
-            'closing' => false,
469
-            'node'    => null,
470
-        ];
471
-        if ($this->content->char() != '<') {
472
-            // we are not at the beginning of a tag
473
-            return $return;
474
-        }
475
-
476
-        // check if this is a closing tag
477
-        if ($this->content->fastForward(1)->char() == '/') {
478
-            // end tag
479
-            $tag = $this->content->fastForward(1)
480
-                                 ->copyByToken('slash', true);
481
-            // move to end of tag
482
-            $this->content->copyUntil('>');
483
-            $this->content->fastForward(1);
484
-
485
-            // check if this closing tag counts
486
-            $tag = strtolower($tag);
487
-            if (in_array($tag, $this->selfClosing)) {
488
-                $return['status'] = true;
489
-
490
-                return $return;
491
-            } else {
492
-                $return['status']  = true;
493
-                $return['closing'] = true;
494
-                $return['tag']     = strtolower($tag);
495
-            }
496
-
497
-            return $return;
498
-        }
499
-
500
-        $tag  = strtolower($this->content->copyByToken('slash', true));
501
-        $node = new HtmlNode($tag);
502
-
503
-        // attributes
504
-        while ($this->content->char() != '>' and
505
-            $this->content->char() != '/') {
506
-            $space = $this->content->skipByToken('blank', true);
507
-            if (empty($space)) {
508
-                $this->content->fastForward(1);
509
-                continue;
510
-            }
511
-
512
-            $name = $this->content->copyByToken('equal', true);
513
-            if ($name == '/') {
514
-                break;
515
-            }
516
-
517
-            if (empty($name)) {
518
-                $this->content->fastForward(1);
519
-                continue;
520
-            }
521
-
522
-            $this->content->skipByToken('blank');
523
-            if ($this->content->char() == '=') {
524
-                $attr = [];
525
-                $this->content->fastForward(1)
526
-                              ->skipByToken('blank');
527
-                switch ($this->content->char()) {
528
-                    case '"':
529
-                        $attr['doubleQuote'] = true;
530
-                        $this->content->fastForward(1);
531
-                        $string = $this->content->copyUntil('"', true, true);
532
-                        do {
533
-                            $moreString = $this->content->copyUntilUnless('"', '=>');
534
-                            $string .= $moreString;
535
-                        } while ( ! empty($moreString));
536
-                        $attr['value'] = $string;
537
-                        $this->content->fastForward(1);
538
-                        $node->getTag()->$name = $attr;
539
-                        break;
540
-                    case "'":
541
-                        $attr['doubleQuote'] = false;
542
-                        $this->content->fastForward(1);
543
-                        $string = $this->content->copyUntil("'", true, true);
544
-                        do {
545
-                            $moreString = $this->content->copyUntilUnless("'", '=>');
546
-                            $string .= $moreString;
547
-                        } while ( ! empty($moreString));
548
-                        $attr['value'] = $string;
549
-                        $this->content->fastForward(1);
550
-                        $node->getTag()->$name = $attr;
551
-                        break;
552
-                    default:
553
-                        $attr['doubleQuote']   = true;
554
-                        $attr['value']         = $this->content->copyByToken('attr', true);
555
-                        $node->getTag()->$name = $attr;
556
-                        break;
557
-                }
558
-            } else {
559
-                // no value attribute
560
-                if ($this->options->strict) {
561
-                    // can't have this in strict html
562
-                    $character = $this->content->getPosition();
563
-                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
564
-                }
565
-                $node->getTag()->$name = [
566
-                    'value'       => null,
567
-                    'doubleQuote' => true,
568
-                ];
569
-                if ($this->content->char() != '>') {
570
-                    $this->content->rewind(1);
571
-                }
572
-            }
573
-        }
574
-
575
-        $this->content->skipByToken('blank');
576
-        if ($this->content->char() == '/') {
577
-            // self closing tag
578
-            $node->getTag()->selfClosing();
579
-            $this->content->fastForward(1);
580
-        } elseif (in_array($tag, $this->selfClosing)) {
581
-
582
-            // Should be a self closing tag, check if we are strict
583
-            if ($this->options->strict) {
584
-                $character = $this->content->getPosition();
585
-                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
586
-            }
587
-
588
-            // We force self closing on this tag.
589
-            $node->getTag()->selfClosing();
590
-        }
591
-
592
-        $this->content->fastForward(1);
593
-
594
-        $return['status'] = true;
595
-        $return['node']   = $node;
596
-
597
-        return $return;
598
-    }
599
-
600
-    /**
601
-     * Attempts to detect the charset that the html was sent in.
602
-     *
603
-     * @return bool
604
-     */
605
-    protected function detectCharset()
606
-    {
607
-        // set the default
608
-        $encode = new Encode;
609
-        $encode->from($this->defaultCharset);
610
-        $encode->to($this->defaultCharset);
611
-
612
-        if ( ! is_null($this->options->enforceEncoding)) {
613
-            //  they want to enforce the given encoding
614
-            $encode->from($this->options->enforceEncoding);
615
-            $encode->to($this->options->enforceEncoding);
616
-
617
-            return false;
618
-        }
619
-
620
-        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
621
-        if (is_null($meta)) {
622
-            // could not find meta tag
623
-            $this->root->propagateEncoding($encode);
624
-
625
-            return false;
626
-        }
627
-        $content = $meta->content;
628
-        if (empty($content)) {
629
-            // could not find content
630
-            $this->root->propagateEncoding($encode);
631
-
632
-            return false;
633
-        }
634
-        $matches = [];
635
-        if (preg_match('/charset=(.+)/', $content, $matches)) {
636
-            $encode->from(trim($matches[1]));
637
-            $this->root->propagateEncoding($encode);
638
-
639
-            return true;
640
-        }
641
-
642
-        // no charset found
643
-        $this->root->propagateEncoding($encode);
644
-
645
-        return false;
646
-    }
19
+	/**
20
+	 * The charset we would like the output to be in.
21
+	 *
22
+	 * @var string
23
+	 */
24
+	protected $defaultCharset = 'UTF-8';
25
+
26
+	/**
27
+	 * Contains the root node of this dom tree.
28
+	 *
29
+	 * @var HtmlNode
30
+	 */
31
+	public $root;
32
+
33
+	/**
34
+	 * The raw version of the document string.
35
+	 *
36
+	 * @var string
37
+	 */
38
+	protected $raw;
39
+
40
+	/**
41
+	 * The document string.
42
+	 *
43
+	 * @var Content
44
+	 */
45
+	protected $content = null;
46
+
47
+	/**
48
+	 * The original file size of the document.
49
+	 *
50
+	 * @var int
51
+	 */
52
+	protected $rawSize;
53
+
54
+	/**
55
+	 * The size of the document after it is cleaned.
56
+	 *
57
+	 * @var int
58
+	 */
59
+	protected $size;
60
+
61
+	/**
62
+	 * A global options array to be used by all load calls.
63
+	 *
64
+	 * @var array
65
+	 */
66
+	protected $globalOptions = [];
67
+
68
+	/**
69
+	 * A persistent option object to be used for all options in the
70
+	 * parsing of the file.
71
+	 *
72
+	 * @var Options
73
+	 */
74
+	protected $options;
75
+
76
+	/**
77
+	 * A list of tags which will always be self closing
78
+	 *
79
+	 * @var array
80
+	 */
81
+	protected $selfClosing = [
82
+		'img',
83
+		'br',
84
+		'input',
85
+		'meta',
86
+		'link',
87
+		'hr',
88
+		'base',
89
+		'embed',
90
+		'spacer',
91
+	];
92
+
93
+	/**
94
+	 * Returns the inner html of the root node.
95
+	 *
96
+	 * @return string
97
+	 */
98
+	public function __toString()
99
+	{
100
+		return $this->root->innerHtml();
101
+	}
102
+
103
+	/**
104
+	 * A simple wrapper around the root node.
105
+	 *
106
+	 * @param string $name
107
+	 * @return mixed
108
+	 */
109
+	public function __get($name)
110
+	{
111
+		return $this->root->$name;
112
+	}
113
+
114
+	/**
115
+	 * Attempts to load the dom from any resource, string, file, or URL.
116
+	 *
117
+	 * @param string $str
118
+	 * @param array $options
119
+	 * @return $this
120
+	 */
121
+	public function load($str, $options = [])
122
+	{
123
+		// check if it's a file
124
+		if (strpos($str, "\n") === false && is_file($str)) {
125
+			return $this->loadFromFile($str, $options);
126
+		}
127
+		// check if it's a url
128
+		if (preg_match("/^https?:\/\//i", $str)) {
129
+			return $this->loadFromUrl($str, $options);
130
+		}
131
+
132
+		return $this->loadStr($str, $options);
133
+	}
134
+
135
+	/**
136
+	 * Loads the dom from a document file/url
137
+	 *
138
+	 * @param string $file
139
+	 * @param array $options
140
+	 * @return $this
141
+	 */
142
+	public function loadFromFile($file, $options = [])
143
+	{
144
+		return $this->loadStr(file_get_contents($file), $options);
145
+	}
146
+
147
+	/**
148
+	 * Use a curl interface implementation to attempt to load
149
+	 * the content from a url.
150
+	 *
151
+	 * @param string $url
152
+	 * @param array $options
153
+	 * @param CurlInterface $curl
154
+	 * @return $this
155
+	 */
156
+	public function loadFromUrl($url, $options = [], CurlInterface $curl = null)
157
+	{
158
+		if (is_null($curl)) {
159
+			// use the default curl interface
160
+			$curl = new Curl;
161
+		}
162
+		$content = $curl->get($url);
163
+
164
+		return $this->loadStr($content, $options);
165
+	}
166
+
167
+	/**
168
+	 * Parsers the html of the given string. Used for load(), loadFromFile(),
169
+	 * and loadFromUrl().
170
+	 *
171
+	 * @param string $str
172
+	 * @param array $option
173
+	 * @return $this
174
+	 */
175
+	public function loadStr($str, $option)
176
+	{
177
+		$this->options = new Options;
178
+		$this->options->setOptions($this->globalOptions)
179
+					  ->setOptions($option);
180
+
181
+		$this->rawSize = strlen($str);
182
+		$this->raw     = $str;
183
+
184
+		$html = $this->clean($str);
185
+
186
+		$this->size    = strlen($str);
187
+		$this->content = new Content($html);
188
+
189
+		$this->parse();
190
+		$this->detectCharset();
191
+
192
+		return $this;
193
+	}
194
+
195
+	/**
196
+	 * Sets a global options array to be used by all load calls.
197
+	 *
198
+	 * @param array $options
199
+	 * @return $this
200
+	 */
201
+	public function setOptions(array $options)
202
+	{
203
+		$this->globalOptions = $options;
204
+
205
+		return $this;
206
+	}
207
+
208
+	/**
209
+	 * Find elements by css selector on the root node.
210
+	 *
211
+	 * @param string $selector
212
+	 * @param int $nth
213
+	 * @return array
214
+	 */
215
+	public function find($selector, $nth = null)
216
+	{
217
+		$this->isLoaded();
218
+
219
+		return $this->root->find($selector, $nth);
220
+	}
221
+
222
+	/**
223
+	 * Adds the tag (or tags in an array) to the list of tags that will always
224
+	 * be self closing.
225
+	 *
226
+	 * @param string|array $tag
227
+	 * @return $this
228
+	 */
229
+	public function addSelfClosingTag($tag)
230
+	{
231
+		if ( ! is_array($tag)) {
232
+			$tag = [$tag];
233
+		}
234
+		foreach ($tag as $value) {
235
+			$this->selfClosing[] = $value;
236
+		}
237
+
238
+		return $this;
239
+	}
240
+
241
+	/**
242
+	 * Removes the tag (or tags in an array) from the list of tags that will
243
+	 * always be self closing.
244
+	 *
245
+	 * @param string|array $tag
246
+	 * @return $this
247
+	 */
248
+	public function removeSelfClosingTag($tag)
249
+	{
250
+		if ( ! is_array($tag)) {
251
+			$tag = [$tag];
252
+		}
253
+		$this->selfClosing = array_diff($this->selfClosing, $tag);
254
+
255
+		return $this;
256
+	}
257
+
258
+	/**
259
+	 * Sets the list of self closing tags to empty.
260
+	 *
261
+	 * @return $this
262
+	 */
263
+	public function clearSelfClosingTags()
264
+	{
265
+		$this->selfClosing = [];
266
+
267
+		return $this;
268
+	}
269
+
270
+	/**
271
+	 * Simple wrapper function that returns the first child.
272
+	 *
273
+	 * @return \PHPHtmlParser\Dom\AbstractNode
274
+	 */
275
+	public function firstChild()
276
+	{
277
+		$this->isLoaded();
278
+
279
+		return $this->root->firstChild();
280
+	}
281
+
282
+	/**
283
+	 * Simple wrapper function that returns the last child.
284
+	 *
285
+	 * @return \PHPHtmlParser\Dom\AbstractNode
286
+	 */
287
+	public function lastChild()
288
+	{
289
+		$this->isLoaded();
290
+
291
+		return $this->root->lastChild();
292
+	}
293
+
294
+	/**
295
+	 * Simple wrapper function that returns an element by the
296
+	 * id.
297
+	 *
298
+	 * @param string $id
299
+	 * @return \PHPHtmlParser\Dom\AbstractNode
300
+	 */
301
+	public function getElementById($id)
302
+	{
303
+		$this->isLoaded();
304
+
305
+		return $this->find('#'.$id, 0);
306
+	}
307
+
308
+	/**
309
+	 * Simple wrapper function that returns all elements by
310
+	 * tag name.
311
+	 *
312
+	 * @param string $name
313
+	 * @return array
314
+	 */
315
+	public function getElementsByTag($name)
316
+	{
317
+		$this->isLoaded();
318
+
319
+		return $this->find($name);
320
+	}
321
+
322
+	/**
323
+	 * Simple wrapper function that returns all elements by
324
+	 * class name.
325
+	 *
326
+	 * @param string $class
327
+	 * @return array
328
+	 */
329
+	public function getElementsByClass($class)
330
+	{
331
+		$this->isLoaded();
332
+
333
+		return $this->find('.'.$class);
334
+	}
335
+
336
+	/**
337
+	 * Checks if the load methods have been called.
338
+	 *
339
+	 * @throws NotLoadedException
340
+	 */
341
+	protected function isLoaded()
342
+	{
343
+		if (is_null($this->content)) {
344
+			throw new NotLoadedException('Content is not loaded!');
345
+		}
346
+	}
347
+
348
+	/**
349
+	 * Cleans the html of any none-html information.
350
+	 *
351
+	 * @param string $str
352
+	 * @return string
353
+	 */
354
+	protected function clean($str)
355
+	{
356
+		if ($this->options->get('cleanupInput') != true) {
357
+			// skip entire cleanup step
358
+			return $str;
359
+		}
360
+
361
+		// clean out the \n\r
362
+		$replace = ' ';
363
+		if ($this->options->get('preserveLineBreaks')) {
364
+			$replace = '&#10';
365
+		}
366
+		$str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
367
+
368
+		// strip the doctype
369
+		$str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
370
+
371
+		// strip out comments
372
+		$str = mb_eregi_replace("<!--(.*?)-->", '', $str);
373
+
374
+		// strip out cdata
375
+		$str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
376
+
377
+		// strip out <script> tags
378
+		if ($this->options->get('removeScripts') == true) {
379
+			$str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
380
+			$str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
381
+		}
382
+
383
+		// strip out <style> tags
384
+		if ($this->options->get('removeStyles') == true) {
385
+			$str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
386
+			$str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
387
+		}
388
+
389
+		// strip out preformatted tags
390
+		$str = mb_eregi_replace("<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>", '', $str);
391
+
392
+		// strip out server side scripts
393
+		$str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
394
+
395
+		// strip smarty scripts
396
+		$str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
397
+
398
+		return $str;
399
+	}
400
+
401
+	/**
402
+	 * Attempts to parse the html in content.
403
+	 */
404
+	protected function parse()
405
+	{
406
+		// add the root node
407
+		$this->root = new HtmlNode('root');
408
+		$activeNode = $this->root;
409
+		while ( ! is_null($activeNode)) {
410
+			$str = $this->content->copyUntil('<');
411
+			if ($str == '') {
412
+				$info = $this->parseTag();
413
+				if ( ! $info['status']) {
414
+					// we are done here
415
+					$activeNode = null;
416
+					continue;
417
+				}
418
+
419
+				// check if it was a closing tag
420
+				if ($info['closing']) {
421
+					$originalNode = $activeNode;
422
+					while ($activeNode->getTag()->name() != $info['tag']) {
423
+						$activeNode = $activeNode->getParent();
424
+						if (is_null($activeNode)) {
425
+							// we could not find opening tag
426
+							$activeNode = $originalNode;
427
+							break;
428
+						}
429
+					}
430
+					if ( ! is_null($activeNode)) {
431
+						$activeNode = $activeNode->getParent();
432
+					}
433
+					continue;
434
+				}
435
+
436
+				if ( ! isset($info['node'])) {
437
+					continue;
438
+				}
439
+
440
+				/** @var AbstractNode $node */
441
+				$node = $info['node'];
442
+				$activeNode->addChild($node);
443
+
444
+				// check if node is self closing
445
+				if ( ! $node->getTag()->isSelfClosing()) {
446
+					$activeNode = $node;
447
+				}
448
+			} else if ($this->options->whitespaceTextNode or
449
+				trim($str) != ''
450
+			) {
451
+				// we found text we care about
452
+				$textNode = new TextNode($str);
453
+				$activeNode->addChild($textNode);
454
+			}
455
+		}
456
+	}
457
+
458
+	/**
459
+	 * Attempt to parse a tag out of the content.
460
+	 *
461
+	 * @return array
462
+	 * @throws StrictException
463
+	 */
464
+	protected function parseTag()
465
+	{
466
+		$return = [
467
+			'status'  => false,
468
+			'closing' => false,
469
+			'node'    => null,
470
+		];
471
+		if ($this->content->char() != '<') {
472
+			// we are not at the beginning of a tag
473
+			return $return;
474
+		}
475
+
476
+		// check if this is a closing tag
477
+		if ($this->content->fastForward(1)->char() == '/') {
478
+			// end tag
479
+			$tag = $this->content->fastForward(1)
480
+								 ->copyByToken('slash', true);
481
+			// move to end of tag
482
+			$this->content->copyUntil('>');
483
+			$this->content->fastForward(1);
484
+
485
+			// check if this closing tag counts
486
+			$tag = strtolower($tag);
487
+			if (in_array($tag, $this->selfClosing)) {
488
+				$return['status'] = true;
489
+
490
+				return $return;
491
+			} else {
492
+				$return['status']  = true;
493
+				$return['closing'] = true;
494
+				$return['tag']     = strtolower($tag);
495
+			}
496
+
497
+			return $return;
498
+		}
499
+
500
+		$tag  = strtolower($this->content->copyByToken('slash', true));
501
+		$node = new HtmlNode($tag);
502
+
503
+		// attributes
504
+		while ($this->content->char() != '>' and
505
+			$this->content->char() != '/') {
506
+			$space = $this->content->skipByToken('blank', true);
507
+			if (empty($space)) {
508
+				$this->content->fastForward(1);
509
+				continue;
510
+			}
511
+
512
+			$name = $this->content->copyByToken('equal', true);
513
+			if ($name == '/') {
514
+				break;
515
+			}
516
+
517
+			if (empty($name)) {
518
+				$this->content->fastForward(1);
519
+				continue;
520
+			}
521
+
522
+			$this->content->skipByToken('blank');
523
+			if ($this->content->char() == '=') {
524
+				$attr = [];
525
+				$this->content->fastForward(1)
526
+							  ->skipByToken('blank');
527
+				switch ($this->content->char()) {
528
+					case '"':
529
+						$attr['doubleQuote'] = true;
530
+						$this->content->fastForward(1);
531
+						$string = $this->content->copyUntil('"', true, true);
532
+						do {
533
+							$moreString = $this->content->copyUntilUnless('"', '=>');
534
+							$string .= $moreString;
535
+						} while ( ! empty($moreString));
536
+						$attr['value'] = $string;
537
+						$this->content->fastForward(1);
538
+						$node->getTag()->$name = $attr;
539
+						break;
540
+					case "'":
541
+						$attr['doubleQuote'] = false;
542
+						$this->content->fastForward(1);
543
+						$string = $this->content->copyUntil("'", true, true);
544
+						do {
545
+							$moreString = $this->content->copyUntilUnless("'", '=>');
546
+							$string .= $moreString;
547
+						} while ( ! empty($moreString));
548
+						$attr['value'] = $string;
549
+						$this->content->fastForward(1);
550
+						$node->getTag()->$name = $attr;
551
+						break;
552
+					default:
553
+						$attr['doubleQuote']   = true;
554
+						$attr['value']         = $this->content->copyByToken('attr', true);
555
+						$node->getTag()->$name = $attr;
556
+						break;
557
+				}
558
+			} else {
559
+				// no value attribute
560
+				if ($this->options->strict) {
561
+					// can't have this in strict html
562
+					$character = $this->content->getPosition();
563
+					throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
564
+				}
565
+				$node->getTag()->$name = [
566
+					'value'       => null,
567
+					'doubleQuote' => true,
568
+				];
569
+				if ($this->content->char() != '>') {
570
+					$this->content->rewind(1);
571
+				}
572
+			}
573
+		}
574
+
575
+		$this->content->skipByToken('blank');
576
+		if ($this->content->char() == '/') {
577
+			// self closing tag
578
+			$node->getTag()->selfClosing();
579
+			$this->content->fastForward(1);
580
+		} elseif (in_array($tag, $this->selfClosing)) {
581
+
582
+			// Should be a self closing tag, check if we are strict
583
+			if ($this->options->strict) {
584
+				$character = $this->content->getPosition();
585
+				throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
586
+			}
587
+
588
+			// We force self closing on this tag.
589
+			$node->getTag()->selfClosing();
590
+		}
591
+
592
+		$this->content->fastForward(1);
593
+
594
+		$return['status'] = true;
595
+		$return['node']   = $node;
596
+
597
+		return $return;
598
+	}
599
+
600
+	/**
601
+	 * Attempts to detect the charset that the html was sent in.
602
+	 *
603
+	 * @return bool
604
+	 */
605
+	protected function detectCharset()
606
+	{
607
+		// set the default
608
+		$encode = new Encode;
609
+		$encode->from($this->defaultCharset);
610
+		$encode->to($this->defaultCharset);
611
+
612
+		if ( ! is_null($this->options->enforceEncoding)) {
613
+			//  they want to enforce the given encoding
614
+			$encode->from($this->options->enforceEncoding);
615
+			$encode->to($this->options->enforceEncoding);
616
+
617
+			return false;
618
+		}
619
+
620
+		$meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
621
+		if (is_null($meta)) {
622
+			// could not find meta tag
623
+			$this->root->propagateEncoding($encode);
624
+
625
+			return false;
626
+		}
627
+		$content = $meta->content;
628
+		if (empty($content)) {
629
+			// could not find content
630
+			$this->root->propagateEncoding($encode);
631
+
632
+			return false;
633
+		}
634
+		$matches = [];
635
+		if (preg_match('/charset=(.+)/', $content, $matches)) {
636
+			$encode->from(trim($matches[1]));
637
+			$this->root->propagateEncoding($encode);
638
+
639
+			return true;
640
+		}
641
+
642
+		// no charset found
643
+		$this->root->propagateEncoding($encode);
644
+
645
+		return false;
646
+	}
647 647
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/CurlInterface.php 1 patch
Indentation   +7 added lines, -7 removed lines patch added patch discarded remove patch
@@ -9,11 +9,11 @@
 block discarded – undo
9 9
 interface CurlInterface
10 10
 {
11 11
 
12
-    /**
13
-     * This method should return the content of the url in a string
14
-     *
15
-     * @param string $url
16
-     * @return string
17
-     */
18
-    public function get($url);
12
+	/**
13
+	 * This method should return the content of the url in a string
14
+	 *
15
+	 * @param string $url
16
+	 * @return string
17
+	 */
18
+	public function get($url);
19 19
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Curl.php 1 patch
Indentation   +23 added lines, -23 removed lines patch added patch discarded remove patch
@@ -11,31 +11,31 @@
 block discarded – undo
11 11
 class Curl implements CurlInterface
12 12
 {
13 13
 
14
-    /**
15
-     * A simple curl implementation to get the content of the url.
16
-     *
17
-     * @param string $url
18
-     * @return string
19
-     * @throws CurlException
20
-     */
21
-    public function get($url)
22
-    {
23
-        $ch = curl_init($url);
14
+	/**
15
+	 * A simple curl implementation to get the content of the url.
16
+	 *
17
+	 * @param string $url
18
+	 * @return string
19
+	 * @throws CurlException
20
+	 */
21
+	public function get($url)
22
+	{
23
+		$ch = curl_init($url);
24 24
 
25
-        if ( ! ini_get('open_basedir')) {
26
-            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
27
-        }
25
+		if ( ! ini_get('open_basedir')) {
26
+			curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
27
+		}
28 28
 
29
-        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
30
-        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
29
+		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
30
+		curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
31 31
 
32
-        $content = curl_exec($ch);
33
-        if ($content === false) {
34
-            // there was a problem
35
-            $error = curl_error($ch);
36
-            throw new CurlException('Error retrieving "'.$url.'" ('.$error.')');
37
-        }
32
+		$content = curl_exec($ch);
33
+		if ($content === false) {
34
+			// there was a problem
35
+			$error = curl_error($ch);
36
+			throw new CurlException('Error retrieving "'.$url.'" ('.$error.')');
37
+		}
38 38
 
39
-        return $content;
40
-    }
39
+		return $content;
40
+	}
41 41
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom/MockNode.php 1 patch
Indentation   +12 added lines, -12 removed lines patch added patch discarded remove patch
@@ -11,19 +11,19 @@
 block discarded – undo
11 11
 class MockNode extends AbstractNode
12 12
 {
13 13
 
14
-    public function innerHtml()
15
-    {
16
-    }
14
+	public function innerHtml()
15
+	{
16
+	}
17 17
 
18
-    public function outerHtml()
19
-    {
20
-    }
18
+	public function outerHtml()
19
+	{
20
+	}
21 21
 
22
-    public function text()
23
-    {
24
-    }
22
+	public function text()
23
+	{
24
+	}
25 25
 
26
-    protected function clear()
27
-    {
28
-    }
26
+	protected function clear()
27
+	{
28
+	}
29 29
 }
Please login to merge, or discard this patch.
src/PHPHtmlParser/Dom/TextNode.php 1 patch
Indentation   +82 added lines, -82 removed lines patch added patch discarded remove patch
@@ -9,97 +9,97 @@
 block discarded – undo
9 9
 class TextNode extends AbstractNode
10 10
 {
11 11
 
12
-    /**
13
-     * This is a text node.
14
-     *
15
-     * @var Tag
16
-     */
17
-    protected $tag;
12
+	/**
13
+	 * This is a text node.
14
+	 *
15
+	 * @var Tag
16
+	 */
17
+	protected $tag;
18 18
 
19
-    /**
20
-     * This is the text in this node.
21
-     *
22
-     * @var string
23
-     */
24
-    protected $text;
19
+	/**
20
+	 * This is the text in this node.
21
+	 *
22
+	 * @var string
23
+	 */
24
+	protected $text;
25 25
 
26
-    /**
27
-     * This is the converted version of the text.
28
-     *
29
-     * @var string
30
-     */
31
-    protected $convertedText = null;
26
+	/**
27
+	 * This is the converted version of the text.
28
+	 *
29
+	 * @var string
30
+	 */
31
+	protected $convertedText = null;
32 32
 
33
-    /**
34
-     * Sets the text for this node.
35
-     *
36
-     * @param string $text
37
-     */
38
-    public function __construct($text)
39
-    {
40
-        // remove double spaces
41
-        $text = mb_ereg_replace('\s+', ' ', $text);
33
+	/**
34
+	 * Sets the text for this node.
35
+	 *
36
+	 * @param string $text
37
+	 */
38
+	public function __construct($text)
39
+	{
40
+		// remove double spaces
41
+		$text = mb_ereg_replace('\s+', ' ', $text);
42 42
 
43
-        // restore line breaks
44
-        $text = str_replace('&#10', "\n", $text);
43
+		// restore line breaks
44
+		$text = str_replace('&#10', "\n", $text);
45 45
 
46
-        $this->text = $text;
47
-        $this->tag  = new Tag('text');
48
-        parent::__construct();
49
-    }
46
+		$this->text = $text;
47
+		$this->tag  = new Tag('text');
48
+		parent::__construct();
49
+	}
50 50
 
51
-    /**
52
-     * Returns the text of this node.
53
-     *
54
-     * @return string
55
-     */
56
-    public function text()
57
-    {
58
-        // convert charset
59
-        if ( ! is_null($this->encode)) {
60
-            if ( ! is_null($this->convertedText)) {
61
-                // we already know the converted value
62
-                return $this->convertedText;
63
-            }
64
-            $text = $this->encode->convert($this->text);
51
+	/**
52
+	 * Returns the text of this node.
53
+	 *
54
+	 * @return string
55
+	 */
56
+	public function text()
57
+	{
58
+		// convert charset
59
+		if ( ! is_null($this->encode)) {
60
+			if ( ! is_null($this->convertedText)) {
61
+				// we already know the converted value
62
+				return $this->convertedText;
63
+			}
64
+			$text = $this->encode->convert($this->text);
65 65
 
66
-            // remember the conversion
67
-            $this->convertedText = $text;
66
+			// remember the conversion
67
+			$this->convertedText = $text;
68 68
 
69
-            return $text;
70
-        } else {
71
-            return $this->text;
72
-        }
73
-    }
69
+			return $text;
70
+		} else {
71
+			return $this->text;
72
+		}
73
+	}
74 74
 
75
-    /**
76
-     * This node has no html, just return the text.
77
-     *
78
-     * @return string
79
-     * @uses $this->text()
80
-     */
81
-    public function innerHtml()
82
-    {
83
-        return $this->text();
84
-    }
75
+	/**
76
+	 * This node has no html, just return the text.
77
+	 *
78
+	 * @return string
79
+	 * @uses $this->text()
80
+	 */
81
+	public function innerHtml()
82
+	{
83
+		return $this->text();
84
+	}
85 85
 
86
-    /**
87
-     * This node has no html, just return the text.
88
-     *
89
-     * @return string
90
-     * @uses $this->text()
91
-     */
92
-    public function outerHtml()
93
-    {
94
-        return $this->text();
95
-    }
86
+	/**
87
+	 * This node has no html, just return the text.
88
+	 *
89
+	 * @return string
90
+	 * @uses $this->text()
91
+	 */
92
+	public function outerHtml()
93
+	{
94
+		return $this->text();
95
+	}
96 96
 
97
-    /**
98
-     * Call this when something in the node tree has changed. Like a child has been added
99
-     * or a parent has been changed.
100
-     */
101
-    protected function clear()
102
-    {
103
-        $this->convertedText = null;
104
-    }
97
+	/**
98
+	 * Call this when something in the node tree has changed. Like a child has been added
99
+	 * or a parent has been changed.
100
+	 */
101
+	protected function clear()
102
+	{
103
+		$this->convertedText = null;
104
+	}
105 105
 }
Please login to merge, or discard this patch.