1
|
|
|
<?php declare(strict_types=1); |
2
|
|
|
namespace PHPHtmlParser; |
3
|
|
|
|
4
|
|
|
/** |
5
|
|
|
* Class Options |
6
|
|
|
* |
7
|
|
|
* @package PHPHtmlParser |
8
|
|
|
* @property bool $whitespaceTextNode |
9
|
|
|
* @property bool $strict |
10
|
|
|
* @property string|null $enforceEncoding |
11
|
|
|
* @property bool $cleanupInput |
12
|
|
|
* @property bool $removeScripts |
13
|
|
|
* @property bool $removeStyles |
14
|
|
|
* @property bool $preserveLineBreaks |
15
|
|
|
* @property bool $removeDoubleSpace |
16
|
|
|
* @property bool $removeSmartyScripts |
17
|
|
|
* @property bool $depthFirstSearch |
18
|
|
|
* @property bool $htmlSpecialCharsDecode |
19
|
|
|
*/ |
20
|
|
|
class Options |
21
|
|
|
{ |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* The default options array |
25
|
|
|
* |
26
|
|
|
* @var array |
27
|
|
|
*/ |
28
|
|
|
protected $defaults = [ |
29
|
|
|
'whitespaceTextNode' => true, |
30
|
|
|
'strict' => false, |
31
|
|
|
'enforceEncoding' => null, |
32
|
|
|
'cleanupInput' => true, |
33
|
|
|
'removeScripts' => true, |
34
|
|
|
'removeStyles' => true, |
35
|
|
|
'preserveLineBreaks' => false, |
36
|
|
|
'removeDoubleSpace' => true, |
37
|
|
|
'removeSmartyScripts' => true, |
38
|
|
|
'depthFirstSearch' => false, |
39
|
|
|
'htmlSpecialCharsDecode' => false, |
40
|
|
|
]; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* The list of all current options set. |
44
|
|
|
* |
45
|
|
|
* @var array |
46
|
|
|
*/ |
47
|
|
|
protected $options = []; |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* Sets the default options in the options array |
51
|
|
|
*/ |
52
|
261 |
|
public function __construct() |
53
|
|
|
{ |
54
|
261 |
|
$this->options = $this->defaults; |
55
|
261 |
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* The whitespaceTextNode, by default true, option tells the parser to save textnodes even if the content of the |
59
|
|
|
* node is empty (only whitespace). Setting it to false will ignore all whitespace only text node found in the document. |
60
|
|
|
* @param bool $value |
61
|
|
|
* @return Options |
62
|
|
|
*/ |
63
|
3 |
|
public function setWhitespaceTextNode(bool $value): self { |
64
|
3 |
|
$this->options['whitespaceTextNode'] = $value; |
65
|
3 |
|
return $this; |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* Strict, by default false, will throw a StrictException if it finds that the html is not strictly compliant |
70
|
|
|
* (all tags must have a closing tag, no attribute with out a value, etc.). |
71
|
|
|
* @param bool $value |
72
|
|
|
* @return Options |
73
|
|
|
*/ |
74
|
3 |
|
public function setStrict(bool $value): self { |
75
|
3 |
|
$this->options['strict'] = $value; |
76
|
3 |
|
return $this; |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* The enforceEncoding, by default null, option will enforce an character set to be used for reading the content |
81
|
|
|
* and returning the content in that encoding. Setting it to null will trigger an attempt to figure out |
82
|
|
|
* the encoding from within the content of the string given instead. |
83
|
|
|
* @param string|null $value |
84
|
|
|
* @return Options |
85
|
|
|
*/ |
86
|
3 |
|
public function setEnforceEncoding(?string $value): self { |
87
|
3 |
|
$this->options['enforceEncoding'] = $value; |
88
|
3 |
|
return $this; |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
/** |
92
|
|
|
* Set this to false to skip the entire clean up phase of the parser. Defaults to true. |
93
|
|
|
* @param bool $value |
94
|
|
|
* @return Options |
95
|
|
|
*/ |
96
|
3 |
|
public function setCleanupInput(bool $value): self { |
97
|
3 |
|
$this->options['cleanupInput'] = $value; |
98
|
3 |
|
return $this; |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
/** |
102
|
|
|
* Set this to false to skip removing the script tags from the document body. This might have adverse effects. |
103
|
|
|
* Defaults to true. |
104
|
|
|
* |
105
|
|
|
* NOTE: Ignored if cleanupInit is true. |
106
|
|
|
* |
107
|
|
|
* @param bool $value |
108
|
|
|
* @return Options |
109
|
|
|
*/ |
110
|
3 |
|
public function setRemoveScripts(bool $value): self { |
111
|
3 |
|
$this->options['removeScripts'] = $value; |
112
|
3 |
|
return $this; |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
/** |
116
|
|
|
* Set this to false to skip removing of style tags from the document body. This might have adverse effects. Defaults to true. |
117
|
|
|
* |
118
|
|
|
* NOTE: Ignored if cleanupInit is true. |
119
|
|
|
* @param bool $value |
120
|
|
|
* @return Options |
121
|
|
|
*/ |
122
|
3 |
|
public function setRemoveStyles(bool $value): self { |
123
|
3 |
|
$this->options['removeStyles'] = $value; |
124
|
3 |
|
return $this; |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
/** |
128
|
|
|
* Preserves Line Breaks if set to true. If set to false line breaks are cleaned up |
129
|
|
|
* as part of the input clean up process. Defaults to false. |
130
|
|
|
* |
131
|
|
|
* NOTE: Ignored if cleanupInit is true. |
132
|
|
|
* @param bool $value |
133
|
|
|
* @return Options |
134
|
|
|
*/ |
135
|
3 |
|
public function setPreserveLineBreaks(bool $value): self { |
136
|
3 |
|
$this->options['preserveLineBreaks'] = $value; |
137
|
3 |
|
return $this; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* Set this to false if you want to preserve whitespace inside of text nodes. It is set to true by default. |
142
|
|
|
* @param bool $value |
143
|
|
|
* @return Options |
144
|
|
|
*/ |
145
|
3 |
|
public function setRemoveDoubleSpace(bool $value): self { |
146
|
3 |
|
$this->options['removeDoubleSpace'] = $value; |
147
|
3 |
|
return $this; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Set this to false if you want to preserve smarty script found in the html content. It is set to true by default. |
152
|
|
|
* @param bool $value |
153
|
|
|
* @return Options |
154
|
|
|
*/ |
155
|
3 |
|
public function setRemoveSmartyScripts(bool $value): self { |
156
|
3 |
|
$this->options['removeSmartyScripts'] = $value; |
157
|
3 |
|
return $this; |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* By default this is set to false for legacy support. Setting this to true will change the behavior of find |
162
|
|
|
* to order elements by depth first. This will properly preserve the order of elements as they where in the HTML. |
163
|
|
|
* |
164
|
|
|
* @param bool $value |
165
|
|
|
* @return Options |
166
|
|
|
* @deprecated This option will be removed in version 3.0.0 with the new behavior being as if it was set to true. |
167
|
|
|
*/ |
168
|
3 |
|
public function setDepthFirstSearch(bool $value): self { |
169
|
3 |
|
$this->options['depthFirstSearch'] = $value; |
170
|
3 |
|
return $this; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
/** |
174
|
|
|
* By default this is set to false. Setting this to true will apply the php function htmlspecialchars_decode too all attribute values and text nodes. |
175
|
|
|
* @param bool $value |
176
|
|
|
* @return Options |
177
|
|
|
*/ |
178
|
3 |
|
public function setHtmlSpecialCharsDecode(bool $value): self { |
179
|
3 |
|
$this->options['htmlSpecialCharsDecode'] = $value; |
180
|
3 |
|
return $this; |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
/** |
184
|
|
|
* A magic get to call the get() method. |
185
|
|
|
* |
186
|
|
|
* @param string $key |
187
|
|
|
* @return mixed |
188
|
|
|
* @uses $this->get() |
189
|
|
|
*/ |
190
|
252 |
|
public function __get($key) |
191
|
|
|
{ |
192
|
252 |
|
return $this->get($key); |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
/** |
196
|
|
|
* Sets a new options param to override the current option array. |
197
|
|
|
* |
198
|
|
|
* @param array $options |
199
|
|
|
* @return Options |
200
|
|
|
* @chainable |
201
|
|
|
*/ |
202
|
255 |
|
public function setOptions(array $options): Options |
203
|
|
|
{ |
204
|
255 |
|
foreach ($options as $key => $option) { |
205
|
66 |
|
$this->options[$key] = $option; |
206
|
|
|
} |
207
|
|
|
|
208
|
255 |
|
return $this; |
209
|
|
|
} |
210
|
|
|
|
211
|
|
|
/** |
212
|
|
|
* Gets the value associated to the key, or null if the key is not |
213
|
|
|
* found. |
214
|
|
|
* |
215
|
|
|
* @param string $key |
216
|
|
|
* @return mixed |
217
|
|
|
*/ |
218
|
261 |
|
public function get(string $key) |
219
|
|
|
{ |
220
|
261 |
|
if (isset($this->options[$key])) { |
221
|
258 |
|
return $this->options[$key]; |
222
|
|
|
} |
223
|
|
|
|
224
|
252 |
|
return null; |
225
|
|
|
} |
226
|
|
|
} |
227
|
|
|
|