Checks if the types of the passed arguments in a function/method call are compatible.
1 | <?php |
||||
2 | |||||
3 | /** |
||||
4 | * Create wrapper P and BR elements in HTML depending on newlines. Useful when |
||||
5 | * users use newlines to signal line and paragraph breaks. In all cases output |
||||
6 | * should be well-formed markup. |
||||
7 | * |
||||
8 | * In DIV elements, Ps are only added when there would be at |
||||
9 | * least two of them. |
||||
10 | * |
||||
11 | * @package Elgg.Core |
||||
12 | * @subpackage Output |
||||
13 | */ |
||||
14 | class ElggAutoP { |
||||
15 | |||||
16 | public $encoding = 'UTF-8'; |
||||
17 | |||||
18 | /** |
||||
19 | * @var DOMDocument |
||||
20 | */ |
||||
21 | protected $_doc = null; |
||||
22 | |||||
23 | /** |
||||
24 | * @var DOMXPath |
||||
25 | */ |
||||
26 | protected $_xpath = null; |
||||
27 | |||||
28 | protected $_blocks = 'address article area aside blockquote caption col colgroup dd |
||||
29 | details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header |
||||
30 | hr hgroup legend map math menu nav noscript p pre section select style summary |
||||
31 | table tbody td tfoot th thead tr ul ol option li'; |
||||
32 | |||||
33 | /** |
||||
34 | * @var array |
||||
35 | */ |
||||
36 | protected $_inlines = 'a abbr audio b button canvas caption cite code command datalist |
||||
37 | del dfn em embed i iframe img input ins kbd keygen label map mark meter object |
||||
38 | output progress q rp rt ruby s samp script select small source span strong style |
||||
39 | sub sup textarea time var video wbr'; |
||||
40 | |||||
41 | /** |
||||
42 | * Descend into these elements to add Ps |
||||
43 | * |
||||
44 | * @var array |
||||
45 | */ |
||||
46 | protected $_descendList = 'article aside blockquote body details div footer form |
||||
47 | header section'; |
||||
48 | |||||
49 | /** |
||||
50 | * Add Ps inside these elements |
||||
51 | * |
||||
52 | * @var array |
||||
53 | */ |
||||
54 | protected $_alterList = 'article aside blockquote body details div footer header |
||||
55 | section'; |
||||
56 | |||||
57 | /** @var string */ |
||||
58 | protected $_unique = ''; |
||||
59 | |||||
60 | /** |
||||
61 | * Constructor |
||||
62 | */ |
||||
63 | 7 | public function __construct() { |
|||
64 | 7 | $this->_blocks = preg_split('@\\s+@', $this->_blocks); |
|||
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
65 | 7 | $this->_descendList = preg_split('@\\s+@', $this->_descendList); |
|||
0 ignored issues
–
show
$this->_descendList of type array is incompatible with the type string expected by parameter $subject of preg_split() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
66 | 7 | $this->_alterList = preg_split('@\\s+@', $this->_alterList); |
|||
67 | 7 | $this->_inlines = preg_split('@\\s+@', $this->_inlines); |
|||
68 | 7 | $this->_unique = md5(__FILE__); |
|||
69 | 7 | } |
|||
70 | |||||
71 | /** |
||||
72 | * Create wrapper P and BR elements in HTML depending on newlines. Useful when |
||||
73 | * users use newlines to signal line and paragraph breaks. In all cases output |
||||
74 | * should be well-formed markup. |
||||
75 | * |
||||
76 | * In DIV, LI, TD, and TH elements, Ps are only added when their would be at |
||||
77 | * least two of them. |
||||
78 | * |
||||
79 | * @param string $html snippet |
||||
80 | * @return string|false output or false if parse error occurred |
||||
81 | */ |
||||
82 | 6 | public function process($html) { |
|||
83 | // normalize whitespace |
||||
84 | 6 | $html = str_replace(["\r\n", "\r"], "\n", $html); |
|||
85 | |||||
86 | // allows preserving entities untouched |
||||
87 | 6 | $html = str_replace('&', $this->_unique . 'AMP', $html); |
|||
88 | |||||
89 | 6 | $this->_doc = new DOMDocument(); |
|||
90 | |||||
91 | // parse to DOM, suppressing loadHTML warnings |
||||
92 | // http://www.php.net/manual/en/domdocument.loadhtml.php#95463 |
||||
93 | 6 | libxml_use_internal_errors(true); |
|||
94 | |||||
95 | // Do not load entities. May be unnecessary, better safe than sorry |
||||
96 | 6 | $disable_load_entities = libxml_disable_entity_loader(true); |
|||
97 | |||||
98 | 6 | if (!$this->_doc->loadHTML("<html><meta http-equiv='content-type' " |
|||
99 | 6 | . "content='text/html; charset={$this->encoding}'><body>{$html}</body>" |
|||
100 | 6 | . "</html>")) { |
|||
101 | libxml_disable_entity_loader($disable_load_entities); |
||||
102 | return false; |
||||
103 | } |
||||
104 | |||||
105 | 6 | libxml_disable_entity_loader($disable_load_entities); |
|||
106 | |||||
107 | 6 | $this->_xpath = new DOMXPath($this->_doc); |
|||
108 | |||||
109 | // start processing recursively at the BODY element |
||||
110 | 6 | $nodeList = $this->_xpath->query('//body[1]'); |
|||
111 | 6 | if ($nodeList->item(0) instanceof DOMText) { |
|||
112 | // May be https://github.com/facebook/hhvm/issues/7745 |
||||
113 | // Um... try again? |
||||
114 | $this->_xpath = new DOMXPath($this->_doc); |
||||
115 | $nodeList = $this->_xpath->query('//body[1]'); |
||||
116 | |||||
117 | if ($nodeList->item(0) instanceof DOMText) { |
||||
118 | // not going to work |
||||
119 | throw new \RuntimeException('DOMXPath::query for BODY element returned a text node'); |
||||
120 | } |
||||
121 | } |
||||
122 | 6 | $this->addParagraphs($nodeList->item(0)); |
|||
123 | |||||
124 | // serialize back to HTML |
||||
125 | 6 | $html = $this->_doc->saveHTML(); |
|||
126 | |||||
127 | // Note: we create <autop> elements, which will later be converted to paragraphs |
||||
128 | |||||
129 | // split AUTOPs into multiples at /\n\n+/ |
||||
130 | 6 | $html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '</autop><autop>', $html); |
|||
131 | 6 | $html = str_replace([$this->_unique . 'BR', $this->_unique . 'NL', '<br>'], |
|||
132 | 6 | '<br />', |
|||
133 | 6 | $html); |
|||
134 | 6 | $html = str_replace('<br /></autop>', '</autop>', $html); |
|||
135 | |||||
136 | // re-parse so we can handle new AUTOP elements |
||||
137 | |||||
138 | // Do not load entities. May be unnecessary, better safe than sorry |
||||
139 | 6 | $disable_load_entities = libxml_disable_entity_loader(true); |
|||
140 | |||||
141 | 6 | if (!$this->_doc->loadHTML($html)) { |
|||
142 | libxml_disable_entity_loader($disable_load_entities); |
||||
143 | return false; |
||||
144 | } |
||||
145 | |||||
146 | 6 | libxml_disable_entity_loader($disable_load_entities); |
|||
147 | |||||
148 | // must re-create XPath object after DOM load |
||||
149 | 6 | $this->_xpath = new DOMXPath($this->_doc); |
|||
150 | |||||
151 | // strip AUTOPs that only have comments/whitespace |
||||
152 | 6 | foreach ($this->_xpath->query('//autop') as $autop) { |
|||
153 | /* @var DOMElement $autop */ |
||||
154 | 5 | $hasContent = false; |
|||
155 | 5 | if (trim($autop->textContent) !== '') { |
|||
156 | 5 | $hasContent = true; |
|||
157 | } else { |
||||
158 | foreach ($autop->childNodes as $node) { |
||||
159 | if ($node->nodeType === XML_ELEMENT_NODE) { |
||||
160 | $hasContent = true; |
||||
161 | break; |
||||
162 | } |
||||
163 | } |
||||
164 | } |
||||
165 | 5 | if (!$hasContent) { |
|||
166 | // mark to be later replaced w/ preg_replace (faster than moving nodes out) |
||||
167 | 5 | $autop->setAttribute("r", "1"); |
|||
168 | } |
||||
169 | } |
||||
170 | |||||
171 | // If a DIV contains a single AUTOP, remove it |
||||
172 | 6 | foreach ($this->_xpath->query('//div') as $el) { |
|||
173 | /* @var DOMElement $el */ |
||||
174 | $autops = $this->_xpath->query('./autop', $el); |
||||
175 | if ($autops->length === 1) { |
||||
176 | $firstAutop = $autops->item(0); |
||||
177 | /* @var DOMElement $firstAutop */ |
||||
178 | $firstAutop->setAttribute("r", "1"); |
||||
179 | } |
||||
180 | } |
||||
181 | |||||
182 | 6 | $html = $this->_doc->saveHTML(); |
|||
183 | |||||
184 | // trim to the contents of BODY |
||||
185 | 6 | $bodyStart = strpos($html, '<body>'); |
|||
186 | 6 | $bodyEnd = strpos($html, '</body>', $bodyStart + 6); |
|||
187 | 6 | $html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6); |
|||
188 | |||||
189 | // strip AUTOPs that should be removed |
||||
190 | 6 | $html = preg_replace('@<autop r="1">(.*?)</autop>@', '\\1', $html); |
|||
191 | |||||
192 | // commit to converting AUTOPs to Ps |
||||
193 | 6 | $html = str_replace('<autop>', "\n<p>", $html); |
|||
194 | 6 | $html = str_replace('</autop>', "</p>\n", $html); |
|||
195 | |||||
196 | 6 | $html = str_replace('<br>', '<br />', $html); |
|||
197 | 6 | $html = str_replace($this->_unique . 'AMP', '&', $html); |
|||
198 | 6 | return $html; |
|||
199 | } |
||||
200 | |||||
201 | /** |
||||
202 | * Add P and BR elements as necessary |
||||
203 | * |
||||
204 | * @param DOMElement $el DOM element |
||||
205 | * @return void |
||||
206 | */ |
||||
207 | 6 | protected function addParagraphs(DOMElement $el) { |
|||
208 | // no need to call recursively, just queue up |
||||
209 | 6 | $elsToProcess = [$el]; |
|||
210 | 6 | $inlinesToProcess = []; |
|||
211 | 6 | while ($el = array_shift($elsToProcess)) { |
|||
212 | // if true, we can alter all child nodes, if not, we'll just call |
||||
213 | // addParagraphs on each element in the descendInto list |
||||
214 | 6 | $alterInline = in_array($el->nodeName, $this->_alterList); |
|||
215 | |||||
216 | // inside affected elements, we want to trim leading whitespace from |
||||
217 | // the first text node |
||||
218 | 6 | $ltrimFirstTextNode = true; |
|||
219 | |||||
220 | // should we open a new AUTOP element to move inline elements into? |
||||
221 | 6 | $openP = true; |
|||
222 | 6 | $autop = null; |
|||
223 | |||||
224 | // after BR, ignore a newline |
||||
225 | 6 | $isFollowingBr = false; |
|||
226 | |||||
227 | 6 | $node = $el->firstChild; |
|||
228 | 6 | while (null !== $node) { |
|||
229 | 5 | if ($alterInline) { |
|||
230 | 5 | if ($openP) { |
|||
231 | 5 | $openP = false; |
|||
232 | // create a P to move inline content into (this may be removed later) |
||||
233 | 5 | $autop = $el->insertBefore($this->_doc->createElement('autop'), $node); |
|||
234 | } |
||||
235 | } |
||||
236 | |||||
237 | 5 | $isElement = ($node->nodeType === XML_ELEMENT_NODE); |
|||
238 | 5 | if ($isElement) { |
|||
239 | 1 | $isBlock = in_array($node->nodeName, $this->_blocks); |
|||
240 | 1 | if (!$isBlock) { |
|||
241 | // if we start with an inline element we don't need to do this |
||||
242 | 1 | $ltrimFirstTextNode = false; |
|||
243 | } |
||||
244 | } else { |
||||
245 | 5 | $isBlock = false; |
|||
246 | } |
||||
247 | |||||
248 | 5 | if ($alterInline) { |
|||
249 | 5 | $isText = ($node->nodeType === XML_TEXT_NODE); |
|||
250 | 5 | $isLastInline = (! $node->nextSibling |
|||
251 | 1 | || ($node->nextSibling->nodeType === XML_ELEMENT_NODE |
|||
252 | 5 | && in_array($node->nextSibling->nodeName, $this->_blocks))); |
|||
253 | 5 | if ($isElement) { |
|||
254 | 1 | $isFollowingBr = ($node->nodeName === 'br'); |
|||
255 | } |
||||
256 | |||||
257 | 5 | if ($isText) { |
|||
258 | 5 | $nodeText = $node->nodeValue; |
|||
259 | |||||
260 | 5 | if ($ltrimFirstTextNode) { |
|||
261 | // we're at the beginning of a sequence of text/inline elements |
||||
262 | 5 | $nodeText = ltrim($nodeText); |
|||
263 | 5 | $ltrimFirstTextNode = false; |
|||
264 | } |
||||
265 | 5 | if ($isFollowingBr && preg_match('@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) { |
|||
266 | // if a user ends a line with <br>, don't add a second BR |
||||
267 | $nodeText = substr($nodeText, strlen($m[0])); |
||||
268 | } |
||||
269 | 5 | if ($isLastInline) { |
|||
270 | // we're at the end of a sequence of text/inline elements |
||||
271 | 5 | $nodeText = rtrim($nodeText); |
|||
272 | } |
||||
273 | 5 | $nodeText = str_replace("\n", $this->_unique . 'NL', $nodeText); |
|||
274 | 5 | $tmpNode = $node; |
|||
275 | 5 | $node = $node->nextSibling; // move loop to next node |
|||
276 | |||||
277 | // alter node in place, then move into AUTOP |
||||
278 | 5 | $tmpNode->nodeValue = $nodeText; |
|||
279 | 5 | $autop->appendChild($tmpNode); |
|||
280 | |||||
281 | 5 | continue; |
|||
282 | } |
||||
283 | } |
||||
284 | 1 | if ($isBlock || ! $node->nextSibling) { |
|||
285 | if ($isBlock) { |
||||
286 | if (in_array($node->nodeName, $this->_descendList)) { |
||||
287 | $elsToProcess[] = $node; |
||||
288 | //$this->addParagraphs($node); |
||||
289 | } |
||||
290 | } |
||||
291 | $openP = true; |
||||
292 | $ltrimFirstTextNode = true; |
||||
293 | } |
||||
294 | 1 | if ($alterInline) { |
|||
295 | 1 | if (! $isBlock) { |
|||
296 | 1 | $tmpNode = $node; |
|||
297 | 1 | if ($isElement && false !== strpos($tmpNode->textContent, "\n")) { |
|||
298 | $inlinesToProcess[] = $tmpNode; |
||||
299 | } |
||||
300 | 1 | $node = $node->nextSibling; |
|||
301 | 1 | $autop->appendChild($tmpNode); |
|||
302 | 1 | continue; |
|||
303 | } |
||||
304 | } |
||||
305 | |||||
306 | $node = $node->nextSibling; |
||||
307 | } |
||||
308 | } |
||||
309 | |||||
310 | // handle inline nodes |
||||
311 | // no need to recurse, just queue up |
||||
312 | 6 | while ($el = array_shift($inlinesToProcess)) { |
|||
313 | $ignoreLeadingNewline = false; |
||||
314 | foreach ($el->childNodes as $node) { |
||||
315 | if ($node->nodeType === XML_ELEMENT_NODE) { |
||||
316 | if ($node->nodeValue === 'BR') { |
||||
317 | $ignoreLeadingNewline = true; |
||||
318 | } else { |
||||
319 | $ignoreLeadingNewline = false; |
||||
320 | if (false !== strpos($node->textContent, "\n")) { |
||||
321 | $inlinesToProcess[] = $node; |
||||
322 | } |
||||
323 | } |
||||
324 | continue; |
||||
325 | } elseif ($node->nodeType === XML_TEXT_NODE) { |
||||
326 | $text = $node->nodeValue; |
||||
327 | if ($text[0] === "\n" && $ignoreLeadingNewline) { |
||||
328 | $text = substr($text, 1); |
||||
329 | $ignoreLeadingNewline = false; |
||||
330 | } |
||||
331 | $node->nodeValue = str_replace("\n", $this->_unique . 'BR', $text); |
||||
332 | } |
||||
333 | } |
||||
334 | } |
||||
335 | 6 | } |
|||
336 | } |
||||
337 |