HTMLPurifier_Injector_AutoParagraph - Code Metrics - XOOPS/XoopsCore25 - Measure and Improve Code Quality continuously with Scrutinizer

HTMLPurifier_Injector_AutoParagraph B
last analyzed 2025-06-05 02:27 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	344
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	45
eloc	108
dl	0
loc	344
rs	8.8
c	0
b	0
f	0

7 Methods

Rating	Name	Size	Complexity
C	handleElement()	85	12
A	_isInline()	3	1
A	_pLookAhead()	17	4
A	_checkNeedsP()	20	5
A	_pStart()	5	1
B	_splitText()	77	11
B	handleText()	67	11

How to fix Complexity

<?php

/**
 * Injector that auto paragraphs text in the root node based on
 * double-spacing.
 * @todo Ensure all states are unit tested, including variations as well.
 * @todo Make a graph of the flow control for this Injector.
 */
class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
{
    /**
     * @type string
     */
    public $name = 'AutoParagraph';

    /**
     * @type array
     */
    public $needed = array('p');

    /**
     * @return HTMLPurifier_Token_Start
     */
    private function _pStart()
    {
        $par = new HTMLPurifier_Token_Start('p');
        $par->armor['MakeWellFormed_TagClosedError'] = true;
        return $par;
    }

    /**
     * @param HTMLPurifier_Token_Text $token
     */
    public function handleText(&$token)
    {
        $text = $token->data;
        // Does the current parent allow <p> tags?
        if ($this->allowsElement('p')) {
            if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
                // Note that we have differing behavior when dealing with text
                // in the anonymous root node, or a node inside the document.
                // If the text as a double-newline, the treatment is the same;
                // if it doesn't, see the next if-block if you're in the document.

                $i = $nesting = null;
                if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
                    // State 1.1: ...    ^ (whitespace, then document end)
                    //               ----
                    // This is a degenerate case
                } else {
                    if (!$token->is_whitespace || $this->_isInline($current)) {
                        // State 1.2: PAR1
                        //            ----

                        // State 1.3: PAR1\n\nPAR2
                        //            ------------

                        // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
                        //                 ------------
                        $token = array($this->_pStart());
                        $this->_splitText($text, $token);
                    } else {
                        // State 1.5: \n<hr />
                        //            --
                    }
                }
            } else {
                // State 2:   <div>PAR1... (similar to 1.4)
                //                 ----

                // We're in an element that allows paragraph tags, but we're not
                // sure if we're going to need them.
                if ($this->_pLookAhead()) {
                    // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
                    //                 ----
                    // Note: This will always be the first child, since any
                    // previous inline element would have triggered this very
                    // same routine, and found the double newline. One possible
                    // exception would be a comment.
                    $token = array($this->_pStart(), $token);
                } else {
                    // State 2.2.1: <div>PAR1<div>
                    //                   ----

                    // State 2.2.2: <div>PAR1<b>PAR1</b></div>
                    //                   ----
                }
            }
            // Is the current parent a <p> tag?
        } elseif (!empty($this->currentNesting) &&
            $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
            // State 3.1: ...<p>PAR1
            //                  ----

            // State 3.2: ...<p>PAR1\n\nPAR2
            //                  ------------
            $token = array();
            $this->_splitText($text, $token);
            // Abort!
        } else {
            // State 4.1: ...<b>PAR1
            //                  ----

            // State 4.2: ...<b>PAR1\n\nPAR2
            //                  ------------
        }
    }

    /**
     * @param HTMLPurifier_Token $token
     */
    public function handleElement(&$token)
    {
        // We don't have to check if we're already in a <p> tag for block
        // tokens, because the tag would have been autoclosed by MakeWellFormed.
        if ($this->allowsElement('p')) {
            if (!empty($this->currentNesting)) {
                if ($this->_isInline($token)) {
                    // State 1: <div>...<b>
                    //                  ---
                    // Check if this token is adjacent to the parent token
                    // (seek backwards until token isn't whitespace)
                    $i = null;
                    $this->backward($i, $prev);

                    if (!$prev instanceof HTMLPurifier_Token_Start) {
                        // Token wasn't adjacent
                        if ($prev instanceof HTMLPurifier_Token_Text &&
                            substr($prev->data, -2) === "\n\n"
                        ) {
                            // State 1.1.4: <div><p>PAR1</p>\n\n<b>
                            //                                  ---
                            // Quite frankly, this should be handled by splitText
                            $token = array($this->_pStart(), $token);
                        } else {
                            // State 1.1.1: <div><p>PAR1</p><b>
                            //                              ---
                            // State 1.1.2: <div><br /><b>
                            //                         ---
                            // State 1.1.3: <div>PAR<b>
                            //                      ---
                        }
                    } else {
                        // State 1.2.1: <div><b>
                        //                   ---
                        // Lookahead to see if <p> is needed.
                        if ($this->_pLookAhead()) {
                            // State 1.3.1: <div><b>PAR1\n\nPAR2
                            //                   ---
                            $token = array($this->_pStart(), $token);
                        } else {
                            // State 1.3.2: <div><b>PAR1</b></div>
                            //                   ---

                            // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
                            //                   ---
                        }
                    }
                } else {
                    // State 2.3: ...<div>
                    //               -----
                }
            } else {
                if ($this->_isInline($token)) {
                    // State 3.1: <b>
                    //            ---
                    // This is where the {p} tag is inserted, not reflected in
                    // inputTokens yet, however.
                    $token = array($this->_pStart(), $token);
                } else {
                    // State 3.2: <div>
                    //            -----
                }

                $i = null;
                if ($this->backward($i, $prev)) {
                    if (!$prev instanceof HTMLPurifier_Token_Text) {
                        // State 3.1.1: ...</p>{p}<b>
                        //                        ---
                        // State 3.2.1: ...</p><div>
                        //                     -----
                        if (!is_array($token)) {
                            $token = array($token);
                        }
                        array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
                    } else {
                        // State 3.1.2: ...</p>\n\n{p}<b>
                        //                            ---
                        // State 3.2.2: ...</p>\n\n<div>
                        //                         -----
                        // Note: PAR<ELEM> cannot occur because PAR would have been
                        // wrapped in <p> tags.
                    }
                }
            }
        } else {
            // State 2.2: <ul><li>
            //                ----
            // State 2.4: <p><b>
            //               ---
        }
    }

    /**
     * Splits up a text in paragraph tokens and appends them
     * to the result stream that will replace the original
     * @param string $data String text data that will be processed
     *    into paragraphs
     * @param HTMLPurifier_Token[] $result Reference to array of tokens that the
     *    tags will be appended onto
     */
    private function _splitText($data, &$result)
    {
        $raw_paragraphs = explode("\n\n", $data);
        $paragraphs = array(); // without empty paragraphs
        $needs_start = false;
        $needs_end = false;

        $c = count($raw_paragraphs);
        if ($c == 1) {
            // There were no double-newlines, abort quickly. In theory this
            // should never happen.
            $result[] = new HTMLPurifier_Token_Text($data);
            return;
        }
        for ($i = 0; $i < $c; $i++) {
            $par = $raw_paragraphs[$i];
            if (trim($par) !== '') {
                $paragraphs[] = $par;
            } else {
                if ($i == 0) {
                    // Double newline at the front
                    if (empty($result)) {
                        // The empty result indicates that the AutoParagraph
                        // injector did not add any start paragraph tokens.
                        // This means that we have been in a paragraph for
                        // a while, and the newline means we should start a new one.
                        $result[] = new HTMLPurifier_Token_End('p');
                        $result[] = new HTMLPurifier_Token_Text("\n\n");
                        // However, the start token should only be added if
                        // there is more processing to be done (i.e. there are
                        // real paragraphs in here). If there are none, the
                        // next start paragraph tag will be handled by the
                        // next call to the injector
                        $needs_start = true;
                    } else {
                        // We just started a new paragraph!
                        // Reinstate a double-newline for presentation's sake, since
                        // it was in the source code.
                        array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
                    }
                } elseif ($i + 1 == $c) {
                    // Double newline at the end
                    // There should be a trailing </p> when we're finally done.
                    $needs_end = true;
                }
            }
        }

        // Check if this was just a giant blob of whitespace. Move this earlier,
        // perhaps?
        if (empty($paragraphs)) {
            return;
        }

        // Add the start tag indicated by \n\n at the beginning of $data
        if ($needs_start) {

            $result[] = $this->_pStart();
        }

        // Append the paragraphs onto the result
        foreach ($paragraphs as $par) {
            $result[] = new HTMLPurifier_Token_Text($par);
            $result[] = new HTMLPurifier_Token_End('p');
            $result[] = new HTMLPurifier_Token_Text("\n\n");
            $result[] = $this->_pStart();
        }

        // Remove trailing start token; Injector will handle this later if
        // it was indeed needed. This prevents from needing to do a lookahead,
        // at the cost of a lookbehind later.
        array_pop($result);

        // If there is no need for an end tag, remove all of it and let
        // MakeWellFormed close it later.
        if (!$needs_end) {

            array_pop($result); // removes \n\n
            array_pop($result); // removes </p>
        }
    }

    /**
     * Returns true if passed token is inline (and, ergo, allowed in
     * paragraph tags)
     * @param HTMLPurifier_Token $token
     * @return bool
     */
    private function _isInline($token)
    {
        return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);

    }

    /**
     * Looks ahead in the token list and determines whether or not we need
     * to insert a <p> tag.
     * @return bool
     */
    private function _pLookAhead()
    {
        if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
            $nesting = 1;
        } else {
            $nesting = 0;
        }
        $ok = false;
        $i = null;
        while ($this->forwardUntilEndToken($i, $current, $nesting)) {
            $result = $this->_checkNeedsP($current);
            if ($result !== null) {
                $ok = $result;
                break;
            }
        }
        return $ok;
    }

    /**
     * Determines if a particular token requires an earlier inline token
     * to get a paragraph. This should be used with _forwardUntilEndToken
     * @param HTMLPurifier_Token $current
     * @return bool
     */
    private function _checkNeedsP($current)
    {
        if ($current instanceof HTMLPurifier_Token_Start) {
            if (!$this->_isInline($current)) {
                // <div>PAR1<div>
                //      ----
                // Terminate early, since we hit a block element
                return false;
            }
        } elseif ($current instanceof HTMLPurifier_Token_Text) {
            if (strpos($current->data, "\n\n") !== false) {
                // <div>PAR1<b>PAR1\n\nPAR2
                //      ----
                return true;
            } else {
                // <div>PAR1<b>PAR1...
                //      ----
            }
        }
        return null;
    }
}

// vim: et sw=4 sts=4


1			<?php
2
3			/**
4			* Injector that auto paragraphs text in the root node based on
5			* double-spacing.
6			* @todo Ensure all states are unit tested, including variations as well.
7			* @todo Make a graph of the flow control for this Injector.
8			*/
9			class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
10			{
11			/**
12			* @type string
13			*/
14			public $name = 'AutoParagraph';
15
16			/**
17			* @type array
18			*/
19			public $needed = array('p');
20
21			/**
22			* @return HTMLPurifier_Token_Start
23			*/
24			private function _pStart()
25			{
26			$par = new HTMLPurifier_Token_Start('p');
27			$par->armor['MakeWellFormed_TagClosedError'] = true;
28			return $par;
29			}
30
31			/**
32			* @param HTMLPurifier_Token_Text $token
33			*/
34			public function handleText(&$token)
35			{
36			$text = $token->data;
37			// Does the current parent allow <p> tags?
38			if ($this->allowsElement('p')) {
39			if (empty($this->currentNesting) \|\| strpos($text, "\n\n") !== false) {
40			// Note that we have differing behavior when dealing with text
41			// in the anonymous root node, or a node inside the document.
42			// If the text as a double-newline, the treatment is the same;
43			// if it doesn't, see the next if-block if you're in the document.
44
45			$i = $nesting = null;
46			if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
47			// State 1.1: ... ^ (whitespace, then document end)
48			// ----
49			// This is a degenerate case
50			} else {
51			if (!$token->is_whitespace \|\| $this->_isInline($current)) {
52			// State 1.2: PAR1
53			// ----
54
55			// State 1.3: PAR1\n\nPAR2
56			// ------------
57
58			// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
59			// ------------
60			$token = array($this->_pStart());
61			$this->_splitText($text, $token);
62			} else {
63			// State 1.5: \n<hr />
64			// --
65			}
66			}
67			} else {
68			// State 2: <div>PAR1... (similar to 1.4)
69			// ----
70
71			// We're in an element that allows paragraph tags, but we're not
72			// sure if we're going to need them.
73			if ($this->_pLookAhead()) {
74			// State 2.1: <div>PAR1<b>PAR1\n\nPAR2
75			// ----
76			// Note: This will always be the first child, since any
77			// previous inline element would have triggered this very
78			// same routine, and found the double newline. One possible
79			// exception would be a comment.
80			$token = array($this->_pStart(), $token);
81			} else {
82			// State 2.2.1: <div>PAR1<div>
83			// ----
84
85			// State 2.2.2: <div>PAR1<b>PAR1</b></div>
86			// ----
87			}
88			}
89			// Is the current parent a <p> tag?
90			} elseif (!empty($this->currentNesting) &&
91			$this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
92			// State 3.1: ...<p>PAR1
93			// ----
94
95			// State 3.2: ...<p>PAR1\n\nPAR2
96			// ------------
97			$token = array();
98			$this->_splitText($text, $token);
99			// Abort!
100			} else {
101			// State 4.1: ...<b>PAR1
102			// ----
103
104			// State 4.2: ...<b>PAR1\n\nPAR2
105			// ------------
106			}
107			}
108
109			/**
110			* @param HTMLPurifier_Token $token
111			*/
112			public function handleElement(&$token)
113			{
114			// We don't have to check if we're already in a <p> tag for block
115			// tokens, because the tag would have been autoclosed by MakeWellFormed.
116			if ($this->allowsElement('p')) {
117			if (!empty($this->currentNesting)) {
118			if ($this->_isInline($token)) {
119			// State 1: <div>...<b>
120			// ---
121			// Check if this token is adjacent to the parent token
122			// (seek backwards until token isn't whitespace)
123			$i = null;
124			$this->backward($i, $prev);
125
126			if (!$prev instanceof HTMLPurifier_Token_Start) {
127			// Token wasn't adjacent
128			if ($prev instanceof HTMLPurifier_Token_Text &&
129			substr($prev->data, -2) === "\n\n"
130			) {
131			// State 1.1.4: <div><p>PAR1</p>\n\n<b>
132			// ---
133			// Quite frankly, this should be handled by splitText
134			$token = array($this->_pStart(), $token);
135			} else {
136			// State 1.1.1: <div><p>PAR1</p><b>
137			// ---
138			// State 1.1.2: <div><br /><b>
139			// ---
140			// State 1.1.3: <div>PAR<b>
141			// ---
142			}
143			} else {
144			// State 1.2.1: <div><b>
145			// ---
146			// Lookahead to see if <p> is needed.
147			if ($this->_pLookAhead()) {
148			// State 1.3.1: <div><b>PAR1\n\nPAR2
149			// ---
150			$token = array($this->_pStart(), $token);
151			} else {
152			// State 1.3.2: <div><b>PAR1</b></div>
153			// ---
154
155			// State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
156			// ---
157			}
158			}
159			} else {
160			// State 2.3: ...<div>
161			// -----
162			}
163			} else {
164			if ($this->_isInline($token)) {
165			// State 3.1: <b>
166			// ---
167			// This is where the {p} tag is inserted, not reflected in
168			// inputTokens yet, however.
169			$token = array($this->_pStart(), $token);
170			} else {
171			// State 3.2: <div>
172			// -----
173			}
174
175			$i = null;
176			if ($this->backward($i, $prev)) {
177			if (!$prev instanceof HTMLPurifier_Token_Text) {
178			// State 3.1.1: ...</p>{p}<b>
179			// ---
180			// State 3.2.1: ...</p><div>
181			// -----
182			if (!is_array($token)) {
183			$token = array($token);
184			}
185			array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
186			} else {
187			// State 3.1.2: ...</p>\n\n{p}<b>
188			// ---
189			// State 3.2.2: ...</p>\n\n<div>
190			// -----
191			// Note: PAR<ELEM> cannot occur because PAR would have been
192			// wrapped in <p> tags.
193			}
194			}
195			}
196			} else {
197			// State 2.2: <ul><li>
198			// ----
199			// State 2.4: <p><b>
200			// ---
201			}
202			}
203
204			/**
205			* Splits up a text in paragraph tokens and appends them
206			* to the result stream that will replace the original
207			* @param string $data String text data that will be processed
208			* into paragraphs
209			* @param HTMLPurifier_Token[] $result Reference to array of tokens that the
210			* tags will be appended onto
211			*/
212			private function _splitText($data, &$result)
213			{
214			$raw_paragraphs = explode("\n\n", $data);
215			$paragraphs = array(); // without empty paragraphs
216			$needs_start = false;
217			$needs_end = false;
218
219			$c = count($raw_paragraphs);
220			if ($c == 1) {
221			// There were no double-newlines, abort quickly. In theory this
222			// should never happen.
223			$result[] = new HTMLPurifier_Token_Text($data);
224			return;
225			}
226			for ($i = 0; $i < $c; $i++) {
227			$par = $raw_paragraphs[$i];
228			if (trim($par) !== '') {
229			$paragraphs[] = $par;
230			} else {
231			if ($i == 0) {
232			// Double newline at the front
233			if (empty($result)) {
234			// The empty result indicates that the AutoParagraph
235			// injector did not add any start paragraph tokens.
236			// This means that we have been in a paragraph for
237			// a while, and the newline means we should start a new one.
238			$result[] = new HTMLPurifier_Token_End('p');
239			$result[] = new HTMLPurifier_Token_Text("\n\n");
240			// However, the start token should only be added if
241			// there is more processing to be done (i.e. there are
242			// real paragraphs in here). If there are none, the
243			// next start paragraph tag will be handled by the
244			// next call to the injector
245			$needs_start = true;
246			} else {
247			// We just started a new paragraph!
248			// Reinstate a double-newline for presentation's sake, since
249			// it was in the source code.
250			array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
251			}
252			} elseif ($i + 1 == $c) {
253			// Double newline at the end
254			// There should be a trailing </p> when we're finally done.
255			$needs_end = true;
256			}
257			}
258			}
259
260			// Check if this was just a giant blob of whitespace. Move this earlier,
261			// perhaps?
262			if (empty($paragraphs)) {
263			return;
264			}
265
266			// Add the start tag indicated by \n\n at the beginning of $data
267			if ($needs_start) {
			0 ignored issues – show introduced 2023-12-24 16:44 UTC by Report Bug Copy Issue Report The condition `$needs_start` is always `false`. Loading history...
268			$result[] = $this->_pStart();
269			}
270
271			// Append the paragraphs onto the result
272			foreach ($paragraphs as $par) {
273			$result[] = new HTMLPurifier_Token_Text($par);
274			$result[] = new HTMLPurifier_Token_End('p');
275			$result[] = new HTMLPurifier_Token_Text("\n\n");
276			$result[] = $this->_pStart();
277			}
278
279			// Remove trailing start token; Injector will handle this later if
280			// it was indeed needed. This prevents from needing to do a lookahead,
281			// at the cost of a lookbehind later.
282			array_pop($result);
283
284			// If there is no need for an end tag, remove all of it and let
285			// MakeWellFormed close it later.
286			if (!$needs_end) {
			0 ignored issues – show introduced 2023-12-24 16:44 UTC by Report Bug Copy Issue Report The condition `$needs_end` is always `false`. Loading history...
287			array_pop($result); // removes \n\n
288			array_pop($result); // removes </p>
289			}
290			}
291
292			/**
293			* Returns true if passed token is inline (and, ergo, allowed in
294			* paragraph tags)
295			* @param HTMLPurifier_Token $token
296			* @return bool
297			*/
298			private function _isInline($token)
299			{
300			return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
			0 ignored issues – show Bug Best Practice introduced 2018-06-11 05:57 UTC by Report Bug Copy Issue Report The property `name` does not exist on `HTMLPurifier_Token`. Since you implemented `__get`, consider adding a @property annotation. Loading history...
301			}
302
303			/**
304			* Looks ahead in the token list and determines whether or not we need
305			* to insert a <p> tag.
306			* @return bool
307			*/
308			private function _pLookAhead()
309			{
310			if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
311			$nesting = 1;
312			} else {
313			$nesting = 0;
314			}
315			$ok = false;
316			$i = null;
317			while ($this->forwardUntilEndToken($i, $current, $nesting)) {
318			$result = $this->_checkNeedsP($current);
319			if ($result !== null) {
320			$ok = $result;
321			break;
322			}
323			}
324			return $ok;
325			}
326
327			/**
328			* Determines if a particular token requires an earlier inline token
329			* to get a paragraph. This should be used with _forwardUntilEndToken
330			* @param HTMLPurifier_Token $current
331			* @return bool
332			*/
333			private function _checkNeedsP($current)
334			{
335			if ($current instanceof HTMLPurifier_Token_Start) {
336			if (!$this->_isInline($current)) {
337			// <div>PAR1<div>
338			// ----
339			// Terminate early, since we hit a block element
340			return false;
341			}
342			} elseif ($current instanceof HTMLPurifier_Token_Text) {
343			if (strpos($current->data, "\n\n") !== false) {
344			// <div>PAR1<b>PAR1\n\nPAR2
345			// ----
346			return true;
347			} else {
348			// <div>PAR1<b>PAR1...
349			// ----
350			}
351			}
352			return null;
353			}
354			}
355
356			// vim: et sw=4 sts=4
357

XOOPS / XoopsCore25

HTMLPurifier_Injector_AutoParagraph B last analyzed 2025-06-05 02:27 UTC

Complexity

Size/Duplication

Importance

7 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like

HTMLPurifier_Injector_AutoParagraph B
last analyzed 2025-06-05 02:27 UTC