HTMLPurifier_Injector_AutoParagraph   B
last analyzed

Complexity

Total Complexity 45

Size/Duplication

Total Lines 344
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 45
eloc 108
dl 0
loc 344
rs 8.8
c 0
b 0
f 0

7 Methods

Rating   Name   Duplication   Size   Complexity  
A _isInline() 0 3 1
C handleElement() 0 85 12
A _pLookAhead() 0 17 4
A _checkNeedsP() 0 20 5
A _pStart() 0 5 1
B _splitText() 0 77 11
B handleText() 0 67 11

How to fix   Complexity   

Complex Class

Complex classes like HTMLPurifier_Injector_AutoParagraph often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use HTMLPurifier_Injector_AutoParagraph, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
 * Injector that auto paragraphs text in the root node based on
5
 * double-spacing.
6
 * @todo Ensure all states are unit tested, including variations as well.
7
 * @todo Make a graph of the flow control for this Injector.
8
 */
9
class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
10
{
11
    /**
12
     * @type string
13
     */
14
    public $name = 'AutoParagraph';
15
16
    /**
17
     * @type array
18
     */
19
    public $needed = array('p');
20
21
    /**
22
     * @return HTMLPurifier_Token_Start
23
     */
24
    private function _pStart()
25
    {
26
        $par = new HTMLPurifier_Token_Start('p');
27
        $par->armor['MakeWellFormed_TagClosedError'] = true;
28
        return $par;
29
    }
30
31
    /**
32
     * @param HTMLPurifier_Token_Text $token
33
     */
34
    public function handleText(&$token)
35
    {
36
        $text = $token->data;
37
        // Does the current parent allow <p> tags?
38
        if ($this->allowsElement('p')) {
39
            if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
40
                // Note that we have differing behavior when dealing with text
41
                // in the anonymous root node, or a node inside the document.
42
                // If the text as a double-newline, the treatment is the same;
43
                // if it doesn't, see the next if-block if you're in the document.
44
45
                $i = $nesting = null;
46
                if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
47
                    // State 1.1: ...    ^ (whitespace, then document end)
48
                    //               ----
49
                    // This is a degenerate case
50
                } else {
51
                    if (!$token->is_whitespace || $this->_isInline($current)) {
52
                        // State 1.2: PAR1
53
                        //            ----
54
55
                        // State 1.3: PAR1\n\nPAR2
56
                        //            ------------
57
58
                        // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
59
                        //                 ------------
60
                        $token = array($this->_pStart());
61
                        $this->_splitText($text, $token);
62
                    } else {
63
                        // State 1.5: \n<hr />
64
                        //            --
65
                    }
66
                }
67
            } else {
68
                // State 2:   <div>PAR1... (similar to 1.4)
69
                //                 ----
70
71
                // We're in an element that allows paragraph tags, but we're not
72
                // sure if we're going to need them.
73
                if ($this->_pLookAhead()) {
74
                    // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
75
                    //                 ----
76
                    // Note: This will always be the first child, since any
77
                    // previous inline element would have triggered this very
78
                    // same routine, and found the double newline. One possible
79
                    // exception would be a comment.
80
                    $token = array($this->_pStart(), $token);
81
                } else {
82
                    // State 2.2.1: <div>PAR1<div>
83
                    //                   ----
84
85
                    // State 2.2.2: <div>PAR1<b>PAR1</b></div>
86
                    //                   ----
87
                }
88
            }
89
            // Is the current parent a <p> tag?
90
        } elseif (!empty($this->currentNesting) &&
91
            $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
92
            // State 3.1: ...<p>PAR1
93
            //                  ----
94
95
            // State 3.2: ...<p>PAR1\n\nPAR2
96
            //                  ------------
97
            $token = array();
98
            $this->_splitText($text, $token);
99
            // Abort!
100
        } else {
101
            // State 4.1: ...<b>PAR1
102
            //                  ----
103
104
            // State 4.2: ...<b>PAR1\n\nPAR2
105
            //                  ------------
106
        }
107
    }
108
109
    /**
110
     * @param HTMLPurifier_Token $token
111
     */
112
    public function handleElement(&$token)
113
    {
114
        // We don't have to check if we're already in a <p> tag for block
115
        // tokens, because the tag would have been autoclosed by MakeWellFormed.
116
        if ($this->allowsElement('p')) {
117
            if (!empty($this->currentNesting)) {
118
                if ($this->_isInline($token)) {
119
                    // State 1: <div>...<b>
120
                    //                  ---
121
                    // Check if this token is adjacent to the parent token
122
                    // (seek backwards until token isn't whitespace)
123
                    $i = null;
124
                    $this->backward($i, $prev);
125
126
                    if (!$prev instanceof HTMLPurifier_Token_Start) {
127
                        // Token wasn't adjacent
128
                        if ($prev instanceof HTMLPurifier_Token_Text &&
129
                            substr($prev->data, -2) === "\n\n"
130
                        ) {
131
                            // State 1.1.4: <div><p>PAR1</p>\n\n<b>
132
                            //                                  ---
133
                            // Quite frankly, this should be handled by splitText
134
                            $token = array($this->_pStart(), $token);
135
                        } else {
136
                            // State 1.1.1: <div><p>PAR1</p><b>
137
                            //                              ---
138
                            // State 1.1.2: <div><br /><b>
139
                            //                         ---
140
                            // State 1.1.3: <div>PAR<b>
141
                            //                      ---
142
                        }
143
                    } else {
144
                        // State 1.2.1: <div><b>
145
                        //                   ---
146
                        // Lookahead to see if <p> is needed.
147
                        if ($this->_pLookAhead()) {
148
                            // State 1.3.1: <div><b>PAR1\n\nPAR2
149
                            //                   ---
150
                            $token = array($this->_pStart(), $token);
151
                        } else {
152
                            // State 1.3.2: <div><b>PAR1</b></div>
153
                            //                   ---
154
155
                            // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
156
                            //                   ---
157
                        }
158
                    }
159
                } else {
160
                    // State 2.3: ...<div>
161
                    //               -----
162
                }
163
            } else {
164
                if ($this->_isInline($token)) {
165
                    // State 3.1: <b>
166
                    //            ---
167
                    // This is where the {p} tag is inserted, not reflected in
168
                    // inputTokens yet, however.
169
                    $token = array($this->_pStart(), $token);
170
                } else {
171
                    // State 3.2: <div>
172
                    //            -----
173
                }
174
175
                $i = null;
176
                if ($this->backward($i, $prev)) {
177
                    if (!$prev instanceof HTMLPurifier_Token_Text) {
178
                        // State 3.1.1: ...</p>{p}<b>
179
                        //                        ---
180
                        // State 3.2.1: ...</p><div>
181
                        //                     -----
182
                        if (!is_array($token)) {
183
                            $token = array($token);
184
                        }
185
                        array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
186
                    } else {
187
                        // State 3.1.2: ...</p>\n\n{p}<b>
188
                        //                            ---
189
                        // State 3.2.2: ...</p>\n\n<div>
190
                        //                         -----
191
                        // Note: PAR<ELEM> cannot occur because PAR would have been
192
                        // wrapped in <p> tags.
193
                    }
194
                }
195
            }
196
        } else {
197
            // State 2.2: <ul><li>
198
            //                ----
199
            // State 2.4: <p><b>
200
            //               ---
201
        }
202
    }
203
204
    /**
205
     * Splits up a text in paragraph tokens and appends them
206
     * to the result stream that will replace the original
207
     * @param string $data String text data that will be processed
208
     *    into paragraphs
209
     * @param HTMLPurifier_Token[] $result Reference to array of tokens that the
210
     *    tags will be appended onto
211
     */
212
    private function _splitText($data, &$result)
213
    {
214
        $raw_paragraphs = explode("\n\n", $data);
215
        $paragraphs = array(); // without empty paragraphs
216
        $needs_start = false;
217
        $needs_end = false;
218
219
        $c = count($raw_paragraphs);
220
        if ($c == 1) {
221
            // There were no double-newlines, abort quickly. In theory this
222
            // should never happen.
223
            $result[] = new HTMLPurifier_Token_Text($data);
224
            return;
225
        }
226
        for ($i = 0; $i < $c; $i++) {
227
            $par = $raw_paragraphs[$i];
228
            if (trim($par) !== '') {
229
                $paragraphs[] = $par;
230
            } else {
231
                if ($i == 0) {
232
                    // Double newline at the front
233
                    if (empty($result)) {
234
                        // The empty result indicates that the AutoParagraph
235
                        // injector did not add any start paragraph tokens.
236
                        // This means that we have been in a paragraph for
237
                        // a while, and the newline means we should start a new one.
238
                        $result[] = new HTMLPurifier_Token_End('p');
239
                        $result[] = new HTMLPurifier_Token_Text("\n\n");
240
                        // However, the start token should only be added if
241
                        // there is more processing to be done (i.e. there are
242
                        // real paragraphs in here). If there are none, the
243
                        // next start paragraph tag will be handled by the
244
                        // next call to the injector
245
                        $needs_start = true;
246
                    } else {
247
                        // We just started a new paragraph!
248
                        // Reinstate a double-newline for presentation's sake, since
249
                        // it was in the source code.
250
                        array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
251
                    }
252
                } elseif ($i + 1 == $c) {
253
                    // Double newline at the end
254
                    // There should be a trailing </p> when we're finally done.
255
                    $needs_end = true;
256
                }
257
            }
258
        }
259
260
        // Check if this was just a giant blob of whitespace. Move this earlier,
261
        // perhaps?
262
        if (empty($paragraphs)) {
263
            return;
264
        }
265
266
        // Add the start tag indicated by \n\n at the beginning of $data
267
        if ($needs_start) {
0 ignored issues
show
introduced by
The condition $needs_start is always false.
Loading history...
268
            $result[] = $this->_pStart();
269
        }
270
271
        // Append the paragraphs onto the result
272
        foreach ($paragraphs as $par) {
273
            $result[] = new HTMLPurifier_Token_Text($par);
274
            $result[] = new HTMLPurifier_Token_End('p');
275
            $result[] = new HTMLPurifier_Token_Text("\n\n");
276
            $result[] = $this->_pStart();
277
        }
278
279
        // Remove trailing start token; Injector will handle this later if
280
        // it was indeed needed. This prevents from needing to do a lookahead,
281
        // at the cost of a lookbehind later.
282
        array_pop($result);
283
284
        // If there is no need for an end tag, remove all of it and let
285
        // MakeWellFormed close it later.
286
        if (!$needs_end) {
0 ignored issues
show
introduced by
The condition $needs_end is always false.
Loading history...
287
            array_pop($result); // removes \n\n
288
            array_pop($result); // removes </p>
289
        }
290
    }
291
292
    /**
293
     * Returns true if passed token is inline (and, ergo, allowed in
294
     * paragraph tags)
295
     * @param HTMLPurifier_Token $token
296
     * @return bool
297
     */
298
    private function _isInline($token)
299
    {
300
        return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
0 ignored issues
show
Bug Best Practice introduced by
The property name does not exist on HTMLPurifier_Token. Since you implemented __get, consider adding a @property annotation.
Loading history...
301
    }
302
303
    /**
304
     * Looks ahead in the token list and determines whether or not we need
305
     * to insert a <p> tag.
306
     * @return bool
307
     */
308
    private function _pLookAhead()
309
    {
310
        if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
311
            $nesting = 1;
312
        } else {
313
            $nesting = 0;
314
        }
315
        $ok = false;
316
        $i = null;
317
        while ($this->forwardUntilEndToken($i, $current, $nesting)) {
318
            $result = $this->_checkNeedsP($current);
319
            if ($result !== null) {
320
                $ok = $result;
321
                break;
322
            }
323
        }
324
        return $ok;
325
    }
326
327
    /**
328
     * Determines if a particular token requires an earlier inline token
329
     * to get a paragraph. This should be used with _forwardUntilEndToken
330
     * @param HTMLPurifier_Token $current
331
     * @return bool
332
     */
333
    private function _checkNeedsP($current)
334
    {
335
        if ($current instanceof HTMLPurifier_Token_Start) {
336
            if (!$this->_isInline($current)) {
337
                // <div>PAR1<div>
338
                //      ----
339
                // Terminate early, since we hit a block element
340
                return false;
341
            }
342
        } elseif ($current instanceof HTMLPurifier_Token_Text) {
343
            if (strpos($current->data, "\n\n") !== false) {
344
                // <div>PAR1<b>PAR1\n\nPAR2
345
                //      ----
346
                return true;
347
            } else {
348
                // <div>PAR1<b>PAR1...
349
                //      ----
350
            }
351
        }
352
        return null;
353
    }
354
}
355
356
// vim: et sw=4 sts=4
357