Total Complexity | 45 |
Total Lines | 344 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like HTMLPurifier_Injector_AutoParagraph often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HTMLPurifier_Injector_AutoParagraph, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
9 | class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector |
||
10 | { |
||
11 | /** |
||
12 | * @type string |
||
13 | */ |
||
14 | public $name = 'AutoParagraph'; |
||
15 | |||
16 | /** |
||
17 | * @type array |
||
18 | */ |
||
19 | public $needed = array('p'); |
||
20 | |||
21 | /** |
||
22 | * @return HTMLPurifier_Token_Start |
||
23 | */ |
||
24 | private function _pStart() |
||
25 | { |
||
26 | $par = new HTMLPurifier_Token_Start('p'); |
||
27 | $par->armor['MakeWellFormed_TagClosedError'] = true; |
||
28 | return $par; |
||
29 | } |
||
30 | |||
31 | /** |
||
32 | * @param HTMLPurifier_Token_Text $token |
||
33 | */ |
||
34 | public function handleText(&$token) |
||
35 | { |
||
36 | $text = $token->data; |
||
37 | // Does the current parent allow <p> tags? |
||
38 | if ($this->allowsElement('p')) { |
||
39 | if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) { |
||
40 | // Note that we have differing behavior when dealing with text |
||
41 | // in the anonymous root node, or a node inside the document. |
||
42 | // If the text as a double-newline, the treatment is the same; |
||
43 | // if it doesn't, see the next if-block if you're in the document. |
||
44 | |||
45 | $i = $nesting = null; |
||
46 | if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) { |
||
47 | // State 1.1: ... ^ (whitespace, then document end) |
||
48 | // ---- |
||
49 | // This is a degenerate case |
||
50 | } else { |
||
51 | if (!$token->is_whitespace || $this->_isInline($current)) { |
||
52 | // State 1.2: PAR1 |
||
53 | // ---- |
||
54 | |||
55 | // State 1.3: PAR1\n\nPAR2 |
||
56 | // ------------ |
||
57 | |||
58 | // State 1.4: <div>PAR1\n\nPAR2 (see State 2) |
||
59 | // ------------ |
||
60 | $token = array($this->_pStart()); |
||
61 | $this->_splitText($text, $token); |
||
62 | } else { |
||
63 | // State 1.5: \n<hr /> |
||
64 | // -- |
||
65 | } |
||
66 | } |
||
67 | } else { |
||
68 | // State 2: <div>PAR1... (similar to 1.4) |
||
69 | // ---- |
||
70 | |||
71 | // We're in an element that allows paragraph tags, but we're not |
||
72 | // sure if we're going to need them. |
||
73 | if ($this->_pLookAhead()) { |
||
74 | // State 2.1: <div>PAR1<b>PAR1\n\nPAR2 |
||
75 | // ---- |
||
76 | // Note: This will always be the first child, since any |
||
77 | // previous inline element would have triggered this very |
||
78 | // same routine, and found the double newline. One possible |
||
79 | // exception would be a comment. |
||
80 | $token = array($this->_pStart(), $token); |
||
81 | } else { |
||
82 | // State 2.2.1: <div>PAR1<div> |
||
83 | // ---- |
||
84 | |||
85 | // State 2.2.2: <div>PAR1<b>PAR1</b></div> |
||
86 | // ---- |
||
87 | } |
||
88 | } |
||
89 | // Is the current parent a <p> tag? |
||
90 | } elseif (!empty($this->currentNesting) && |
||
91 | $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') { |
||
92 | // State 3.1: ...<p>PAR1 |
||
93 | // ---- |
||
94 | |||
95 | // State 3.2: ...<p>PAR1\n\nPAR2 |
||
96 | // ------------ |
||
97 | $token = array(); |
||
98 | $this->_splitText($text, $token); |
||
99 | // Abort! |
||
100 | } else { |
||
101 | // State 4.1: ...<b>PAR1 |
||
102 | // ---- |
||
103 | |||
104 | // State 4.2: ...<b>PAR1\n\nPAR2 |
||
105 | // ------------ |
||
106 | } |
||
107 | } |
||
108 | |||
109 | /** |
||
110 | * @param HTMLPurifier_Token $token |
||
111 | */ |
||
112 | public function handleElement(&$token) |
||
113 | { |
||
114 | // We don't have to check if we're already in a <p> tag for block |
||
115 | // tokens, because the tag would have been autoclosed by MakeWellFormed. |
||
116 | if ($this->allowsElement('p')) { |
||
117 | if (!empty($this->currentNesting)) { |
||
118 | if ($this->_isInline($token)) { |
||
119 | // State 1: <div>...<b> |
||
120 | // --- |
||
121 | // Check if this token is adjacent to the parent token |
||
122 | // (seek backwards until token isn't whitespace) |
||
123 | $i = null; |
||
124 | $this->backward($i, $prev); |
||
125 | |||
126 | if (!$prev instanceof HTMLPurifier_Token_Start) { |
||
127 | // Token wasn't adjacent |
||
128 | if ($prev instanceof HTMLPurifier_Token_Text && |
||
129 | substr($prev->data, -2) === "\n\n" |
||
130 | ) { |
||
131 | // State 1.1.4: <div><p>PAR1</p>\n\n<b> |
||
132 | // --- |
||
133 | // Quite frankly, this should be handled by splitText |
||
134 | $token = array($this->_pStart(), $token); |
||
135 | } else { |
||
136 | // State 1.1.1: <div><p>PAR1</p><b> |
||
137 | // --- |
||
138 | // State 1.1.2: <div><br /><b> |
||
139 | // --- |
||
140 | // State 1.1.3: <div>PAR<b> |
||
141 | // --- |
||
142 | } |
||
143 | } else { |
||
144 | // State 1.2.1: <div><b> |
||
145 | // --- |
||
146 | // Lookahead to see if <p> is needed. |
||
147 | if ($this->_pLookAhead()) { |
||
148 | // State 1.3.1: <div><b>PAR1\n\nPAR2 |
||
149 | // --- |
||
150 | $token = array($this->_pStart(), $token); |
||
151 | } else { |
||
152 | // State 1.3.2: <div><b>PAR1</b></div> |
||
153 | // --- |
||
154 | |||
155 | // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div> |
||
156 | // --- |
||
157 | } |
||
158 | } |
||
159 | } else { |
||
160 | // State 2.3: ...<div> |
||
161 | // ----- |
||
162 | } |
||
163 | } else { |
||
164 | if ($this->_isInline($token)) { |
||
165 | // State 3.1: <b> |
||
166 | // --- |
||
167 | // This is where the {p} tag is inserted, not reflected in |
||
168 | // inputTokens yet, however. |
||
169 | $token = array($this->_pStart(), $token); |
||
170 | } else { |
||
171 | // State 3.2: <div> |
||
172 | // ----- |
||
173 | } |
||
174 | |||
175 | $i = null; |
||
176 | if ($this->backward($i, $prev)) { |
||
177 | if (!$prev instanceof HTMLPurifier_Token_Text) { |
||
178 | // State 3.1.1: ...</p>{p}<b> |
||
179 | // --- |
||
180 | // State 3.2.1: ...</p><div> |
||
181 | // ----- |
||
182 | if (!is_array($token)) { |
||
183 | $token = array($token); |
||
184 | } |
||
185 | array_unshift($token, new HTMLPurifier_Token_Text("\n\n")); |
||
186 | } else { |
||
187 | // State 3.1.2: ...</p>\n\n{p}<b> |
||
188 | // --- |
||
189 | // State 3.2.2: ...</p>\n\n<div> |
||
190 | // ----- |
||
191 | // Note: PAR<ELEM> cannot occur because PAR would have been |
||
192 | // wrapped in <p> tags. |
||
193 | } |
||
194 | } |
||
195 | } |
||
196 | } else { |
||
197 | // State 2.2: <ul><li> |
||
198 | // ---- |
||
199 | // State 2.4: <p><b> |
||
200 | // --- |
||
201 | } |
||
202 | } |
||
203 | |||
204 | /** |
||
205 | * Splits up a text in paragraph tokens and appends them |
||
206 | * to the result stream that will replace the original |
||
207 | * @param string $data String text data that will be processed |
||
208 | * into paragraphs |
||
209 | * @param HTMLPurifier_Token[] $result Reference to array of tokens that the |
||
210 | * tags will be appended onto |
||
211 | */ |
||
212 | private function _splitText($data, &$result) |
||
213 | { |
||
214 | $raw_paragraphs = explode("\n\n", $data); |
||
215 | $paragraphs = array(); // without empty paragraphs |
||
216 | $needs_start = false; |
||
217 | $needs_end = false; |
||
218 | |||
219 | $c = count($raw_paragraphs); |
||
220 | if ($c == 1) { |
||
221 | // There were no double-newlines, abort quickly. In theory this |
||
222 | // should never happen. |
||
223 | $result[] = new HTMLPurifier_Token_Text($data); |
||
224 | return; |
||
225 | } |
||
226 | for ($i = 0; $i < $c; $i++) { |
||
227 | $par = $raw_paragraphs[$i]; |
||
228 | if (trim($par) !== '') { |
||
229 | $paragraphs[] = $par; |
||
230 | } else { |
||
231 | if ($i == 0) { |
||
232 | // Double newline at the front |
||
233 | if (empty($result)) { |
||
234 | // The empty result indicates that the AutoParagraph |
||
235 | // injector did not add any start paragraph tokens. |
||
236 | // This means that we have been in a paragraph for |
||
237 | // a while, and the newline means we should start a new one. |
||
238 | $result[] = new HTMLPurifier_Token_End('p'); |
||
239 | $result[] = new HTMLPurifier_Token_Text("\n\n"); |
||
240 | // However, the start token should only be added if |
||
241 | // there is more processing to be done (i.e. there are |
||
242 | // real paragraphs in here). If there are none, the |
||
243 | // next start paragraph tag will be handled by the |
||
244 | // next call to the injector |
||
245 | $needs_start = true; |
||
246 | } else { |
||
247 | // We just started a new paragraph! |
||
248 | // Reinstate a double-newline for presentation's sake, since |
||
249 | // it was in the source code. |
||
250 | array_unshift($result, new HTMLPurifier_Token_Text("\n\n")); |
||
251 | } |
||
252 | } elseif ($i + 1 == $c) { |
||
253 | // Double newline at the end |
||
254 | // There should be a trailing </p> when we're finally done. |
||
255 | $needs_end = true; |
||
256 | } |
||
257 | } |
||
258 | } |
||
259 | |||
260 | // Check if this was just a giant blob of whitespace. Move this earlier, |
||
261 | // perhaps? |
||
262 | if (empty($paragraphs)) { |
||
263 | return; |
||
264 | } |
||
265 | |||
266 | // Add the start tag indicated by \n\n at the beginning of $data |
||
267 | if ($needs_start) { |
||
|
|||
268 | $result[] = $this->_pStart(); |
||
269 | } |
||
270 | |||
271 | // Append the paragraphs onto the result |
||
272 | foreach ($paragraphs as $par) { |
||
273 | $result[] = new HTMLPurifier_Token_Text($par); |
||
274 | $result[] = new HTMLPurifier_Token_End('p'); |
||
275 | $result[] = new HTMLPurifier_Token_Text("\n\n"); |
||
276 | $result[] = $this->_pStart(); |
||
277 | } |
||
278 | |||
279 | // Remove trailing start token; Injector will handle this later if |
||
280 | // it was indeed needed. This prevents from needing to do a lookahead, |
||
281 | // at the cost of a lookbehind later. |
||
282 | array_pop($result); |
||
283 | |||
284 | // If there is no need for an end tag, remove all of it and let |
||
285 | // MakeWellFormed close it later. |
||
286 | if (!$needs_end) { |
||
287 | array_pop($result); // removes \n\n |
||
288 | array_pop($result); // removes </p> |
||
289 | } |
||
290 | } |
||
291 | |||
292 | /** |
||
293 | * Returns true if passed token is inline (and, ergo, allowed in |
||
294 | * paragraph tags) |
||
295 | * @param HTMLPurifier_Token $token |
||
296 | * @return bool |
||
297 | */ |
||
298 | private function _isInline($token) |
||
301 | } |
||
302 | |||
303 | /** |
||
304 | * Looks ahead in the token list and determines whether or not we need |
||
305 | * to insert a <p> tag. |
||
306 | * @return bool |
||
307 | */ |
||
308 | private function _pLookAhead() |
||
325 | } |
||
326 | |||
327 | /** |
||
328 | * Determines if a particular token requires an earlier inline token |
||
329 | * to get a paragraph. This should be used with _forwardUntilEndToken |
||
330 | * @param HTMLPurifier_Token $current |
||
331 | * @return bool |
||
332 | */ |
||
333 | private function _checkNeedsP($current) |
||
353 | } |
||
354 | } |
||
355 | |||
356 | // vim: et sw=4 sts=4 |
||
357 |