HtmlParser::transform()   F
last analyzed

Complexity

Conditions 42
Paths 222

Size

Total Lines 209
Code Lines 105

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 84
CRAP Score 51.6991

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 42
eloc 105
c 3
b 0
f 0
nc 222
nop 1
dl 0
loc 209
ccs 84
cts 102
cp 0.8235
crap 51.6991
rs 2.4466

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * @author domenico [email protected] / [email protected]
5
 * Date: 05/11/18
6
 * Time: 15.30
7
 *
8
 */
9
10
namespace Matecat\SubFiltering\Filters\Html;
11
12
use Matecat\SubFiltering\Commons\Pipeline;
13
use ReflectionException;
14
use ReflectionMethod;
15
use RuntimeException;
16
17
/**
18
 * Class HtmlToPh
19
 *
20
 * Based on the code https://github.com/ericnorris/striptags
21
 * Rewritten/Improved and Changed for PHP
22
 *
23
 * @author  domenico [email protected] / [email protected]
24
 * @package SubFiltering
25
 *
26
 * @method _isTagValid( string $buffer )
27
 * @method _finalizeHTMLTag( string $buffer )
28
 * @method _fixWrongBuffer( string $buffer )
29
 * @method _finalizeScriptTag( string $buffer )
30
 * @method _finalizePlainText( string $plain_text_buffer )
31
 * @method _setSegmentContainsHtml()
32
 */
33
class HtmlParser {
34
35
    const STATE_PLAINTEXT = 0;
36
    const STATE_HTML      = 1;
37
    const STATE_COMMENT   = 2;
38
    const STATE_JS_CSS    = 3;
39
40
    /**
41
     * @var Pipeline
42
     */
43
    private $pipeline;
44
45
    /**
46
     * HtmlParser constructor.
47
     *
48
     * @param Pipeline $pipeline
49
     */
50 93
    public function __construct( Pipeline $pipeline = null ) {
51 93
        $this->pipeline = $pipeline;
52 93
    }
53
54
    /**
55
     * @var CallbacksHandler
56
     */
57
    protected $callbacksHandler;
58
59 93
    public function registerCallbacksHandler( $class ) {
60
        //check: $class must use CallbacksHandler trait
61 93
        if ( !in_array( CallbacksHandler::class, class_uses( $class ) ) ) {
62
            throw new RuntimeException( "Class passed to " . __METHOD__ . " must use " . CallbacksHandler::class . " trait." );
63
        }
64 93
        $this->callbacksHandler = $class;
65 93
    }
66
67
    /**
68
     * @param $name
69
     * @param $arguments
70
     *
71
     * @return mixed
72
     * @throws ReflectionException
73
     */
74 93
    public function __call( $name, $arguments ) {
75
76 93
        if ( $this->callbacksHandler !== null ) {
77
            //Reflection to allow protected/private methods to be set as callback
78 93
            $reflector = new ReflectionMethod( $this->callbacksHandler, $name );
79 93
            if ( !$reflector->isPublic() ) {
80 93
                $reflector->setAccessible( true );
81
            }
82
83 93
            return $reflector->invoke( $this->callbacksHandler, $arguments[ 0 ] );
84
        }
85
86
        return null;
87
88
    }
89
90 93
    public function transform( $segment ) {
91
92 93
        $originalSplit = preg_split( '//u', $segment, -1, PREG_SPLIT_NO_EMPTY );
93
94 93
        $state             = static::STATE_PLAINTEXT;
95 93
        $html_buffer       = '';
96 93
        $plain_text_buffer = '';
97 93
        $in_quote_char     = '';
98 93
        $output            = '';
99
100 93
        foreach ( $originalSplit as $idx => $char ) {
101
102 93
            if ( $state == static::STATE_PLAINTEXT ) {
103 93
                switch ( $char ) {
104 93
                    case '<':
105 29
                        $state             = static::STATE_HTML;
106 29
                        $html_buffer       .= $char;
107 29
                        $output            .= $this->_finalizePlainText( $plain_text_buffer );
108 29
                        $plain_text_buffer = '';
109 29
                        break;
110
111
                    //
112
                    // *************************************
113
                    // NOTE 2021-06-15
114
                    // *************************************
115
                    //
116
                    // This case covers simple greater than sign (>),
117
                    // otherwise is ignored and leaved as >.
118
                    //
119 88
                    case '>':
120 2
                        $plain_text_buffer .= $this->_fixWrongBuffer( $char );
121 2
                        break;
122
123
                    default:
124 88
                        $plain_text_buffer .= $char;
125 93
                        break;
126
                }
127 29
            } elseif ( $state == static::STATE_HTML ) {
128 29
                switch ( $char ) {
129 29
                    case '<':
130
                        // is not possible to have angle brackets inside a tag, this case can not happen
131
                        // this code would ignore '>' if inside a quote, useless
132
                        // for more info see https://www.w3.org/TR/xml/#charsets
133
134
                        // if we found a second less than symbol the first one IS NOT a tag,
135
                        // treat the html_buffer as plain text and attach to the output
136
                        $output      .= $this->_fixWrongBuffer( $html_buffer );
137
                        $html_buffer = $char;
138
                        break;
139
140 29
                    case '>':
141
                        // is not possible to have angle brackets inside a tag, this case can not happen
142
                        // this code would ignore '>' if inside a quote, useless
143
                        // for more info see https://www.w3.org/TR/xml/#charsets
144
145 23
                        if ( in_array( substr( $html_buffer, 0, 8 ), [ '<script ', '<style', '<script', '<style ' ] ) ) {
146 3
                            $html_buffer .= $char;
147 3
                            $state       = static::STATE_JS_CSS;
148 3
                            break;
149
                        }
150
151
                        // this is closing the tag in tag_buffer
152 20
                        $in_quote_char = '';
153 20
                        $state         = static::STATE_PLAINTEXT;
154 20
                        $html_buffer   .= $char;
155
156 20
                        if ( $this->_isTagValid( $html_buffer ) ) {
157 18
                            $output .= $this->_finalizeHTMLTag( $html_buffer );
158
                        } else {
159 3
                            $output .= $this->_fixWrongBuffer( $html_buffer );
160
                        }
161
162 20
                        if ( $this->_isTagValid( $html_buffer ) and null !== $this->pipeline ) {
163
                            $this->_setSegmentContainsHtml();
164
                        }
165
166 20
                        $html_buffer = '';
167 20
                        break;
168
169 29
                    case '"':
170 29
                    case '\'':
171
                        // catch both single and double quotes
172
173 8
                        if ( $char == $in_quote_char ) {
174 7
                            $in_quote_char = '';
175
                        } else {
176 8
                            $in_quote_char = ( !empty( $in_quote_char ) ? $in_quote_char : $char );
177
                        }
178
179 8
                        $html_buffer .= $char;
180 8
                        break;
181
182 29
                    case '-':
183 2
                        if ( $html_buffer == '<!-' ) {
184
                            $state = static::STATE_COMMENT;
185
                        }
186
187 2
                        $html_buffer .= $char;
188 2
                        break;
189
190 29
                    case ' ': //0x20, is a space
191 24
                    case '\n':
192 15
                        if ( $html_buffer === '<' ) {
193 5
                            $state       = static::STATE_PLAINTEXT; // but we work in XML text, so encode it
194 5
                            $output      .= $this->_fixWrongBuffer( '< ' );
195 5
                            $html_buffer = '';
196
197 5
                            if ( $this->_isTagValid( $html_buffer ) and null !== $this->pipeline ) {
198
                                $this->_setSegmentContainsHtml();
199
                            }
200
201 5
                            break;
202
                        }
203
204 10
                        $html_buffer .= $char;
205 10
                        break;
206
207
                    default:
208
209
                        // Check the last char
210 24
                        if ( $idx === ( count( $originalSplit ) - 1 ) ) {
211
212 1
                            $html_buffer .= $char;
213
214
                            //
215
                            // *************************************
216
                            // NOTE 2021-06-16
217
                            // *************************************
218
                            //
219
                            // Check if $html_buffer is valid. If not, then
220
                            // convert it to $plain_text_buffer.
221
                            //
222
                            // Example:
223
                            //
224
                            // $html_buffer = '<3 %}'
225
                            //
226
                            // is not a valid tag, so it's converted to $plain_text_buffer
227
                            //
228 1
                            if ( !$this->_isTagValid( $html_buffer ) ) {
229 1
                                $state             = static::STATE_PLAINTEXT; // but we work in XML text, so encode it
230 1
                                $plain_text_buffer .= $this->_fixWrongBuffer( $html_buffer );
231 1
                                $html_buffer       = '';
232
233 1
                                if ( $this->_isTagValid( $html_buffer ) and null !== $this->pipeline ) {
234
                                    $this->_setSegmentContainsHtml();
235
                                }
236
237 1
                                break;
238
                            }
239
240
                            break;
241
                        }
242
243 24
                        $html_buffer .= $char;
244 29
                        break;
245
                }
246 3
            } elseif ( $state == static::STATE_COMMENT ) {
247
248
                $html_buffer .= $char;
249
250
                if ( $char == '>' ) {
251
                    if ( substr( $html_buffer, -3 ) == '-->' ) {
252
                        // close the comment
253
                        $state       = static::STATE_PLAINTEXT;
254
                        $output      .= $this->_finalizeScriptTag( $html_buffer );
255
                        $html_buffer = '';
256
257
                        if ( $this->_isTagValid( $html_buffer ) and null !== $this->pipeline ) {
258
                            $this->_setSegmentContainsHtml();
259
                        }
260
                    }
261
                }
262
263 3
            } elseif ( $state == static::STATE_JS_CSS ) {
264
265 3
                $html_buffer .= $char;
266
267 3
                if ( $char == '>' ) {
268 2
                    if ( in_array( substr( $html_buffer, -6 ), [ 'cript>', 'style>' ] ) ) {
269
                        // close the comment
270 2
                        $state       = static::STATE_PLAINTEXT;
271 2
                        $output      .= $this->_finalizeScriptTag( $html_buffer );
272 2
                        $html_buffer = '';
273
274 2
                        if ( $this->_isTagValid( $html_buffer ) and null !== $this->pipeline ) {
275
                            $this->_setSegmentContainsHtml();
276
                        }
277
                    }
278
                }
279
280
            }
281
        }
282
283
        //HTML Partial, add wrong HTML to preserve string content
284 93
        if ( !empty( $html_buffer ) ) {
285
286 1
            if ( $this->_isTagValid( $html_buffer ) and null !== $this->pipeline ) {
287
                $this->_setSegmentContainsHtml();
288
            }
289
290 1
            $output .= $this->_fixWrongBuffer( $html_buffer );
291
        }
292
293
        //string ends with plain text, so no state change is triggered at the end of string
294 93
        if ( '' !== $plain_text_buffer and null !== $plain_text_buffer ) {
295 79
            $output .= $this->_finalizePlainText( $plain_text_buffer );
296
        }
297
298 93
        return $output;
299
300
    }
301
}
302