HTMLPurifier_Lexer_DirectLex::substrCount() - Code Metrics - Inspection of "travis-ci 테스트 구성 변경" - xpressengine/xe-core - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — develop ( baac3d...439f66 )

by gyeong-won

created 2016-06-27 10:19 UTC

HTMLPurifier_Lexer_DirectLex::substrCount() A

↳ Parent: HTMLPurifier_Lexer_DirectLex

Complexity

Conditions	3
Paths	4

Size

Total Lines	12
Code Lines	9

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
eloc	9
nc	4
nop	4
dl	0
loc	12
rs	9.4285
c	0
b	0
f	0

<?php

/**
 * Our in-house implementation of a parser.
 *
 * A pure PHP parser, DirectLex has absolutely no dependencies, making
 * it a reasonably good default for PHP4.  Written with efficiency in mind,
 * it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
 * pales in comparison to HTMLPurifier_Lexer_DOMLex.
 *
 * @todo Reread XML spec and document differences.
 */
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
{

    public $tracksLineNumbers = true;

    /**
     * Whitespace characters for str(c)spn.
     */
    protected $_whitespace = "\x20\x09\x0D\x0A";

    /**
     * Callback function for script CDATA fudge
     * @param $matches, in form of array(opening tag, contents, closing tag)
     */
    protected function scriptCallback($matches) {
        return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
    }

    public function tokenizeHTML($html, $config, $context) {

        // special normalization for script tags without any armor
        // our "armor" heurstic is a < sign any number of whitespaces after
        // the first script tag
        if ($config->get('HTML.Trusted')) {
            $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
                array($this, 'scriptCallback'), $html);
        }

        $html = $this->normalize($html, $config, $context);

        $cursor = 0; // our location in the text
        $inside_tag = false; // whether or not we're parsing the inside of a tag
        $array = array(); // result array

        // This is also treated to mean maintain *column* numbers too
        $maintain_line_numbers = $config->get('Core.MaintainLineNumbers');

        if ($maintain_line_numbers === null) {
            // automatically determine line numbering by checking
            // if error collection is on
            $maintain_line_numbers = $config->get('Core.CollectErrors');
        }

        if ($maintain_line_numbers) {
            $current_line = 1;
            $current_col  = 0;
            $length = strlen($html);
        } else {
            $current_line = false;
            $current_col  = false;
            $length = false;
        }
        $context->register('CurrentLine', $current_line);
        $context->register('CurrentCol',  $current_col);
        $nl = "\n";
        // how often to manually recalculate. This will ALWAYS be right,
        // but it's pretty wasteful. Set to 0 to turn off
        $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');

        $e = false;
        if ($config->get('Core.CollectErrors')) {
            $e =& $context->get('ErrorCollector');
        }

        // for testing synchronization
        $loops = 0;

        while(++$loops) {

            // $cursor is either at the start of a token, or inside of
            // a tag (i.e. there was a < immediately before it), as indicated
            // by $inside_tag

            if ($maintain_line_numbers) {

                // $rcursor, however, is always at the start of a token.
                $rcursor = $cursor - (int) $inside_tag;

                // Column number is cheap, so we calculate it every round.
                // We're interested at the *end* of the newline string, so
                // we need to add strlen($nl) == 1 to $nl_pos before subtracting it
                // from our "rcursor" position.
                $nl_pos = strrpos($html, $nl, $rcursor - $length);
                $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);

                // recalculate lines
                if (
                    $synchronize_interval &&  // synchronization is on
                    $cursor > 0 &&            // cursor is further than zero
                    $loops % $synchronize_interval === 0 // time to synchronize!
                ) {
                    $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
                }

            }

            $position_next_lt = strpos($html, '<', $cursor);
            $position_next_gt = strpos($html, '>', $cursor);

            // triggers on "<b>asdf</b>" but not "asdf <b></b>"
            // special case to set up context
            if ($position_next_lt === $cursor) {
                $inside_tag = true;
                $cursor++;
            }

            if (!$inside_tag && $position_next_lt !== false) {
                // We are not inside tag and there still is another tag to parse
                $token = new
                    HTMLPurifier_Token_Text(
                        $this->parseData(
                            substr(
                                $html, $cursor, $position_next_lt - $cursor
                            )
                        )
                    );
                if ($maintain_line_numbers) {
                    $token->rawPosition($current_line, $current_col);
                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
                }
                $array[] = $token;
                $cursor  = $position_next_lt + 1;
                $inside_tag = true;
                continue;
            } elseif (!$inside_tag) {
                // We are not inside tag but there are no more tags
                // If we're already at the end, break
                if ($cursor === strlen($html)) break;
                // Create Text of rest of string
                $token = new
                    HTMLPurifier_Token_Text(
                        $this->parseData(
                            substr(
                                $html, $cursor
                            )
                        )
                    );
                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
                $array[] = $token;
                break;
            } elseif ($inside_tag && $position_next_gt !== false) {
                // We are in tag and it is well formed
                // Grab the internals of the tag
                $strlen_segment = $position_next_gt - $cursor;

                if ($strlen_segment < 1) {
                    // there's nothing to process!
                    $token = new HTMLPurifier_Token_Text('<');
$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}
                    $cursor++;
                    continue;
                }

                $segment = substr($html, $cursor, $strlen_segment);

                if ($segment === false) {
                    // somehow, we attempted to access beyond the end of
                    // the string, defense-in-depth, reported by Nate Abele
                    break;
                }

                // Check if it's a comment
                if (
                    substr($segment, 0, 3) === '!--'
                ) {
                    // re-determine segment length, looking for -->
                    $position_comment_end = strpos($html, '-->', $cursor);
                    if ($position_comment_end === false) {
                        // uh oh, we have a comment that extends to
                        // infinity. Can't be helped: set comment
                        // end position to end of string
                        if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
                        $position_comment_end = strlen($html);
                        $end = true;
                    } else {
                        $end = false;
                    }
                    $strlen_segment = $position_comment_end - $cursor;
                    $segment = substr($html, $cursor, $strlen_segment);
                    $token = new
                        HTMLPurifier_Token_Comment(
                            substr(
                                $segment, 3, $strlen_segment - 3
                            )
                        );
                    if ($maintain_line_numbers) {
                        $token->rawPosition($current_line, $current_col);
                        $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
                    }
                    $array[] = $token;
                    $cursor = $end ? $position_comment_end : $position_comment_end + 3;
                    $inside_tag = false;
                    continue;
                }

                // Check if it's an end tag
                $is_end_tag = (strpos($segment,'/') === 0);
                if ($is_end_tag) {
                    $type = substr($segment, 1);
                    $token = new HTMLPurifier_Token_End($type);
                    if ($maintain_line_numbers) {
                        $token->rawPosition($current_line, $current_col);
                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $inside_tag = false;
                    $cursor = $position_next_gt + 1;
                    continue;
                }

                // Check leading character is alnum, if not, we may
                // have accidently grabbed an emoticon. Translate into
                // text and go our merry way
                if (!ctype_alpha($segment[0])) {
                    // XML:  $segment[0] !== '_' && $segment[0] !== ':'
                    if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
                    $token = new HTMLPurifier_Token_Text('<');
                    if ($maintain_line_numbers) {
                        $token->rawPosition($current_line, $current_col);
                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $inside_tag = false;
                    continue;
                }

                // Check if it is explicitly self closing, if so, remove
                // trailing slash. Remember, we could have a tag like <br>, so
                // any later token processing scripts must convert improperly
                // classified EmptyTags from StartTags.
                $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
                if ($is_self_closing) {
                    $strlen_segment--;
                    $segment = substr($segment, 0, $strlen_segment);
                }

                // Check if there are any attributes
                $position_first_space = strcspn($segment, $this->_whitespace);

                if ($position_first_space >= $strlen_segment) {
                    if ($is_self_closing) {
                        $token = new HTMLPurifier_Token_Empty($segment);
                    } else {
                        $token = new HTMLPurifier_Token_Start($segment);
                    }
                    if ($maintain_line_numbers) {
                        $token->rawPosition($current_line, $current_col);
                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $inside_tag = false;
                    $cursor = $position_next_gt + 1;
                    continue;
                }

                // Grab out all the data
                $type = substr($segment, 0, $position_first_space);
                $attribute_string =
                    trim(
                        substr(
                            $segment, $position_first_space
                        )
                    );
                if ($attribute_string) {
                    $attr = $this->parseAttributeString(
                                    $attribute_string
                                  , $config, $context
                              );
                } else {
                    $attr = array();
                }

                if ($is_self_closing) {
                    $token = new HTMLPurifier_Token_Empty($type, $attr);
                } else {
                    $token = new HTMLPurifier_Token_Start($type, $attr);
                }
                if ($maintain_line_numbers) {
                    $token->rawPosition($current_line, $current_col);
                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
                }
                $array[] = $token;
                $cursor = $position_next_gt + 1;
                $inside_tag = false;
                continue;
            } else {
                // inside tag, but there's no ending > sign
                if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
                $token = new
                    HTMLPurifier_Token_Text(
                        '<' .
                        $this->parseData(
                            substr($html, $cursor)
                        )
                    );
                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
                // no cursor scroll? Hmm...
                $array[] = $token;
                break;
            }
            break;
function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}
        }

        $context->destroy('CurrentLine');
        $context->destroy('CurrentCol');
        return $array;
class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}
    }

    /**
     * PHP 5.0.x compatible substr_count that implements offset and length
     */
    protected function substrCount($haystack, $needle, $offset, $length) {
        static $oldVersion;
        if ($oldVersion === null) {
            $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
        }
        if ($oldVersion) {
            $haystack = substr($haystack, $offset, $length);
            return substr_count($haystack, $needle);
        } else {
            return substr_count($haystack, $needle, $offset, $length);
        }
    }

    /**
     * Takes the inside of an HTML tag and makes an assoc array of attributes.
     *
     * @param $string Inside of tag excluding name.
     * @returns Assoc array of attributes.
     */
    public function parseAttributeString($string, $config, $context) {
        $string = (string) $string; // quick typecast

        if ($string == '') return array(); // no attributes

        $e = false;
        if ($config->get('Core.CollectErrors')) {
            $e =& $context->get('ErrorCollector');
        }

        // let's see if we can abort as quickly as possible
        // one equal sign, no spaces => one attribute
        $num_equal = substr_count($string, '=');
        $has_space = strpos($string, ' ');
        if ($num_equal === 0 && !$has_space) {
            // bool attribute
            return array($string => $string);
        } elseif ($num_equal === 1 && !$has_space) {
            // only one attribute
            list($key, $quoted_value) = explode('=', $string);
            $quoted_value = trim($quoted_value);
            if (!$key) {
                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
                return array();
            }
            if (!$quoted_value) return array($key => '');
            $first_char = @$quoted_value[0];
            $last_char  = @$quoted_value[strlen($quoted_value)-1];

            $same_quote = ($first_char == $last_char);
            $open_quote = ($first_char == '"' || $first_char == "'");

            if ( $same_quote && $open_quote) {
                // well behaved
                $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
            } else {
                // not well behaved
                if ($open_quote) {
                    if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
                    $value = substr($quoted_value, 1);
                } else {
                    $value = $quoted_value;
                }
            }
            if ($value === false) $value = '';
            return array($key => $this->parseData($value));
        }

        // setup loop environment
        $array  = array(); // return assoc array of attributes
        $cursor = 0; // current position in string (moves forward)
        $size   = strlen($string); // size of the string (stays the same)

        // if we have unquoted attributes, the parser expects a terminating
        // space, so let's guarantee that there's always a terminating space.
        $string .= ' ';

        while(true) {

            if ($cursor >= $size) {
                break;
            }

            $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}
            // grab the key

            $key_begin = $cursor; //we're currently at the start of the key

            // scroll past all characters that are the key (not whitespace or =)
            $cursor += strcspn($string, $this->_whitespace . '=', $cursor);

            $key_end = $cursor; // now at the end of the key

            $key = substr($string, $key_begin, $key_end - $key_begin);

            if (!$key) {
                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
                $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
                continue; // empty key
            }

            // scroll past all whitespace
            $cursor += strspn($string, $this->_whitespace, $cursor);

            if ($cursor >= $size) {
                $array[$key] = $key;
                break;
            }

            // if the next character is an equal sign, we've got a regular
            // pair, otherwise, it's a bool attribute
            $first_char = @$string[$cursor];

            if ($first_char == '=') {
                // key="value"

                $cursor++;
                $cursor += strspn($string, $this->_whitespace, $cursor);

                if ($cursor === false) {
                    $array[$key] = '';
                    break;
                }

                // we might be in front of a quote right now

                $char = @$string[$cursor];

                if ($char == '"' || $char == "'") {
                    // it's quoted, end bound is $char
                    $cursor++;
                    $value_begin = $cursor;
                    $cursor = strpos($string, $char, $cursor);
                    $value_end = $cursor;
                } else {
                    // it's not quoted, end bound is whitespace
                    $value_begin = $cursor;
                    $cursor += strcspn($string, $this->_whitespace, $cursor);
                    $value_end = $cursor;
                }

                // we reached a premature end
                if ($cursor === false) {
                    $cursor = $size;
                    $value_end = $cursor;
                }

                $value = substr($string, $value_begin, $value_end - $value_begin);
                if ($value === false) $value = '';
                $array[$key] = $this->parseData($value);
                $cursor++;

            } else {
                // boolattr
                if ($key !== '') {
                    $array[$key] = $key;
                } else {
                    // purely theoretical
                    if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
                }

            }
        }
        return $array;
    }

}

// vim: et sw=4 sts=4


1		<?php
2
3		/**
4		* Our in-house implementation of a parser.
5		*
6		* A pure PHP parser, DirectLex has absolutely no dependencies, making
7		* it a reasonably good default for PHP4. Written with efficiency in mind,
8		* it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
9		* pales in comparison to HTMLPurifier_Lexer_DOMLex.
10		*
11		* @todo Reread XML spec and document differences.
12		*/
13		class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
14		{
15
16		public $tracksLineNumbers = true;
17
18		/**
19		* Whitespace characters for str(c)spn.
20		*/
21		protected $_whitespace = "\x20\x09\x0D\x0A";
22
23		/**
24		* Callback function for script CDATA fudge
25		* @param $matches, in form of array(opening tag, contents, closing tag)
26		*/
27		protected function scriptCallback($matches) {
28		return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
29		}
30
31		public function tokenizeHTML($html, $config, $context) {
32
33		// special normalization for script tags without any armor
34		// our "armor" heurstic is a < sign any number of whitespaces after
35		// the first script tag
36		if ($config->get('HTML.Trusted')) {
37		$html = preg_replace_callback('#(<script[^>]>)(\s[^<].+?)(</script>)#si',
38		array($this, 'scriptCallback'), $html);
39		}
40
41		$html = $this->normalize($html, $config, $context);
42
43		$cursor = 0; // our location in the text
44		$inside_tag = false; // whether or not we're parsing the inside of a tag
45		$array = array(); // result array
46
47		// This is also treated to mean maintain column numbers too
48		$maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
49
50		if ($maintain_line_numbers === null) {
51		// automatically determine line numbering by checking
52		// if error collection is on
53		$maintain_line_numbers = $config->get('Core.CollectErrors');
54		}
55
56		if ($maintain_line_numbers) {
57		$current_line = 1;
58		$current_col = 0;
59		$length = strlen($html);
60		} else {
61		$current_line = false;
62		$current_col = false;
63		$length = false;
64		}
65		$context->register('CurrentLine', $current_line);
66		$context->register('CurrentCol', $current_col);
67		$nl = "\n";
68		// how often to manually recalculate. This will ALWAYS be right,
69		// but it's pretty wasteful. Set to 0 to turn off
70		$synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
71
72		$e = false;
73		if ($config->get('Core.CollectErrors')) {
74		$e =& $context->get('ErrorCollector');
75		}
76
77		// for testing synchronization
78		$loops = 0;
79
80		while(++$loops) {
81
82		// $cursor is either at the start of a token, or inside of
83		// a tag (i.e. there was a < immediately before it), as indicated
84		// by $inside_tag
85
86		if ($maintain_line_numbers) {
87
88		// $rcursor, however, is always at the start of a token.
89		$rcursor = $cursor - (int) $inside_tag;
90
91		// Column number is cheap, so we calculate it every round.
92		// We're interested at the end of the newline string, so
93		// we need to add strlen($nl) == 1 to $nl_pos before subtracting it
94		// from our "rcursor" position.
95		$nl_pos = strrpos($html, $nl, $rcursor - $length);
96		$current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
97
98		// recalculate lines
99		if (
100		$synchronize_interval && // synchronization is on
101		$cursor > 0 && // cursor is further than zero
102		$loops % $synchronize_interval === 0 // time to synchronize!
103		) {
104		$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
105		}
106
107		}
108
109		$position_next_lt = strpos($html, '<', $cursor);
110		$position_next_gt = strpos($html, '>', $cursor);
111
112		// triggers on "<b>asdf</b>" but not "asdf <b></b>"
113		// special case to set up context
114		if ($position_next_lt === $cursor) {
115		$inside_tag = true;
116		$cursor++;
117		}
118
119		if (!$inside_tag && $position_next_lt !== false) {
120		// We are not inside tag and there still is another tag to parse
121		$token = new
122		HTMLPurifier_Token_Text(
123		$this->parseData(
124		substr(
125		$html, $cursor, $position_next_lt - $cursor
126		)
127		)
128		);
129	View Code Duplication	if ($maintain_line_numbers) {
130		$token->rawPosition($current_line, $current_col);
131		$current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
132		}
133		$array[] = $token;
134		$cursor = $position_next_lt + 1;
135		$inside_tag = true;
136		continue;
137		} elseif (!$inside_tag) {
138		// We are not inside tag but there are no more tags
139		// If we're already at the end, break
140		if ($cursor === strlen($html)) break;
141		// Create Text of rest of string
142		$token = new
143		HTMLPurifier_Token_Text(
144		$this->parseData(
145		substr(
146		$html, $cursor
147		)
148		)
149		);
150		if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
151		$array[] = $token;
152		break;
153		} elseif ($inside_tag && $position_next_gt !== false) {
154		// We are in tag and it is well formed
155		// Grab the internals of the tag
156		$strlen_segment = $position_next_gt - $cursor;
157
158		if ($strlen_segment < 1) {
159		// there's nothing to process!
160		$token = new HTMLPurifier_Token_Text('<');
		0 ignored issues – show Unused Code introduced 2015-11-22 14:33 UTC by Report Bug Copy Issue Report `$token` is not used, you could remove the assignment. This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value'; $higher = false; if (rand(1, 6) > 3) { $higher = true; } else { $higher = false; } Both the `$myVar` assignment in line 1 and the `$higher` assignment in line 2 are dead. The first because `$myVar` is never used and the second because `$higher` is always overwritten for every possible time line. Loading history...
161		$cursor++;
162		continue;
163		}
164
165		$segment = substr($html, $cursor, $strlen_segment);
166
167		if ($segment === false) {
168		// somehow, we attempted to access beyond the end of
169		// the string, defense-in-depth, reported by Nate Abele
170		break;
171		}
172
173		// Check if it's a comment
174		if (
175		substr($segment, 0, 3) === '!--'
176		) {
177		// re-determine segment length, looking for -->
178		$position_comment_end = strpos($html, '-->', $cursor);
179		if ($position_comment_end === false) {
180		// uh oh, we have a comment that extends to
181		// infinity. Can't be helped: set comment
182		// end position to end of string
183		if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
184		$position_comment_end = strlen($html);
185		$end = true;
186		} else {
187		$end = false;
188		}
189		$strlen_segment = $position_comment_end - $cursor;
190		$segment = substr($html, $cursor, $strlen_segment);
191		$token = new
192		HTMLPurifier_Token_Comment(
193		substr(
194		$segment, 3, $strlen_segment - 3
195		)
196		);
197		if ($maintain_line_numbers) {
198		$token->rawPosition($current_line, $current_col);
199		$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
200		}
201		$array[] = $token;
202		$cursor = $end ? $position_comment_end : $position_comment_end + 3;
203		$inside_tag = false;
204		continue;
205		}
206
207		// Check if it's an end tag
208		$is_end_tag = (strpos($segment,'/') === 0);
209		if ($is_end_tag) {
210		$type = substr($segment, 1);
211		$token = new HTMLPurifier_Token_End($type);
212	View Code Duplication	if ($maintain_line_numbers) {
213		$token->rawPosition($current_line, $current_col);
214		$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
215		}
216		$array[] = $token;
217		$inside_tag = false;
218		$cursor = $position_next_gt + 1;
219		continue;
220		}
221
222		// Check leading character is alnum, if not, we may
223		// have accidently grabbed an emoticon. Translate into
224		// text and go our merry way
225		if (!ctype_alpha($segment[0])) {
226		// XML: $segment[0] !== '_' && $segment[0] !== ':'
227		if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
228		$token = new HTMLPurifier_Token_Text('<');
229	View Code Duplication	if ($maintain_line_numbers) {
230		$token->rawPosition($current_line, $current_col);
231		$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
232		}
233		$array[] = $token;
234		$inside_tag = false;
235		continue;
236		}
237
238		// Check if it is explicitly self closing, if so, remove
239		// trailing slash. Remember, we could have a tag like <br>, so
240		// any later token processing scripts must convert improperly
241		// classified EmptyTags from StartTags.
242		$is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
243		if ($is_self_closing) {
244		$strlen_segment--;
245		$segment = substr($segment, 0, $strlen_segment);
246		}
247
248		// Check if there are any attributes
249		$position_first_space = strcspn($segment, $this->_whitespace);
250
251		if ($position_first_space >= $strlen_segment) {
252		if ($is_self_closing) {
253		$token = new HTMLPurifier_Token_Empty($segment);
254		} else {
255		$token = new HTMLPurifier_Token_Start($segment);
256		}
257	View Code Duplication	if ($maintain_line_numbers) {
258		$token->rawPosition($current_line, $current_col);
259		$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
260		}
261		$array[] = $token;
262		$inside_tag = false;
263		$cursor = $position_next_gt + 1;
264		continue;
265		}
266
267		// Grab out all the data
268		$type = substr($segment, 0, $position_first_space);
269		$attribute_string =
270		trim(
271		substr(
272		$segment, $position_first_space
273		)
274		);
275		if ($attribute_string) {
276		$attr = $this->parseAttributeString(
277		$attribute_string
278		, $config, $context
279		);
280		} else {
281		$attr = array();
282		}
283
284		if ($is_self_closing) {
285		$token = new HTMLPurifier_Token_Empty($type, $attr);
286		} else {
287		$token = new HTMLPurifier_Token_Start($type, $attr);
288		}
289	View Code Duplication	if ($maintain_line_numbers) {
290		$token->rawPosition($current_line, $current_col);
291		$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
292		}
293		$array[] = $token;
294		$cursor = $position_next_gt + 1;
295		$inside_tag = false;
296		continue;
297		} else {
298		// inside tag, but there's no ending > sign
299		if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
300		$token = new
301		HTMLPurifier_Token_Text(
302		'<' .
303		$this->parseData(
304		substr($html, $cursor)
305		)
306		);
307		if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
308		// no cursor scroll? Hmm...
309		$array[] = $token;
310		break;
311		}
312		break;
		0 ignored issues – show Unused Code introduced 2015-11-22 14:33 UTC by Report Bug Copy Issue Report `break;` does not seem to be reachable. This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed. Unreachable code is most often the result of `return`, `die` or `exit` statements that have been added for debug purposes. function fx() { try { doSomething(); return true; } catch (\Exception $e) { return false; } return false; } In the above example, the last `return false` will never be executed, because a return statement has already been met in every possible execution path. Loading history...
313		}
314
315		$context->destroy('CurrentLine');
316		$context->destroy('CurrentCol');
317		return $array;
		0 ignored issues – show Bug Best Practice introduced 2015-11-22 14:33 UTC by Report Bug Copy Issue Report The return type of `return $array;` (`array`) is incompatible with the return type of the parent method `HTMLPurifier_Lexer::tokenizeHTML` of type `HTMLPurifier_Token\|null`. If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design. Let’s take a look at an example: class Author { private $name; public function __construct($name) { $this->name = $name; } public function getName() { return $this->name; } } abstract class Post { public function getAuthor() { return 'Johannes'; } } class BlogPost extends Post { public function getAuthor() { return new Author('Johannes'); } } class ForumPost extends Post { /* ... */ } function my_function(Post $post) { echo strtoupper($post->getAuthor()); } Our function `my_function` expects a `Post` object, and outputs the author of the post. The base class `Post` returns a simple string and outputting a simple string will work just fine. However, the child class `BlogPost` which is a sub-type of `Post` instead decided to return an `object`, and is therefore violating the SOLID principles. If a `BlogPost` were passed to `my_function`, PHP would not complain, but ultimately fail when executing the `strtoupper` call in its body. Loading history...
318		}
319
320		/**
321		* PHP 5.0.x compatible substr_count that implements offset and length
322		*/
323		protected function substrCount($haystack, $needle, $offset, $length) {
324		static $oldVersion;
325		if ($oldVersion === null) {
326		$oldVersion = version_compare(PHP_VERSION, '5.1', '<');
327		}
328		if ($oldVersion) {
329		$haystack = substr($haystack, $offset, $length);
330		return substr_count($haystack, $needle);
331		} else {
332		return substr_count($haystack, $needle, $offset, $length);
333		}
334		}
335
336		/**
337		* Takes the inside of an HTML tag and makes an assoc array of attributes.
338		*
339		* @param $string Inside of tag excluding name.
340		* @returns Assoc array of attributes.
341		*/
342		public function parseAttributeString($string, $config, $context) {
343		$string = (string) $string; // quick typecast
344
345		if ($string == '') return array(); // no attributes
346
347		$e = false;
348		if ($config->get('Core.CollectErrors')) {
349		$e =& $context->get('ErrorCollector');
350		}
351
352		// let's see if we can abort as quickly as possible
353		// one equal sign, no spaces => one attribute
354		$num_equal = substr_count($string, '=');
355		$has_space = strpos($string, ' ');
356		if ($num_equal === 0 && !$has_space) {
357		// bool attribute
358		return array($string => $string);
359		} elseif ($num_equal === 1 && !$has_space) {
360		// only one attribute
361		list($key, $quoted_value) = explode('=', $string);
362		$quoted_value = trim($quoted_value);
363		if (!$key) {
364		if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
365		return array();
366		}
367		if (!$quoted_value) return array($key => '');
368		$first_char = @$quoted_value[0];
369		$last_char = @$quoted_value[strlen($quoted_value)-1];
370
371		$same_quote = ($first_char == $last_char);
372		$open_quote = ($first_char == '"' \|\| $first_char == "'");
373
374		if ( $same_quote && $open_quote) {
375		// well behaved
376		$value = substr($quoted_value, 1, strlen($quoted_value) - 2);
377		} else {
378		// not well behaved
379		if ($open_quote) {
380		if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
381		$value = substr($quoted_value, 1);
382		} else {
383		$value = $quoted_value;
384		}
385		}
386		if ($value === false) $value = '';
387		return array($key => $this->parseData($value));
388		}
389
390		// setup loop environment
391		$array = array(); // return assoc array of attributes
392		$cursor = 0; // current position in string (moves forward)
393		$size = strlen($string); // size of the string (stays the same)
394
395		// if we have unquoted attributes, the parser expects a terminating
396		// space, so let's guarantee that there's always a terminating space.
397		$string .= ' ';
398
399		while(true) {
400
401		if ($cursor >= $size) {
402		break;
403		}
404
405		$cursor += ($value = strspn($string, $this->_whitespace, $cursor));
		0 ignored issues – show Unused Code introduced 2015-11-22 14:33 UTC by Report Bug Copy Issue Report `$value` is not used, you could remove the assignment. This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value'; $higher = false; if (rand(1, 6) > 3) { $higher = true; } else { $higher = false; } Both the `$myVar` assignment in line 1 and the `$higher` assignment in line 2 are dead. The first because `$myVar` is never used and the second because `$higher` is always overwritten for every possible time line. Loading history...
406		// grab the key
407
408		$key_begin = $cursor; //we're currently at the start of the key
409
410		// scroll past all characters that are the key (not whitespace or =)
411		$cursor += strcspn($string, $this->_whitespace . '=', $cursor);
412
413		$key_end = $cursor; // now at the end of the key
414
415		$key = substr($string, $key_begin, $key_end - $key_begin);
416
417		if (!$key) {
418		if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
419		$cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
420		continue; // empty key
421		}
422
423		// scroll past all whitespace
424		$cursor += strspn($string, $this->_whitespace, $cursor);
425
426		if ($cursor >= $size) {
427		$array[$key] = $key;
428		break;
429		}
430
431		// if the next character is an equal sign, we've got a regular
432		// pair, otherwise, it's a bool attribute
433		$first_char = @$string[$cursor];
434
435		if ($first_char == '=') {
436		// key="value"
437
438		$cursor++;
439		$cursor += strspn($string, $this->_whitespace, $cursor);
440
441		if ($cursor === false) {
442		$array[$key] = '';
443		break;
444		}
445
446		// we might be in front of a quote right now
447
448		$char = @$string[$cursor];
449
450		if ($char == '"' \|\| $char == "'") {
451		// it's quoted, end bound is $char
452		$cursor++;
453		$value_begin = $cursor;
454		$cursor = strpos($string, $char, $cursor);
455		$value_end = $cursor;
456		} else {
457		// it's not quoted, end bound is whitespace
458		$value_begin = $cursor;
459		$cursor += strcspn($string, $this->_whitespace, $cursor);
460		$value_end = $cursor;
461		}
462
463		// we reached a premature end
464		if ($cursor === false) {
465		$cursor = $size;
466		$value_end = $cursor;
467		}
468
469		$value = substr($string, $value_begin, $value_end - $value_begin);
470		if ($value === false) $value = '';
471		$array[$key] = $this->parseData($value);
472		$cursor++;
473
474		} else {
475		// boolattr
476		if ($key !== '') {
477		$array[$key] = $key;
478		} else {
479		// purely theoretical
480		if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
481		}
482
483		}
484		}
485		return $array;
486		}
487
488		}
489
490		// vim: et sw=4 sts=4
491

xpressengine / xe-core

GitHub Access Token became invalid

Push — develop ( baac3d...439f66 )

HTMLPurifier_Lexer_DirectLex::substrCount() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like