UStrObj::loadToArray() - Code Metrics - Inspection of "analysis cleanup" - garrettw/stringobject - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( a47b74...34d315 )

by Garrett

created 2016-03-20 22:55 UTC

UStrObj::loadToArray() C

↳ Parent: UStrObj

Complexity

Conditions	16
Paths	12

Size

Total Lines	78
Code Lines	44

Duplication

Lines	0
Ratio	0 %

Importance

Changes	4
Bugs	0	Features	0

Metric	Value
c	4
b	0
f	0
dl	0
loc	78
rs	5.142
cc	16
eloc	44
nc	12
nop	0

How to fix Long Method Complexity

<?php

namespace StringObject;

class UStrObj extends AnyStrObj

{
    const NOT_NORMALIZED = 0;
    const NFC = 1;
    const NFD = 2;
    const NFK = 4;
    const NFKC = 5;
    const NFKD = 6;

    protected $chars = [];
    protected $uhandler;
    protected $normform = self::NOT_NORMALIZED;

    protected static $spec = [
        2 => ['mask' => 0b00011111, 'start' => 0x80],
        3 => ['mask' => 0b00001111, 'start' => 0x800],
        4 => ['mask' => 0b00000111, 'start' => 0x10000],
        5 => ['mask' => 0b00000011, 'start' => 0x200000],
        6 => ['mask' => 0b00000001, 'start' => 0x4000000],
    ];
    protected static $winc1umap = [
        0x80 => 0x20AC,
        0x81 => 0xFFFD, // invalid
        0x82 => 0x201A,
        0x83 => 0x0192,
        0x84 => 0x201E,
        0x85 => 0x2026,
        0x86 => 0x2020,
        0x87 => 0x2021,
        0x88 => 0x02C6,
        0x89 => 0x2030,
        0x8A => 0x0160,
        0x8B => 0x2039,
        0x8C => 0x0152,
        0x8D => 0xFFFD, // invalid
        0x8E => 0x017D,
        0x8F => 0xFFFD, // invalid
        0x90 => 0xFFFD, // invalid
        0x91 => 0x2018,
        0x92 => 0x2019,
        0x93 => 0x201C,
        0x94 => 0x201D,
        0x95 => 0x2022,
        0x96 => 0x2013,
        0x97 => 0x2014,
        0x98 => 0x02DC,
        0x99 => 0x2122,
        0x9A => 0x0161,
        0x9B => 0x203A,
        0x9C => 0x0153,
        0x9D => 0xFFFD, // invalid
        0x9E => 0x017E,
        0x9F => 0x0178,
    ];

    public function toArray($delim = '', $limit = null)

    {
        $this->loadToArray();

        if (empty($delim)) {
            return $this->chars;
        }
        if (is_int($delim)) {
            return \str_split($this->raw, $delim);
        }
        if ($limit === null) {
            return \explode($delim, $this->raw);
        }
        return \explode($delim, $this->raw, $limit);
    }

    /**
     * @return string
     */
    public function charAt($index)
    {
        $this->loadToArray();
        return $this->chars[$index][0];
    }

    /**
     * @return int
     */
    public function charCodeAt($index)
    {
        $this->loadToArray();
        return $this->chars[$index][1];
    }

    /**
     *
     */
    protected static function cpToUtf8Char($cpt)
    {
        if ($cpt < self::$spec[2]['start']) {
            return \chr($cpt);
        }

        if ($cpt == 0xFEFF) {
            return '';
        }

        if (($cpt >= 0xD800 && $cpt <= 0xDFFF) || $cpt > 0x10FFFF) {
            return "\xEF\xBF\xBD"; // U+FFFD; invalid symbol
        }

        if ($cpt < self::$spec[3]['start']) {
            $data = [
                0b11000000 | ($cpt >> 6),
                0b10000000 | ($cpt & 0b00111111)
            ];
        } elseif ($cpt < self::$spec[4]['start']) {
            $data = [
                0b11100000 | ($cpt >> 12),
                0b10000000 | (($cpt >> 6) & 0b00111111),
                0b10000000 | ($cpt & 0b00111111),
            ];
        } elseif ($cpt <= 0x10FFFF) {
            $data = [
                0b11110100,
                0b10000000 | (($cpt >> 12) & 0b00111111),
                0b10000000 | (($cpt >> 6) & 0b00111111),
                0b10000000 | ($cpt & 0b00111111),
            ];
        }

        return implode(array_map('chr', $data));
function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}
    }
    /**
     * @param integer $byte
     */
    protected static function charLength($byte)
    {
        if (($byte & 0b11111110) === 0b11111100) {
            return 6;
        }
        if (($byte & 0b11111100) === 0b11111000) {
            return 5;
        }
        if (($byte & 0b11111000) === 0b11110000) {
            return 4;
        }
        if (($byte & 0b11110000) === 0b11100000) {
            return 3;
        }
        if (($byte & 0b11100000) === 0b11000000) {
            return 2;
        }
        return 1;
    }

    private function loadToArray()
    {
        if (!empty($this->chars)) {
            return;
        }

        $len = \strlen($this->raw);
        $inside = false; // are we "inside" of evaluating a valid UTF-8 char?
        $invalid = false;

        for ($offset = 0; $offset < $len; $offset++) {
            $char = $this->raw{$offset};
            $ord = \ord($char);

            if ($inside === false) {
                $bytes = self::charLength($ord);

                if ($bytes > 1 && $offset + $bytes <= $len && $invalid === false) {
                    // valid UTF-8 multibyte start
                    $inside = true;
                    $cache = $char;
                    $ordcache = ($ord & self::$spec[$bytes]['mask']) << (6 * ($bytes - 1));
                    $originOffset = $offset;
                } elseif ($ord < self::$spec[2]['start']) {
                    // ASCII 7-bit char
                    $this->chars[] = [$char, $ord];
                } else {
                    // either C0/C1 block or higher; map from cp1252 to utf8 or just convert
                    $ord = (isset(self::$winc1umap[$ord])) ? self::$winc1umap[$ord] : $ord;
                    $this->chars[] = [self::cpToUtf8Char($ord), $ord];
                    $invalid = false;
                }
                continue;
            }

            // $inside === true, i.e. *should be* continuation character
            if (($ord & 0b11000000) !== 0b10000000) {
                // actually, it's not one, so now the whole UTF-8 char is invalid
                // go back and force it to parse as ISO or 1252
                $inside = false;
                $invalid = true;
                $offset = $originOffset - 1;
                continue;
            }

            // put this byte's data where it needs to go
            $ordcache |= ($ord & 0b00111111) << (6 * ($bytes - 1 - ($offset - $originOffset)));
            $cache .= $char;

            if ($originOffset + ($bytes - 1) === $offset) {
                // we're done parsing this char, now let's verify
                $inside = false;

                // check for overlong, surrogate, too large, BOM, or C0/C1
                $overlong = ($ordcache < self::$spec[$bytes]['start']);
                $surrogate = ($ordcache & 0xFFFFF800 === 0xD800);
                $toobig = ($ordcache > 0x10FFFF);

                if ($overlong || $surrogate || $toobig) {
                    $invalid = true;
                    $offset = $originOffset - 1;
                    continue;
                }

                if ($ordcache === 0xFEFF) { // BOM
                    if ($originOffset !== 0) {
                        // if not at beginning, store as word joiner U+2060
                        $this->chars[] = [\chr(0xE2) . \chr(0x81) . \chr(0xA0), 0x2060];
                    }
                    // otherwise discard
                    continue;
                }

                // verification passed, now store it
                $this->chars[] = [$cache, $ordcache];
            }
        }
    }
}


1		<?php
2
3		namespace StringObject;
4
5		class UStrObj extends AnyStrObj
		0 ignored issues – show Bug introduced 2016-03-20 22:38 UTC by Report Bug Copy Issue Report There is at least one abstract method in this class. Maybe declare it as abstract, or implement the remaining methods: compareTo, escape, isAscii, isEmpty, nextToken, remove, repeat, replace, resetToken, times, translate, trim, unescape, uuDecode, uuEncode Loading history...
6		{
7		const NOT_NORMALIZED = 0;
8		const NFC = 1;
9		const NFD = 2;
10		const NFK = 4;
11		const NFKC = 5;
12		const NFKD = 6;
13
14		protected $chars = [];
15		protected $uhandler;
16		protected $normform = self::NOT_NORMALIZED;
17
18		protected static $spec = [
19		2 => ['mask' => 0b00011111, 'start' => 0x80],
20		3 => ['mask' => 0b00001111, 'start' => 0x800],
21		4 => ['mask' => 0b00000111, 'start' => 0x10000],
22		5 => ['mask' => 0b00000011, 'start' => 0x200000],
23		6 => ['mask' => 0b00000001, 'start' => 0x4000000],
24		];
25		protected static $winc1umap = [
26		0x80 => 0x20AC,
27		0x81 => 0xFFFD, // invalid
28		0x82 => 0x201A,
29		0x83 => 0x0192,
30		0x84 => 0x201E,
31		0x85 => 0x2026,
32		0x86 => 0x2020,
33		0x87 => 0x2021,
34		0x88 => 0x02C6,
35		0x89 => 0x2030,
36		0x8A => 0x0160,
37		0x8B => 0x2039,
38		0x8C => 0x0152,
39		0x8D => 0xFFFD, // invalid
40		0x8E => 0x017D,
41		0x8F => 0xFFFD, // invalid
42		0x90 => 0xFFFD, // invalid
43		0x91 => 0x2018,
44		0x92 => 0x2019,
45		0x93 => 0x201C,
46		0x94 => 0x201D,
47		0x95 => 0x2022,
48		0x96 => 0x2013,
49		0x97 => 0x2014,
50		0x98 => 0x02DC,
51		0x99 => 0x2122,
52		0x9A => 0x0161,
53		0x9B => 0x203A,
54		0x9C => 0x0153,
55		0x9D => 0xFFFD, // invalid
56		0x9E => 0x017E,
57		0x9F => 0x0178,
58		];
59
60	View Code Duplication	public function toArray($delim = '', $limit = null)
		0 ignored issues – show Duplication introduced 2015-12-04 23:19 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
61		{
62		$this->loadToArray();
63
64		if (empty($delim)) {
65		return $this->chars;
66		}
67		if (is_int($delim)) {
68		return \str_split($this->raw, $delim);
69		}
70		if ($limit === null) {
71		return \explode($delim, $this->raw);
72		}
73		return \explode($delim, $this->raw, $limit);
74		}
75
76		/**
77		* @return string
78		*/
79		public function charAt($index)
80		{
81		$this->loadToArray();
82		return $this->chars[$index][0];
83		}
84
85		/**
86		* @return int
87		*/
88		public function charCodeAt($index)
89		{
90		$this->loadToArray();
91		return $this->chars[$index][1];
92		}
93
94		/**
95		*
96		*/
97		protected static function cpToUtf8Char($cpt)
98		{
99		if ($cpt < self::$spec[2]['start']) {
100		return \chr($cpt);
101		}
102
103		if ($cpt == 0xFEFF) {
104		return '';
105		}
106
107		if (($cpt >= 0xD800 && $cpt <= 0xDFFF) \|\| $cpt > 0x10FFFF) {
108		return "\xEF\xBF\xBD"; // U+FFFD; invalid symbol
109		}
110
111		if ($cpt < self::$spec[3]['start']) {
112		$data = [
113		0b11000000 \| ($cpt >> 6),
114		0b10000000 \| ($cpt & 0b00111111)
115		];
116		} elseif ($cpt < self::$spec[4]['start']) {
117		$data = [
118		0b11100000 \| ($cpt >> 12),
119		0b10000000 \| (($cpt >> 6) & 0b00111111),
120		0b10000000 \| ($cpt & 0b00111111),
121		];
122		} elseif ($cpt <= 0x10FFFF) {
123		$data = [
124		0b11110100,
125		0b10000000 \| (($cpt >> 12) & 0b00111111),
126		0b10000000 \| (($cpt >> 6) & 0b00111111),
127		0b10000000 \| ($cpt & 0b00111111),
128		];
129		}
130
131		return implode(array_map('chr', $data));
		0 ignored issues – show Bug introduced 2016-03-20 22:57 UTC by Report Bug Copy Issue Report The variable `$data` does not seem to be defined for all execution paths leading up to this point. If you define a variable conditionally, it can happen that it is not defined for all execution paths. Let’s take a look at an example: function myFunction($a) { switch ($a) { case 'foo': $x = 1; break; case 'bar': $x = 2; break; } // $x is potentially undefined here. echo $x; } In the above example, the variable `$x` is defined if you pass “foo” or “bar” as argument for `$a`. However, since the `switch` statement has no default case statement, if you pass any other value, the variable `$x` would be undefined. Available Fixes Check for existence of the variable explicitly: function myFunction($a) { switch ($a) { case 'foo': $x = 1; break; case 'bar': $x = 2; break; } if (isset($x)) { // Make sure it's always set. echo $x; } } Define a default value for the variable: function myFunction($a) { $x = ''; // Set a default which gets overridden for certain paths. switch ($a) { case 'foo': $x = 1; break; case 'bar': $x = 2; break; } echo $x; } Add a value for the missing path: function myFunction($a) { switch ($a) { case 'foo': $x = 1; break; case 'bar': $x = 2; break; // We add support for the missing case. default: $x = ''; break; } echo $x; } Loading history...
132		}
133		/**
134		* @param integer $byte
135		*/
136		protected static function charLength($byte)
137		{
138		if (($byte & 0b11111110) === 0b11111100) {
139		return 6;
140		}
141		if (($byte & 0b11111100) === 0b11111000) {
142		return 5;
143		}
144		if (($byte & 0b11111000) === 0b11110000) {
145		return 4;
146		}
147		if (($byte & 0b11110000) === 0b11100000) {
148		return 3;
149		}
150		if (($byte & 0b11100000) === 0b11000000) {
151		return 2;
152		}
153		return 1;
154		}
155
156		private function loadToArray()
157		{
158		if (!empty($this->chars)) {
159		return;
160		}
161
162		$len = \strlen($this->raw);
163		$inside = false; // are we "inside" of evaluating a valid UTF-8 char?
164		$invalid = false;
165
166		for ($offset = 0; $offset < $len; $offset++) {
167		$char = $this->raw{$offset};
168		$ord = \ord($char);
169
170		if ($inside === false) {
171		$bytes = self::charLength($ord);
172
173		if ($bytes > 1 && $offset + $bytes <= $len && $invalid === false) {
174		// valid UTF-8 multibyte start
175		$inside = true;
176		$cache = $char;
177		$ordcache = ($ord & self::$spec[$bytes]['mask']) << (6 * ($bytes - 1));
178		$originOffset = $offset;
179		} elseif ($ord < self::$spec[2]['start']) {
180		// ASCII 7-bit char
181		$this->chars[] = [$char, $ord];
182		} else {
183		// either C0/C1 block or higher; map from cp1252 to utf8 or just convert
184		$ord = (isset(self::$winc1umap[$ord])) ? self::$winc1umap[$ord] : $ord;
185		$this->chars[] = [self::cpToUtf8Char($ord), $ord];
186		$invalid = false;
187		}
188		continue;
189		}
190
191		// $inside === true, i.e. should be continuation character
192		if (($ord & 0b11000000) !== 0b10000000) {
193		// actually, it's not one, so now the whole UTF-8 char is invalid
194		// go back and force it to parse as ISO or 1252
195		$inside = false;
196		$invalid = true;
197		$offset = $originOffset - 1;
198		continue;
199		}
200
201		// put this byte's data where it needs to go
202		$ordcache \|= ($ord & 0b00111111) << (6 * ($bytes - 1 - ($offset - $originOffset)));
203		$cache .= $char;
204
205		if ($originOffset + ($bytes - 1) === $offset) {
206		// we're done parsing this char, now let's verify
207		$inside = false;
208
209		// check for overlong, surrogate, too large, BOM, or C0/C1
210		$overlong = ($ordcache < self::$spec[$bytes]['start']);
211		$surrogate = ($ordcache & 0xFFFFF800 === 0xD800);
212		$toobig = ($ordcache > 0x10FFFF);
213
214		if ($overlong \|\| $surrogate \|\| $toobig) {
215		$invalid = true;
216		$offset = $originOffset - 1;
217		continue;
218		}
219
220		if ($ordcache === 0xFEFF) { // BOM
221		if ($originOffset !== 0) {
222		// if not at beginning, store as word joiner U+2060
223		$this->chars[] = [\chr(0xE2) . \chr(0x81) . \chr(0xA0), 0x2060];
224		}
225		// otherwise discard
226		continue;
227		}
228
229		// verification passed, now store it
230		$this->chars[] = [$cache, $ordcache];
231		}
232		}
233		}
234		}
235

garrettw / stringobject

Push — master ( a47b74...34d315 )

UStrObj::loadToArray() C

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Available Fixes

Duplication Side-by-Side

Filter issues like