Completed
Push — master ( 891408...d32ecd )
by Garrett
02:56
created

UStrObj::loadToArray()   C

Complexity

Conditions 16
Paths 12

Size

Total Lines 78
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
c 3
b 0
f 0
dl 0
loc 78
rs 5.142
cc 16
eloc 44
nc 12
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace StringObject;
4
5
class UStrObj extends StrObj
6
{
7
    protected $chars = [];
8
    protected $uhandler;
9
10
    protected static $spec = [
11
        2 => ['datamask' => 0b00011111, 'threshold' => 0x80],
12
        3 => ['datamask' => 0b00001111, 'threshold' => 0x800],
13
        4 => ['datamask' => 0b00000111, 'threshold' => 0x10000],
14
        5 => ['datamask' => 0b00000011, 'threshold' => 0x200000],
15
        6 => ['datamask' => 0b00000001, 'threshold' => 0x4000000],
16
    ];
17
    protected static $winc1umap = [
18
        0x80 => 0x20AC,
19
        0x81 => 0xFFFD, // invalid
20
        0x82 => 0x201A,
21
        0x83 => 0x0192,
22
        0x84 => 0x201E,
23
        0x85 => 0x2026,
24
        0x86 => 0x2020,
25
        0x87 => 0x2021,
26
        0x88 => 0x02C6,
27
        0x89 => 0x2030,
28
        0x8A => 0x0160,
29
        0x8B => 0x2039,
30
        0x8C => 0x0152,
31
        0x8D => 0xFFFD, // invalid
32
        0x8E => 0x017D,
33
        0x8F => 0xFFFD, // invalid
34
        0x90 => 0xFFFD, // invalid
35
        0x91 => 0x2018,
36
        0x92 => 0x2019,
37
        0x93 => 0x201C,
38
        0x94 => 0x201D,
39
        0x95 => 0x2022,
40
        0x96 => 0x2013,
41
        0x97 => 0x2014,
42
        0x98 => 0x02DC,
43
        0x99 => 0x2122,
44
        0x9A => 0x0161,
45
        0x9B => 0x203A,
46
        0x9C => 0x0153,
47
        0x9D => 0xFFFD, // invalid
48
        0x9E => 0x017E,
49
        0x9F => 0x0178,
50
    ];
51
52
    public function __construct($thing)
53
    {
54
        parent::__construct($thing);
55
    }
56
57 View Code Duplication
    public function toArray($delim = '', $limit = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
58
    {
59
        $this->loadToArray();
60
61
        if (empty($delim)) {
62
            return $this->chars;
63
        }
64
        if (is_int($delim)) {
65
            return \str_split($this->raw, $delim);
66
        }
67
        if ($limit === null) {
68
            return \explode($delim, $this->raw);
69
        }
70
        return \explode($delim, $this->raw, $limit);
71
    }
72
73
    public function charAt($index)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
74
    {
75
        $this->loadToArray();
76
        return $this->chars[$index][0];
77
    }
78
79
    public function charCodeAt($index)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
80
    {
81
        $this->loadToArray();
82
        return $this->chars[$index][1];
83
    }
84
85
    private function loadToArray()
86
    {
87
        if (!empty($this->chars)) {
88
            return;
89
        }
90
91
        $len = \strlen($this->raw);
92
        $inside = false;
93
        $invalid = false;
94
95
        for ($offset = 0; $offset < $len; $offset++) {
96
            $char = $this->raw{$offset};
97
            $ord = \ord($char);
98
99
            if ($inside === false) {
100
                $bytes = self::charLength($ord);
101
102
                if ($bytes > 1 && $offset + $bytes <= $len && $invalid === false) {
103
                    // valid UTF-8 multibyte start
104
                    $inside = true;
105
                    $cache = $char;
106
                    $ordcache = ($ord & self::$spec[$bytes]['datamask']) << (6 * ($bytes - 1));
107
                    $originOffset = $offset;
108
                } elseif ($ord < 0x80) {
109
                    // ASCII 7-bit char
110
                    $this->chars[] = [$char, $ord];
111
                } else {
112
                    // either C0/C1 block or higher; map from cp1252 to utf8 or just convert
113
                    $ord = (isset(self::$winc1umap[$ord])) ? self::$winc1umap[$ord] : $ord;
114
                    $this->chars[] = [self::cpToUtf8Char($ord), $ord];
115
                    $invalid = false;
116
                }
117
                continue;
118
            }
119
120
            // $inside === true, i.e. *should be* continuation character
121
            if (($ord & 0b11000000) !== 0b10000000) {
122
                // actually, it's not one, so now the whole UTF-8 char is invalid
123
                // go back and force it to parse as ISO or 1252
124
                $inside = false;
125
                $invalid = true;
126
                $offset = $originOffset - 1;
1 ignored issue
show
Bug introduced by
The variable $originOffset does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
127
                continue;
128
            }
129
130
            // put this byte's data where it needs to go
131
            $ordcache |= ($ord & 0b00111111) << (6 * ($bytes - 1 - ($offset - $originOffset)));
2 ignored issues
show
Bug introduced by
The variable $ordcache does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
Bug introduced by
The variable $bytes does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
132
            $cache .= $char;
1 ignored issue
show
Bug introduced by
The variable $cache does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
133
134
            if ($originOffset + ($bytes - 1) === $offset) {
135
                // we're done parsing this char, now let's verify
136
                $inside = false;
137
138
                // check for overlong, surrogate, too large, BOM, or C0/C1
139
                $overlong = ($ordcache < self::$spec[$bytes]['threshold']);
140
                $surrogate = ($ordcache & 0xFFFFF800 === 0xD800);
141
                $toobig = ($ordcache > 0x10FFFF);
142
143
                if ($overlong || $surrogate || $toobig) {
144
                    $invalid = true;
145
                    $offset = $originOffset - 1;
146
                    continue;
147
                }
148
149
                if ($ordcache === 0xFEFF) { // BOM
150
                    if ($originOffset !== 0) {
151
                        // if not at beginning, store as word joiner U+2060
152
                        $this->chars[] = [\chr(0xE2) . \chr(0x81) . \chr(0xA0), 0x2060];
153
                    }
154
                    // otherwise discard
155
                    continue;
156
                }
157
158
                // verification passed, now store it
159
                $this->chars[] = [$cache, $ordcache];
160
            }
161
        }
162
    }
163
164
    /**
165
     *
166
     */
167
    protected static function cpToUtf8Char($cpt)
168
    {
169
        if ($cpt < 0x80) {
170
            return \chr($cpt);
171
        }
172
173
        $data = [];
0 ignored issues
show
Unused Code introduced by
$data is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
174
        if ($cpt < 0x800) {
175
            $data = [
176
                0b11000000 | ($cpt >> 6),
177
                0b10000000 | ($cpt & 0b00111111)
178
            ];
179
        } elseif ($cpt < 0x10000) {
180
            $data = [
181
                0b11100000 | ($cpt >> 12),
182
                0b10000000 | (($cpt >> 6) & 0b00111111),
183
                0b10000000 | ($cpt & 0b00111111),
184
            ];
185
        } elseif ($cpt < 0x10FFFF) {
186
            $data = [
187
                0b11110100,
188
                0b10000000 | (($cpt >> 12) & 0b00111111),
189
                0b10000000 | (($cpt >> 6) & 0b00111111),
190
                0b10000000 | ($cpt & 0b00111111),
191
            ];
192
        } else {
193
            $data = [0xEF, 0xBF, 0xBD]; // U+FFFD
194
        }
195
196
        return implode(array_map('chr', $data));
197
    }
198
    /**
199
     * @param integer $byte
200
     */
201
    protected static function charLength($byte)
202
    {
203
        if (($byte & 0b11111110) === 0b11111100) {
204
            return 6;
205
        }
206
        if (($byte & 0b11111100) === 0b11111000) {
207
            return 5;
208
        }
209
        if (($byte & 0b11111000) === 0b11110000) {
210
            return 4;
211
        }
212
        if (($byte & 0b11110000) === 0b11100000) {
213
            return 3;
214
        }
215
        if (($byte & 0b11100000) === 0b11000000) {
216
            return 2;
217
        }
218
        return 1;
219
    }
220
}
221