Completed
Push — master ( 132494...891408 )
by Garrett
02:18
created

UStrObj   A

Complexity

Total Complexity 34

Size/Duplication

Total Lines 226
Duplicated Lines 6.64 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 2
Bugs 0 Features 0
Metric Value
wmc 34
c 2
b 0
f 0
lcom 1
cbo 1
dl 15
loc 226
rs 9.2

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A toArray() 15 15 4
A charAt() 0 5 1
A charCodeAt() 0 5 1
D loadToArray() 0 88 16
B cpToUtf8Char() 0 31 5
B charLength() 0 19 6

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
3
namespace StringObject;
4
5
class UStrObj extends StrObj
6
{
7
    protected $chars = [];
8
    protected $uhandler;
9
10
    protected static $spec = [
11
        2 => ['datamask' => 0b00011111, 'threshold' => 0x80],
12
        3 => ['datamask' => 0b00001111, 'threshold' => 0x800],
13
        4 => ['datamask' => 0b00000111, 'threshold' => 0x10000],
14
        5 => ['datamask' => 0b00000011, 'threshold' => 0x200000],
15
        6 => ['datamask' => 0b00000001, 'threshold' => 0x4000000],
16
    ];
17
    protected static $winc1umap = [
18
        0x80 => 0x20AC,
19
        0x81 => 0xFFFD, // invalid
20
        0x82 => 0x201A,
21
        0x83 => 0x0192,
22
        0x84 => 0x201E,
23
        0x85 => 0x2026,
24
        0x86 => 0x2020,
25
        0x87 => 0x2021,
26
        0x88 => 0x02C6,
27
        0x89 => 0x2030,
28
        0x8A => 0x0160,
29
        0x8B => 0x2039,
30
        0x8C => 0x0152,
31
        0x8D => 0xFFFD, // invalid
32
        0x8E => 0x017D,
33
        0x8F => 0xFFFD, // invalid
34
        0x90 => 0xFFFD, // invalid
35
        0x91 => 0x2018,
36
        0x92 => 0x2019,
37
        0x93 => 0x201C,
38
        0x94 => 0x201D,
39
        0x95 => 0x2022,
40
        0x96 => 0x2013,
41
        0x97 => 0x2014,
42
        0x98 => 0x02DC,
43
        0x99 => 0x2122,
44
        0x9A => 0x0161,
45
        0x9B => 0x203A,
46
        0x9C => 0x0153,
47
        0x9D => 0xFFFD, // invalid
48
        0x9E => 0x017E,
49
        0x9F => 0x0178,
50
    ];
51
52
    public function __construct($thing)
53
    {
54
        parent::__construct($thing);
55
    }
56
57 View Code Duplication
    public function toArray($delim = '', $limit = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
58
    {
59
        $this->loadToArray();
60
61
        if (empty($delim)) {
62
            return $this->chars;
63
        }
64
        if (is_int($delim)) {
65
            return \str_split($this->raw, $delim);
66
        }
67
        if ($limit === null) {
68
            return \explode($delim, $this->raw);
69
        }
70
        return \explode($delim, $this->raw, $limit);
71
    }
72
73
    public function charAt($index)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
74
    {
75
        $this->loadToArray();
76
        return $this->chars[$index][0];
77
    }
78
79
    public function charCodeAt($index)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
80
    {
81
        $this->loadToArray();
82
        return $this->chars[$index][1];
83
    }
84
85
    private function loadToArray()
86
    {
87
        if (!empty($this->chars)) {
88
            return;
89
        }
90
91
        $len = \strlen($this->raw);
92
        $inside = false;
93
        $invalid = false;
94
        $cache = '';
95
        $ordcache = 0;
96
        $originOffset = 0;
97
        $bytes = 0;
98
99
        for ($offset = 0; $offset < $len; $offset++) {
100
            $char = $this->raw{$offset};
101
            $ord = \ord($char);
102
103
            if ($inside === false) {
104
                $bytes = self::charLength($ord);
105
106
                if ($bytes > 1 && $offset + $bytes <= $len && $invalid === false) {
107
                    // valid UTF-8 multibyte start
108
                    $inside = true;
109
                    $cache = $char;
110
                    $ordcache = ($ord & self::$spec[$bytes]['datamask']) << (6 * ($bytes - 1));
111
                    $originOffset = $offset;
112
                } elseif ($ord < 0x80) {
113
                    // ASCII 7-bit char
114
                    $this->chars[] = [$char, $ord];
115
                } else {
116
                    // either C0/C1 block or higher; map from cp1252 to utf8 or just convert
117
                    $ord = (isset(self::$winc1umap[$ord])) ? self::$winc1umap[$ord] : $ord;
118
                    $this->chars[] = [self::cpToUtf8Char($ord), $ord];
119
                    $invalid = false;
120
                }
121
                continue;
122
            }
123
124
            // $inside === true, i.e. *should be* continuation character
125
            if (($ord & 0b11000000) !== 0b10000000) {
126
                // actually, it's not one, so now the whole UTF-8 char is invalid
127
                // go back and force it to parse as ISO or 1252
128
                $inside = false;
129
                $invalid = true;
130
                $offset = $originOffset - 1;
131
                continue;
132
            }
133
134
            // put this byte's data where it needs to go
135
            $ordcache |= ($ord & 0b00111111) << (6 * ($bytes - 1 - ($offset - $originOffset)));
136
            $cache .= $char;
137
138
            if ($originOffset + ($bytes - 1) === $offset) {
139
                // we're done parsing this char, now let's verify
140
                $inside = false;
141
142
                // check for overlong, surrogate, too large, BOM, or C0/C1
143
                $overlong = ($ordcache < self::$spec[$bytes]['threshold']);
144
                $surrogate = ($ordcache & 0xFFFFF800 === 0xD800);
145
                $toobig = ($ordcache > 0x10FFFF);
146
147
                if ($overlong || $surrogate || $toobig) {
148
                    $inside = false;
149
                    $invalid = true;
150
                    $offset = $originOffset - 1;
151
                    continue;
152
                }
153
154
                if ($ordcache === 0xFEFF) { // BOM
155
                    if ($originOffset !== 0) {
156
                        // if not at beginning, store as word joiner U+2060
157
                        $this->chars[] = [\chr(0xE2) . \chr(0x81) . \chr(0xA0), 0x2060];
158
                    }
159
                    // otherwise discard
160
                    continue;
161
                }
162
163
                // verification passed, now store it
164
                $this->chars[] = [$cache, $ordcache];
165
                // then clear out the temp vars for the next sequence
166
                $inside = false;
167
                $invalid = false;
168
                $cache = '';
169
                $ordcache = 0;
170
            }
171
        }
172
    }
173
174
    /**
175
     *
176
     */
177
    protected static function cpToUtf8Char($cpt)
178
    {
179
        if ($cpt < 0x80) {
180
            return \chr($cpt);
181
        }
182
183
        $data = [];
0 ignored issues
show
Unused Code introduced by
$data is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
184
        if ($cpt < 0x800) {
185
            $data = [
186
                0b11000000 | ($cpt >> 6),
187
                0b10000000 | ($cpt & 0b00111111)
188
            ];
189
        } elseif ($cpt < 0x10000) {
190
            $data = [
191
                0b11100000 | ($cpt >> 12),
192
                0b10000000 | (($cpt >> 6) & 0b00111111),
193
                0b10000000 | ($cpt & 0b00111111),
194
            ];
195
        } elseif ($cpt < 0x10FFFF) {
196
            $data = [
197
                0b11110100,
198
                0b10000000 | (($cpt >> 12) & 0b00111111),
199
                0b10000000 | (($cpt >> 6) & 0b00111111),
200
                0b10000000 | ($cpt & 0b00111111),
201
            ];
202
        } else {
203
            $data = [0xEF, 0xBF, 0xBD]; // U+FFFD
204
        }
205
206
        return implode(array_map('chr', $data));
207
    }
208
    /**
209
     * @param integer $byte
210
     */
211
    protected static function charLength($byte)
212
    {
213
        if (($byte & 0b11111110) === 0b11111100) {
214
            return 6;
215
        }
216
        if (($byte & 0b11111100) === 0b11111000) {
217
            return 5;
218
        }
219
        if (($byte & 0b11111000) === 0b11110000) {
220
            return 4;
221
        }
222
        if (($byte & 0b11110000) === 0b11100000) {
223
            return 3;
224
        }
225
        if (($byte & 0b11100000) === 0b11000000) {
226
            return 2;
227
        }
228
        return 1;
229
    }
230
}
231