Completed
Push — master ( 1d9387...a47b74 )
by Garrett
08:57
created

UStrObj   B

Complexity

Total Complexity 36

Size/Duplication

Total Lines 233
Duplicated Lines 6.44 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 6
Bugs 0 Features 0
Metric Value
wmc 36
c 6
b 0
f 0
lcom 1
cbo 1
dl 15
loc 233
rs 8.8

6 Methods

Rating   Name   Duplication   Size   Complexity  
A toArray() 15 15 4
A charAt() 0 5 1
A charCodeAt() 0 5 1
C cpToUtf8Char() 0 38 8
B charLength() 0 19 6
C loadToArray() 0 78 16

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
3
namespace StringObject;
4
5
class UStrObj extends AnyStrObj
0 ignored issues
show
Bug introduced by
There is at least one abstract method in this class. Maybe declare it as abstract, or implement the remaining methods: compareTo, escape, isAscii, isEmpty, nextToken, remove, repeat, replace, resetToken, times, translate, trim, unescape, uuDecode, uuEncode
Loading history...
6
{
7
    const NOT_NORMALIZED = 0;
8
    const NFC = 1;
9
    const NFD = 2;
10
    const NFK = 4;
11
    const NFKC = 5;
12
    const NFKD = 6;
13
14
    protected $chars = [];
15
    protected $uhandler;
16
    protected $normform = self::NOT_NORMALIZED;
17
18
    protected static $spec = [
19
        2 => ['mask' => 0b00011111, 'start' => 0x80],
20
        3 => ['mask' => 0b00001111, 'start' => 0x800],
21
        4 => ['mask' => 0b00000111, 'start' => 0x10000],
22
        5 => ['mask' => 0b00000011, 'start' => 0x200000],
23
        6 => ['mask' => 0b00000001, 'start' => 0x4000000],
24
    ];
25
    protected static $winc1umap = [
26
        0x80 => 0x20AC,
27
        0x81 => 0xFFFD, // invalid
28
        0x82 => 0x201A,
29
        0x83 => 0x0192,
30
        0x84 => 0x201E,
31
        0x85 => 0x2026,
32
        0x86 => 0x2020,
33
        0x87 => 0x2021,
34
        0x88 => 0x02C6,
35
        0x89 => 0x2030,
36
        0x8A => 0x0160,
37
        0x8B => 0x2039,
38
        0x8C => 0x0152,
39
        0x8D => 0xFFFD, // invalid
40
        0x8E => 0x017D,
41
        0x8F => 0xFFFD, // invalid
42
        0x90 => 0xFFFD, // invalid
43
        0x91 => 0x2018,
44
        0x92 => 0x2019,
45
        0x93 => 0x201C,
46
        0x94 => 0x201D,
47
        0x95 => 0x2022,
48
        0x96 => 0x2013,
49
        0x97 => 0x2014,
50
        0x98 => 0x02DC,
51
        0x99 => 0x2122,
52
        0x9A => 0x0161,
53
        0x9B => 0x203A,
54
        0x9C => 0x0153,
55
        0x9D => 0xFFFD, // invalid
56
        0x9E => 0x017E,
57
        0x9F => 0x0178,
58
    ];
59
60 View Code Duplication
    public function toArray($delim = '', $limit = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
61
    {
62
        $this->loadToArray();
63
64
        if (empty($delim)) {
65
            return $this->chars;
66
        }
67
        if (is_int($delim)) {
68
            return \str_split($this->raw, $delim);
69
        }
70
        if ($limit === null) {
71
            return \explode($delim, $this->raw);
72
        }
73
        return \explode($delim, $this->raw, $limit);
74
    }
75
76
    /**
77
     * @return string
78
     */
79
    public function charAt($index)
80
    {
81
        $this->loadToArray();
82
        return $this->chars[$index][0];
83
    }
84
85
    /**
86
     * @return int
87
     */
88
    public function charCodeAt($index)
89
    {
90
        $this->loadToArray();
91
        return $this->chars[$index][1];
92
    }
93
94
    /**
95
     *
96
     */
97
    protected static function cpToUtf8Char($cpt)
98
    {
99
        if ($cpt < self::$spec[2]['start']) {
100
            return \chr($cpt);
101
        }
102
103
        if ($cpt == 0xFEFF) {
104
            return '';
105
        }
106
107
        $invalid = [0xEF, 0xBF, 0xBD]; // U+FFFD
108
109
        if ($cpt < self::$spec[3]['start']) {
110
            $data = [
111
                0b11000000 | ($cpt >> 6),
112
                0b10000000 | ($cpt & 0b00111111)
113
            ];
114
        } elseif ($cpt >= 0xD800 && $cpt <= 0xDFFF) {
115
            $data = $invalid;
116
        } elseif ($cpt < self::$spec[4]['start']) {
117
            $data = [
118
                0b11100000 | ($cpt >> 12),
119
                0b10000000 | (($cpt >> 6) & 0b00111111),
120
                0b10000000 | ($cpt & 0b00111111),
121
            ];
122
        } elseif ($cpt <= 0x10FFFF) {
123
            $data = [
124
                0b11110100,
125
                0b10000000 | (($cpt >> 12) & 0b00111111),
126
                0b10000000 | (($cpt >> 6) & 0b00111111),
127
                0b10000000 | ($cpt & 0b00111111),
128
            ];
129
        } else {
130
            $data = $invalid;
131
        }
132
133
        return implode(array_map('chr', $data));
134
    }
135
    /**
136
     * @param integer $byte
137
     */
138
    protected static function charLength($byte)
139
    {
140
        if (($byte & 0b11111110) === 0b11111100) {
141
            return 6;
142
        }
143
        if (($byte & 0b11111100) === 0b11111000) {
144
            return 5;
145
        }
146
        if (($byte & 0b11111000) === 0b11110000) {
147
            return 4;
148
        }
149
        if (($byte & 0b11110000) === 0b11100000) {
150
            return 3;
151
        }
152
        if (($byte & 0b11100000) === 0b11000000) {
153
            return 2;
154
        }
155
        return 1;
156
    }
157
158
    private function loadToArray()
159
    {
160
        if (!empty($this->chars)) {
161
            return;
162
        }
163
164
        $len = \strlen($this->raw);
165
        $inside = false; // are we "inside" of evaluating a valid UTF-8 char?
166
        $invalid = false;
167
168
        for ($offset = 0; $offset < $len; $offset++) {
169
            $char = $this->raw{$offset};
170
            $ord = \ord($char);
171
172
            if ($inside === false) {
173
                $bytes = self::charLength($ord);
174
175
                if ($bytes > 1 && $offset + $bytes <= $len && $invalid === false) {
176
                    // valid UTF-8 multibyte start
177
                    $inside = true;
178
                    $cache = $char;
179
                    $ordcache = ($ord & self::$spec[$bytes]['mask']) << (6 * ($bytes - 1));
180
                    $originOffset = $offset;
181
                } elseif ($ord < self::$spec[2]['start']) {
182
                    // ASCII 7-bit char
183
                    $this->chars[] = [$char, $ord];
184
                } else {
185
                    // either C0/C1 block or higher; map from cp1252 to utf8 or just convert
186
                    $ord = (isset(self::$winc1umap[$ord])) ? self::$winc1umap[$ord] : $ord;
187
                    $this->chars[] = [self::cpToUtf8Char($ord), $ord];
188
                    $invalid = false;
189
                }
190
                continue;
191
            }
192
193
            // $inside === true, i.e. *should be* continuation character
194
            if (($ord & 0b11000000) !== 0b10000000) {
195
                // actually, it's not one, so now the whole UTF-8 char is invalid
196
                // go back and force it to parse as ISO or 1252
197
                $inside = false;
198
                $invalid = true;
199
                $offset = $originOffset - 1;
200
                continue;
201
            }
202
203
            // put this byte's data where it needs to go
204
            $ordcache |= ($ord & 0b00111111) << (6 * ($bytes - 1 - ($offset - $originOffset)));
205
            $cache .= $char;
206
207
            if ($originOffset + ($bytes - 1) === $offset) {
208
                // we're done parsing this char, now let's verify
209
                $inside = false;
210
211
                // check for overlong, surrogate, too large, BOM, or C0/C1
212
                $overlong = ($ordcache < self::$spec[$bytes]['start']);
213
                $surrogate = ($ordcache & 0xFFFFF800 === 0xD800);
214
                $toobig = ($ordcache > 0x10FFFF);
215
216
                if ($overlong || $surrogate || $toobig) {
217
                    $invalid = true;
218
                    $offset = $originOffset - 1;
219
                    continue;
220
                }
221
222
                if ($ordcache === 0xFEFF) { // BOM
223
                    if ($originOffset !== 0) {
224
                        // if not at beginning, store as word joiner U+2060
225
                        $this->chars[] = [\chr(0xE2) . \chr(0x81) . \chr(0xA0), 0x2060];
226
                    }
227
                    // otherwise discard
228
                    continue;
229
                }
230
231
                // verification passed, now store it
232
                $this->chars[] = [$cache, $ordcache];
233
            }
234
        }
235
    }
236
237
}
238