Completed
Push — master ( 66f9bc...5c75e4 )
by Garrett
02:20
created

StrObj::findUtf8CharAt()   C

Complexity

Conditions 17
Paths 17

Size

Total Lines 67
Code Lines 33

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 67
rs 5.7713
cc 17
eloc 33
nc 17
nop 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace StringObject;
4
5
class StrObj implements \ArrayAccess, \Countable, \Iterator
6
{
7
    // CONSTANTS
8
9
    const NORMAL = 0;
10
    const START = 0;
11
    const END = 1;
12
    const BOTH_ENDS = 2;
13
    const CASE_INSENSITIVE = 4;
14
    const REVERSE = 8;
15
    const EXACT_POSITION = 16;
16
    const CURRENT_LOCALE = 32;
17
    const NATURAL_ORDER = 64;
18
    const FIRST_N = 128;
19
    const C_STYLE = 256;
20
    const META = 512;
21
    const LAZY = 1024;
22
    const GREEDY = 2048;
23
    const WINDOWS1252 = 4096;
24
    const UTF8 = 8192;
25
26
    // STATIC PROPERTIES
27
28
    protected static $asciimap = [
29
        'a' => ['à', 'á', 'ả', 'ã', 'ạ', 'ă', 'ắ', 'ằ', 'ẳ', 'ẵ', 'ặ', 'â', 'ấ',
30
                'ầ', 'ẩ', 'ẫ', 'ậ', 'ā', 'ą', 'å', 'α', 'ά', 'ἀ', 'ἁ', 'ἂ', 'ἃ',
31
                'ἄ', 'ἅ', 'ἆ', 'ἇ', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ὰ',
32
                'ά', 'ᾰ', 'ᾱ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'а', 'أ'],
33
        'b' => ['б', 'β', 'Ъ', 'Ь', 'ب'],
34
        'c' => ['ç', 'ć', 'č', 'ĉ', 'ċ'],
35
        'd' => ['ď', 'ð', 'đ', 'ƌ', 'ȡ', 'ɖ', 'ɗ', 'ᵭ', 'ᶁ', 'ᶑ', 'д', 'δ', 'د', 'ض'],
36
        'e' => ['é', 'è', 'ẻ', 'ẽ', 'ẹ', 'ê', 'ế', 'ề', 'ể', 'ễ', 'ệ', 'ë', 'ē',
37
                'ę', 'ě', 'ĕ', 'ė', 'ε', 'έ', 'ἐ', 'ἑ', 'ἒ', 'ἓ', 'ἔ', 'ἕ', 'ὲ',
38
                'έ', 'е', 'ё', 'э', 'є', 'ə'],
39
        'f' => ['ф', 'φ', 'ف'],
40
        'g' => ['ĝ', 'ğ', 'ġ', 'ģ', 'г', 'ґ', 'γ', 'ج'],
41
        'h' => ['ĥ', 'ħ', 'η', 'ή', 'ح', 'ه'],
42
        'i' => ['í', 'ì', 'ỉ', 'ĩ', 'ị', 'î', 'ï', 'ī', 'ĭ', 'į', 'ı', 'ι', 'ί',
43
                'ϊ', 'ΐ', 'ἰ', 'ἱ', 'ἲ', 'ἳ', 'ἴ', 'ἵ', 'ἶ', 'ἷ', 'ὶ', 'ί', 'ῐ',
44
                'ῑ', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'і', 'ї', 'и'],
45
        'j' => ['ĵ', 'ј', 'Ј'],
46
        'k' => ['ķ', 'ĸ', 'к', 'κ', 'Ķ', 'ق', 'ك'],
47
        'l' => ['ł', 'ľ', 'ĺ', 'ļ', 'ŀ', 'л', 'λ', 'ل'],
48
        'm' => ['м', 'μ', 'م'],
49
        'n' => ['ñ', 'ń', 'ň', 'ņ', 'ʼn', 'ŋ', 'ν', 'н', 'ن'],
50
        'o' => ['ó', 'ò', 'ỏ', 'õ', 'ọ', 'ô', 'ố', 'ồ', 'ổ', 'ỗ', 'ộ', 'ơ', 'ớ',
51
                'ờ', 'ở', 'ỡ', 'ợ', 'ø', 'ō', 'ő', 'ŏ', 'ο', 'ὀ', 'ὁ', 'ὂ', 'ὃ',
52
                'ὄ', 'ὅ', 'ὸ', 'ό', 'о', 'و', 'θ'],
53
        'p' => ['п', 'π'],
54
        'r' => ['ŕ', 'ř', 'ŗ', 'р', 'ρ', 'ر'],
55
        's' => ['ś', 'š', 'ş', 'с', 'σ', 'ș', 'ς', 'س', 'ص'],
56
        't' => ['ť', 'ţ', 'т', 'τ', 'ț', 'ت', 'ط'],
57
        'u' => ['ú', 'ù', 'ủ', 'ũ', 'ụ', 'ư', 'ứ', 'ừ', 'ử', 'ữ', 'ự', 'û', 'ū',
58
                'ů', 'ű', 'ŭ', 'ų', 'µ', 'у'],
59
        'v' => ['в'],
60
        'w' => ['ŵ', 'ω', 'ώ'],
61
        'x' => ['χ'],
62
        'y' => ['ý', 'ỳ', 'ỷ', 'ỹ', 'ỵ', 'ÿ', 'ŷ', 'й', 'ы', 'υ', 'ϋ', 'ύ', 'ΰ', 'ي'],
63
        'z' => ['ź', 'ž', 'ż', 'з', 'ζ', 'ز'],
64
        'aa' => ['ع'],
65
        'ae' => ['ä', 'æ'],
66
        'ch' => ['ч'],
67
        'dj' => ['ђ', 'đ'],
68
        'dz' => ['џ'],
69
        'gh' => ['غ'],
70
        'kh' => ['х', 'خ'],
71
        'lj' => ['љ'],
72
        'nj' => ['њ'],
73
        'oe' => ['ö', 'œ'],
74
        'ps' => ['ψ'],
75
        'sh' => ['ш'],
76
        'shch' => ['щ'],
77
        'ss' => ['ß'],
78
        'th' => ['þ', 'ث', 'ذ', 'ظ'],
79
        'ts' => ['ц'],
80
        'ue' => ['ü'],
81
        'ya' => ['я'],
82
        'yu' => ['ю'],
83
        'zh' => ['ж'],
84
        '(c]' => ['©'],
85
        'A' => ['Á', 'À', 'Ả', 'Ã', 'Ạ', 'Ă', 'Ắ', 'Ằ', 'Ẳ', 'Ẵ', 'Ặ', 'Â', 'Ấ',
86
                'Ầ', 'Ẩ', 'Ẫ', 'Ậ', 'Å', 'Ā', 'Ą', 'Α', 'Ά', 'Ἀ', 'Ἁ', 'Ἂ', 'Ἃ',
87
                'Ἄ', 'Ἅ', 'Ἆ', 'Ἇ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'Ᾰ',
88
                'Ᾱ', 'Ὰ', 'Ά', 'ᾼ', 'А'],
89
        'B' => ['Б', 'Β'],
90
        'C' => ['Ç', 'Ć', 'Č', 'Ĉ', 'Ċ'],
91
        'D' => ['Ď', 'Ð', 'Đ', 'Ɖ', 'Ɗ', 'Ƌ', 'ᴅ', 'ᴆ', 'Д', 'Δ'],
92
        'E' => ['É', 'È', 'Ẻ', 'Ẽ', 'Ẹ', 'Ê', 'Ế', 'Ề', 'Ể', 'Ễ', 'Ệ', 'Ë', 'Ē',
93
                'Ę', 'Ě', 'Ĕ', 'Ė', 'Ε', 'Έ', 'Ἐ', 'Ἑ', 'Ἒ', 'Ἓ', 'Ἔ', 'Ἕ', 'Έ',
94
                'Ὲ', 'Е', 'Ё', 'Э', 'Є', 'Ə'],
95
        'F' => ['Ф', 'Φ'],
96
        'G' => ['Ğ', 'Ġ', 'Ģ', 'Г', 'Ґ', 'Γ'],
97
        'H' => ['Η', 'Ή'],
98
        'I' => ['Í', 'Ì', 'Ỉ', 'Ĩ', 'Ị', 'Î', 'Ï', 'Ī', 'Ĭ', 'Į', 'İ', 'Ι', 'Ί',
99
                'Ϊ', 'Ἰ', 'Ἱ', 'Ἳ', 'Ἴ', 'Ἵ', 'Ἶ', 'Ἷ', 'Ῐ', 'Ῑ', 'Ὶ', 'Ί', 'И',
100
                'І', 'Ї'],
101
        'K' => ['К', 'Κ'],
102
        'L' => ['Ĺ', 'Ł', 'Л', 'Λ', 'Ļ'],
103
        'M' => ['М', 'Μ'],
104
        'N' => ['Ń', 'Ñ', 'Ň', 'Ņ', 'Ŋ', 'Н', 'Ν'],
105
        'O' => ['Ó', 'Ò', 'Ỏ', 'Õ', 'Ọ', 'Ô', 'Ố', 'Ồ', 'Ổ', 'Ỗ', 'Ộ', 'Ơ', 'Ớ',
106
                'Ờ', 'Ở', 'Ỡ', 'Ợ', 'Ø', 'Ō', 'Ő', 'Ŏ', 'Ο', 'Ό', 'Ὀ', 'Ὁ', 'Ὂ',
107
                'Ὃ', 'Ὄ', 'Ὅ', 'Ὸ', 'Ό', 'О', 'Θ', 'Ө'],
108
        'P' => ['П', 'Π'],
109
        'R' => ['Ř', 'Ŕ', 'Р', 'Ρ'],
110
        'S' => ['Ş', 'Ŝ', 'Ș', 'Š', 'Ś', 'С', 'Σ'],
111
        'T' => ['Ť', 'Ţ', 'Ŧ', 'Ț', 'Т', 'Τ'],
112
        'U' => ['Ú', 'Ù', 'Ủ', 'Ũ', 'Ụ', 'Ư', 'Ứ', 'Ừ', 'Ử', 'Ữ', 'Ự', 'Û', 'Ū',
113
                'Ů', 'Ű', 'Ŭ', 'Ų', 'У'],
114
        'V' => ['В'],
115
        'W' => ['Ω', 'Ώ'],
116
        'X' => ['Χ'],
117
        'Y' => ['Ý', 'Ỳ', 'Ỷ', 'Ỹ', 'Ỵ', 'Ÿ', 'Ῠ', 'Ῡ', 'Ὺ', 'Ύ', 'Ы', 'Й', 'Υ', 'Ϋ'],
118
        'Z' => ['Ź', 'Ž', 'Ż', 'З', 'Ζ'],
119
        'AE' => ['Ä', 'Æ'],
120
        'CH' => ['Ч'],
121
        'DJ' => ['Ђ'],
122
        'DZ' => ['Џ'],
123
        'KH' => ['Х'],
124
        'LJ' => ['Љ'],
125
        'NJ' => ['Њ'],
126
        'OE' => ['Ö'],
127
        'PS' => ['Ψ'],
128
        'SH' => ['Ш'],
129
        'SHCH' => ['Щ'],
130
        'SS' => ['ẞ'],
131
        'TH' => ['Þ'],
132
        'TS' => ['Ц'],
133
        'UE' => ['Ü'],
134
        'YA' => ['Я'],
135
        'YU' => ['Ю'],
136
        'ZH' => ['Ж'],
137
        ' ' => ["\xC2\xA0", "\xE2\x80\x80", "\xE2\x80\x81", "\xE2\x80\x82",
138
                "\xE2\x80\x83", "\xE2\x80\x84", "\xE2\x80\x85", "\xE2\x80\x86",
139
                "\xE2\x80\x87", "\xE2\x80\x88", "\xE2\x80\x89", "\xE2\x80\x8A",
140
                "\xE2\x80\xAF", "\xE2\x81\x9F", "\xE3\x80\x80"],
141
    ];
142
    protected static $winc1umap = [
143
        128 => 0x20AC,
144
        130 => 0x201A,
145
        131 => 0x0192,
146
        132 => 0x201E,
147
        133 => 0x2026,
148
        134 => 0x2020,
149
        135 => 0x2021,
150
        136 => 0x02C6,
151
        137 => 0x2030,
152
        138 => 0x0160,
153
        139 => 0x2039,
154
        140 => 0x0152,
155
        142 => 0x017D,
156
        145 => 0x2018,
157
        146 => 0x2019,
158
        147 => 0x201C,
159
        148 => 0x201D,
160
        149 => 0x2022,
161
        150 => 0x2013,
162
        151 => 0x2014,
163
        152 => 0x02DC,
164
        153 => 0x2122,
165
        154 => 0x0161,
166
        155 => 0x203A,
167
        156 => 0x0153,
168
        158 => 0x017E,
169
        159 => 0x0178,
170
    ];
171
172
    // PROPERTIES
173
174
    protected $raw;
175
    protected $encoding;
176
    protected $token = false;
177
    protected $caret = 0;
178
179
    // MAGIC METHODS
180
181
    public function __construct($thing, $enc = self::WINDOWS1252)
182
    {
183
        self::testStringableObject($thing);
184
185
        if (\is_array($thing)) {
186
            throw new \InvalidArgumentException('Unsure of how to convert array to string');
187
        }
188
189
        $this->raw = (string) $thing;
190
        $this->encoding = $enc;
191
    }
192
193
    /**
194
     * @return mixed
195
     */
196
    public function __get($name)
197
    {
198
        return $this->$name;
199
    }
200
201
    /**
202
     * @return string
203
     */
204
    public function __toString()
205
    {
206
        return $this->raw;
207
    }
208
209
    public function toArray($delim = '', $limit = null)
210
    {
211
        if (empty($delim)) {
212
            return \str_split($this->raw);
213
        }
214
        if (is_int($delim)) {
215
            return \str_split($this->raw, $delim);
216
        }
217
        if ($limit === null) {
218
            return \explode($delim, $this->raw);
219
        }
220
        return \explode($delim, $this->raw, $limit);
221
    }
222
223
    // INFORMATIONAL METHODS
224
225
    public function charAt($offset)
226
    {
227
        return new self($this->raw{$offset});
228
    }
229
230
    public function charCodeAt($offset)
231
    {
232
        if ($this->encoding === self::WINDOWS1252) {
233
            return \ord($this->raw{$offset});
234
        }
235
236
        return $this->parseUtf8CharAt($offset);
237
    }
238
239
    public function compareTo($str, $mode = self::NORMAL, $length = 1)
240
    {
241
        $modemap = [
242
            self::NORMAL => 'strcmp',
243
            self::CASE_INSENSITIVE => 'strcasecmp',
244
            self::CURRENT_LOCALE => 'strcoll',
245
            self::NATURAL_ORDER => 'strnatcmp',
246
            (self::NATURAL_ORDER | self::CASE_INSENSITIVE) => 'strnatcasecmp',
247
            self::FIRST_N => 'strncmp',
248
            (self::FIRST_N | self::CASE_INSENSITIVE) => 'strncasecmp',
249
        ];
250
251
        if ($mode & self::FIRST_N) {
252
            return \call_user_func($modemap[$mode], $this->raw, $str, $length);
253
        }
254
        return \call_user_func($modemap[$mode], $this->raw, $str);
255
    }
256
257
    public function indexOf($needle, $offset = 0, $mode = self::NORMAL)
258
    {
259
        // strip out bits we don't understand
260
        $mode &= (self::REVERSE | self::CASE_INSENSITIVE);
261
262
        $modemap = [
263
            self::NORMAL => 'strpos',
264
            self::CASE_INSENSITIVE => 'stripos',
265
            self::REVERSE => 'strrpos',
266
            (self::REVERSE | self::CASE_INSENSITIVE) => 'strripos',
267
        ];
268
        return \call_user_func($modemap[$mode], $this->raw, $needle, $offset);
269
    }
270
271
    public function length()
272
    {
273
        return \strlen($this->raw);
274
    }
275
276
    // MODIFYING METHODS
277
278
    public function append($str)
279
    {
280
        return new self($this->raw . $str);
281
    }
282
283
    public function asciify($removeUnsupported = true)
284
    {
285
        $str = $this->raw;
286
        foreach (self::$asciimap as $key => $value) {
287
            $str = \str_replace($value, $key, $str);
288
        }
289
        if ($removeUnsupported) {
290
            $str = \preg_replace('/[^\x20-\x7E]/u', '', $str);
291
        }
292
        return new self($str);
293
    }
294
295
    public function chunk($length = 76, $ending = "\r\n")
296
    {
297
        return new self(\chunk_split($this->raw, $length, $ending));
298
    }
299
300
    public function concat($str)
301
    {
302
        return $this->append($str);
303
    }
304
305
    public function escape($mode = self::NORMAL, $charlist = '')
306
    {
307
        $modemap = [
308
            self::NORMAL => 'addslashes',
309
            self::C_STYLE => 'addcslashes',
310
            self::META => 'quotemeta',
311
        ];
312
        if ($mode === self::C_STYLE) {
313
            return new self(\call_user_func($modemap[$mode], $this->raw, $charlist));
314
        }
315
        return new self(\call_user_func($modemap[$mode], $this->raw));
316
    }
317
318
    public function insertAt($str, $offset)
319
    {
320
        return $this->replaceSubstr($str, $offset, 0);
321
    }
322
323
    public function nextToken($delim)
324
    {
325
        if ($this->token) {
326
            return new self(\strtok($delim));
327
        }
328
        $this->token = true;
329
        return new self(\strtok($this->raw, $delim));
330
    }
331
332
    public function pad($newlength, $padding = ' ', $mode = self::END)
333
    {
334
        return new self(\str_pad($this->raw, $newlength, $padding, $mode));
335
    }
336
337
    public function prepend($str)
338
    {
339
        return new self($str . $this->raw);
340
    }
341
342
    public function remove($str, $mode = self::NORMAL)
343
    {
344
        return $this->replace($str, '', $mode);
345
    }
346
347
    public function removeSubstr($start, $length = null)
348
    {
349
        return $this->replaceSubstr('', $start, $length);
350
    }
351
352
    public function repeat($times)
353
    {
354
        return new self(\str_repeat($this->raw, $times));
355
    }
356
357
    public function replace($search, $replace, $mode = self::NORMAL)
358
    {
359
        if ($mode & self::CASE_INSENSITIVE) {
360
            return new self(\str_ireplace($search, $replace, $this->raw));
361
        }
362
        return new self(\str_replace($search, $replace, $this->raw));
363
    }
364
365
    public function replaceSubstr($replacement, $start, $length = null)
366
    {
367
        if ($length === null) {
368
            $length = $this->length();
369
        }
370
        return new self(\substr_replace($this->raw, $replacement, $start, $length));
371
    }
372
373
    public function resetToken()
374
    {
375
        $this->token = false;
376
    }
377
378
    public function reverse()
379
    {
380
        return new self(\strrev($this->raw));
381
    }
382
383
    public function shuffle()
384
    {
385
        return new self(\str_shuffle($this->raw));
386
    }
387
388
    public function substr($start, $length = 'omitted')
389
    {
390
        if ($length === 'omitted') {
391
            return new self(\substr($this->raw, $start));
392
        }
393
        return new self(\substr($this->raw, $start, $length));
394
    }
395
396
    public function times($times)
397
    {
398
        return $this->repeat($times);
399
    }
400
401
    public function translate($search, $replace = '')
402
    {
403
        if (is_array($search)) {
404
            return new self(\strtr($this->raw, $search));
405
        }
406
        return new self(\strtr($this->raw, $search, $replace));
407
    }
408
409
    public function trim($mask = " \t\n\r\0\x0B", $mode = self::BOTH_ENDS)
410
    {
411
        $modemap = [
412
            self::START => 'ltrim',
413
            self::END => 'rtrim',
414
            self::BOTH_ENDS => 'trim',
415
        ];
416
        return new self(\call_user_func($modemap[$mode], $this->raw, $mask));
417
    }
418
419
    public function unescape($mode = self::NORMAL)
420
    {
421
        $modemap = [
422
            self::NORMAL => 'stripslashes',
423
            self::C_STYLE => 'stripcslashes',
424
            self::META => 'stripslashes',
425
        ];
426
        return new self(\call_user_func($modemap[$mode], $this->raw));
427
    }
428
429
    public function uuDecode()
430
    {
431
        return new self(\convert_uudecode($this->raw));
432
    }
433
434
    public function uuEncode()
435
    {
436
        return new self(\convert_uuencode($this->raw));
437
    }
438
439
    public function wordwrap($width = 75, $break = "\n")
440
    {
441
        return new self(\wordwrap($this->raw, $width, $break, false));
442
    }
443
444
    public function wordwrapBreaking($width = 75, $break = "\n")
445
    {
446
        return new self(\wordwrap($this->raw, $width, $break, true));
447
    }
448
449
    // TESTING METHODS
450
451
    public function contains($needle, $offset = 0, $mode = self::NORMAL)
452
    {
453
        if ($mode & self::EXACT_POSITION) {
454
            return ($this->indexOf($needle, $offset, $mode) === $offset);
455
        }
456
        return ($this->indexOf($needle, $offset, $mode) !== false);
457
    }
458
459
    public function countSubstr($needle, $offset = 0, $length = null)
460
    {
461
        if ($length === null) {
462
            return \substr_count($this->raw, $needle, $offset);
463
        }
464
        return \substr_count($this->raw, $needle, $offset, $length);
465
    }
466
467
    public function endsWith($str, $mode = self::NORMAL)
468
    {
469
        $mode &= self::CASE_INSENSITIVE;
470
        $offset = $this->length() - \strlen($str);
471
        return $this->contains($str, $offset, $mode | self::EXACT_POSITION | self::REVERSE);
472
    }
473
474
    public function equals($str)
475
    {
476
        self::testStringableObject($str);
477
478
        $str = (string) $str;
479
        return ($str == $this->raw);
480
    }
481
482
    public function isAscii()
483
    {
484
        $len = $this->length();
485
486
        for ($i = 0; $i < $len; $i++) {
487
            if ($this->charCodeAt($i) >= 128) {
488
                return false;
489
            }
490
        }
491
        return true;
492
    }
493
494
    public function isEmpty()
495
    {
496
        return empty($this->raw);
497
    }
498
499
    public function startsWith($str, $mode = self::NORMAL)
500
    {
501
        $mode &= self::CASE_INSENSITIVE;
502
        return $this->contains($str, 0, $mode | self::EXACT_POSITION);
503
    }
504
505
    // INTERFACE IMPLEMENTATION METHODS
506
507
    public function count()
508
    {
509
        return \strlen($this->raw);
510
    }
511
512
    public function current()
513
    {
514
        return $this->raw[$this->caret];
515
    }
516
517
    public function key()
518
    {
519
        return $this->caret;
520
    }
521
522
    public function next()
523
    {
524
        $this->caret++;
525
    }
526
527
    public function rewind()
528
    {
529
        $this->caret = 0;
530
    }
531
532
    public function valid()
533
    {
534
        return ($this->caret < \strlen($this->raw));
535
    }
536
537
    public function offsetExists($offset)
538
    {
539
        $offset = (int) $offset;
540
        return ($offset >= 0 && $offset < \strlen($this->raw));
541
    }
542
543
    public function offsetGet($offset)
544
    {
545
        return $this->raw{$offset};
546
    }
547
548
    public function offsetSet($offset, $value)
549
    {
550
        throw new \LogicException('Cannot assign '.$value.' to immutable StrObj instance at index '.$offset);
551
    }
552
553
    public function offsetUnset($offset)
554
    {
555
        throw new \LogicException('Cannot unset index '.$offset.' on immutable StrObj instance');
556
    }
557
558
    // PRIVATE STATIC FUNCTIONS
559
560
    protected static function testStringableObject($thing)
561
    {
562
        if (\is_object($thing) && !\method_exists($thing, '__toString')) {
563
            throw new \InvalidArgumentException(
564
                'Parameter is an object that does not implement __toString() method'
565
            );
566
        }
567
    }
568
569
    protected function parseUtf8CharAt($offset)
570
    {
571
        list($start, $length, $valid, $current) = $this->findUtf8CharAt($offset);
572
573
        if ($length === 1) {
574
            if ($current > 0b01111111 && $current < 0b10100000) {
575
                return [$start, $length, self::$winc1umap[$current]];
576
            }
577
            return [$start, $length, $current];
578
        }
579
580
        $byte = \ord($this->raw{$start});
581
582
        if ($valid === false) {
583
            if ($length === 2 && $byte & 0b11000000) {
584
                // overlong ascii
585
                return [$start + 1, 1, ($offset === $start) ? \ord($this->raw{$start + 1}) : $byte];
586
            }
587
            return [$offset, 1, $current];
588
        }
589
590
        if ($valid === true) {
591
592
            if ($length === 2) {
593
                $bigcode = $byte & 0b00011111;
594
            }
595
596
            elseif ($length === 3) {
597
                $bigcode = $byte & 0b00001111;
598
            }
599
600
            elseif ($length === 4) {
601
                $bigcode = $byte & 0b00000111;
602
            }
603
604
            for ($next = 1; $next < $length; $next++) {
605
                $bigcode <<= 6;
0 ignored issues
show
Bug introduced by
The variable $bigcode does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
606
                $bigcode += \ord($this->raw{$start + $next}) & 0b00111111;
607
            }
608
609
            if ($bigcode > 0x10FFFF) {
610
                return [$offset, 1, $current];
611
            }
612
            return [$start, $length, $bigcode];
613
        }
614
    }
615
616
    /**
617
     * Determines if the byte at the given offset is part of a valid UTF8 char,
618
     * and returns its actual starting offset, length in bytes, validity,
619
     * and the byte at the original offset.
620
     */
621
    protected function findUtf8CharAt($offset)
622
    {
623
        $byte = \ord($this->raw{$offset});
624
625
        if ($byte <= 0b01111111) {
626
            // ASCII passthru, 1 byte long
627
            return [$offset, 1, true, $byte];
628
        }
629
630
        if ($byte <= 0b10111111) {
631
            // either part of a UTF8 char, or an invalid UTF8 codepoint.
632
            // try to find start of UTF8 char
633
            $original = $offset;
634
            while ($offset > 0 && $original - $offset < 4) {
635
                $prev = \ord($this->raw{--$offset});
636
637
                if ($prev <= 0b01111111) {
638
                    // prev is plain ASCII so current char can't be valid
639
                    return [$original, 1, false, $byte];
640
                }
641
642
                if ($prev <= 0b10111111) {
643
                    // prev is also part of a UTF8 char, so keep looking
644
                    continue;
645
                }
646
647
                if ($prev == 0xC0 || $prev == 0xC1) {
648
                    // prev is an invalid UTF8 starter for overlong ASCII
649
                    return [$offset, 2, false, $byte];
650
                }
651
652
                if ($prev <= 0b11110100) {
653
                    // prev is valid start byte, validate length to check this char
654
                    if ($original < $offset + self::calcUtf8CharLength($prev)) {
655
                        return [$offset, $length, true, $byte];
0 ignored issues
show
Bug introduced by
The variable $length seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
656
                    }
657
                }
658
                return [$original, 1, false, $byte];
659
            }
660
            return [$original, 1, false, $byte];
661
        }
662
663
        if ($byte <= 0b11110100) {
664
            // valid UTF8 start byte, find the rest, determine if length is valid
665
            $actual = $length = self::calcUtf8CharLength($byte);
666
667
            for ($i = 1; $i < $length; $i++) {
668
                if ($offset + $i >= $this->length()) {
669
                    $actual = $i - 1;
670
                    break;
671
                }
672
                $last = \ord($this->raw{$offset + $i});
673
                if ($last < 0b10000000 || $last > 0b10111111) {
674
                    $actual = $i;
675
                    break;
676
                }
677
            }
678
679
            if ($actual !== $length) {
680
                return [$offset, $actual, false, $byte];
681
            }
682
            return [$offset, $length, true, $byte];
683
        }
684
685
        // if 245 to 255, Windows-1252 passthru
686
        return [$offset, 1, false, $byte];
687
    }
688
689
    protected static function calcUtf8CharLength($byte)
690
    {
691
        if (~$byte & 0b00001000) return 4;
692
        if (~$byte & 0b00010000) return 3;
693
        if (~$byte & 0b00100000) return 2;
694
        return 1;
695
    }
696
}
697