Utf8::substr()   A
last analyzed

Complexity

Conditions 2
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 1
c 1
b 0
f 0
dl 0
loc 3
rs 10
cc 2
nc 1
nop 3
1
<?php
2
3
/*
4
 * This file is part of Utf8.
5
 *     (c) Fabrice de Stefanis / https://github.com/fab2s/Utf8
6
 * This source file is licensed under the MIT license which you will
7
 * find in the LICENSE file or at https://opensource.org/licenses/MIT
8
 */
9
10
namespace fab2s\Utf8;
11
12
/**
13
 * UTF8 string manipulations
14
 */
15
class Utf8
16
{
17
    /**
18
     * utf8 charset name in mb dialect
19
     */
20
    const ENC_UTF8 = 'UTF-8';
21
22
    /**
23
     * \Normalizer::NFC
24
     */
25
    const NORMALIZE_NFC = 4;
26
27
    /**
28
     * \Normalizer::NFD
29
     */
30
    const NORMALIZE_NFD = 2;
31
32
    /**
33
     * @var bool
34
     */
35
    protected static $normalizerSupport = false;
36
37
    /**
38
     * @var bool
39
     */
40
    protected static $ordSupport = false;
41
42
    /**
43
     * strrpos
44
     *
45
     * @param string $haystack
46
     * @param string $needle
47
     * @param int    $offset
48
     *
49
     * @return int|false
50
     */
51
    public static function strrpos(string $haystack, string $needle, ?int $offset = 0)
52
    {
53
        // Emulate strrpos behaviour (no warning)
54
        if (empty($haystack)) {
55
            return false;
56
        }
57
58
        return mb_strrpos($haystack, $needle, $offset, static::ENC_UTF8);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of mb_strrpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

58
        return mb_strrpos($haystack, $needle, /** @scrutinizer ignore-type */ $offset, static::ENC_UTF8);
Loading history...
59
    }
60
61
    /**
62
     * strpos
63
     *
64
     * @param string $haystack
65
     * @param string $needle
66
     * @param int    $offset
67
     *
68
     * @return int|false
69
     */
70
    public static function strpos(string $haystack, string $needle, $offset = 0)
71
    {
72
        return mb_strpos($haystack, $needle, $offset, static::ENC_UTF8);
73
    }
74
75
    /**
76
     * strtolower
77
     *
78
     * @param string $string
79
     *
80
     * @return string
81
     */
82
    public static function strtolower(string $string): string
83
    {
84
        return mb_strtolower($string, static::ENC_UTF8);
85
    }
86
87
    /**
88
     * strtoupper
89
     *
90
     * @param string $string
91
     *
92
     * @return string
93
     */
94
    public static function strtoupper(string $string): string
95
    {
96
        return mb_strtoupper($string, static::ENC_UTF8);
97
    }
98
99
    /**
100
     * @param string   $string
101
     * @param int      $offset
102
     * @param int|null $length
103
     *
104
     * @return string
105
     */
106
    public static function substr(string $string, int $offset, ?int $length = null): string
107
    {
108
        return mb_substr($string, $offset, $length === null ? mb_strlen($string, static::ENC_UTF8) : $length, static::ENC_UTF8);
109
    }
110
111
    /**
112
     * strlen
113
     *
114
     * @param string $string
115
     *
116
     * @return int
117
     */
118
    public static function strlen(string $string): int
119
    {
120
        return mb_strlen($string, static::ENC_UTF8);
121
    }
122
123
    /**
124
     * ucfirst
125
     *
126
     * @param string $string
127
     *
128
     * @return string
129
     */
130
    public static function ucfirst(string $string): string
131
    {
132
        switch (static::strlen($string)) {
133
            case 0:
134
                return '';
135
            case 1:
136
                return static::strtoupper($string);
137
            default:
138
                return static::strtoupper(static::substr($string, 0, 1)) . static::substr($string, 1);
139
        }
140
    }
141
142
    /**
143
     * @param string $string
144
     *
145
     * @return string
146
     */
147
    public static function ucwords(string $string): string
148
    {
149
        return mb_convert_case($string, MB_CASE_TITLE, static::ENC_UTF8);
150
    }
151
152
    /**
153
     * ord
154
     *
155
     * @param string $chr
156
     *
157
     * @return int|false
158
     */
159
    public static function ord(string $chr)
160
    {
161
        if (($strLen = strlen($chr)) === 0) {
162
            return false;
163
        }
164
165
        if (static::$ordSupport) {
166
            return mb_ord($chr, static::ENC_UTF8);
167
        }
168
169
        return static::ordCompat($chr, $strLen);
170
    }
171
172
    /**
173
     * chr
174
     *
175
     * @param int $num
176
     *
177
     * @return string|false
178
     */
179
    public static function chr(int $num)
180
    {
181
        if ($num === 0) {
182
            return "\0";
183
        }
184
185
        if (static::$ordSupport) {
186
            return mb_chr($num, static::ENC_UTF8);
187
        }
188
189
        // prolly the fastest
190
        $result = mb_convert_encoding($input = '&#' . $num . ';', static::ENC_UTF8, 'HTML-ENTITIES');
191
192
        return $result !== $input ? $result : false;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $result !== $input ? $result : false also could return the type array which is incompatible with the documented return type false|string.
Loading history...
193
    }
194
195
    /**
196
     * normalize an utf8 string to canonical form
197
     * Default to NFC
198
     *
199
     * @see https://stackoverflow.com/a/7934397/7630496
200
     *
201
     * @param string $string
202
     * @param int    $canonicalForm
203
     *
204
     * @return string
205
     */
206
    public static function normalize(string $string, int $canonicalForm = self::NORMALIZE_NFC): string
207
    {
208
        if (static::$normalizerSupport) {
209
            return \Normalizer::normalize($string, $canonicalForm);
210
        }
211
212
        return $string;
213
    }
214
215
    /**
216
     * tels if a string contains utf8 chars (which may not be valid)
217
     *
218
     * @param string $string
219
     *
220
     * @return bool
221
     */
222
    public static function hasUtf8(string $string): bool
223
    {
224
        // From http://w3.org/International/questions/qa-forms-utf-8.html
225
        // non-overlong 2-byte|excluding overlong|straight 3-byte|excluding surrogates|planes 1-3|planes 4-15|plane 16
226
        return (bool) preg_match('%(?:[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF] |\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})+%xs', $string);
227
    }
228
229
    /**
230
     * @param string $string
231
     *
232
     * @return bool
233
     */
234
    public static function isUtf8(string $string): bool
235
    {
236
        return (bool) preg_match('//u', $string);
237
    }
238
239
    /**
240
     * Remove any 4byte multi bit chars, useful to make sure we can insert in utf8-nonMb4 db tables
241
     *
242
     * @param string $string
243
     * @param string $replace
244
     *
245
     * @return string
246
     */
247
    public static function replaceMb4(string $string, string $replace = ''): string
248
    {
249
        return preg_replace('%(?:
250
            \xF0[\x90-\xBF][\x80-\xBF]{2}      # planes 1-3
251
            | [\xF1-\xF3][\x80-\xBF]{3}        # planes 4-15
252
            | \xF4[\x80-\x8F][\x80-\xBF]{2}    # plane 16
253
        )%xs', $replace, $string);
254
    }
255
256
    /**
257
     * @param bool $disable
258
     *
259
     * @return bool
260
     */
261
    public static function normalizerSupport(bool $disable = false): bool
262
    {
263
        if ($disable) {
264
            return static::$normalizerSupport = false;
265
        }
266
267
        return static::$normalizerSupport = function_exists('normalizer_normalize');
268
    }
269
270
    /**
271
     * Performs the few compatibility operations
272
     */
273
    public static function support()
274
    {
275
        static::normalizerSupport();
276
        static::$ordSupport = function_exists('mb_ord');
277
    }
278
279
    /**
280
     * @param string $chr
281
     * @param int    $strLen
282
     *
283
     * @return int|false
284
     */
285
    public static function ordCompat(string $chr, int $strLen)
286
    {
287
        switch ($strLen) {
288
            case 1:
289
                return ord($chr);
290
            case 2:
291
                return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
292
            case 3:
293
                return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
294
            case 4:
295
                return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
296
            default:
297
                return false;
298
        }
299
    }
300
}
301
302
// OMG a dynamic static anti pattern ^^
303
Utf8::support();
304