Completed
Pull Request — master (#63)
by Lars
09:10 queued 06:50
created

UTF8::strtoupper()   C

Complexity

Conditions 8
Paths 29

Size

Total Lines 39
Code Lines 19

Duplication

Lines 39
Ratio 100 %

Code Coverage

Tests 5
CRAP Score 41.7233

Importance

Changes 0
Metric Value
dl 39
loc 39
ccs 5
cts 26
cp 0.1923
rs 5.3846
c 0
b 0
f 0
cc 8
eloc 19
nc 29
nop 4
crap 41.7233
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206 16
   * __construct()
207
   */
208 16
  public function __construct()
209 16
  {
210
    self::checkForSupport();
211
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219 3
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228
      return '';
229 3
    }
230 1
231
    return (string)self::substr($str, $pos, 1);
232
  }
233 3
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246
      $str = self::bom() . $str;
247 1
    }
248 1
249 1
    return $str;
250
  }
251 1
252
  /**
253
   * Convert binary into an string.
254
   *
255
   * @param mixed $bin 1|0
256
   *
257
   * @return string
258
   */
259
  public static function binary_to_str($bin): string
260
  {
261 1
    if (!isset($bin[0])) {
262
      return '';
263 1
    }
264
265
    $convert = \base_convert($bin, 2, 16);
266
    if ($convert === '0') {
267 1
      return '';
268 1
    }
269 1
270
    return \pack('H*', $convert);
271
  }
272 1
273
  /**
274
   * Returns the UTF-8 Byte Order Mark Character.
275
   *
276
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
277
   *
278
   * @return string UTF-8 Byte Order Mark
279
   */
280
  public static function bom(): string
281
  {
282 2
    return "\xef\xbb\xbf";
283
  }
284 2
285
  /**
286
   * @alias of UTF8::chr_map()
287
   *
288
   * @see   UTF8::chr_map()
289
   *
290
   * @param string|array $callback
291
   * @param string       $str
292
   *
293
   * @return array
294
   */
295
  public static function callback($callback, string $str): array
296
  {
297 1
    return self::chr_map($callback, $str);
298
  }
299 1
300
  /**
301
   * This method will auto-detect your server environment for UTF-8 support.
302
   *
303
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
304
   */
305
  public static function checkForSupport()
306
  {
307 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
308
309 19
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
310
311 1
      // http://php.net/manual/en/book.mbstring.php
312
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
313
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
314 1
315 1
      // http://php.net/manual/en/book.iconv.php
316
      self::$SUPPORT['iconv'] = self::iconv_loaded();
317
318 1
      // http://php.net/manual/en/book.intl.php
319
      self::$SUPPORT['intl'] = self::intl_loaded();
320
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
321 1
      if (
322 1
          self::$SUPPORT['intl'] === true
323
          &&
324 1
          \function_exists('transliterator_list_ids') === true
325 1
      ) {
326 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
327 1
      }
328
329
      // http://php.net/manual/en/class.intlchar.php
330
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
331
332 1
      // http://php.net/manual/en/book.pcre.php
333
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
334
    }
335 1
  }
336 1
337 19
  /**
338
   * Generates a UTF-8 encoded character from the given code point.
339
   *
340
   * INFO: opposite to UTF8::ord()
341
   *
342
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
343
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
344
   *
345
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
346
   */
347
  public static function chr($code_point, string $encoding = 'UTF-8')
348
  {
349 10
    // init
350
    static $CHAR_CACHE = [];
351
352 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
353
      self::checkForSupport();
354 10
    }
355
356
    if ($encoding !== 'UTF-8') {
357
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
358 10
    }
359 2
360 2 View Code Duplication
    if (
361
        $encoding !== 'UTF-8'
362
        &&
363
        $encoding !== 'WINDOWS-1252'
364 10
        &&
365
        self::$SUPPORT['mbstring'] === false
366 10
    ) {
367 1
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
368 10
    }
369
370
    $cacheKey = $code_point . $encoding;
371
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
372 10
      return $CHAR_CACHE[$cacheKey];
373 10
    }
374 8
375
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
376
377 9
      if (self::$CHR === null) {
378
        self::$CHR = self::getData('chr');
379 7
      }
380
381
      $chr = self::$CHR[$code_point];
382
383 7
      if ($encoding !== 'UTF-8') {
384
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
385 7
      }
386 1
387 1
      return $CHAR_CACHE[$cacheKey] = $chr;
388
    }
389 7
390
    if (self::$SUPPORT['intlChar'] === true) {
391
      $chr = \IntlChar::chr($code_point);
392 7
393
      if ($encoding !== 'UTF-8') {
394
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
395
      }
396
397
      return $CHAR_CACHE[$cacheKey] = $chr;
398
    }
399
400
    if (self::$CHR === null) {
401
      self::$CHR = self::getData('chr');
402 7
    }
403
404
    if ($code_point <= 0x7F) {
405
      $chr = self::$CHR[$code_point];
406 7
    } elseif ($code_point <= 0x7FF) {
407
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
408 7
             self::$CHR[($code_point & 0x3F) + 0x80];
409 6
    } elseif ($code_point <= 0xFFFF) {
410 6
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
411 7
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
412 7
             self::$CHR[($code_point & 0x3F) + 0x80];
413 7
    } else {
414 7
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
415 7
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
416 1
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
417 1
             self::$CHR[($code_point & 0x3F) + 0x80];
418 1
    }
419 1
420
    if ($encoding !== 'UTF-8') {
421
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
422 7
    }
423
424
    return $CHAR_CACHE[$cacheKey] = $chr;
425
  }
426 7
427
  /**
428
   * Applies callback to all characters of a string.
429
   *
430
   * @param string|array $callback <p>The callback function.</p>
431
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
432
   *
433
   * @return array <p>The outcome of callback.</p>
434
   */
435
  public static function chr_map($callback, string $str): array
436
  {
437 1
    $chars = self::split($str);
438
439 1
    return \array_map($callback, $chars);
440
  }
441 1
442
  /**
443
   * Generates an array of byte length of each character of a Unicode string.
444
   *
445
   * 1 byte => U+0000  - U+007F
446
   * 2 byte => U+0080  - U+07FF
447
   * 3 byte => U+0800  - U+FFFF
448
   * 4 byte => U+10000 - U+10FFFF
449
   *
450
   * @param string $str <p>The original unicode string.</p>
451
   *
452
   * @return array <p>An array of byte lengths of each character.</p>
453
   */
454
  public static function chr_size_list(string $str): array
455
  {
456 4
    if (!isset($str[0])) {
457
      return [];
458 4
    }
459
460 4
    return \array_map(
461 3
        function ($data) {
462
          return UTF8::strlen($data, '8BIT');
463
        },
464 4
        self::split($str)
465
    );
466 4
  }
467 4
468 4
  /**
469 4
   * Get a decimal code representation of a specific character.
470
   *
471
   * @param string $char <p>The input character.</p>
472
   *
473
   * @return int
474
   */
475
  public static function chr_to_decimal(string $char): int
476
  {
477
    $code = self::ord($char[0]);
478
    $bytes = 1;
479 2
480
    if (!($code & 0x80)) {
481 2
      // 0xxxxxxx
482 2
      return $code;
483 2
    }
484
485 2
    if (($code & 0xe0) === 0xc0) {
486
      // 110xxxxx
487 2
      $bytes = 2;
488
      $code &= ~0xc0;
489
    } elseif (($code & 0xf0) === 0xe0) {
490 2
      // 1110xxxx
491
      $bytes = 3;
492 2
      $code &= ~0xe0;
493 2
    } elseif (($code & 0xf8) === 0xf0) {
494 2
      // 11110xxx
495
      $bytes = 4;
496 2
      $code &= ~0xf0;
497 2
    }
498 2
499
    for ($i = 2; $i <= $bytes; $i++) {
500 1
      // 10xxxxxx
501 1
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
502 1
    }
503
504 2
    return $code;
505
  }
506 2
507 2
  /**
508
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
509 2
   *
510
   * @param string $char <p>The input character</p>
511
   * @param string $pfix [optional]
512
   *
513
   * @return string <p>The code point encoded as U+xxxx<p>
514
   */
515
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
516
  {
517
    if (!isset($char[0])) {
518
      return '';
519
    }
520 1
521
    if ($char === '&#0;') {
522 1
      $char = '';
523
    }
524 1
525 1
    return self::int_to_hex(self::ord($char), $pfix);
526
  }
527
528 1
  /**
529
   * alias for "UTF8::chr_to_decimal()"
530
   *
531
   * @see UTF8::chr_to_decimal()
532 1
   *
533
   * @param string $chr
534
   *
535
   * @return int
536
   */
537
  public static function chr_to_int(string $chr): int
538
  {
539
    return self::chr_to_decimal($chr);
540
  }
541
542
  /**
543
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
544 1
   *
545
   * @param string $body     <p>The original string to be split.</p>
546 1
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
547
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
548
   *
549
   * @return string <p>The chunked string</p>
550
   */
551
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
552
  {
553
    return \implode($end, self::split($body, $chunklen));
554
  }
555
556
  /**
557
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
558 1
   *
559
   * @param string $str                     <p>The string to be sanitized.</p>
560 1
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
561
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
562
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
563
   *                                        => "..."</p>
564
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
565
   *                                        $normalize_whitespace</p>
566
   *
567
   * @return string <p>Clean UTF-8 encoded string.</p>
568
   */
569
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false): string
570
  {
571
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
572
    // caused connection reset problem on larger strings
573
574
    $regx = '/
575
      (
576 61
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
577
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
578
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
579
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
580
        ){1,100}                      # ...one or more times
581
      )
582
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
583
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
584
    /x';
585
    $str = \preg_replace($regx, '$1', $str);
586
587
    $str = self::replace_diamond_question_mark($str, '');
588
    $str = self::remove_invisible_characters($str);
589
590
    if ($normalize_whitespace === true) {
591 61
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
592 61
    }
593
594 61
    if ($normalize_msword === true) {
595 61
      $str = self::normalize_msword($str);
596
    }
597 61
598 36
    if ($remove_bom === true) {
599 36
      $str = self::remove_bom($str);
600
    }
601 61
602 15
    return $str;
603 15
  }
604
605 61
  /**
606 35
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
607 35
   *
608
   * @param string $str <p>The input string.</p>
609 61
   *
610
   * @return string
611
   */
612
  public static function cleanup(string $str): string
613
  {
614
    if (!isset($str[0])) {
615
      return '';
616
    }
617
618
    // fixed ISO <-> UTF-8 Errors
619 21
    $str = self::fix_simple_utf8($str);
620
621 21
    // remove all none UTF-8 symbols
622
    // && remove diamond question mark (�)
623 21
    // && remove remove invisible characters (e.g. "\0")
624 2
    // && remove BOM
625
    // && normalize whitespace chars (but keep non-breaking-spaces)
626
    $str = self::clean($str, true, true, false, true);
627
628 21
    return $str;
629
  }
630
631
  /**
632
   * Accepts a string or a array of strings and returns an array of Unicode code points.
633
   *
634
   * INFO: opposite to UTF8::string()
635 21
   *
636
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
637 21
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
638
   *                                    default, code points will be returned as integers.</p>
639
   *
640
   * @return array <p>The array of code points.</p>
641
   */
642
  public static function codepoints($arg, bool $u_style = false): array
643
  {
644
    if (\is_string($arg) === true) {
645
      $arg = self::split($arg);
646
    }
647
648
    $arg = \array_map(
649
        [
650
            self::class,
651 7
            'ord',
652
        ],
653 7
        $arg
654 7
    );
655 7
656
    if ($u_style) {
657 7
      $arg = \array_map(
658
          [
659 7
              self::class,
660 7
              'int_to_hex',
661 7
          ],
662
          $arg
663 7
      );
664
    }
665 7
666 1
    return $arg;
667
  }
668 1
669 1
  /**
670 1
   * Returns count of characters used in a string.
671
   *
672 1
   * @param string $str       <p>The input string.</p>
673 1
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
674
   *
675 7
   * @return array <p>An associative array of Character as keys and
676
   *               their count as values.</p>
677
   */
678
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
679
  {
680
    return \array_count_values(self::split($str, 1, $cleanUtf8));
681
  }
682
683
  /**
684
   * Converts a int-value into an UTF-8 character.
685
   *
686
   * @param mixed $int
687 7
   *
688
   * @return string
689 7
   */
690
  public static function decimal_to_chr($int): string
691
  {
692
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
693
  }
694
695
  /**
696
   * Encode a string with a new charset-encoding.
697
   *
698
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
699 5
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
700
   *
701 5
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
702
   * @param string $str      <p>The input string</p>
703
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
704 5
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
705
   *
706
   * @return string
707 5
   */
708
  public static function encode(string $encoding, string $str, bool $force = true): string
709
  {
710
    if (!isset($str[0], $encoding[0])) {
711
      return $str;
712
    }
713
714
    if ($encoding !== 'UTF-8') {
715
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
716
    }
717
718
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
719
      self::checkForSupport();
720
    }
721
722
    $encodingDetected = self::str_detect_encoding($str);
723 11
724
    if (
725 11
        $encodingDetected !== false
726 11
        &&
727
        (
728 11
            $force === true
729 5
            ||
730
            $encodingDetected !== $encoding
731
        )
732 11
    ) {
733 2
734 2 View Code Duplication
      if (
735
          $encoding === 'UTF-8'
736 11
          &&
737
          (
738
              $force === true
739
              || $encodingDetected === 'UTF-8'
740 11
              || $encodingDetected === 'WINDOWS-1252'
741
              || $encodingDetected === 'ISO-8859-1'
742
          )
743
      ) {
744 11
        return self::to_utf8($str);
745
      }
746
747 11 View Code Duplication
      if (
748
          $encoding === 'ISO-8859-1'
749 3
          &&
750 11
          (
751
              $force === true
752
              || $encodingDetected === 'ISO-8859-1'
753
              || $encodingDetected === 'WINDOWS-1252'
754 11
              || $encodingDetected === 'UTF-8'
755
          )
756
      ) {
757 11
        return self::to_iso8859($str);
758 2
      }
759 2
760 2 View Code Duplication
      if (
761 11
          $encoding !== 'UTF-8'
762 11
          &&
763
          $encoding !== 'WINDOWS-1252'
764
          &&
765
          self::$SUPPORT['mbstring'] === false
766
      ) {
767 3
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
768
      }
769
770 2
      $strEncoded = \mb_convert_encoding(
771 1
          $str,
772 1
          $encoding,
773 1
          $encodingDetected
774 3
      );
775 2
776
      if ($strEncoded) {
777
        return $strEncoded;
778
      }
779
    }
780 2
781
    return $str;
782 2
  }
783 1
784 2
  /**
785
   * Reads entire file into a string.
786
   *
787
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
788 2
   *
789 2
   * @link http://php.net/manual/en/function.file-get-contents.php
790 2
   *
791
   * @param string        $filename         <p>
792 2
   *                                        Name of the file to read.
793
   *                                        </p>
794 2
   * @param bool          $use_include_path [optional] <p>
795 2
   *                                        Prior to PHP 5, this parameter is called
796
   *                                        use_include_path and is a bool.
797
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
798
   *                                        to trigger include path
799 1
   *                                        search.
800
   *                                        </p>
801
   * @param resource|null $context          [optional] <p>
802
   *                                        A valid context resource created with
803
   *                                        stream_context_create. If you don't need to use a
804
   *                                        custom context, you can skip this parameter by &null;.
805
   *                                        </p>
806
   * @param int|null      $offset           [optional] <p>
807
   *                                        The offset where the reading starts.
808
   *                                        </p>
809
   * @param int|null      $maxLength        [optional] <p>
810
   *                                        Maximum length of data read. The default is to read until end
811
   *                                        of file is reached.
812
   *                                        </p>
813
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
814
   *
815
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
816
   *                                        images or pdf, because they used non default utf-8 chars</p>
817
   *
818
   * @return string|false <p>The function returns the read data or false on failure.</p>
819
   */
820
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
821
  {
822
    // init
823
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
824
825
    if ($timeout && $context === null) {
826
      $context = \stream_context_create(
827
          [
828
              'http' =>
829
                  [
830
                      'timeout' => $timeout,
831
                  ],
832
          ]
833
      );
834
    }
835
836
    if ($offset === null) {
837
      $offset = 0;
838
    }
839
840
    if (\is_int($maxLength) === true) {
841
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
842
    } else {
843
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
844
    }
845
846
    // return false on error
847
    if ($data === false) {
848
      return false;
849
    }
850
851
    if ($convertToUtf8 === true) {
852
      $data = self::encode('UTF-8', $data, false);
853
      $data = self::cleanup($data);
854
    }
855
856
    return $data;
857
  }
858
859
  /**
860
   * Checks if a file starts with BOM (Byte Order Mark) character.
861
   *
862
   * @param string $file_path <p>Path to a valid file.</p>
863
   *
864
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
865
   */
866
  public static function file_has_bom(string $file_path): bool
867
  {
868
    return self::string_has_bom(\file_get_contents($file_path));
869
  }
870
871
  /**
872
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
873
   *
874
   * @param mixed  $var
875
   * @param int    $normalization_form
876
   * @param string $leading_combining
877
   *
878
   * @return mixed
879
   */
880
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
881
  {
882
    switch (\gettype($var)) {
883 View Code Duplication
      case 'array':
884 3
        foreach ($var as $k => $v) {
885
          /** @noinspection AlterInForeachInspection */
886
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
887 3
        }
888 3
        break;
889 View Code Duplication
      case 'object':
890 3
        foreach ($var as $k => $v) {
891 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
892
        }
893
        break;
894
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
895 2
896 2
        if (false !== \strpos($var, "\r")) {
897
          // Workaround https://bugs.php.net/65732
898 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
899 2
        }
900
901 3
        if (self::is_ascii($var) === false) {
902 3
          /** @noinspection PhpUndefinedClassInspection */
903 3
          if (\Normalizer::isNormalized($var, $normalization_form)) {
904
            $n = '-';
905 3
          } else {
906 3
            /** @noinspection PhpUndefinedClassInspection */
907 3
            $n = \Normalizer::normalize($var, $normalization_form);
908
909 3
            if (isset($n[0])) {
910 1
              $var = $n;
911 1
            } else {
912 3
              $var = self::encode('UTF-8', $var, true);
913
            }
914
          }
915
916 3
          if (
917 1
              $var[0] >= "\x80"
918
              &&
919
              isset($n[0], $leading_combining[0])
920 2
              &&
921 2
              \preg_match('/^\p{Mn}/u', $var)
922 2
          ) {
923 2
            // Prevent leading combining chars
924
            // for NFC-safe concatenations.
925 2
            $var = $leading_combining . $var;
926
          }
927
        }
928
929
        break;
930
    }
931
932
    return $var;
933
  }
934
935 1
  /**
936
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
937 1
   *
938
   * Gets a specific external variable by name and optionally filters it
939
   *
940
   * @link  http://php.net/manual/en/function.filter-input.php
941
   *
942
   * @param int    $type          <p>
943
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
944
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
945
   *                              <b>INPUT_ENV</b>.
946
   *                              </p>
947
   * @param string $variable_name <p>
948
   *                              Name of a variable to get.
949 9
   *                              </p>
950
   * @param int    $filter        [optional] <p>
951 9
   *                              The ID of the filter to apply. The
952 9
   *                              manual page lists the available filters.
953 3
   *                              </p>
954
   * @param mixed  $options       [optional] <p>
955 3
   *                              Associative array of options or bitwise disjunction of flags. If filter
956 3
   *                              accepts options, flags can be provided in "flags" field of array.
957 3
   *                              </p>
958 9
   *
959 2
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
960 2
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
961 2
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
962 2
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
963 9
   * @since 5.2.0
964
   */
965 8 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
966
  {
967 2
    if (4 > \func_num_args()) {
968 2
      $var = \filter_input($type, $variable_name, $filter);
969
    } else {
970 8
      $var = \filter_input($type, $variable_name, $filter, $options);
971
    }
972 8
973 6
    return self::filter($var);
974 6
  }
975
976 6
  /**
977
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
978 6
   *
979 3
   * Gets external variables and optionally filters them
980 3
   *
981 5
   * @link  http://php.net/manual/en/function.filter-input-array.php
982
   *
983
   * @param int   $type       <p>
984
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
985
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
986 8
   *                          <b>INPUT_ENV</b>.
987 8
   *                          </p>
988 6
   * @param mixed $definition [optional] <p>
989 8
   *                          An array defining the arguments. A valid key is a string
990 5
   *                          containing a variable name and a valid value is either a filter type, or an array
991 8
   *                          optionally specifying the filter, flags and options. If the value is an
992
   *                          array, valid keys are filter which specifies the
993
   *                          filter type,
994 2
   *                          flags which specifies any flags that apply to the
995 2
   *                          filter, and options which specifies any options that
996 8
   *                          apply to the filter. See the example below for a better understanding.
997
   *                          </p>
998 8
   *                          <p>
999 9
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1000
   *                          input array are filtered by this filter.
1001 9
   *                          </p>
1002
   * @param bool  $add_empty  [optional] <p>
1003
   *                          Add missing keys as <b>NULL</b> to the return value.
1004
   *                          </p>
1005
   *
1006
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1007
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1008
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1009
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1010
   * fails.
1011
   * @since 5.2.0
1012
   */
1013 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1014
  {
1015
    if (2 > \func_num_args()) {
1016
      $a = \filter_input_array($type);
1017
    } else {
1018
      $a = \filter_input_array($type, $definition, $add_empty);
1019
    }
1020
1021
    return self::filter($a);
1022
  }
1023
1024
  /**
1025
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1026
   *
1027
   * Filters a variable with a specified filter
1028
   *
1029
   * @link  http://php.net/manual/en/function.filter-var.php
1030
   *
1031
   * @param mixed $variable <p>
1032
   *                        Value to filter.
1033
   *                        </p>
1034
   * @param int   $filter   [optional] <p>
1035
   *                        The ID of the filter to apply. The
1036
   *                        manual page lists the available filters.
1037
   *                        </p>
1038
   * @param mixed $options  [optional] <p>
1039
   *                        Associative array of options or bitwise disjunction of flags. If filter
1040
   *                        accepts options, flags can be provided in "flags" field of array. For
1041
   *                        the "callback" filter, callable type should be passed. The
1042
   *                        callback must accept one argument, the value to be filtered, and return
1043
   *                        the value after filtering/sanitizing it.
1044
   *                        </p>
1045
   *                        <p>
1046
   *                        <code>
1047
   *                        // for filters that accept options, use this format
1048
   *                        $options = array(
1049
   *                        'options' => array(
1050
   *                        'default' => 3, // value to return if the filter fails
1051
   *                        // other options here
1052
   *                        'min_range' => 0
1053
   *                        ),
1054
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1055
   *                        );
1056
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1057
   *                        // for filter that only accept flags, you can pass them directly
1058
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1059
   *                        // for filter that only accept flags, you can also pass as an array
1060
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1061
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1062
   *                        // callback validate filter
1063
   *                        function foo($value)
1064
   *                        {
1065
   *                        // Expected format: Surname, GivenNames
1066
   *                        if (strpos($value, ", ") === false) return false;
1067
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1068
   *                        $empty = (empty($surname) || empty($givennames));
1069
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1070
   *                        if ($empty || $notstrings) {
1071
   *                        return false;
1072
   *                        } else {
1073
   *                        return $value;
1074
   *                        }
1075
   *                        }
1076
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1077
   *                        </code>
1078
   *                        </p>
1079
   *
1080
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1081
   * @since 5.2.0
1082
   */
1083 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1084
  {
1085
    if (3 > \func_num_args()) {
1086
      $variable = \filter_var($variable, $filter);
1087
    } else {
1088
      $variable = \filter_var($variable, $filter, $options);
1089
    }
1090
1091
    return self::filter($variable);
1092
  }
1093
1094
  /**
1095
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1096
   *
1097
   * Gets multiple variables and optionally filters them
1098
   *
1099
   * @link  http://php.net/manual/en/function.filter-var-array.php
1100
   *
1101
   * @param array $data       <p>
1102
   *                          An array with string keys containing the data to filter.
1103
   *                          </p>
1104
   * @param mixed $definition [optional] <p>
1105
   *                          An array defining the arguments. A valid key is a string
1106
   *                          containing a variable name and a valid value is either a
1107
   *                          filter type, or an
1108
   *                          array optionally specifying the filter, flags and options.
1109
   *                          If the value is an array, valid keys are filter
1110
   *                          which specifies the filter type,
1111
   *                          flags which specifies any flags that apply to the
1112
   *                          filter, and options which specifies any options that
1113
   *                          apply to the filter. See the example below for a better understanding.
1114
   *                          </p>
1115
   *                          <p>
1116
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1117
   *                          input array are filtered by this filter.
1118
   *                          </p>
1119
   * @param bool  $add_empty  [optional] <p>
1120
   *                          Add missing keys as <b>NULL</b> to the return value.
1121
   *                          </p>
1122
   *
1123
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1124
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1125
   * the variable is not set.
1126
   * @since 5.2.0
1127
   */
1128 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1129
  {
1130
    if (2 > \func_num_args()) {
1131
      $a = \filter_var_array($data);
1132
    } else {
1133
      $a = \filter_var_array($data, $definition, $add_empty);
1134
    }
1135
1136
    return self::filter($a);
1137
  }
1138
1139
  /**
1140
   * Check if the number of unicode characters are not more than the specified integer.
1141
   *
1142
   * @param string $str      The original string to be checked.
1143
   * @param int    $box_size The size in number of chars to be checked against string.
1144
   *
1145
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1146
   */
1147
  public static function fits_inside(string $str, int $box_size): bool
1148
  {
1149
    return (self::strlen($str) <= $box_size);
1150
  }
1151
1152 1
  /**
1153
   * Try to fix simple broken UTF-8 strings.
1154 1
   *
1155 1
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1156 1
   *
1157 1
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1158
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1159
   * See: http://en.wikipedia.org/wiki/Windows-1252
1160 1
   *
1161
   * @param string $str <p>The input string</p>
1162
   *
1163
   * @return string
1164
   */
1165 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1166
  {
1167
    if (!isset($str[0])) {
1168
      return '';
1169
    }
1170
1171
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1172
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1173
1174
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1175
1176
      if (self::$BROKEN_UTF8_FIX === null) {
1177
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1178
      }
1179
1180
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1181
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1182
    }
1183
1184
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1185
  }
1186
1187
  /**
1188
   * Fix a double (or multiple) encoded UTF8 string.
1189
   *
1190
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1191
   *
1192
   * @return string|string[] <p>Will return the fixed input-"array" or
1193
   *                         the fixed input-"string".</p>
1194
   */
1195
  public static function fix_utf8($str)
1196
  {
1197 1
    if (\is_array($str) === true) {
1198
      foreach ($str as $k => $v) {
1199 1
        $str[$k] = self::fix_utf8($v);
1200 1
      }
1201 1
1202 1
      return $str;
1203
    }
1204
1205 1
    $last = '';
1206
    while ($last !== $str) {
1207
      $last = $str;
1208
      $str = self::to_utf8(
1209
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1208 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1210
      );
1211
    }
1212
1213
    return $str;
1214
  }
1215
1216 1
  /**
1217
   * Get character of a specific character.
1218 1
   *
1219
   * @param string $char
1220
   *
1221
   * @return string <p>'RTL' or 'LTR'</p>
1222
   */
1223
  public static function getCharDirection(string $char): string
1224
  {
1225
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1226
      self::checkForSupport();
1227
    }
1228
1229
    if (self::$SUPPORT['intlChar'] === true) {
1230
      $tmpReturn = \IntlChar::charDirection($char);
1231
1232
      // from "IntlChar"-Class
1233
      $charDirection = [
1234 26
          'RTL' => [1, 13, 14, 15, 21],
1235
          'LTR' => [0, 11, 12, 20],
1236
      ];
1237 26
1238
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1239 26
        return 'LTR';
1240 2
      }
1241
1242
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1243 26
        return 'RTL';
1244 26
      }
1245
    }
1246 26
1247
    $c = static::chr_to_decimal($char);
1248 1
1249 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1250 1
      return 'LTR';
1251
    }
1252 1
1253 1
    if (0x85e >= $c) {
1254 1
1255
      if (0x5be === $c ||
1256 26
          0x5c0 === $c ||
1257
          0x5c3 === $c ||
1258
          0x5c6 === $c ||
1259
          (0x5d0 <= $c && 0x5ea >= $c) ||
1260
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1261
          0x608 === $c ||
1262
          0x60b === $c ||
1263
          0x60d === $c ||
1264
          0x61b === $c ||
1265
          (0x61e <= $c && 0x64a >= $c) ||
1266
          (0x66d <= $c && 0x66f >= $c) ||
1267 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1268
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1269 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1270
          (0x6fa <= $c && 0x70d >= $c) ||
1271
          0x710 === $c ||
1272 1
          (0x712 <= $c && 0x72f >= $c) ||
1273
          (0x74d <= $c && 0x7a5 >= $c) ||
1274
          0x7b1 === $c ||
1275 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1276 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1277
          0x7fa === $c ||
1278 1
          (0x800 <= $c && 0x815 >= $c) ||
1279
          0x81a === $c ||
1280
          0x824 === $c ||
1281 1
          0x828 === $c ||
1282 1
          (0x830 <= $c && 0x83e >= $c) ||
1283 1
          (0x840 <= $c && 0x858 >= $c) ||
1284 1
          0x85e === $c
1285 1
      ) {
1286 1
        return 'RTL';
1287 1
      }
1288
1289 1
    } elseif (0x200f === $c) {
1290
1291
      return 'RTL';
1292
1293
    } elseif (0xfb1d <= $c) {
1294
1295
      if (0xfb1d === $c ||
1296
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1297
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1298
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1299 1
          0xfb3e === $c ||
1300
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1301 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1302
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1303
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1304
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1305 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1306
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1307
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1308
          (0xfe76 <= $c && 0xfefc >= $c) ||
1309
          (0x10800 <= $c && 0x10805 >= $c) ||
1310
          0x10808 === $c ||
1311
          (0x1080a <= $c && 0x10835 >= $c) ||
1312
          (0x10837 <= $c && 0x10838 >= $c) ||
1313
          0x1083c === $c ||
1314
          (0x1083f <= $c && 0x10855 >= $c) ||
1315
          (0x10857 <= $c && 0x1085f >= $c) ||
1316
          (0x10900 <= $c && 0x1091b >= $c) ||
1317
          (0x10920 <= $c && 0x10939 >= $c) ||
1318
          0x1093f === $c ||
1319
          0x10a00 === $c ||
1320
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1321
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1322
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1323 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1324
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1325 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1326 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1327
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1328
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1329 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1330
      ) {
1331 1
        return 'RTL';
1332 1
      }
1333 1
    }
1334 1
1335 1
    return 'LTR';
1336 1
  }
1337 1
1338 1
  /**
1339 1
   * get data from "/data/*.ser"
1340 1
   *
1341 1
   * @param string $file
1342 1
   *
1343 1
   * @return bool|string|array|int <p>Will return false on error.</p>
1344 1
   */
1345 1
  private static function getData(string $file)
1346 1
  {
1347 1
    $file = __DIR__ . '/data/' . $file . '.php';
1348 1
    if (\file_exists($file)) {
1349 1
      /** @noinspection PhpIncludeInspection */
1350 1
      return require $file;
1351 1
    }
1352 1
1353 1
    return false;
1354 1
  }
1355 1
1356 1
  /**
1357 1
   * Check for php-support.
1358 1
   *
1359 1
   * @param string|null $key
1360
   *
1361 1
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1362 1
   *               return bool-value, if $key is used and available<br>
1363
   *               otherwise return null</p>
1364
   */
1365 1
  public static function getSupportInfo(string $key = null)
1366
  {
1367
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1368
      self::checkForSupport();
1369 1
    }
1370
1371 1
    if ($key === null) {
1372 1
      return self::$SUPPORT;
1373 1
    }
1374 1
1375 1
    if (!isset(self::$SUPPORT[$key])) {
1376 1
      return null;
1377 1
    }
1378 1
1379 1
    return self::$SUPPORT[$key];
1380 1
  }
1381 1
1382 1
  /**
1383 1
   * alias for "UTF8::string_has_bom()"
1384 1
   *
1385 1
   * @see        UTF8::string_has_bom()
1386 1
   *
1387 1
   * @param string $str
1388 1
   *
1389 1
   * @return bool
1390 1
   *
1391 1
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1392 1
   */
1393 1
  public static function hasBom(string $str): bool
1394 1
  {
1395 1
    return self::string_has_bom($str);
1396 1
  }
1397 1
1398 1
  /**
1399 1
   * Converts a hexadecimal-value into an UTF-8 character.
1400 1
   *
1401 1
   * @param string $hexdec <p>The hexadecimal value.</p>
1402 1
   *
1403 1
   * @return string|false <p>One single UTF-8 character.</p>
1404 1
   */
1405
  public static function hex_to_chr(string $hexdec)
1406 1
  {
1407 1
    return self::decimal_to_chr(\hexdec($hexdec));
1408
  }
1409
1410
  /**
1411 1
   * Converts hexadecimal U+xxxx code point representation to integer.
1412
   *
1413
   * INFO: opposite to UTF8::int_to_hex()
1414
   *
1415
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1416
   *
1417
   * @return int|false <p>The code point, or false on failure.</p>
1418
   */
1419
  public static function hex_to_int(string $hexDec)
1420
  {
1421 6
    if (!isset($hexDec[0])) {
1422
      return false;
1423 6
    }
1424 6
1425
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1426 6
      return \intval($match[1], 16);
1427
    }
1428
1429 1
    return false;
1430
  }
1431
1432
  /**
1433
   * alias for "UTF8::html_entity_decode()"
1434
   *
1435
   * @see UTF8::html_entity_decode()
1436
   *
1437
   * @param string $str
1438
   * @param int    $flags
1439
   * @param string $encoding
1440
   *
1441 19
   * @return string
1442
   */
1443 19
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1444
  {
1445
    return self::html_entity_decode($str, $flags, $encoding);
1446
  }
1447 19
1448 2
  /**
1449
   * Converts a UTF-8 string to a series of HTML numbered entities.
1450
   *
1451 18
   * INFO: opposite to UTF8::html_decode()
1452 1
   *
1453
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1454
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1455 17
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1456
   *
1457
   * @return string <p>HTML numbered entities.</p>
1458
   */
1459
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1460
  {
1461
    if (!isset($str[0])) {
1462
      return '';
1463
    }
1464
1465
    if ($encoding !== 'UTF-8') {
1466
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1467
    }
1468
1469
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1470
    if (\function_exists('mb_encode_numericentity')) {
1471
1472
      $startCode = 0x00;
1473
      if ($keepAsciiChars === true) {
1474
        $startCode = 0x80;
1475
      }
1476
1477
      return \mb_encode_numericentity(
1478
          $str,
1479
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1480
          $encoding
1481 2
      );
1482
    }
1483 2
1484
    return \implode(
1485
        '',
1486
        \array_map(
1487
            function ($data) use ($keepAsciiChars, $encoding) {
1488
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1489
            },
1490
            self::split($str)
1491
        )
1492
    );
1493
  }
1494
1495 1
  /**
1496
   * UTF-8 version of html_entity_decode()
1497 1
   *
1498
   * The reason we are not using html_entity_decode() by itself is because
1499 1
   * while it is not technically correct to leave out the semicolon
1500 1
   * at the end of an entity most browsers will still interpret the entity
1501
   * correctly. html_entity_decode() does not convert entities without
1502
   * semicolons, so we are left with our own little solution here. Bummer.
1503 1
   *
1504 1
   * Convert all HTML entities to their applicable characters
1505
   *
1506
   * INFO: opposite to UTF8::html_encode()
1507 1
   *
1508
   * @link http://php.net/manual/en/function.html-entity-decode.php
1509
   *
1510
   * @param string $str      <p>
1511
   *                         The input string.
1512
   *                         </p>
1513
   * @param int    $flags    [optional] <p>
1514
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1515
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1516
   *                         <table>
1517
   *                         Available <i>flags</i> constants
1518
   *                         <tr valign="top">
1519
   *                         <td>Constant Name</td>
1520
   *                         <td>Description</td>
1521 1
   *                         </tr>
1522
   *                         <tr valign="top">
1523 1
   *                         <td><b>ENT_COMPAT</b></td>
1524
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1525
   *                         </tr>
1526
   *                         <tr valign="top">
1527
   *                         <td><b>ENT_QUOTES</b></td>
1528
   *                         <td>Will convert both double and single quotes.</td>
1529
   *                         </tr>
1530
   *                         <tr valign="top">
1531
   *                         <td><b>ENT_NOQUOTES</b></td>
1532
   *                         <td>Will leave both double and single quotes unconverted.</td>
1533
   *                         </tr>
1534
   *                         <tr valign="top">
1535
   *                         <td><b>ENT_HTML401</b></td>
1536
   *                         <td>
1537 2
   *                         Handle code as HTML 4.01.
1538
   *                         </td>
1539
   *                         </tr>
1540 2
   *                         <tr valign="top">
1541
   *                         <td><b>ENT_XML1</b></td>
1542 2
   *                         <td>
1543 1
   *                         Handle code as XML 1.
1544
   *                         </td>
1545
   *                         </tr>
1546 2
   *                         <tr valign="top">
1547 1
   *                         <td><b>ENT_XHTML</b></td>
1548 1
   *                         <td>
1549
   *                         Handle code as XHTML.
1550
   *                         </td>
1551 2
   *                         </tr>
1552
   *                         <tr valign="top">
1553 2
   *                         <td><b>ENT_HTML5</b></td>
1554 2
   *                         <td>
1555 1
   *                         Handle code as HTML 5.
1556 1
   *                         </td>
1557
   *                         </tr>
1558 2
   *                         </table>
1559 2
   *                         </p>
1560 2
   * @param string $encoding [optional] <p>Encoding to use.</p>
1561
   *
1562 2
   * @return string <p>The decoded string.</p>
1563
   */
1564
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1565
  {
1566
    if (!isset($str[0])) {
1567
      return '';
1568
    }
1569
1570
    if (!isset($str[3])) { // examples: &; || &x;
1571
      return $str;
1572
    }
1573
1574
    if (
1575
        \strpos($str, '&') === false
1576
        ||
1577
        (
1578
            \strpos($str, '&#') === false
1579
            &&
1580
            \strpos($str, ';') === false
1581
        )
1582
    ) {
1583
      return $str;
1584
    }
1585
1586
    if ($encoding !== 'UTF-8') {
1587
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1588
    }
1589
1590
    if ($flags === null) {
1591
      if (Bootup::is_php('5.4') === true) {
1592
        $flags = ENT_QUOTES | ENT_HTML5;
1593
      } else {
1594
        $flags = ENT_QUOTES;
1595
      }
1596
    }
1597
1598 View Code Duplication
    if (
1599
        $encoding !== 'UTF-8'
1600
        &&
1601
        $encoding !== 'WINDOWS-1252'
1602
        &&
1603
        self::$SUPPORT['mbstring'] === false
1604
    ) {
1605
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1606
    }
1607
1608
    do {
1609
      $str_compare = $str;
1610
1611
      $str = \preg_replace_callback(
1612
          "/&#\d{2,6};/",
1613
          function ($matches) use ($encoding) {
1614
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1615
1616
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1617
              return $returnTmp;
1618
            }
1619
1620
            return $matches[0];
1621
          },
1622
          $str
1623
      );
1624
1625
      // decode numeric & UTF16 two byte entities
1626
      $str = \html_entity_decode(
1627
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1628
          $flags,
1629
          $encoding
1630
      );
1631
1632
    } while ($str_compare !== $str);
1633
1634
    return $str;
1635
  }
1636
1637
  /**
1638
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1639
   *
1640
   * @link http://php.net/manual/en/function.htmlentities.php
1641
   *
1642
   * @param string $str           <p>
1643
   *                              The input string.
1644
   *                              </p>
1645 16
   * @param int    $flags         [optional] <p>
1646
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1647
   *                              invalid code unit sequences and the used document type. The default is
1648 16
   *                              ENT_COMPAT | ENT_HTML401.
1649
   *                              <table>
1650 16
   *                              Available <i>flags</i> constants
1651 5
   *                              <tr valign="top">
1652
   *                              <td>Constant Name</td>
1653
   *                              <td>Description</td>
1654 16
   *                              </tr>
1655 9
   *                              <tr valign="top">
1656
   *                              <td><b>ENT_COMPAT</b></td>
1657
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1658
   *                              </tr>
1659 15
   *                              <tr valign="top">
1660 15
   *                              <td><b>ENT_QUOTES</b></td>
1661
   *                              <td>Will convert both double and single quotes.</td>
1662 15
   *                              </tr>
1663 15
   *                              <tr valign="top">
1664 9
   *                              <td><b>ENT_NOQUOTES</b></td>
1665 9
   *                              <td>Will leave both double and single quotes unconverted.</td>
1666 15
   *                              </tr>
1667 8
   *                              <tr valign="top">
1668
   *                              <td><b>ENT_IGNORE</b></td>
1669
   *                              <td>
1670 15
   *                              Silently discard invalid code unit sequences instead of returning
1671 2
   *                              an empty string. Using this flag is discouraged as it
1672 2
   *                              may have security implications.
1673
   *                              </td>
1674 15
   *                              </tr>
1675 5
   *                              <tr valign="top">
1676
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1677
   *                              <td>
1678 5
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1679
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1680 5
   *                              </td>
1681
   *                              </tr>
1682
   *                              <tr valign="top">
1683
   *                              <td><b>ENT_DISALLOWED</b></td>
1684 15
   *                              <td>
1685
   *                              Replace invalid code points for the given document type with a
1686 15
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1687 2
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1688 15
   *                              instance, to ensure the well-formedness of XML documents with
1689
   *                              embedded external content.
1690
   *                              </td>
1691
   *                              </tr>
1692
   *                              <tr valign="top">
1693 15
   *                              <td><b>ENT_HTML401</b></td>
1694
   *                              <td>
1695 15
   *                              Handle code as HTML 4.01.
1696 15
   *                              </td>
1697
   *                              </tr>
1698 13
   *                              <tr valign="top">
1699
   *                              <td><b>ENT_XML1</b></td>
1700 13
   *                              <td>
1701 13
   *                              Handle code as XML 1.
1702
   *                              </td>
1703
   *                              </tr>
1704 6
   *                              <tr valign="top">
1705 15
   *                              <td><b>ENT_XHTML</b></td>
1706
   *                              <td>
1707 15
   *                              Handle code as XHTML.
1708
   *                              </td>
1709
   *                              </tr>
1710 15
   *                              <tr valign="top">
1711 15
   *                              <td><b>ENT_HTML5</b></td>
1712 15
   *                              <td>
1713
   *                              Handle code as HTML 5.
1714 15
   *                              </td>
1715
   *                              </tr>
1716 15
   *                              </table>
1717
   *                              </p>
1718 15
   * @param string $encoding      [optional] <p>
1719
   *                              Like <b>htmlspecialchars</b>,
1720
   *                              <b>htmlentities</b> takes an optional third argument
1721
   *                              <i>encoding</i> which defines encoding used in
1722
   *                              conversion.
1723
   *                              Although this argument is technically optional, you are highly
1724
   *                              encouraged to specify the correct value for your code.
1725
   *                              </p>
1726
   * @param bool   $double_encode [optional] <p>
1727
   *                              When <i>double_encode</i> is turned off PHP will not
1728
   *                              encode existing html entities. The default is to convert everything.
1729
   *                              </p>
1730
   *
1731
   *
1732
   * @return string the encoded string.
1733
   * </p>
1734
   * <p>
1735
   * If the input <i>string</i> contains an invalid code unit
1736
   * sequence within the given <i>encoding</i> an empty string
1737
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1738
   * <b>ENT_SUBSTITUTE</b> flags are set.
1739
   */
1740
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1741
  {
1742
    if ($encoding !== 'UTF-8') {
1743
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1744
    }
1745
1746
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
1747
1748
    /**
1749
     * PHP doesn't replace a backslash to its html entity since this is something
1750
     * that's mostly used to escape characters when inserting in a database. Since
1751
     * we're using a decent database layer, we don't need this shit and we're replacing
1752
     * the double backslashes by its' html entity equivalent.
1753
     *
1754
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1755
     */
1756
    $str = \str_replace('\\', '&#92;', $str);
1757
1758
    if ($encoding !== 'UTF-8') {
1759
      return $str;
1760
    }
1761
1762
    $byteLengths = self::chr_size_list($str);
1763
    $search = [];
1764
    $replacements = [];
1765
    foreach ($byteLengths as $counter => $byteLength) {
1766
      if ($byteLength >= 3) {
1767
        $char = self::access($str, $counter);
1768
1769
        if (!isset($replacements[$char])) {
1770
          $search[$char] = $char;
1771
          $replacements[$char] = self::html_encode($char);
1772
        }
1773
      }
1774
    }
1775
1776
    return \str_replace($search, $replacements, $str);
1777
  }
1778
1779
  /**
1780
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1781
   *
1782
   * INFO: Take a look at "UTF8::htmlentities()"
1783
   *
1784
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1785
   *
1786
   * @param string $str           <p>
1787
   *                              The string being converted.
1788
   *                              </p>
1789
   * @param int    $flags         [optional] <p>
1790
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1791
   *                              invalid code unit sequences and the used document type. The default is
1792
   *                              ENT_COMPAT | ENT_HTML401.
1793
   *                              <table>
1794
   *                              Available <i>flags</i> constants
1795
   *                              <tr valign="top">
1796
   *                              <td>Constant Name</td>
1797
   *                              <td>Description</td>
1798
   *                              </tr>
1799
   *                              <tr valign="top">
1800
   *                              <td><b>ENT_COMPAT</b></td>
1801
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1802
   *                              </tr>
1803
   *                              <tr valign="top">
1804
   *                              <td><b>ENT_QUOTES</b></td>
1805
   *                              <td>Will convert both double and single quotes.</td>
1806
   *                              </tr>
1807
   *                              <tr valign="top">
1808
   *                              <td><b>ENT_NOQUOTES</b></td>
1809
   *                              <td>Will leave both double and single quotes unconverted.</td>
1810
   *                              </tr>
1811
   *                              <tr valign="top">
1812
   *                              <td><b>ENT_IGNORE</b></td>
1813
   *                              <td>
1814
   *                              Silently discard invalid code unit sequences instead of returning
1815
   *                              an empty string. Using this flag is discouraged as it
1816
   *                              may have security implications.
1817
   *                              </td>
1818
   *                              </tr>
1819
   *                              <tr valign="top">
1820
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1821
   *                              <td>
1822
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1823
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1824 2
   *                              </td>
1825
   *                              </tr>
1826 2
   *                              <tr valign="top">
1827 1
   *                              <td><b>ENT_DISALLOWED</b></td>
1828 1
   *                              <td>
1829
   *                              Replace invalid code points for the given document type with a
1830 2
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1831
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1832
   *                              instance, to ensure the well-formedness of XML documents with
1833
   *                              embedded external content.
1834
   *                              </td>
1835
   *                              </tr>
1836
   *                              <tr valign="top">
1837
   *                              <td><b>ENT_HTML401</b></td>
1838
   *                              <td>
1839
   *                              Handle code as HTML 4.01.
1840 2
   *                              </td>
1841
   *                              </tr>
1842 2
   *                              <tr valign="top">
1843 1
   *                              <td><b>ENT_XML1</b></td>
1844
   *                              <td>
1845
   *                              Handle code as XML 1.
1846 2
   *                              </td>
1847 2
   *                              </tr>
1848 2
   *                              <tr valign="top">
1849 2
   *                              <td><b>ENT_XHTML</b></td>
1850 2
   *                              <td>
1851 1
   *                              Handle code as XHTML.
1852
   *                              </td>
1853 1
   *                              </tr>
1854 1
   *                              <tr valign="top">
1855 1
   *                              <td><b>ENT_HTML5</b></td>
1856 1
   *                              <td>
1857 1
   *                              Handle code as HTML 5.
1858 2
   *                              </td>
1859
   *                              </tr>
1860 2
   *                              </table>
1861
   *                              </p>
1862
   * @param string $encoding      [optional] <p>
1863
   *                              Defines encoding used in conversion.
1864
   *                              </p>
1865
   *                              <p>
1866
   *                              For the purposes of this function, the encodings
1867
   *                              ISO-8859-1, ISO-8859-15,
1868
   *                              UTF-8, cp866,
1869
   *                              cp1251, cp1252, and
1870
   *                              KOI8-R are effectively equivalent, provided the
1871
   *                              <i>string</i> itself is valid for the encoding, as
1872
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1873
   *                              the same positions in all of these encodings.
1874
   *                              </p>
1875
   * @param bool   $double_encode [optional] <p>
1876
   *                              When <i>double_encode</i> is turned off PHP will not
1877
   *                              encode existing html entities, the default is to convert everything.
1878
   *                              </p>
1879
   *
1880
   * @return string The converted string.
1881
   * </p>
1882
   * <p>
1883
   * If the input <i>string</i> contains an invalid code unit
1884
   * sequence within the given <i>encoding</i> an empty string
1885
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1886
   * <b>ENT_SUBSTITUTE</b> flags are set.
1887
   */
1888
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1889
  {
1890
    if ($encoding !== 'UTF-8') {
1891
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1892
    }
1893
1894
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
1895
  }
1896
1897
  /**
1898
   * Checks whether iconv is available on the server.
1899
   *
1900
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1901
   */
1902
  public static function iconv_loaded(): bool
1903
  {
1904
    return \extension_loaded('iconv') ? true : false;
1905
  }
1906
1907
  /**
1908
   * alias for "UTF8::decimal_to_chr()"
1909
   *
1910
   * @see UTF8::decimal_to_chr()
1911
   *
1912
   * @param mixed $int
1913
   *
1914
   * @return string
1915
   */
1916
  public static function int_to_chr($int): string
1917
  {
1918
    return self::decimal_to_chr($int);
1919
  }
1920
1921
  /**
1922
   * Converts Integer to hexadecimal U+xxxx code point representation.
1923
   *
1924
   * INFO: opposite to UTF8::hex_to_int()
1925
   *
1926
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
1927
   * @param string $pfix [optional]
1928
   *
1929
   * @return string <p>The code point, or empty string on failure.</p>
1930
   */
1931
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
1932
  {
1933
    $hex = \dechex($int);
1934
1935
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
1936
1937
    return $pfix . $hex;
1938
  }
1939
1940
  /**
1941
   * Checks whether intl-char is available on the server.
1942
   *
1943
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1944
   */
1945
  public static function intlChar_loaded(): bool
1946
  {
1947
    return \class_exists('IntlChar');
1948
  }
1949
1950
  /**
1951
   * Checks whether intl is available on the server.
1952
   *
1953
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1954
   */
1955
  public static function intl_loaded(): bool
1956
  {
1957
    return \extension_loaded('intl');
1958
  }
1959
1960
  /**
1961
   * alias for "UTF8::is_ascii()"
1962
   *
1963
   * @see        UTF8::is_ascii()
1964
   *
1965
   * @param string $str
1966
   *
1967
   * @return boolean
1968
   *
1969
   * @deprecated <p>use "UTF8::is_ascii()"</p>
1970
   */
1971
  public static function isAscii(string $str): bool
1972 1
  {
1973
    return self::is_ascii($str);
1974 1
  }
1975 1
1976 1
  /**
1977
   * alias for "UTF8::is_base64()"
1978 1
   *
1979
   * @see        UTF8::is_base64()
1980
   *
1981
   * @param string $str
1982
   *
1983
   * @return bool
1984
   *
1985
   * @deprecated <p>use "UTF8::is_base64()"</p>
1986 1
   */
1987
  public static function isBase64(string $str): bool
1988 1
  {
1989
    return self::is_base64($str);
1990
  }
1991
1992 1
  /**
1993
   * alias for "UTF8::is_binary()"
1994 1
   *
1995 1
   * @see        UTF8::is_binary()
1996 1
   *
1997 1
   * @param mixed $str
1998
   *
1999 1
   * @return bool
2000
   *
2001
   * @deprecated <p>use "UTF8::is_binary()"</p>
2002
   */
2003
  public static function isBinary($str): bool
2004
  {
2005
    return self::is_binary($str);
2006
  }
2007
2008
  /**
2009
   * alias for "UTF8::is_bom()"
2010
   *
2011 2
   * @see        UTF8::is_bom()
2012
   *
2013 2
   * @param string $utf8_chr
2014
   *
2015
   * @return boolean
2016
   *
2017
   * @deprecated <p>use "UTF8::is_bom()"</p>
2018
   */
2019
  public static function isBom(string $utf8_chr): bool
2020
  {
2021
    return self::is_bom($utf8_chr);
2022
  }
2023
2024
  /**
2025
   * alias for "UTF8::is_html()"
2026 3
   *
2027
   * @see        UTF8::is_html()
2028 3
   *
2029 3
   * @param string $str
2030
   *
2031 3
   * @return boolean
2032
   *
2033 3
   * @deprecated <p>use "UTF8::is_html()"</p>
2034
   */
2035
  public static function isHtml(string $str): bool
2036 1
  {
2037
    return self::is_html($str);
2038
  }
2039
2040
  /**
2041
   * alias for "UTF8::is_json()"
2042
   *
2043
   * @see        UTF8::is_json()
2044 1
   *
2045
   * @param string $str
2046
   *
2047 1
   * @return bool
2048 1
   *
2049
   * @deprecated <p>use "UTF8::is_json()"</p>
2050 1
   */
2051
  public static function isJson(string $str): bool
2052
  {
2053
    return self::is_json($str);
2054
  }
2055
2056
  /**
2057
   * alias for "UTF8::is_utf16()"
2058 4
   *
2059
   * @see        UTF8::is_utf16()
2060 4
   *
2061
   * @param string $str
2062
   *
2063
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2064
   *
2065
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2066
   */
2067
  public static function isUtf16(string $str)
2068
  {
2069
    return self::is_utf16($str);
2070
  }
2071
2072
  /**
2073
   * alias for "UTF8::is_utf32()"
2074
   *
2075
   * @see        UTF8::is_utf32()
2076
   *
2077
   * @param string $str
2078
   *
2079
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2080
   *
2081
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2082
   */
2083
  public static function isUtf32(string $str)
2084
  {
2085
    return self::is_utf32($str);
2086
  }
2087
2088
  /**
2089
   * alias for "UTF8::is_utf8()"
2090
   *
2091
   * @see        UTF8::is_utf8()
2092
   *
2093
   * @param string $str
2094
   * @param bool   $strict
2095
   *
2096
   * @return bool
2097
   *
2098
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2099
   */
2100
  public static function isUtf8($str, $strict = false): bool
2101
  {
2102
    return self::is_utf8($str, $strict);
2103
  }
2104
2105
  /**
2106
   * Checks if a string is 7 bit ASCII.
2107
   *
2108
   * @param string $str <p>The string to check.</p>
2109
   *
2110
   * @return bool <p>
2111
   *              <strong>true</strong> if it is ASCII<br>
2112
   *              <strong>false</strong> otherwise
2113
   *              </p>
2114
   */
2115
  public static function is_ascii(string $str): bool
2116
  {
2117
    if (!isset($str[0])) {
2118
      return true;
2119
    }
2120
2121
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2122
  }
2123
2124
  /**
2125
   * Returns true if the string is base64 encoded, false otherwise.
2126
   *
2127
   * @param string $str <p>The input string.</p>
2128
   *
2129
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2130
   */
2131
  public static function is_base64(string $str): bool
2132
  {
2133
    $base64String = (string)\base64_decode($str, true);
2134
2135
    return $base64String && \base64_encode($base64String) === $str;
2136
  }
2137
2138
  /**
2139
   * Check if the input is binary... (is look like a hack).
2140
   *
2141
   * @param mixed $input
2142
   *
2143
   * @return bool
2144
   */
2145
  public static function is_binary($input): bool
2146
  {
2147
    $input = (string)$input;
2148
    if (!isset($input[0])) {
2149
      return false;
2150
    }
2151
2152
    if (\preg_match('~^[01]+$~', $input)) {
2153
      return true;
2154
    }
2155
2156
    $testLength = \strlen($input);
2157
    if ($testLength && \substr_count($input, "\x0") / $testLength > 0.3) {
2158
      return true;
2159
    }
2160
2161
    if (\substr_count($input, "\x00") > 0) {
2162
      return true;
2163
    }
2164
2165
    return false;
2166
  }
2167
2168
  /**
2169
   * Check if the file is binary.
2170
   *
2171
   * @param string $file
2172
   *
2173
   * @return boolean
2174
   */
2175
  public static function is_binary_file($file): bool
2176
  {
2177
    try {
2178
      $fp = \fopen($file, 'rb');
2179
      $block = \fread($fp, 512);
2180
      \fclose($fp);
2181
    } catch (\Exception $e) {
2182
      $block = '';
2183
    }
2184
2185
    return self::is_binary($block);
2186
  }
2187
2188
  /**
2189
   * Checks if the given string is equal to any "Byte Order Mark".
2190
   *
2191
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2192
   *
2193
   * @param string $str <p>The input string.</p>
2194
   *
2195
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2196
   */
2197
  public static function is_bom($str): bool
2198
  {
2199
    foreach (self::$BOM as $bomString => $bomByteLength) {
2200
      if ($str === $bomString) {
2201
        return true;
2202
      }
2203
    }
2204
2205
    return false;
2206
  }
2207
2208
  /**
2209
   * Check if the string contains any html-tags <lall>.
2210
   *
2211
   * @param string $str <p>The input string.</p>
2212
   *
2213
   * @return boolean
2214
   */
2215
  public static function is_html(string $str): bool
2216
  {
2217
    if (!isset($str[0])) {
2218 55
      return false;
2219
    }
2220 55
2221
    // init
2222 55
    $matches = [];
2223 6
2224
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2225
2226 54
    return !(\count($matches) === 0);
2227
  }
2228
2229
  /**
2230
   * Try to check if "$str" is an json-string.
2231
   *
2232
   * @param string $str <p>The input string.</p>
2233
   *
2234
   * @return bool
2235
   */
2236 1
  public static function is_json(string $str): bool
2237
  {
2238 1
    if (!isset($str[0])) {
2239
      return false;
2240 1
    }
2241 1
2242
    $json = self::json_decode($str);
2243
2244 1
    return (
2245 1
               \is_object($json) === true
2246 1
               ||
2247
               \is_array($json) === true
2248
           )
2249 1
           &&
2250
           \json_last_error() === JSON_ERROR_NONE;
2251
  }
2252
2253
  /**
2254
   * Check if the string is UTF-16.
2255
   *
2256
   * @param string $str <p>The input string.</p>
2257
   *
2258
   * @return int|false <p>
2259 16
   *                   <strong>false</strong> if is't not UTF-16,<br>
2260
   *                   <strong>1</strong> for UTF-16LE,<br>
2261 16
   *                   <strong>2</strong> for UTF-16BE.
2262
   *                   </p>
2263 16
   */
2264 4 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2265
  {
2266
    $str = self::remove_bom($str);
2267 16
2268 4
    if (self::is_binary($str) === true) {
2269
2270
      $maybeUTF16LE = 0;
2271 16
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2272 16
      if ($test) {
2273 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2274
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2275
        if ($test3 === $test) {
2276 15
          $strChars = self::count_chars($str, true);
2277 1
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2278
            if (\in_array($test3char, $strChars, true) === true) {
2279
              $maybeUTF16LE++;
2280 15
            }
2281
          }
2282
        }
2283
      }
2284
2285
      $maybeUTF16BE = 0;
2286
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2287
      if ($test) {
2288
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2289
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2290 1
        if ($test3 === $test) {
2291
          $strChars = self::count_chars($str, true);
2292
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2293 1
            if (\in_array($test3char, $strChars, true) === true) {
2294 1
              $maybeUTF16BE++;
2295 1
            }
2296 1
          }
2297
        }
2298
      }
2299
2300 1
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2301
        if ($maybeUTF16LE > $maybeUTF16BE) {
2302
          return 1;
2303
        }
2304
2305
        return 2;
2306
      }
2307
2308
    }
2309
2310
    return false;
2311
  }
2312 1
2313
  /**
2314 1
   * Check if the string is UTF-32.
2315 1
   *
2316 1
   * @param string $str
2317
   *
2318 1
   * @return int|false <p>
2319
   *                   <strong>false</strong> if is't not UTF-32,<br>
2320 1
   *                   <strong>1</strong> for UTF-32LE,<br>
2321
   *                   <strong>2</strong> for UTF-32BE.
2322
   *                   </p>
2323
   */
2324 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2325
  {
2326
    $str = self::remove_bom($str);
2327
2328
    if (self::is_binary($str) === true) {
2329
2330 1
      $maybeUTF32LE = 0;
2331
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2332 1
      if ($test) {
2333
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2334 1
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2335 1
        if ($test3 === $test) {
2336
          $strChars = self::count_chars($str, true);
2337
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2338
            if (\in_array($test3char, $strChars, true) === true) {
2339 1
              $maybeUTF32LE++;
2340
            }
2341 1
          }
2342
        }
2343 1
      }
2344 1
2345
      $maybeUTF32BE = 0;
2346
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2347 1
      if ($test) {
2348
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2349
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2350
        if ($test3 === $test) {
2351
          $strChars = self::count_chars($str, true);
2352
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2353
            if (\in_array($test3char, $strChars, true) === true) {
2354
              $maybeUTF32BE++;
2355
            }
2356
          }
2357 1
        }
2358
      }
2359 1
2360
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2361 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
2362 1
          return 1;
2363
        }
2364
2365 1
        return 2;
2366
      }
2367
2368
    }
2369 1
2370 1
    return false;
2371 1
  }
2372 1
2373 1
  /**
2374 1
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2375 1
   *
2376 1
   * @see    http://hsivonen.iki.fi/php-utf8/
2377
   *
2378
   * @param string|string[] $str    <p>The string to be checked.</p>
2379 1
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2380
   *
2381
   * @return bool
2382
   */
2383
  public static function is_utf8($str, bool $strict = false): bool
2384
  {
2385
    if (\is_array($str) === true) {
2386
      foreach ($str as $k => $v) {
2387
        if (false === self::is_utf8($v, $strict)) {
2388
          return false;
2389
        }
2390
      }
2391
2392
      return true;
2393 5
    }
2394
2395 5
    if (!isset($str[0])) {
2396
      return true;
2397 5
    }
2398
2399 5
    if ($strict === true) {
2400 5
      if (self::is_utf16($str) !== false) {
2401 5
        return false;
2402 5
      }
2403 5
2404 5
      if (self::is_utf32($str) !== false) {
2405 5
        return false;
2406 5
      }
2407 4
    }
2408 2
2409 2
    if (self::pcre_utf8_support() !== true) {
2410 5
2411 5
      // If even just the first character can be matched, when the /u
2412 5
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2413
      // invalid, nothing at all will match, even if the string contains
2414 5
      // some valid sequences
2415 5
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2416 5
    }
2417 5
2418 5
    $mState = 0; // cached expected number of octets after the current octet
2419 5
    // until the beginning of the next UTF8 character sequence
2420 5
    $mUcs4 = 0; // cached Unicode character
2421 5
    $mBytes = 1; // cached expected number of octets in the current sequence
2422 4
2423 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2424 3
      self::checkForSupport();
2425 5
    }
2426 5
2427 5
    if (self::$ORD === null) {
2428
      self::$ORD = self::getData('ord');
2429 5
    }
2430 3
2431 2
    $len = self::strlen_in_byte($str);
2432
    /** @noinspection ForeachInvariantsInspection */
2433
    for ($i = 0; $i < $len; $i++) {
2434 3
      $in = self::$ORD[$str[$i]];
2435
      if ($mState === 0) {
2436
        // When mState is zero we expect either a US-ASCII character or a
2437 3
        // multi-octet sequence.
2438
        if (0 === (0x80 & $in)) {
2439 3
          // US-ASCII, pass straight through.
2440
          $mBytes = 1;
2441 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2442
          // First octet of 2 octet sequence.
2443
          $mUcs4 = $in;
2444
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2445
          $mState = 1;
2446
          $mBytes = 2;
2447
        } elseif (0xE0 === (0xF0 & $in)) {
2448
          // First octet of 3 octet sequence.
2449
          $mUcs4 = $in;
2450
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2451
          $mState = 2;
2452
          $mBytes = 3;
2453 3 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2454
          // First octet of 4 octet sequence.
2455 3
          $mUcs4 = $in;
2456
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2457 3
          $mState = 3;
2458
          $mBytes = 4;
2459 3
        } elseif (0xF8 === (0xFC & $in)) {
2460 3
          /* First octet of 5 octet sequence.
2461 3
          *
2462 2
          * This is illegal because the encoded codepoint must be either
2463 2
          * (a) not the shortest form or
2464 2
          * (b) outside the Unicode range of 0-0x10FFFF.
2465 2
          * Rather than trying to resynchronize, we will carry on until the end
2466 2
          * of the sequence and let the later error handling code catch it.
2467 2
          */
2468 1
          $mUcs4 = $in;
2469 1
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2470 2
          $mState = 4;
2471 2
          $mBytes = 5;
2472 2 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2473
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2474 3
          $mUcs4 = $in;
2475 3
          $mUcs4 = ($mUcs4 & 1) << 30;
2476 3
          $mState = 5;
2477 2
          $mBytes = 6;
2478 2
        } else {
2479 2
          /* Current octet is neither in the US-ASCII range nor a legal first
2480 2
           * octet of a multi-octet sequence.
2481 2
           */
2482 2
          return false;
2483 1
        }
2484 1
      } else {
2485 2
        // When mState is non-zero, we expect a continuation of the multi-octet
2486 2
        // sequence
2487 2
        if (0x80 === (0xC0 & $in)) {
2488
          // Legal continuation.
2489 3
          $shift = ($mState - 1) * 6;
2490 1
          $tmp = $in;
2491 1
          $tmp = ($tmp & 0x0000003F) << $shift;
2492
          $mUcs4 |= $tmp;
2493
          /**
2494 1
           * End of the multi-octet sequence. mUcs4 now contains the final
2495
           * Unicode code point to be output
2496
           */
2497 3
          if (0 === --$mState) {
2498
            /*
2499 3
            * Check for illegal sequences and code points.
2500
            */
2501
            // From Unicode 3.1, non-shortest form is illegal
2502
            if (
2503
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2504
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2505
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2506
                (4 < $mBytes) ||
2507
                // From Unicode 3.2, surrogate characters are illegal.
2508
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2509
                // Code points outside the Unicode range are illegal.
2510
                ($mUcs4 > 0x10FFFF)
2511
            ) {
2512 60
              return false;
2513
            }
2514 60
            // initialize UTF8 cache
2515
            $mState = 0;
2516 60
            $mUcs4 = 0;
2517 3
            $mBytes = 1;
2518
          }
2519
        } else {
2520 58
          /**
2521 1
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2522 1
           * Incomplete multi-octet sequence.
2523
           */
2524
          return false;
2525
        }
2526
      }
2527
    }
2528
2529
    return true;
2530 58
  }
2531
2532
  /**
2533
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2534
   * Decodes a JSON string
2535
   *
2536
   * @link http://php.net/manual/en/function.json-decode.php
2537
   *
2538
   * @param string $json    <p>
2539 58
   *                        The <i>json</i> string being decoded.
2540
   *                        </p>
2541 58
   *                        <p>
2542 58
   *                        This function only works with UTF-8 encoded strings.
2543
   *                        </p>
2544 58
   *                        <p>PHP implements a superset of
2545
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2546
   *                        only supports these values when they are nested inside an array or an object.
2547
   *                        </p>
2548 58
   * @param bool   $assoc   [optional] <p>
2549 1
   *                        When <b>TRUE</b>, returned objects will be converted into
2550 1
   *                        associative arrays.
2551
   *                        </p>
2552 58
   * @param int    $depth   [optional] <p>
2553
   *                        User specified recursion depth.
2554 58
   *                        </p>
2555 58
   * @param int    $options [optional] <p>
2556 58
   *                        Bitmask of JSON decode options. Currently only
2557
   *                        <b>JSON_BIGINT_AS_STRING</b>
2558
   *                        is supported (default is to cast large integers as floats)
2559 58
   *                        </p>
2560
   *
2561 52
   * @return mixed the value encoded in <i>json</i> in appropriate
2562 58
   * PHP type. Values true, false and
2563
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2564 48
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2565 48
   * <i>json</i> cannot be decoded or if the encoded
2566 48
   * data is deeper than the recursion limit.
2567 48
   */
2568 55
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
2569
  {
2570 28
    $json = self::filter($json);
2571 28
2572 28
    $json = \json_decode($json, $assoc, $depth, $options);
2573 28
2574 45
    return $json;
2575
  }
2576 11
2577 11
  /**
2578 11
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2579 11
   * Returns the JSON representation of a value.
2580 22
   *
2581
   * @link http://php.net/manual/en/function.json-encode.php
2582
   *
2583
   * @param mixed $value   <p>
2584
   *                       The <i>value</i> being encoded. Can be any type except
2585
   *                       a resource.
2586
   *                       </p>
2587
   *                       <p>
2588
   *                       All string data must be UTF-8 encoded.
2589 4
   *                       </p>
2590 4
   *                       <p>PHP implements a superset of
2591 4
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2592 4
   *                       only supports these values when they are nested inside an array or an object.
2593 12
   *                       </p>
2594
   * @param int   $options [optional] <p>
2595 4
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2596 4
   *                       <b>JSON_HEX_TAG</b>,
2597 4
   *                       <b>JSON_HEX_AMP</b>,
2598 4
   *                       <b>JSON_HEX_APOS</b>,
2599 4
   *                       <b>JSON_NUMERIC_CHECK</b>,
2600
   *                       <b>JSON_PRETTY_PRINT</b>,
2601
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2602
   *                       <b>JSON_FORCE_OBJECT</b>,
2603 6
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2604
   *                       constants is described on
2605 57
   *                       the JSON constants page.
2606
   *                       </p>
2607
   * @param int   $depth   [optional] <p>
2608 52
   *                       Set the maximum depth. Must be greater than zero.
2609
   *                       </p>
2610 48
   *
2611 48
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2612 48
   */
2613 48
  public static function json_encode($value, int $options = 0, int $depth = 512): string
2614
  {
2615
    $value = self::filter($value);
2616
2617
    $json = \json_encode($value, $options, $depth);
2618 48
2619
    return $json;
2620
  }
2621
2622
  /**
2623
   * Makes string's first char lowercase.
2624 48
   *
2625 48
   * @param string $str       <p>The input string</p>
2626 48
   * @param string $encoding  [optional] <p>Set the charset.</p>
2627 48
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2628
   *
2629 48
   * @return string <p>The resulting string</p>
2630
   */
2631 48
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2632 48
  {
2633 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2634
    if ($strPartTwo === false) {
2635
      $strPartTwo = '';
2636 48
    }
2637 48
2638 48
    $strPartOne = self::strtolower(
2639 48
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2640 48
        $encoding,
2641
        $cleanUtf8
2642
    );
2643
2644
    return $strPartOne . $strPartTwo;
2645 26
  }
2646
2647
  /**
2648 57
   * alias for "UTF8::lcfirst()"
2649
   *
2650 27
   * @see UTF8::lcfirst()
2651
   *
2652
   * @param string $word
2653
   * @param string $encoding
2654
   * @param bool   $cleanUtf8
2655
   *
2656
   * @return string
2657
   */
2658
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2659
  {
2660
    return self::lcfirst($word, $encoding, $cleanUtf8);
2661
  }
2662
2663
  /**
2664
   * Lowercase for all words in the string.
2665
   *
2666
   * @param string   $str        <p>The input string.</p>
2667
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2668
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2669
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2670
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2671
   *
2672
   * @return string
2673
   */
2674
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2675
  {
2676
    if (!$str) {
2677
      return '';
2678
    }
2679
2680
    $words = self::str_to_words($str, $charlist);
2681
    $newWords = [];
2682
2683
    if (\count($exceptions) > 0) {
2684
      $useExceptions = true;
2685
    } else {
2686
      $useExceptions = false;
2687
    }
2688
2689 2 View Code Duplication
    foreach ($words as $word) {
2690
2691 2
      if (!$word) {
2692
        continue;
2693 2
      }
2694
2695
      if (
2696 2
          $useExceptions === false
2697
          ||
2698
          (
2699 2
              $useExceptions === true
2700
              &&
2701
              !\in_array($word, $exceptions, true)
2702
          )
2703
      ) {
2704
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2705
      }
2706
2707
      $newWords[] = $word;
2708
    }
2709
2710
    return \implode('', $newWords);
2711
  }
2712
2713
  /**
2714
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2715
   *
2716
   * @param string $str   <p>The string to be trimmed</p>
2717
   * @param mixed  $chars <p>Optional characters to be stripped</p>
2718
   *
2719
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2720
   */
2721 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2722
  {
2723
    if (!isset($str[0])) {
2724
      return '';
2725
    }
2726
2727
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2728
    if ($chars === INF || !$chars) {
2729
      return \preg_replace('/^[\pZ\pC]+/u', '', $str);
2730
    }
2731
2732
    return \preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2733
  }
2734
2735
  /**
2736
   * Returns the UTF-8 character with the maximum code point in the given data.
2737
   *
2738 2
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2739
   *
2740 2
   * @return string <p>The character with the highest code point than others.</p>
2741
   */
2742 2 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2743
  {
2744
    if (\is_array($arg) === true) {
2745 2
      $arg = \implode('', $arg);
2746
    }
2747
2748 2
    return self::chr(\max(self::codepoints($arg)));
2749
  }
2750
2751
  /**
2752
   * Calculates and returns the maximum number of bytes taken by any
2753
   * UTF-8 encoded character in the given string.
2754
   *
2755
   * @param string $str <p>The original Unicode string.</p>
2756
   *
2757
   * @return int <p>Max byte lengths of the given chars.</p>
2758
   */
2759
  public static function max_chr_width(string $str): int
2760 7
  {
2761
    $bytes = self::chr_size_list($str);
2762 7
    if (\count($bytes) > 0) {
2763 7
      return (int)\max($bytes);
2764
    }
2765
2766
    return 0;
2767 7
  }
2768 7
2769 7
  /**
2770
   * Checks whether mbstring is available on the server.
2771 7
   *
2772
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2773 7
   */
2774
  public static function mbstring_loaded(): bool
2775
  {
2776
    $return = \extension_loaded('mbstring') ? true : false;
2777
2778
    if ($return === true) {
2779
      \mb_internal_encoding('UTF-8');
2780
    }
2781
2782
    return $return;
2783
  }
2784
2785
  private static function mbstring_overloaded(): bool
2786
  {
2787 1
    return \defined('MB_OVERLOAD_STRING')
2788
           &&
2789 1
           \ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING;
2790
  }
2791
2792
  /**
2793
   * Returns the UTF-8 character with the minimum code point in the given data.
2794
   *
2795
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2796
   *
2797
   * @return string <p>The character with the lowest code point than others.</p>
2798
   */
2799 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2800
  {
2801
    if (\is_array($arg) === true) {
2802
      $arg = \implode('', $arg);
2803 1
    }
2804
2805 1
    return self::chr(\min(self::codepoints($arg)));
2806 1
  }
2807
2808
  /**
2809 1
   * alias for "UTF8::normalize_encoding()"
2810 1
   *
2811
   * @see        UTF8::normalize_encoding()
2812 1
   *
2813 1
   * @param string $encoding
2814 1
   * @param mixed  $fallback
2815 1
   *
2816
   * @return string
2817
   *
2818 1
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2819
   */
2820 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
2821 1
  {
2822
    return self::normalize_encoding($encoding, $fallback);
2823
  }
2824
2825
  /**
2826 1
   * Normalize the encoding-"name" input.
2827
   *
2828
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2829 1
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2830 1
   *
2831 1
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
2832 1
   *                default)</p>
2833 1
   */
2834 1
  public static function normalize_encoding(string $encoding, $fallback = '')
2835
  {
2836 1
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
2837 1
2838
    if (!$encoding) {
2839 1
      return $fallback;
2840
    }
2841
2842
    if (
2843
        'UTF-8' === $encoding
2844
        ||
2845
        'UTF8' === $encoding
2846
    ) {
2847
      return 'UTF-8';
2848
    }
2849
2850 24
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2851
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2852 24
    }
2853
2854 24
    if (self::$ENCODINGS === null) {
2855 2
      self::$ENCODINGS = self::getData('encodings');
2856
    }
2857
2858
    if (\in_array($encoding, self::$ENCODINGS, true)) {
2859 23
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
2860 2
2861
      return $encoding;
2862
    }
2863 23
2864
    $encodingOrig = $encoding;
2865
    $encoding = \strtoupper($encoding);
2866
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2867
2868
    $equivalences = [
2869
        'ISO8859'     => 'ISO-8859-1',
2870
        'ISO88591'    => 'ISO-8859-1',
2871
        'ISO'         => 'ISO-8859-1',
2872
        'LATIN'       => 'ISO-8859-1',
2873 1
        'LATIN1'      => 'ISO-8859-1', // Western European
2874
        'ISO88592'    => 'ISO-8859-2',
2875 1
        'LATIN2'      => 'ISO-8859-2', // Central European
2876 1
        'ISO88593'    => 'ISO-8859-3',
2877 1
        'LATIN3'      => 'ISO-8859-3', // Southern European
2878
        'ISO88594'    => 'ISO-8859-4',
2879 1
        'LATIN4'      => 'ISO-8859-4', // Northern European
2880
        'ISO88595'    => 'ISO-8859-5',
2881
        'ISO88596'    => 'ISO-8859-6', // Greek
2882
        'ISO88597'    => 'ISO-8859-7',
2883
        'ISO88598'    => 'ISO-8859-8', // Hebrew
2884
        'ISO88599'    => 'ISO-8859-9',
2885
        'LATIN5'      => 'ISO-8859-9', // Turkish
2886
        'ISO885911'   => 'ISO-8859-11',
2887
        'TIS620'      => 'ISO-8859-11', // Thai
2888
        'ISO885910'   => 'ISO-8859-10',
2889
        'LATIN6'      => 'ISO-8859-10', // Nordic
2890 1
        'ISO885913'   => 'ISO-8859-13',
2891
        'LATIN7'      => 'ISO-8859-13', // Baltic
2892 1
        'ISO885914'   => 'ISO-8859-14',
2893 1
        'LATIN8'      => 'ISO-8859-14', // Celtic
2894 1
        'ISO885915'   => 'ISO-8859-15',
2895
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
2896
        'ISO885916'   => 'ISO-8859-16',
2897 1
        'LATIN10'     => 'ISO-8859-16', // Southeast European
2898
        'CP1250'      => 'WINDOWS-1250',
2899
        'WIN1250'     => 'WINDOWS-1250',
2900
        'WINDOWS1250' => 'WINDOWS-1250',
2901
        'CP1251'      => 'WINDOWS-1251',
2902
        'WIN1251'     => 'WINDOWS-1251',
2903
        'WINDOWS1251' => 'WINDOWS-1251',
2904
        'CP1252'      => 'WINDOWS-1252',
2905 12
        'WIN1252'     => 'WINDOWS-1252',
2906
        'WINDOWS1252' => 'WINDOWS-1252',
2907 12
        'CP1253'      => 'WINDOWS-1253',
2908
        'WIN1253'     => 'WINDOWS-1253',
2909 12
        'WINDOWS1253' => 'WINDOWS-1253',
2910 12
        'CP1254'      => 'WINDOWS-1254',
2911 12
        'WIN1254'     => 'WINDOWS-1254',
2912
        'WINDOWS1254' => 'WINDOWS-1254',
2913 12
        'CP1255'      => 'WINDOWS-1255',
2914
        'WIN1255'     => 'WINDOWS-1255',
2915
        'WINDOWS1255' => 'WINDOWS-1255',
2916 1
        'CP1256'      => 'WINDOWS-1256',
2917
        'WIN1256'     => 'WINDOWS-1256',
2918
        'WINDOWS1256' => 'WINDOWS-1256',
2919 1
        'CP1257'      => 'WINDOWS-1257',
2920 1
        'WIN1257'     => 'WINDOWS-1257',
2921 1
        'WINDOWS1257' => 'WINDOWS-1257',
2922 1
        'CP1258'      => 'WINDOWS-1258',
2923
        'WIN1258'     => 'WINDOWS-1258',
2924
        'WINDOWS1258' => 'WINDOWS-1258',
2925
        'UTF16'       => 'UTF-16',
2926 1
        'UTF32'       => 'UTF-32',
2927
        'UTF8'        => 'UTF-8',
2928
        'UTF'         => 'UTF-8',
2929
        'UTF7'        => 'UTF-7',
2930
        '8BIT'        => 'CP850',
2931
        'BINARY'      => 'CP850',
2932
    ];
2933
2934
    if (!empty($equivalences[$encodingUpperHelper])) {
2935
      $encoding = $equivalences[$encodingUpperHelper];
2936 1
    }
2937
2938 1
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
2939 1
2940 1
    return $encoding;
2941
  }
2942 1
2943
  /**
2944
   * Normalize some MS Word special characters.
2945
   *
2946
   * @param string $str <p>The string to be normalized.</p>
2947
   *
2948
   * @return string
2949
   */
2950 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2951
  {
2952
    if (!isset($str[0])) {
2953
      return '';
2954
    }
2955
2956
    static $UTF8_MSWORD_KEYS_CACHE = null;
2957
    static $UTF8_MSWORD_VALUES_CACHE = null;
2958
2959
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
2960
2961
2962
      if (self::$UTF8_MSWORD === null) {
2963
        self::$UTF8_MSWORD = self::getData('utf8_msword');
2964
      }
2965
2966
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
2967
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
2968
    }
2969
2970 80
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
2971
  }
2972 80
2973
  /**
2974 80
   * Normalize the whitespace.
2975 3
   *
2976
   * @param string $str                     <p>The string to be normalized.</p>
2977
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
2978 79
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
2979 1
   *                                        bidirectional text chars.</p>
2980
   *
2981
   * @return string
2982 79
   */
2983 1
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
2984 1
  {
2985
    if (!isset($str[0])) {
2986 79
      return '';
2987 7
    }
2988
2989
    static $WHITESPACE_CACHE = [];
2990 78
    $cacheKey = (int)$keepNonBreakingSpace;
2991 77
2992
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
2993
2994 5
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
2995 5
2996 5
      if ($keepNonBreakingSpace === true) {
2997
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
2998
      }
2999 5
3000 5
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3001 5
    }
3002 5
3003 5
    if ($keepBidiUnicodeControls === false) {
3004 5
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3005 5
3006 5
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3007 5
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3008 5
      }
3009 5
3010 5
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3011 5
    }
3012 5
3013 5
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3014 5
  }
3015 5
3016 5
  /**
3017 5
   * Strip all whitespace characters. This includes tabs and newline
3018 5
   * characters, as well as multibyte whitespace such as the thin space
3019 5
   * and ideographic space.
3020 5
   *
3021 5
   * @param string $str
3022 5
   *
3023 5
   * @return string
3024 5
   */
3025 5
  public static function strip_whitespace(string $str): string
3026 5
  {
3027 5
    if (!isset($str[0])) {
3028 5
      return '';
3029 5
    }
3030 5
3031 5
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
3032 5
  }
3033 5
3034 5
  /**
3035 5
   * Calculates Unicode code point of the given UTF-8 encoded character.
3036 5
   *
3037 5
   * INFO: opposite to UTF8::chr()
3038 5
   *
3039 5
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3040 5
   * @param string $encoding [optional] <p>Default is UTF-8</p>
3041 5
   *
3042 5
   * @return int <p>
3043 5
   *             Unicode code point of the given character,<br>
3044 5
   *             0 on invalid UTF-8 byte sequence.
3045 5
   *             </p>
3046 5
   */
3047 5
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3048 5
  {
3049 5
    // init
3050 5
    static $CHAR_CACHE = [];
3051 5
3052 5
    // save the original string
3053 5
    $chr_orig = $chr;
3054 5
3055 5
    if ($encoding !== 'UTF-8') {
3056 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3057 5
3058 5
      // check again, if it's still not UTF-8
3059 5
      /** @noinspection NotOptimalIfConditionsInspection */
3060 5
      if ($encoding !== 'UTF-8') {
3061 5
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3062 5
      }
3063
    }
3064 5
3065 5
    $cacheKey = $chr_orig . $encoding;
3066 5
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3067
      return $CHAR_CACHE[$cacheKey];
3068 5
    }
3069
3070 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3071
      self::checkForSupport();
3072
    }
3073
3074
    if (self::$SUPPORT['intlChar'] === true) {
3075
      $code = \IntlChar::ord($chr);
3076
      if ($code) {
3077
        return $CHAR_CACHE[$cacheKey] = $code;
3078
      }
3079
    }
3080 16
3081
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3082 16
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3083
    $code = $chr ? $chr[1] : 0;
3084 16
3085 1
    if (0xF0 <= $code && isset($chr[4])) {
3086
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3087
    }
3088 16
3089 16
    if (0xE0 <= $code && isset($chr[3])) {
3090
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3091 16
    }
3092
3093
    if (0xC0 <= $code && isset($chr[2])) {
3094 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3095 1
    }
3096 1
3097
    return $CHAR_CACHE[$cacheKey] = $code;
3098 1
  }
3099 1
3100 1
  /**
3101
   * Parses the string into an array (into the the second parameter).
3102 16
   *
3103
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3104
   *          if the second parameter is not set!
3105
   *
3106
   * @link http://php.net/manual/en/function.parse-str.php
3107
   *
3108
   * @param string $str       <p>The input string.</p>
3109
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3110
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3111
   *
3112
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3113
   */
3114
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3115 37
  {
3116
    if ($cleanUtf8 === true) {
3117 37
      $str = self::clean($str);
3118
    }
3119 37
3120 4
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3121
    $return = \mb_parse_str($str, $result);
3122
3123 37
    return !($return === false || empty($result));
3124 37
  }
3125
3126 37
  /**
3127
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3128 2
   *
3129
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3130 2
   */
3131
  public static function pcre_utf8_support(): bool
3132 1
  {
3133 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3134
    return (bool)@\preg_match('//u', '');
3135 2
  }
3136 2
3137
  /**
3138 37
   * Create an array containing a range of UTF-8 characters.
3139 37
   *
3140
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3141 37
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3142 1
   *
3143 1
   * @return array
3144
   */
3145 37
  public static function range($var1, $var2): array
3146 37
  {
3147
    if (!$var1 || !$var2) {
3148 37
      return [];
3149
    }
3150
3151 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3152
      $start = (int)$var1;
3153
    } elseif (\ctype_xdigit($var1)) {
3154
      $start = (int)self::hex_to_int($var1);
3155
    } else {
3156
      $start = self::ord($var1);
3157
    }
3158
3159
    if (!$start) {
3160 12
      return [];
3161
    }
3162 12
3163 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3164 12
      $end = (int)$var2;
3165 1
    } elseif (\ctype_xdigit($var2)) {
3166
      $end = (int)self::hex_to_int($var2);
3167
    } else {
3168 11
      $end = self::ord($var2);
3169
    }
3170
3171
    if (!$end) {
3172
      return [];
3173
    }
3174
3175
    return \array_map(
3176
        [
3177
            self::class,
3178
            'chr',
3179
        ],
3180
        \range($start, $end)
3181
    );
3182
  }
3183
3184
  /**
3185
   * Multi decode html entity & fix urlencoded-win1252-chars.
3186
   *
3187
   * e.g:
3188
   * 'test+test'                     => 'test+test'
3189
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3190
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3191
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3192
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3193
   * 'Düsseldorf'                   => 'Düsseldorf'
3194
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3195
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3196
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3197
   *
3198
   * @param string $str          <p>The input string.</p>
3199
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3200
   *
3201
   * @return string
3202
   */
3203 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3204
  {
3205
    if (!isset($str[0])) {
3206
      return '';
3207
    }
3208
3209
    $pattern = '/%u([0-9a-f]{3,4})/i';
3210
    if (\preg_match($pattern, $str)) {
3211
      $str = \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3212
    }
3213
3214
    $flags = ENT_QUOTES | ENT_HTML5;
3215
3216
    do {
3217
      $str_compare = $str;
3218
3219
      $str = self::fix_simple_utf8(
3220
          \rawurldecode(
3221
              self::html_entity_decode(
3222
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3223 23
                  $flags
3224
              )
3225
          )
3226 23
      );
3227 23
3228
    } while ($multi_decode === true && $str_compare !== $str);
3229
3230 23
    return $str;
3231
  }
3232 23
3233 2
  /**
3234
   * alias for "UTF8::remove_bom()"
3235
   *
3236
   * @see        UTF8::remove_bom()
3237 2
   *
3238 2
   * @param string $str
3239 2
   *
3240 2
   * @return string
3241
   *
3242 23
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3243 23
   */
3244 23
  public static function removeBOM(string $str): string
3245
  {
3246
    return self::remove_bom($str);
3247 11
  }
3248
3249
  /**
3250
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3251 11
   *
3252
   * @param string $str <p>The input string.</p>
3253
   *
3254
   * @return string <p>String without UTF-BOM</p>
3255
   */
3256
  public static function remove_bom(string $str): string
3257
  {
3258
    if (!isset($str[0])) {
3259 11
      return '';
3260 11
    }
3261
3262 11
    foreach (self::$BOM as $bomString => $bomByteLength) {
3263 1
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3264
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3265
        if ($strTmp === false) {
3266 11
          $strTmp = '';
3267 4
        }
3268
        $str = (string)$strTmp;
3269
      }
3270 11
    }
3271 7
3272
    return $str;
3273
  }
3274 10
3275
  /**
3276
   * Removes duplicate occurrences of a string in another string.
3277
   *
3278
   * @param string          $str  <p>The base string.</p>
3279
   * @param string|string[] $what <p>String to search for in the base string.</p>
3280
   *
3281
   * @return string <p>The result string with removed duplicates.</p>
3282
   */
3283
  public static function remove_duplicates(string $str, $what = ' '): string
3284
  {
3285
    if (\is_string($what) === true) {
3286
      $what = [$what];
3287
    }
3288
3289
    if (\is_array($what) === true) {
3290
      /** @noinspection ForeachSourceInspection */
3291 1
      foreach ($what as $item) {
3292
        $str = \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
3293 1
      }
3294 1
    }
3295 1
3296
    return $str;
3297
  }
3298 1
3299 1
  /**
3300 1
   * Remove invisible characters from a string.
3301
   *
3302
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3303 1
   *
3304
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3305
   *
3306
   * @param string $str
3307
   * @param bool   $url_encoded
3308
   * @param string $replacement
3309
   *
3310
   * @return string
3311 58
   */
3312
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
3313
  {
3314 58
    // init
3315
    $non_displayables = [];
3316
3317
    // every control character except newline (dec 10),
3318
    // carriage return (dec 13) and horizontal tab (dec 09)
3319
    if ($url_encoded) {
3320
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3321
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3322
    }
3323
3324
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3325 1
3326
    do {
3327 1
      $str = \preg_replace($non_displayables, $replacement, $str, -1, $count);
3328 1
    } while ($count !== 0);
3329
3330
    return $str;
3331 1
  }
3332 1
3333 1
  /**
3334
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3335
   *
3336 1
   * @param string $str                <p>The input string</p>
3337
   * @param string $replacementChar    <p>The replacement character.</p>
3338
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3339 1
   *
3340
   * @return string
3341
   */
3342
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
3343 1
  {
3344 1
    if (!isset($str[0])) {
3345 1
      return '';
3346
    }
3347
3348 1
    if ($processInvalidUtf8 === true) {
3349
      $replacementCharHelper = $replacementChar;
3350
      if ($replacementChar === '') {
3351 1
        $replacementCharHelper = 'none';
3352
      }
3353
3354
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3355 1
        self::checkForSupport();
3356
      }
3357 1
3358 1
      $save = \mb_substitute_character();
3359 1
      \mb_substitute_character($replacementCharHelper);
3360 1
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3361 1
      \mb_substitute_character($save);
3362
3363
      if (\is_string($strTmp)) {
3364
        $str = $strTmp;
3365
      } else {
3366
        $str = '';
3367
      }
3368
    }
3369
3370
    return str_replace(
3371
        [
3372
            "\xEF\xBF\xBD",
3373
            '�',
3374
        ],
3375
        [
3376
            $replacementChar,
3377
            $replacementChar,
3378
        ],
3379
        $str
3380
    );
3381
  }
3382
3383 2
  /**
3384
   * Strip whitespace or other characters from end of a UTF-8 string.
3385 2
   *
3386
   * @param string $str   <p>The string to be trimmed.</p>
3387 2
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
3388 1
   *
3389
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3390
   */
3391 2 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3392 2
  {
3393 1
    if (!isset($str[0])) {
3394 1
      return '';
3395
    }
3396 2
3397
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3398
    if ($chars === INF || !$chars) {
3399 2
      return \preg_replace('/[\pZ\pC]+$/u', '', $str);
3400
    }
3401 2
3402 2
    return \preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3403 2
  }
3404 2
3405
  /**
3406 2
   * rxClass
3407 2
   *
3408 2
   * @param string $s
3409
   * @param string $class
3410 2
   *
3411
   * @return string
3412 2
   */
3413
  private static function rxClass(string $s, string $class = ''): string
3414
  {
3415
    static $RX_CLASSS_CACHE = [];
3416
3417
    $cacheKey = $s . $class;
3418
3419
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3420
      return $RX_CLASSS_CACHE[$cacheKey];
3421
    }
3422
3423
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3424
    $class = [$class];
3425
3426
    /** @noinspection SuspiciousLoopInspection */
3427
    foreach (self::str_split($s) as $s) {
3428
      if ('-' === $s) {
3429
        $class[0] = '-' . $class[0];
3430
      } elseif (!isset($s[2])) {
3431
        $class[0] .= \preg_quote($s, '/');
3432
      } elseif (1 === self::strlen($s)) {
3433
        $class[0] .= $s;
3434
      } else {
3435
        $class[] = $s;
3436
      }
3437
    }
3438 40
3439
    if ($class[0]) {
3440 40
      $class[0] = '[' . $class[0] . ']';
3441
    }
3442 40
3443 5
    if (1 === \count($class)) {
3444
      $return = $class[0];
3445
    } else {
3446 40
      $return = '(?:' . \implode('|', $class) . ')';
3447 40
    }
3448 5
3449 5
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3450
3451
    return $return;
3452 5
  }
3453 5
3454 40
  /**
3455
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3456 40
   */
3457
  public static function showSupport()
3458
  {
3459
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3460
      self::checkForSupport();
3461
    }
3462
3463
    echo '<pre>';
3464
    foreach (self::$SUPPORT as $key => $value) {
3465
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
3466
    }
3467 1
    echo '</pre>';
3468
  }
3469 1
3470 1
  /**
3471 1
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3472
   *
3473 1
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3474
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3475 1
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3476 1
   *
3477 1
   * @return string <p>The HTML numbered entity.</p>
3478 1
   */
3479
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
3480 1
  {
3481
    if (!isset($char[0])) {
3482
      return '';
3483
    }
3484
3485
    if (
3486
        $keepAsciiChars === true
3487
        &&
3488
        self::is_ascii($char) === true
3489
    ) {
3490
      return $char;
3491
    }
3492
3493
    if ($encoding !== 'UTF-8') {
3494
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3495
    }
3496 62
3497
    return '&#' . self::ord($char, $encoding) . ';';
3498
  }
3499 62
3500
  /**
3501
   * Convert a string to an array of Unicode characters.
3502
   *
3503 62
   * @param string $str       <p>The string to split into array.</p>
3504 62
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
3505 62
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3506 62
   *
3507
   * @return string[] <p>An array containing chunks of the string.</p>
3508 62
   */
3509
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
3510
  {
3511 62
    if (!isset($str[0])) {
3512 62
      return [];
3513
    }
3514 62
3515
    // init
3516
    $ret = [];
3517
3518
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3519
      self::checkForSupport();
3520
    }
3521
3522
    if ($cleanUtf8 === true) {
3523
      $str = self::clean($str);
3524
    }
3525
3526 62
    if (self::$SUPPORT['pcre_utf8'] === true) {
3527
3528 62
      \preg_match_all('/./us', $str, $retArray);
3529
      if (isset($retArray[0])) {
3530 62
        $ret = $retArray[0];
3531 4
      }
3532
      unset($retArray);
3533
3534 62
    } else {
3535 62
3536 62
      // fallback
3537 62
3538 62
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3539
        self::checkForSupport();
3540 62
      }
3541
3542
      $len = self::strlen_in_byte($str);
3543
3544 62
      /** @noinspection ForeachInvariantsInspection */
3545 62
      for ($i = 0; $i < $len; $i++) {
3546 62
3547 62
        if (($str[$i] & "\x80") === "\x00") {
3548 62
3549
          $ret[] = $str[$i];
3550 62
3551
        } elseif (
3552 62
            isset($str[$i + 1])
3553 62
            &&
3554 62
            ($str[$i] & "\xE0") === "\xC0"
3555
        ) {
3556 62
3557 62
          if (($str[$i + 1] & "\xC0") === "\x80") {
3558 62
            $ret[] = $str[$i] . $str[$i + 1];
3559
3560 62
            $i++;
3561
          }
3562
3563 View Code Duplication
        } elseif (
3564
            isset($str[$i + 2])
3565
            &&
3566
            ($str[$i] & "\xF0") === "\xE0"
3567
        ) {
3568
3569
          if (
3570
              ($str[$i + 1] & "\xC0") === "\x80"
3571 23
              &&
3572
              ($str[$i + 2] & "\xC0") === "\x80"
3573 23
          ) {
3574
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3575 23
3576 5
            $i += 2;
3577
          }
3578
3579
        } elseif (
3580 19
            isset($str[$i + 3])
3581 3
            &&
3582
            ($str[$i] & "\xF8") === "\xF0"
3583
        ) {
3584 18
3585 View Code Duplication
          if (
3586
              ($str[$i + 1] & "\xC0") === "\x80"
3587
              &&
3588
              ($str[$i + 2] & "\xC0") === "\x80"
3589
              &&
3590
              ($str[$i + 3] & "\xC0") === "\x80"
3591
          ) {
3592
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3593
3594
            $i += 3;
3595 60
          }
3596
3597 60
        }
3598
      }
3599 60
    }
3600
3601 60
    if ($length > 1) {
3602 48
      $ret = \array_chunk($ret, $length);
3603
3604
      return \array_map(
3605
          function ($item) {
3606 20
            return \implode('', $item);
3607
          }, $ret
3608
      );
3609 20
    }
3610 19
3611
    if (isset($ret[0]) && $ret[0] === '') {
3612 19
      return [];
3613 19
    }
3614 19
3615 2
    return $ret;
3616 2
  }
3617
3618
  /**
3619 20
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3620
   *
3621 20
   * @param string $str <p>The input string.</p>
3622 20
   *
3623 20
   * @return false|string <p>
3624
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3625 20
   *                      otherwise it will return false.
3626 20
   *                      </p>
3627 20
   */
3628
  public static function str_detect_encoding(string $str)
3629
  {
3630
    //
3631 20
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3632
    //
3633 20
3634
    if (self::is_binary($str) === true) {
3635
3636
      if (self::is_utf16($str) === 1) {
3637
        return 'UTF-16LE';
3638
      }
3639 1
3640
      if (self::is_utf16($str) === 2) {
3641 1
        return 'UTF-16BE';
3642
      }
3643
3644
      if (self::is_utf32($str) === 1) {
3645 1
        return 'UTF-32LE';
3646 1
      }
3647 1
3648 1
      if (self::is_utf32($str) === 2) {
3649 1
        return 'UTF-32BE';
3650 1
      }
3651
3652
    }
3653
3654
    //
3655
    // 2.) simple check for ASCII chars
3656
    //
3657
3658
    if (self::is_ascii($str) === true) {
3659
      return 'ASCII';
3660
    }
3661 1
3662
    //
3663 1
    // 3.) simple check for UTF-8 chars
3664
    //
3665 1
3666 1
    if (self::is_utf8($str) === true) {
3667
      return 'UTF-8';
3668
    }
3669
3670
    //
3671 1
    // 4.) check via "\mb_detect_encoding()"
3672 1
    //
3673 1
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3674 1
3675
    $detectOrder = [
3676
        'ISO-8859-1',
3677 1
        'ISO-8859-2',
3678 1
        'ISO-8859-3',
3679 1
        'ISO-8859-4',
3680
        'ISO-8859-5',
3681 1
        'ISO-8859-6',
3682
        'ISO-8859-7',
3683
        'ISO-8859-8',
3684
        'ISO-8859-9',
3685
        'ISO-8859-10',
3686
        'ISO-8859-13',
3687
        'ISO-8859-14',
3688
        'ISO-8859-15',
3689
        'ISO-8859-16',
3690
        'WINDOWS-1251',
3691
        'WINDOWS-1252',
3692
        'WINDOWS-1254',
3693 39
        'ISO-2022-JP',
3694
        'JIS',
3695 39
        'EUC-JP',
3696
    ];
3697 39
3698 3
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3699
    if ($encoding) {
3700
      return $encoding;
3701
    }
3702 38
3703
    //
3704 38
    // 5.) check via "iconv()"
3705
    //
3706
3707
    if (self::$ENCODINGS === null) {
3708 38
      self::$ENCODINGS = self::getData('encodings');
3709 7
    }
3710 7
3711
    $md5 = \md5($str);
3712 38
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3713
      # INFO: //IGNORE and //TRANSLIT still throw notice
3714 38
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3715 38
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3716 38
        return $encodingTmp;
3717 38
      }
3718 38
    }
3719
3720 38
    return false;
3721
  }
3722
3723
  /**
3724 2
   * Check if the string ends with the given substring.
3725
   *
3726
   * @param string $haystack <p>The string to search in.</p>
3727
   * @param string $needle   <p>The substring to search for.</p>
3728 2
   *
3729
   * @return bool
3730
   */
3731 2 View Code Duplication
  public static function str_ends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3732
  {
3733 2
    if (!isset($haystack[0], $needle[0])) {
3734
      return false;
3735 2
    }
3736
3737 2
    if (\substr($haystack, -\strlen($needle)) === $needle) {
3738 2
      return true;
3739 2
    }
3740 2
3741 2
    return false;
3742
  }
3743
3744
  /**
3745
   * Check if the string ends with the given substring, case insensitive.
3746
   *
3747
   * @param string $haystack <p>The string to search in.</p>
3748
   * @param string $needle   <p>The substring to search for.</p>
3749
   *
3750 2
   * @return bool
3751 2
   */
3752 2 View Code Duplication
  public static function str_iends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3753 2
  {
3754
    if (!isset($haystack[0], $needle[0])) {
3755
      return false;
3756 2
    }
3757 2
3758 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
3759 2
      return true;
3760 2
    }
3761
3762 2
    return false;
3763 2
  }
3764
3765 2
  /**
3766
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3767
   *
3768
   * @link  http://php.net/manual/en/function.str-ireplace.php
3769
   *
3770
   * @param mixed $search  <p>
3771
   *                       Every replacement with search array is
3772
   *                       performed on the result of previous replacement.
3773
   *                       </p>
3774
   * @param mixed $replace <p>
3775
   *                       </p>
3776
   * @param mixed $subject <p>
3777
   *                       If subject is an array, then the search and
3778
   *                       replace is performed with every entry of
3779
   *                       subject, and the return value is an array as
3780
   *                       well.
3781
   *                       </p>
3782
   * @param int   $count   [optional] <p>
3783
   *                       The number of matched and replaced needles will
3784 2
   *                       be returned in count which is passed by
3785
   *                       reference.
3786
   *                       </p>
3787 38
   *
3788 5
   * @return mixed <p>A string or an array of replacements.</p>
3789
   */
3790 5
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3791
  {
3792 5
    $search = (array)$search;
3793 5
3794 5
    /** @noinspection AlterInForeachInspection */
3795
    foreach ($search as &$s) {
3796
      if ('' === $s .= '') {
3797 34
        $s = '/^(?<=.)$/';
3798
      } else {
3799
        $s = '/' . \preg_quote($s, '/') . '/ui';
3800
      }
3801 34
    }
3802
3803
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
3804
    $count = $replace; // used as reference parameter
3805
3806
    return $subject;
3807
  }
3808
3809
  /**
3810
   * Check if the string starts with the given substring, case insensitive.
3811
   *
3812
   * @param string $haystack <p>The string to search in.</p>
3813
   * @param string $needle   <p>The substring to search for.</p>
3814 12
   *
3815
   * @return bool
3816
   */
3817 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3818
  {
3819
    if (!isset($haystack[0], $needle[0])) {
3820 12
      return false;
3821
    }
3822 3
3823 1
    if (self::stripos($haystack, $needle) === 0) {
3824
      return true;
3825
    }
3826 3
3827 1
    return false;
3828
  }
3829
3830 2
  /**
3831
   * Limit the number of characters in a string, but also after the next word.
3832
   *
3833
   * @param string $str
3834 2
   * @param int    $length
3835
   * @param string $strAddOn
3836
   *
3837
   * @return string
3838 2
   */
3839
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
3840
  {
3841
    if (!isset($str[0])) {
3842
      return '';
3843
    }
3844 12
3845 3
    if (self::strlen($str) <= $length) {
3846
      return $str;
3847
    }
3848
3849
    if (self::substr($str, $length - 1, 1) === ' ') {
3850
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
3851
    }
3852 12
3853 9
    $str = (string)self::substr($str, 0, $length);
3854
    $array = \explode(' ', $str);
3855
    \array_pop($array);
3856
    $new_str = \implode(' ', $array);
3857
3858
    if ($new_str === '') {
3859
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
3860
    } else {
3861
      $str = $new_str . $strAddOn;
3862 7
    }
3863 7
3864 7
    return $str;
3865 7
  }
3866 7
3867 7
  /**
3868 7
   * Pad a UTF-8 string to given length with another string.
3869 7
   *
3870 7
   * @param string $str        <p>The input string.</p>
3871 7
   * @param int    $pad_length <p>The length of return string.</p>
3872 7
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3873 7
   * @param int    $pad_type   [optional] <p>
3874 7
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3875 7
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3876 7
   *                           </p>
3877 7
   *
3878 7
   * @return string <strong>Returns the padded string</strong>
3879 7
   */
3880 7
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
3881 7
  {
3882 7
    $str_length = self::strlen($str);
3883
3884 7
    if (
3885 7
        \is_int($pad_length) === true
3886 7
        &&
3887
        $pad_length > 0
3888
        &&
3889
        $pad_length >= $str_length
3890
    ) {
3891
      $ps_length = self::strlen($pad_string);
3892
3893
      $diff = ($pad_length - $str_length);
3894
3895
      switch ($pad_type) {
3896 View Code Duplication
        case STR_PAD_LEFT:
3897
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3898
          $pre = (string)self::substr($pre, 0, $diff);
3899
          $post = '';
3900
          break;
3901
3902
        case STR_PAD_BOTH:
3903
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3904
          $pre = (string)self::substr($pre, 0, $diff / 2);
3905
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3906
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
3907
          break;
3908
3909
        case STR_PAD_RIGHT:
3910 View Code Duplication
        default:
3911
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3912
          $post = (string)self::substr($post, 0, $diff);
3913
          $pre = '';
3914
      }
3915
3916
      return $pre . $str . $post;
3917 2
    }
3918
3919 2
    return $str;
3920 2
  }
3921
3922 2
  /**
3923 1
   * Repeat a string.
3924
   *
3925
   * @param string $str        <p>
3926 2
   *                           The string to be repeated.
3927 2
   *                           </p>
3928
   * @param int    $multiplier <p>
3929
   *                           Number of time the input string should be
3930 2
   *                           repeated.
3931
   *                           </p>
3932
   *                           <p>
3933
   *                           multiplier has to be greater than or equal to 0.
3934
   *                           If the multiplier is set to 0, the function
3935
   *                           will return an empty string.
3936
   *                           </p>
3937
   *
3938
   * @return string <p>The repeated string.</p>
3939
   */
3940
  public static function str_repeat(string $str, int $multiplier): string
3941 2
  {
3942
    $str = self::filter($str);
3943 2
3944 2
    return \str_repeat($str, $multiplier);
3945
  }
3946 2
3947 1
  /**
3948
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
3949
   *
3950 2
   * Replace all occurrences of the search string with the replacement string
3951 2
   *
3952
   * @link http://php.net/manual/en/function.str-replace.php
3953
   *
3954 2
   * @param mixed $search  <p>
3955
   *                       The value being searched for, otherwise known as the needle.
3956
   *                       An array may be used to designate multiple needles.
3957
   *                       </p>
3958
   * @param mixed $replace <p>
3959
   *                       The replacement value that replaces found search
3960
   *                       values. An array may be used to designate multiple replacements.
3961
   *                       </p>
3962
   * @param mixed $subject <p>
3963
   *                       The string or array being searched and replaced on,
3964
   *                       otherwise known as the haystack.
3965
   *                       </p>
3966
   *                       <p>
3967
   *                       If subject is an array, then the search and
3968
   *                       replace is performed with every entry of
3969
   *                       subject, and the return value is an array as
3970
   *                       well.
3971
   *                       </p>
3972
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
3973
   *
3974
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
3975
   */
3976
  public static function str_replace($search, $replace, $subject, int &$count = null)
3977
  {
3978
    return \str_replace($search, $replace, $subject, $count);
3979
  }
3980
3981
  /**
3982 26
   * Replace the first "$search"-term with the "$replace"-term.
3983
   *
3984 26
   * @param string $search
3985
   * @param string $replace
3986
   * @param string $subject
3987 26
   *
3988 26
   * @return string
3989 2
   */
3990 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
3991 24
  {
3992
    $pos = self::strpos($subject, $search);
3993 26
3994
    if ($pos !== false) {
3995 26
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
3996 26
    }
3997
3998 26
    return $subject;
3999
  }
4000
4001
  /**
4002
   * Shuffles all the characters in the string.
4003
   *
4004
   * @param string $str <p>The input string</p>
4005
   *
4006
   * @return string <p>The shuffled string.</p>
4007
   */
4008
  public static function str_shuffle(string $str): string
4009 2
  {
4010
    $array = self::split($str);
4011 2
4012 2
    \shuffle($array);
4013
4014 2
    return \implode('', $array);
4015 1
  }
4016
4017
  /**
4018 2
   * Sort all characters according to code points.
4019 2
   *
4020
   * @param string $str    <p>A UTF-8 string.</p>
4021
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4022 2
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4023
   *
4024
   * @return string <p>String of sorted characters.</p>
4025
   */
4026
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
4027
  {
4028
    $array = self::codepoints($str);
4029
4030
    if ($unique) {
4031
      $array = \array_flip(\array_flip($array));
4032
    }
4033
4034 1
    if ($desc) {
4035
      \arsort($array);
4036 1
    } else {
4037
      \asort($array);
4038 1
    }
4039 1
4040
    return self::string($array);
4041
  }
4042 1
4043
  /**
4044 1
   * Split a string into an array.
4045 1
   *
4046
   * @param string|string[] $str
4047
   * @param int             $len
4048 1
   *
4049 1
   * @return array
4050
   */
4051
  public static function str_split($str, int $len = 1): array
4052 1
  {
4053 1 View Code Duplication
    if (\is_array($str) === true) {
4054 1
      foreach ($str as $k => $v) {
4055 1
        $str[$k] = self::str_split($v, $len);
4056
      }
4057 1
4058 1
      return $str;
4059 1
    }
4060 1
4061
    if (!isset($str[0])) {
4062
      return [];
4063 1
    }
4064
4065
    if ($len < 1) {
4066
      return \str_split($str, $len);
4067
    }
4068
4069
    /** @noinspection PhpInternalEntityUsedInspection */
4070
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4071
    $a = $a[0];
4072
4073
    if ($len === 1) {
4074
      return $a;
4075
    }
4076
4077
    $arrayOutput = [];
4078
    $p = -1;
4079 2
4080
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4081 2
    foreach ($a as $l => $a) {
4082
      if ($l % $len) {
4083
        $arrayOutput[$p] .= $a;
4084 2
      } else {
4085 2
        $arrayOutput[++$p] = $a;
4086
      }
4087 2
    }
4088
4089 2
    return $arrayOutput;
4090 2
  }
4091
4092 2
  /**
4093
   * Check if the string starts with the given substring.
4094
   *
4095 2
   * @param string $haystack <p>The string to search in.</p>
4096 2
   * @param string $needle   <p>The substring to search for.</p>
4097 2
   *
4098 2
   * @return bool
4099 2
   */
4100 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4101 2
  {
4102 2
    if (!isset($haystack[0], $needle[0])) {
4103 2
      return false;
4104 2
    }
4105 2
4106 2
    if (\strpos($haystack, $needle) === 0) {
4107
      return true;
4108 2
    }
4109 2
4110 2
    return false;
4111 2
  }
4112 2
4113 2
  /**
4114
   * Get a binary representation of a specific string.
4115 2
   *
4116
   * @param string $str <p>The input string.</p>
4117
   *
4118 2
   * @return string
4119
   */
4120
  public static function str_to_binary(string $str): string
4121
  {
4122
    $value = \unpack('H*', $str);
4123
4124
    return \base_convert($value[1], 16, 2);
4125
  }
4126
4127
  /**
4128
   * Convert a string into an array of words.
4129
   *
4130
   * @param string   $str
4131
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4132
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4133
   * @param null|int $removeShortValues
4134
   *
4135
   * @return array
4136
   */
4137
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
4138
  {
4139 1
    if (!isset($str[0])) {
4140
      if ($removeEmptyValues === true) {
4141 1
        return [];
4142
      }
4143 1
4144
      return [''];
4145
    }
4146
4147
    $charList = self::rxClass($charList, '\pL');
4148
4149
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4150
4151
    if (
4152
        $removeShortValues === null
4153
        &&
4154
        $removeEmptyValues === false
4155
    ) {
4156
      return $return;
4157
    }
4158
4159
    $tmpReturn = [];
4160
    foreach ($return as $returnValue) {
4161
      if (
4162
          $removeShortValues !== null
4163
          &&
4164
          self::strlen($returnValue) <= $removeShortValues
4165
      ) {
4166
        continue;
4167
      }
4168
4169
      if (
4170
          $removeEmptyValues === true
4171
          &&
4172
          \trim($returnValue) === ''
4173
      ) {
4174
        continue;
4175 12
      }
4176
4177 12
      $tmpReturn[] = $returnValue;
4178
    }
4179
4180
    return $tmpReturn;
4181
  }
4182
4183
  /**
4184
   * alias for "UTF8::to_ascii()"
4185
   *
4186
   * @see UTF8::to_ascii()
4187
   *
4188
   * @param string $str
4189 1
   * @param string $unknown
4190
   * @param bool   $strict
4191 1
   *
4192
   * @return string
4193 1
   */
4194 1
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
4195
  {
4196
    return self::to_ascii($str, $unknown, $strict);
4197 1
  }
4198
4199
  /**
4200
   * Counts number of words in the UTF-8 string.
4201
   *
4202
   * @param string $str      <p>The input string.</p>
4203
   * @param int    $format   [optional] <p>
4204
   *                         <strong>0</strong> => return a number of words (default)<br>
4205
   *                         <strong>1</strong> => return an array of words<br>
4206
   *                         <strong>2</strong> => return an array of words with word-offset as key
4207 1
   *                         </p>
4208
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4209 1
   *
4210
   * @return array|int <p>The number of words in the string</p>
4211 1
   */
4212
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
4213 1
  {
4214
    $strParts = self::str_to_words($str, $charlist);
4215
4216
    $len = \count($strParts);
4217
4218
    if ($format === 1) {
4219
4220
      $numberOfWords = [];
4221
      for ($i = 1; $i < $len; $i += 2) {
4222
        $numberOfWords[] = $strParts[$i];
4223
      }
4224
4225 1
    } elseif ($format === 2) {
4226
4227 1
      $numberOfWords = [];
4228
      $offset = self::strlen($strParts[0]);
4229 1
      for ($i = 1; $i < $len; $i += 2) {
4230 1
        $numberOfWords[$offset] = $strParts[$i];
4231 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4232
      }
4233 1
4234 1
    } else {
4235 1
4236 1
      $numberOfWords = ($len - 1) / 2;
4237
4238
    }
4239 1
4240
    return $numberOfWords;
4241
  }
4242
4243
  /**
4244
   * Case-insensitive string comparison.
4245
   *
4246
   * INFO: Case-insensitive version of UTF8::strcmp()
4247
   *
4248
   * @param string $str1
4249
   * @param string $str2
4250 23
   *
4251
   * @return int <p>
4252 23
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4253
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4254 23
   *             <strong>0</strong> if they are equal.
4255 1
   *             </p>
4256
   */
4257
  public static function strcasecmp(string $str1, string $str2): int
4258 22
  {
4259
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4260 22
  }
4261
4262
  /**
4263
   * alias for "UTF8::strstr()"
4264
   *
4265 22
   * @see UTF8::strstr()
4266 22
   *
4267
   * @param string $haystack
4268 22
   * @param string $needle
4269 22
   * @param bool   $before_needle
4270
   * @param string $encoding
4271
   * @param bool   $cleanUtf8
4272 1
   *
4273 1
   * @return string|false
4274
   */
4275
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4276 1
  {
4277 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4278 1
  }
4279 1
4280 1
  /**
4281
   * Case-sensitive string comparison.
4282 1
   *
4283
   * @param string $str1
4284 1
   * @param string $str2
4285
   *
4286
   * @return int  <p>
4287
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4288
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4289
   *              <strong>0</strong> if they are equal.
4290
   *              </p>
4291
   */
4292
  public static function strcmp(string $str1, string $str2): int
4293
  {
4294
    /** @noinspection PhpUndefinedClassInspection */
4295 2
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
4296
        \Normalizer::normalize($str1, \Normalizer::NFD),
4297 2
        \Normalizer::normalize($str2, \Normalizer::NFD)
4298 2
    );
4299
  }
4300 2
4301 1
  /**
4302
   * Find length of initial segment not matching mask.
4303
   *
4304 2
   * @param string $str
4305 2
   * @param string $charList
4306
   * @param int    $offset
4307
   * @param int    $length
4308 2
   *
4309
   * @return int|null
4310
   */
4311
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
4312
  {
4313
    if ('' === $charList .= '') {
4314
      return null;
4315
    }
4316
4317 View Code Duplication
    if ($offset || $length !== null) {
4318 1
      $strTmp = self::substr($str, $offset, $length);
4319
      if ($strTmp === false) {
4320 1
        return null;
4321
      }
4322 1
      $str = (string)$strTmp;
4323
    }
4324 1
4325
    if (!isset($str[0])) {
4326
      return null;
4327
    }
4328
4329
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4330
      return self::strlen($length[1]);
4331
    }
4332
4333
    return self::strlen($str);
4334
  }
4335
4336
  /**
4337 10
   * alias for "UTF8::stristr()"
4338
   *
4339 10
   * @see UTF8::stristr()
4340
   *
4341 10
   * @param string $haystack
4342 1
   * @param string $needle
4343 1
   * @param bool   $before_needle
4344
   * @param string $encoding
4345 10
   * @param bool   $cleanUtf8
4346 2
   *
4347
   * @return string|false
4348
   */
4349
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4350 2
  {
4351
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4352
  }
4353 10
4354
  /**
4355 10
   * Create a UTF-8 string from code points.
4356
   *
4357
   * INFO: opposite to UTF8::codepoints()
4358
   *
4359 10
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4360
   *
4361 10
   * @return string <p>UTF-8 encoded string.</p>
4362 10
   */
4363
  public static function string(array $array): string
4364
  {
4365 1
    return \implode(
4366 1
        '',
4367
        \array_map(
4368
            [
4369 1
                self::class,
4370 1
                'chr',
4371 1
            ],
4372 1
            $array
4373
        )
4374
    );
4375
  }
4376
4377 1
  /**
4378 1
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4379 1
   *
4380 1
   * @param string $str <p>The input string.</p>
4381
   *
4382
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4383 1
   */
4384 1
  public static function string_has_bom(string $str): bool
4385
  {
4386 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
4387
      if (0 === \strpos($str, $bomString)) {
4388
        return true;
4389
      }
4390
    }
4391
4392
    return false;
4393
  }
4394
4395
  /**
4396
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4397
   *
4398
   * @link http://php.net/manual/en/function.strip-tags.php
4399
   *
4400 7
   * @param string $str             <p>
4401
   *                                The input string.
4402 7
   *                                </p>
4403
   * @param string $allowable_tags  [optional] <p>
4404
   *                                You can use the optional second parameter to specify tags which should
4405
   *                                not be stripped.
4406
   *                                </p>
4407
   *                                <p>
4408
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4409
   *                                can not be changed with allowable_tags.
4410
   *                                </p>
4411
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
4412
   *
4413
   * @return string <p>The stripped string.</p>
4414
   */
4415
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
4416
  {
4417
    if (!isset($str[0])) {
4418 1
      return '';
4419
    }
4420 1
4421
    if ($cleanUtf8 === true) {
4422 1
      $str = self::clean($str);
4423
    }
4424 1
4425
    return \strip_tags($str, $allowable_tags);
4426 1
  }
4427 1
4428 1
  /**
4429 1
   * Finds position of first occurrence of a string within another, case insensitive.
4430
   *
4431 1
   * @link http://php.net/manual/en/function.mb-stripos.php
4432
   *
4433 1
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4434 1
   * @param string $needle    <p>The string to find in haystack.</p>
4435 1
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
4436 1
   * @param string $encoding  [optional] <p>Set the charset.</p>
4437 1
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4438 1
   *
4439
   * @return int|false <p>
4440 1
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4441
   *                   or false if needle is not found.
4442 1
   *                   </p>
4443
   */
4444
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4445
  {
4446 1
    if (!isset($haystack[0], $needle[0])) {
4447
      return false;
4448
    }
4449
4450
    if ($cleanUtf8 === true) {
4451
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4452
      // if invalid characters are found in $haystack before $needle
4453
      $haystack = self::clean($haystack);
4454
      $needle = self::clean($needle);
4455
    }
4456
4457
    if ($encoding !== 'UTF-8') {
4458
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4459
    }
4460
4461
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4462
      self::checkForSupport();
4463 11
    }
4464
4465 11 View Code Duplication
    if (
4466
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4467
        &&
4468
        self::$SUPPORT['intl'] === true
4469
    ) {
4470
      return \grapheme_stripos($haystack, $needle, $offset);
4471
    }
4472
4473
    // fallback to "mb_"-function via polyfill
4474
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4475
  }
4476
4477
  /**
4478
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4479
   *
4480
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
4481 1
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
4482
   * @param bool   $before_needle  [optional] <p>
4483 1
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4484
   *                               haystack before the first occurrence of the needle (excluding the needle).
4485
   *                               </p>
4486
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4487
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4488
   *
4489
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4490
   */
4491
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4492
  {
4493
    if (!isset($haystack[0], $needle[0])) {
4494
      return false;
4495
    }
4496
4497
    if ($encoding !== 'UTF-8') {
4498 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4499
    }
4500
4501 14
    if ($cleanUtf8 === true) {
4502 13
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4503 13
      // if invalid characters are found in $haystack before $needle
4504 14
      $needle = self::clean($needle);
4505
      $haystack = self::clean($haystack);
4506
    }
4507
4508
    if (!$needle) {
4509
      return $haystack;
4510
    }
4511
4512
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4513
      self::checkForSupport();
4514
    }
4515
4516 View Code Duplication
    if (
4517 15
        $encoding !== 'UTF-8'
4518
        &&
4519 15
        self::$SUPPORT['mbstring'] === false
4520 1
    ) {
4521
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4522
    }
4523 14
4524 2
    if (self::$SUPPORT['mbstring'] === true) {
4525 2
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4526
    }
4527
4528 2 View Code Duplication
    if (
4529 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4530
        &&
4531 14
        self::$SUPPORT['intl'] === true
4532 14
    ) {
4533 1
      return \grapheme_stristr($haystack, $needle, $before_needle);
4534
    }
4535
4536 13
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4537
      return \stristr($haystack, $needle, $before_needle);
4538 13
    }
4539
4540
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
4541 1
4542
    if (!isset($match[1])) {
4543
      return false;
4544
    }
4545
4546
    if ($before_needle) {
4547
      return $match[1];
4548
    }
4549
4550
    return self::substr($haystack, self::strlen($match[1]));
4551
  }
4552
4553
  /**
4554
   * Get the string length, not the byte-length!
4555
   *
4556
   * @link     http://php.net/manual/en/function.mb-strlen.php
4557 1
   *
4558
   * @param string $str       <p>The string being checked for length.</p>
4559 1
   * @param string $encoding  [optional] <p>Set the charset.</p>
4560
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4561
   *
4562
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4563
   *             character counted as +1)</p>
4564
   */
4565
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
4566
  {
4567
    if (!isset($str[0])) {
4568
      return 0;
4569
    }
4570
4571 2
    if ($encoding !== 'UTF-8') {
4572
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4573 2
    }
4574 2
4575 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4576
      self::checkForSupport();
4577 2
    }
4578 2
4579 2
    switch ($encoding) {
4580
      case 'ASCII':
4581 2
      case 'CP850':
4582 2
      case '8BIT':
4583
        if (
4584
            $encoding === 'CP850'
4585
            &&
4586
            self::$SUPPORT['mbstring_func_overload'] === false
4587
        ) {
4588
          return \strlen($str);
4589
        }
4590
4591
        return \mb_strlen($str, '8BIT');
4592 3
    }
4593
4594 3
    if ($cleanUtf8 === true) {
4595 3
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4596 3
      // if invalid characters are found in $str
4597
      $str = self::clean($str);
4598 3
    }
4599
4600 3 View Code Duplication
    if (
4601
        $encoding !== 'UTF-8'
4602
        &&
4603
        self::$SUPPORT['mbstring'] === false
4604
        &&
4605
        self::$SUPPORT['iconv'] === false
4606
    ) {
4607
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4608
    }
4609
4610
    if (
4611
        $encoding !== 'UTF-8'
4612
        &&
4613
        self::$SUPPORT['iconv'] === true
4614
        &&
4615
        self::$SUPPORT['mbstring'] === false
4616
    ) {
4617
      $returnTmp = \iconv_strlen($str, $encoding);
4618
      if ($returnTmp !== false) {
4619
        return $returnTmp;
4620
      }
4621
    }
4622
4623 2 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4624
      $returnTmp = \mb_strlen($str, $encoding);
4625 2
      if ($returnTmp !== false) {
4626
        return $returnTmp;
4627 2
      }
4628 1
    }
4629
4630 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
4631 2
      $returnTmp = \iconv_strlen($str, $encoding);
4632 1
      if ($returnTmp !== false) {
4633 1
        return $returnTmp;
4634
      }
4635 2
    }
4636
4637
    if (
4638
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4639
        &&
4640
        self::$SUPPORT['intl'] === true
4641
    ) {
4642
      return \grapheme_strlen($str);
4643
    }
4644
4645
    if (self::is_ascii($str)) {
4646
      return \strlen($str);
4647
    }
4648
4649
    // fallback via vanilla php
4650
    \preg_match_all('/./us', $str, $parts);
4651
    $returnTmp = \count($parts[0]);
4652
    if ($returnTmp !== 0) {
4653
      return $returnTmp;
4654 10
    }
4655
4656 10
    // fallback to "mb_"-function via polyfill
4657 10
    return \mb_strlen($str, $encoding);
4658 10
  }
4659
4660 10
  /**
4661 3
   * Get string length in byte.
4662
   *
4663
   * @param string $str
4664 9
   *
4665
   * @return int
4666
   */
4667 1
  public static function strlen_in_byte(string $str): int
4668 1
  {
4669 1
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4670
      $len = \mb_strlen($str, '8BIT');
4671
    } else {
4672
      $len = \strlen($str);
4673 9
    }
4674 2
4675 9
    return $len;
4676 9
  }
4677 9
4678 1
  /**
4679
   * Case insensitive string comparisons using a "natural order" algorithm.
4680
   *
4681 9
   * INFO: natural order version of UTF8::strcasecmp()
4682
   *
4683
   * @param string $str1 <p>The first string.</p>
4684
   * @param string $str2 <p>The second string.</p>
4685
   *
4686
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4687 9
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4688 9
   *             <strong>0</strong> if they are equal
4689 9
   */
4690 9
  public static function strnatcasecmp(string $str1, string $str2): int
4691 9
  {
4692
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4693
  }
4694
4695
  /**
4696 9
   * String comparisons using a "natural order" algorithm
4697
   *
4698
   * INFO: natural order version of UTF8::strcmp()
4699
   *
4700
   * @link  http://php.net/manual/en/function.strnatcmp.php
4701
   *
4702
   * @param string $str1 <p>The first string.</p>
4703
   * @param string $str2 <p>The second string.</p>
4704
   *
4705
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4706
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4707
   *             <strong>0</strong> if they are equal
4708
   */
4709
  public static function strnatcmp(string $str1, string $str2): int
4710
  {
4711
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4712
  }
4713 17
4714
  /**
4715 17
   * Case-insensitive string comparison of the first n characters.
4716 17
   *
4717 17
   * @link  http://php.net/manual/en/function.strncasecmp.php
4718
   *
4719 17
   * @param string $str1 <p>The first string.</p>
4720 6
   * @param string $str2 <p>The second string.</p>
4721
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4722
   *
4723 11
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4724 1
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4725 1
   *             <strong>0</strong> if they are equal
4726
   */
4727 11
  public static function strncasecmp(string $str1, string $str2, int $len): int
4728
  {
4729
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4730 1
  }
4731 1
4732 1
  /**
4733
   * String comparison of the first n characters.
4734 11
   *
4735
   * @link  http://php.net/manual/en/function.strncmp.php
4736
   *
4737
   * @param string $str1 <p>The first string.</p>
4738 11
   * @param string $str2 <p>The second string.</p>
4739
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4740
   *
4741
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4742
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4743
   *             <strong>0</strong> if they are equal
4744 11
   */
4745 1
  public static function strncmp(string $str1, string $str2, int $len): int
4746 11
  {
4747
    $str1 = (string)self::substr($str1, 0, $len);
4748
    $str2 = (string)self::substr($str2, 0, $len);
4749
4750 11
    return self::strcmp($str1, $str2);
4751 11
  }
4752
4753
  /**
4754
   * Search a string for any of a set of characters.
4755
   *
4756
   * @link  http://php.net/manual/en/function.strpbrk.php
4757
   *
4758
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4759
   * @param string $char_list <p>This parameter is case sensitive.</p>
4760
   *
4761
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
4762
   */
4763
  public static function strpbrk(string $haystack, string $char_list)
4764
  {
4765
    if (!isset($haystack[0], $char_list[0])) {
4766
      return false;
4767
    }
4768
4769
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4770
      return \substr($haystack, \strpos($haystack, $m[0]));
4771
    }
4772
4773
    return false;
4774
  }
4775
4776
  /**
4777
   * Find position of first occurrence of string in a string.
4778
   *
4779
   * @link http://php.net/manual/en/function.mb-strpos.php
4780
   *
4781
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4782
   * @param string $needle    <p>The string to find in haystack.</p>
4783
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4784
   * @param string $encoding  [optional] <p>Set the charset.</p>
4785
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4786
   *
4787
   * @return int|false <p>
4788
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
4789
   *                   If needle is not found it returns false.
4790
   *                   </p>
4791
   */
4792
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4793 88
  {
4794
    if (!isset($haystack[0], $needle[0])) {
4795 88
      return false;
4796
    }
4797 88
4798 6
    // iconv and mbstring do not support integer $needle
4799 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4800
      $needle = (string)self::chr((int)$needle);
4801
    }
4802
4803 87
    if ($cleanUtf8 === true) {
4804 14
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4805 87
      // if invalid characters are found in $haystack before $needle
4806 78
      $needle = self::clean($needle);
4807 78
      $haystack = self::clean($haystack);
4808 13
    }
4809
4810
    if ($encoding !== 'UTF-8') {
4811 87
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4812
    }
4813
4814
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4815
      self::checkForSupport();
4816 87
    }
4817 87
4818 87
    if (
4819
        $encoding === 'CP850'
4820
        &&
4821 10
        self::$SUPPORT['mbstring_func_overload'] === false
4822 10
    ) {
4823 10
      return \strpos($haystack, $needle, $offset);
4824 10
    }
4825
4826 View Code Duplication
    if (
4827
        $encoding !== 'UTF-8'
4828
        &&
4829
        self::$SUPPORT['iconv'] === false
4830 79
        &&
4831
        self::$SUPPORT['mbstring'] === false
4832
    ) {
4833 2
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4834 2
    }
4835
4836
    if (
4837
        $offset >= 0 // iconv_strpos() can't handle negative offset
4838 79
        &&
4839 2
        $encoding !== 'UTF-8'
4840 79
        &&
4841
        self::$SUPPORT['mbstring'] === false
4842 79
        &&
4843
        self::$SUPPORT['iconv'] === true
4844
    ) {
4845
      // ignore invalid negative offset to keep compatibility
4846
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4847
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4848 79
      if ($returnTmp !== false) {
4849 2
        return $returnTmp;
4850 79
      }
4851 2
    }
4852 79
4853 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4854
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
4855
      if ($returnTmp !== false) {
4856 79
        return $returnTmp;
4857 78
      }
4858
    }
4859
4860 2 View Code Duplication
    if (
4861
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4862
        &&
4863
        self::$SUPPORT['intl'] === true
4864
    ) {
4865
      return \grapheme_strpos($haystack, $needle, $offset);
4866 2
    }
4867 2
4868 2
    if (
4869
        $offset >= 0 // iconv_strpos() can't handle negative offset
4870 2
        &&
4871
        self::$SUPPORT['iconv'] === true
4872
    ) {
4873
      // ignore invalid negative offset to keep compatibility
4874 2
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4875 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4876
      if ($returnTmp !== false) {
4877
        return $returnTmp;
4878
      }
4879 2
    }
4880 2
4881 2
    $haystackIsAscii = self::is_ascii($haystack);
4882 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
4883
      return \strpos($haystack, $needle, $offset);
4884
    }
4885
4886
    // fallback via vanilla php
4887
4888
    if ($haystackIsAscii) {
4889
      $haystackTmp = \substr($haystack, $offset);
4890
    } else {
4891
      $haystackTmp = self::substr($haystack, $offset);
4892
    }
4893
    if ($haystackTmp === false) {
4894
      $haystackTmp = '';
4895
    }
4896 69
    $haystack = (string)$haystackTmp;
4897
4898 69
    if ($offset < 0) {
4899
      $offset = 0;
4900
    }
4901 69
4902
    $pos = \strpos($haystack, $needle);
4903
    if ($pos === false) {
4904 69
      return false;
4905
    }
4906
4907
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
4908
    if ($returnTmp !== false) {
4909
      return $returnTmp;
4910
    }
4911
4912
    // fallback to "mb_"-function via polyfill
4913
    return \mb_strpos($haystack, $needle, $offset, $encoding);
4914
  }
4915
4916
  /**
4917
   * Finds the last occurrence of a character in a string within another.
4918
   *
4919 1
   * @link http://php.net/manual/en/function.mb-strrchr.php
4920
   *
4921 1
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
4922
   * @param string $needle        <p>The string to find in haystack</p>
4923
   * @param bool   $before_needle [optional] <p>
4924
   *                              Determines which portion of haystack
4925
   *                              this function returns.
4926
   *                              If set to true, it returns all of haystack
4927
   *                              from the beginning to the last occurrence of needle.
4928
   *                              If set to false, it returns all of haystack
4929
   *                              from the last occurrence of needle to the end,
4930
   *                              </p>
4931
   * @param string $encoding      [optional] <p>
4932
   *                              Character encoding name to use.
4933
   *                              If it is omitted, internal character encoding is used.
4934
   *                              </p>
4935
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
4936
   *
4937
   * @return string|false The portion of haystack or false if needle is not found.
4938 2
   */
4939 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4940 2
  {
4941
    if ($encoding !== 'UTF-8') {
4942
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4943
    }
4944
4945
    if ($cleanUtf8 === true) {
4946
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4947
      // if invalid characters are found in $haystack before $needle
4948
      $needle = self::clean($needle);
4949
      $haystack = self::clean($haystack);
4950
    }
4951
4952
    // fallback to "mb_"-function via polyfill
4953
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
4954
  }
4955
4956 1
  /**
4957
   * Reverses characters order in the string.
4958 1
   *
4959
   * @param string $str The input string
4960
   *
4961
   * @return string The string with characters in the reverse sequence
4962
   */
4963
  public static function strrev(string $str): string
4964
  {
4965
    if (!isset($str[0])) {
4966
      return '';
4967
    }
4968
4969
    return \implode('', \array_reverse(self::split($str)));
4970
  }
4971
4972
  /**
4973
   * Finds the last occurrence of a character in a string within another, case insensitive.
4974 2
   *
4975
   * @link http://php.net/manual/en/function.mb-strrichr.php
4976 2
   *
4977 2
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
4978
   * @param string $needle         <p>The string to find in haystack.</p>
4979 2
   * @param bool   $before_needle  [optional] <p>
4980
   *                               Determines which portion of haystack
4981
   *                               this function returns.
4982
   *                               If set to true, it returns all of haystack
4983
   *                               from the beginning to the last occurrence of needle.
4984
   *                               If set to false, it returns all of haystack
4985
   *                               from the last occurrence of needle to the end,
4986
   *                               </p>
4987
   * @param string $encoding       [optional] <p>
4988
   *                               Character encoding name to use.
4989
   *                               If it is omitted, internal character encoding is used.
4990
   *                               </p>
4991
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4992 1
   *
4993
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
4994 1
   */
4995 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4996
  {
4997 1
    if ($encoding !== 'UTF-8') {
4998 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4999
    }
5000
5001 1
    if ($cleanUtf8 === true) {
5002 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5003
      // if invalid characters are found in $haystack before $needle
5004
      $needle = self::clean($needle);
5005 1
      $haystack = self::clean($haystack);
5006
    }
5007
5008
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5009
  }
5010
5011
  /**
5012
   * Find position of last occurrence of a case-insensitive string.
5013
   *
5014
   * @param string $haystack  <p>The string to look in.</p>
5015
   * @param string $needle    <p>The string to look for.</p>
5016
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5017
   * @param string $encoding  [optional] <p>Set the charset.</p>
5018
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5019
   *
5020
   * @return int|false <p>
5021
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5022
   *                   not found, it returns false.
5023
   *                   </p>
5024 56
   */
5025
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5026 56
  {
5027 56 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5028
      $needle = (string)self::chr((int)$needle);
5029 56
    }
5030 3
5031
    if (!isset($haystack[0], $needle[0])) {
5032
      return false;
5033
    }
5034 55
5035
    if ($cleanUtf8 === true) {
5036
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5037
      $needle = self::clean($needle);
5038 55
      $haystack = self::clean($haystack);
5039
    }
5040
5041
    if ($encoding !== 'UTF-8') {
5042 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5043
    }
5044
5045 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5046 2
      self::checkForSupport();
5047 2
    }
5048
5049 View Code Duplication
    if (
5050
        $encoding !== 'UTF-8'
5051 55
        &&
5052 42
        self::$SUPPORT['mbstring'] === false
5053 55
    ) {
5054 15
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5055 15
    }
5056 41
5057
    if (self::$SUPPORT['mbstring'] === true) {
5058
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5059 55
    }
5060
5061 View Code Duplication
    if (
5062
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5063
        &&
5064
        self::$SUPPORT['intl'] === true
5065 55
    ) {
5066 41
      return \grapheme_strripos($haystack, $needle, $offset);
5067 55
    }
5068 41
5069
    // fallback via vanilla php
5070
5071
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5072
  }
5073 15
5074 1
  /**
5075 15
   * Find position of last occurrence of a string in a string.
5076
   *
5077 15
   * @link http://php.net/manual/en/function.mb-strrpos.php
5078
   *
5079
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5080
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5081
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5082
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5083 15
   *                              the end of the string.
5084
   *                              </p>
5085 15
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5086 1
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5087 15
   *
5088
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5089 15
   *                   is not found, it returns false.</p>
5090
   */
5091
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5092
  {
5093
    if ((int)$needle === $needle && $needle >= 0) {
5094
      $needle = (string)self::chr($needle);
5095 15
    }
5096 15
    $needle = (string)$needle;
5097
5098
    if (!isset($haystack[0], $needle[0])) {
5099
      return false;
5100
    }
5101 1
5102 1
    if (
5103 1
        $cleanUtf8 === true
5104
        ||
5105 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5106
    ) {
5107
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5108
      $needle = self::clean($needle);
5109
      $haystack = self::clean($haystack);
5110
    }
5111 1
5112 1
    if ($encoding !== 'UTF-8') {
5113 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5114
    }
5115
5116
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5117
      self::checkForSupport();
5118
    }
5119 1
5120 1 View Code Duplication
    if (
5121 1
        $encoding !== 'UTF-8'
5122
        &&
5123
        self::$SUPPORT['mbstring'] === false
5124
    ) {
5125
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5126 1
    }
5127
5128
    if (self::$SUPPORT['mbstring'] === true) {
5129 1
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5130
    }
5131 1
5132 View Code Duplication
    if (
5133
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5134 1
        &&
5135
        self::$SUPPORT['intl'] === true
5136 1
    ) {
5137
      return \grapheme_strrpos($haystack, $needle, $offset);
5138
    }
5139
5140 1
    // fallback via vanilla php
5141 1
5142
    $haystackTmp = null;
5143
    if ($offset > 0) {
5144
      $haystackTmp = self::substr($haystack, $offset);
5145 1
    } elseif ($offset < 0) {
5146 1
      $haystackTmp = self::substr($haystack, 0, $offset);
5147 1
      $offset = 0;
5148
    }
5149
5150
    if ($haystackTmp !== null) {
5151
      if ($haystackTmp === false) {
5152
        $haystackTmp = '';
5153
      }
5154
      $haystack = (string)$haystackTmp;
5155
    }
5156
5157
    $pos = \strrpos($haystack, $needle);
5158
    if ($pos === false) {
5159
      return false;
5160
    }
5161
5162
    return $offset + self::strlen(\substr($haystack, 0, $pos));
5163
  }
5164
5165
  /**
5166
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5167
   * mask.
5168
   *
5169
   * @param string $str    <p>The input string.</p>
5170
   * @param string $mask   <p>The mask of chars</p>
5171
   * @param int    $offset [optional]
5172
   * @param int    $length [optional]
5173
   *
5174
   * @return int
5175
   */
5176
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
5177 1
  {
5178 View Code Duplication
    if ($offset || $length !== null) {
5179 1
      $strTmp = self::substr($str, $offset, $length);
5180 1
      if ($strTmp === false) {
5181 1
        $strTmp = '';
5182
      }
5183 1
      $str = (string)$strTmp;
5184
    }
5185
5186 1
    if (!isset($str[0], $mask[0])) {
5187 1
      return 0;
5188 1
    }
5189
5190
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5191 1
  }
5192
5193
  /**
5194
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5195
   *
5196
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5197
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5198
   * @param bool   $before_needle  [optional] <p>
5199
   *                               If <b>TRUE</b>, strstr() returns the part of the
5200
   *                               haystack before the first occurrence of the needle (excluding the needle).
5201 4
   *                               </p>
5202
   * @param string $encoding       [optional] <p>Set the charset.</p>
5203 4
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5204
   *
5205 4
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5206 2
   */
5207
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5208
  {
5209 3
    if (!isset($haystack[0], $needle[0])) {
5210
      return false;
5211
    }
5212
5213
    if ($cleanUtf8 === true) {
5214
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5215
      // if invalid characters are found in $haystack before $needle
5216
      $needle = self::clean($needle);
5217
      $haystack = self::clean($haystack);
5218
    }
5219
5220
    if ($encoding !== 'UTF-8') {
5221
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5222
    }
5223
5224
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5225
      self::checkForSupport();
5226
    }
5227
5228 View Code Duplication
    if (
5229
        $encoding !== 'UTF-8'
5230
        &&
5231
        self::$SUPPORT['mbstring'] === false
5232
    ) {
5233
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5234
    }
5235 1
5236
    if (self::$SUPPORT['mbstring'] === true) {
5237 1
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5238 1
    }
5239 1
5240 View Code Duplication
    if (
5241 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5242
        &&
5243
        self::$SUPPORT['intl'] === true
5244 1
    ) {
5245 1
      return \grapheme_strstr($haystack, $needle, $before_needle);
5246 1
    }
5247
5248 1
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
5249
5250
    if (!isset($match[1])) {
5251
      return false;
5252
    }
5253
5254
    if ($before_needle) {
5255
      return $match[1];
5256
    }
5257
5258
    return self::substr($haystack, self::strlen($match[1]));
5259
  }
5260
5261
  /**
5262
   * Unicode transformation for case-less matching.
5263
   *
5264
   * @link http://unicode.org/reports/tr21/tr21-5.html
5265 1
   *
5266
   * @param string $str        <p>The input string.</p>
5267 1
   * @param bool   $full       [optional] <p>
5268
   *                           <b>true</b>, replace full case folding chars (default)<br>
5269
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5270
   *                           </p>
5271
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5272 1
   *
5273 1
   * @return string
5274 1
   */
5275
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
5276 1
  {
5277
    if (!isset($str[0])) {
5278
      return '';
5279
    }
5280
5281
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5282 1
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5283
5284 1
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5285
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
5286
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
5287 1
    }
5288 1
5289 1
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5290
5291
    if ($full) {
5292
5293 1
      static $FULL_CASE_FOLD = null;
5294 1
      if ($FULL_CASE_FOLD === null) {
5295 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5296 1
      }
5297 1
5298 1
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5299
    }
5300
5301 1
    if ($cleanUtf8 === true) {
5302
      $str = self::clean($str);
5303
    }
5304
5305
    return self::strtolower($str);
5306
  }
5307 1
5308
  /**
5309 1
   * Make a string lowercase.
5310
   *
5311
   * @link http://php.net/manual/en/function.mb-strtolower.php
5312
   *
5313 1
   * @param string      $str       <p>The string being lowercased.</p>
5314 1
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5315
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5316
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5317
   *
5318
   * @return string str with all alphabetic characters converted to lowercase.
5319
   */
5320 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5321
  {
5322
    // init
5323
    $str = (string)$str;
5324
    if (!isset($str[0])) {
5325
      return '';
5326
    }
5327
5328
    if ($cleanUtf8 === true) {
5329
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5330
      // if invalid characters are found in $haystack before $needle
5331
      $str = self::clean($str);
5332
    }
5333
5334
    if ($encoding !== 'UTF-8') {
5335
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5336
    }
5337
5338
    if ($lang !== null) {
5339
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5340
        self::checkForSupport();
5341
      }
5342
5343
      if (self::$SUPPORT['intl'] === true) {
5344
5345
        $langCode = $lang . '-Lower';
5346
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5347
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5348
5349 10
          $langCode = 'Any-Lower';
5350
        }
5351 10
5352 2
        return transliterator_transliterate($langCode, $str);
5353 2
      }
5354
5355
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5356 10
    }
5357 10
5358 10
    return \mb_strtolower($str, $encoding);
5359
  }
5360 10
5361 2
  /**
5362
   * Generic case sensitive transformation for collation matching.
5363
   *
5364
   * @param string $str <p>The input string</p>
5365
   *
5366 9
   * @return string
5367
   */
5368 9
  private static function strtonatfold(string $str): string
5369
  {
5370 3
    /** @noinspection PhpUndefinedClassInspection */
5371 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5372 3
  }
5373
5374
  /**
5375
   * Make a string uppercase.
5376 9
   *
5377 1
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5378 9
   *
5379 9
   * @param string      $str       <p>The string being uppercased.</p>
5380 9
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5381 1
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5382
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5383
   *
5384 9
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
5385
   */
5386 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5387
  {
5388
    $str = (string)$str;
5389
    if (!isset($str[0])) {
5390 9
      return '';
5391 1
    }
5392 9
5393
    if ($cleanUtf8 === true) {
5394
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5395
      // if invalid characters are found in $haystack before $needle
5396 9
      $str = self::clean($str);
5397 9
    }
5398
5399
    if ($encoding !== 'UTF-8') {
5400
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5401
    }
5402
5403
    if ($lang !== null) {
5404
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5405
        self::checkForSupport();
5406
      }
5407
5408
      if (self::$SUPPORT['intl'] === true) {
5409
5410
        $langCode = $lang . '-Upper';
5411
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5412
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5413
5414
          $langCode = 'Any-Upper';
5415
        }
5416
5417
        return transliterator_transliterate($langCode, $str);
5418
      }
5419
5420
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5421
    }
5422
5423
    return \mb_strtoupper($str, $encoding);
5424
  }
5425
5426
  /**
5427
   * Translate characters or replace sub-strings.
5428
   *
5429
   * @link  http://php.net/manual/en/function.strtr.php
5430
   *
5431
   * @param string          $str  <p>The string being translated.</p>
5432
   * @param string|string[] $from <p>The string replacing from.</p>
5433
   * @param string|string[] $to   <p>The string being translated to to.</p>
5434
   *
5435
   * @return string <p>
5436
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5437
   *                corresponding character in to.
5438
   *                </p>
5439
   */
5440
  public static function strtr(string $str, $from, $to = INF): string
5441
  {
5442
    if (!isset($str[0])) {
5443
      return '';
5444
    }
5445
5446 10
    if ($from === $to) {
5447
      return $str;
5448 10
    }
5449 2
5450 2
    if (INF !== $to) {
5451
      $from = self::str_split($from);
5452
      $to = self::str_split($to);
5453 2
      $countFrom = \count($from);
5454 2
      $countTo = \count($to);
5455
5456 10
      if ($countFrom > $countTo) {
5457 10
        $from = \array_slice($from, 0, $countTo);
5458 2
      } elseif ($countFrom < $countTo) {
5459
        $to = \array_slice($to, 0, $countFrom);
5460
      }
5461 8
5462
      $from = \array_combine($from, $to);
5463
    }
5464
5465
    if (\is_string($from)) {
5466
      return \str_replace($from, '', $str);
5467
    }
5468
5469
    return \strtr($str, $from);
5470
  }
5471
5472
  /**
5473
   * Return the width of a string.
5474
   *
5475
   * @param string $str       <p>The input string.</p>
5476
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5477
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5478 2
   *
5479
   * @return int
5480 2
   */
5481 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5482
  {
5483 2
    if ($encoding !== 'UTF-8') {
5484 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5485
    }
5486
5487 2
    if ($cleanUtf8 === true) {
5488
      // iconv and mbstring are not tolerant to invalid encoding
5489
      // further, their behaviour is inconsistent with that of PHP's substr
5490
      $str = self::clean($str);
5491
    }
5492
5493
    // fallback to "mb_"-function via polyfill
5494 2
    return \mb_strwidth($str, $encoding);
5495 1
  }
5496 1
5497
  /**
5498 2
   * Changes all keys in an array.
5499
   *
5500
   * @param array $array <p>The array to work on</p>
5501
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
5502
   *                     or <strong>CASE_LOWER</strong> (default)</p>
5503
   *
5504 2
   * @return array <p>An array with its keys lower or uppercased.</p>
5505 1
   */
5506 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
5507
  {
5508
    if (
5509
        $case !== CASE_LOWER
5510 2
        &&
5511 2
        $case !== CASE_UPPER
5512
    ) {
5513
      $case = CASE_LOWER;
5514
    }
5515
5516
    $return = [];
5517
    foreach ($array as $key => $value) {
5518
      if ($case === CASE_LOWER) {
5519
        $key = self::strtolower($key);
5520
      } else {
5521
        $key = self::strtoupper($key);
5522
      }
5523
5524
      $return[$key] = $value;
5525
    }
5526
5527
    return $return;
5528
  }
5529
5530
  /**
5531
   * Get part of a string.
5532
   *
5533
   * @link http://php.net/manual/en/function.mb-substr.php
5534
   *
5535
   * @param string $str       <p>The string being checked.</p>
5536
   * @param int    $offset    <p>The first position used in str.</p>
5537
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
5538
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5539
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5540
   *
5541
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5542
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5543
   *                      characters long, <b>FALSE</b> will be returned.</p>
5544
   */
5545
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5546
  {
5547
    if (!isset($str[0])) {
5548
      return '';
5549
    }
5550
5551 13
    // Empty string
5552
    if ($length === 0) {
5553
      return '';
5554 13
    }
5555
5556 13
    if ($cleanUtf8 === true) {
5557 4
      // iconv and mbstring are not tolerant to invalid encoding
5558
      // further, their behaviour is inconsistent with that of PHP's substr
5559
      $str = self::clean($str);
5560 12
    }
5561 12
5562
    // Whole string
5563 12
    if (!$offset && $length === null) {
5564 1
      return $str;
5565 1
    }
5566 1
5567
    $str_length = 0;
5568 12
    if ($offset || $length === null) {
5569
      $str_length = self::strlen($str, $encoding);
5570 12
    }
5571
5572 12
    // Impossible
5573 12
    if ($offset && $offset > $str_length) {
5574 1
      return false;
5575 1
    }
5576
5577
    if ($length === null) {
5578 12
      $length = $str_length;
5579 12
    } else {
5580
      $length = (int)$length;
5581 12
    }
5582 1
5583 1
    if ($encoding !== 'UTF-8') {
5584
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5585 12
    }
5586
5587
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5588
      self::checkForSupport();
5589
    }
5590
5591
    if (
5592
        $encoding === 'CP850'
5593
        &&
5594
        self::$SUPPORT['mbstring_func_overload'] === false
5595
    ) {
5596
      return \substr($str, $offset, $length ?? $str_length);
5597
    }
5598
5599 View Code Duplication
    if (
5600 25
        $encoding !== 'UTF-8'
5601
        &&
5602
        self::$SUPPORT['mbstring'] === false
5603 25
    ) {
5604
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5605 25
    }
5606 3
5607
    if (self::$SUPPORT['mbstring'] === true) {
5608
      return \mb_substr($str, $offset, $length, $encoding);
5609 23
    }
5610
5611
    if (
5612 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5613 1
        &&
5614
        self::$SUPPORT['intl'] === true
5615 23
    ) {
5616 2
      return \grapheme_substr($str, $offset, $length);
5617 2
    }
5618
5619 23
    if (
5620
        $length >= 0 // "iconv_substr()" can't handle negative length
5621
        &&
5622
        self::$SUPPORT['iconv'] === true
5623
    ) {
5624
      $returnTmp = \iconv_substr($str, $offset, $length);
5625
      if ($returnTmp !== false) {
5626
        return $returnTmp;
5627
      }
5628
    }
5629
5630
    if (self::is_ascii($str)) {
5631
      return ($length === null) ?
5632
          \substr($str, $offset) :
5633
          \substr($str, $offset, $length);
5634
    }
5635
5636
    // fallback via vanilla php
5637
5638
    // split to array, and remove invalid characters
5639
    $array = self::split($str);
5640
5641
    // extract relevant part, and join to make sting again
5642
    return \implode('', \array_slice($array, $offset, $length));
5643 23
  }
5644
5645
  /**
5646
   * Binary safe comparison of two strings from an offset, up to length characters.
5647
   *
5648
   * @param string   $str1               <p>The main string being compared.</p>
5649
   * @param string   $str2               <p>The secondary string being compared.</p>
5650
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
5651
   *                                     counting from the end of the string.</p>
5652
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
5653 3
   *                                     the length of the str compared to the length of main_str less the offset.</p>
5654
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5655
   *                                     insensitive.</p>
5656 3
   *
5657
   * @return int <p>
5658
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5659
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5660
   *             <strong>0</strong> if they are equal.
5661
   *             </p>
5662
   */
5663
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
5664
  {
5665
    if (
5666
        $offset !== 0
5667
        ||
5668
        $length !== null
5669
    ) {
5670
      $str1Tmp = self::substr($str1, $offset, $length);
5671 19
      if ($str1Tmp === false) {
5672
        $str1Tmp = '';
5673 19
      }
5674
      $str1 = (string)$str1Tmp;
5675 19
5676 3
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5677
      if ($str2Tmp === false) {
5678
        $str2Tmp = '';
5679 17
      }
5680
      $str2 = (string)$str2Tmp;
5681
    }
5682 2
5683 2
    if ($case_insensitivity === true) {
5684
      return self::strcasecmp($str1, $str2);
5685 17
    }
5686 3
5687 3
    return self::strcmp($str1, $str2);
5688
  }
5689 17
5690
  /**
5691
   * Count the number of substring occurrences.
5692
   *
5693
   * @link  http://php.net/manual/en/function.substr-count.php
5694
   *
5695
   * @param string $haystack   <p>The string to search in.</p>
5696
   * @param string $needle     <p>The substring to search for.</p>
5697
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
5698
   * @param int    $length     [optional] <p>
5699
   *                           The maximum length after the specified offset to search for the
5700
   *                           substring. It outputs a warning if the offset plus the length is
5701
   *                           greater than the haystack length.
5702
   *                           </p>
5703
   * @param string $encoding   <p>Set the charset.</p>
5704
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5705
   *
5706
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5707
   */
5708
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5709
  {
5710
    if (!isset($haystack[0], $needle[0])) {
5711
      return false;
5712
    }
5713 17
5714
    if ($offset || $length !== null) {
5715
5716
      if ($length === null) {
5717
        $length = self::strlen($haystack);
5718
      }
5719
5720
      if (
5721
          (
5722
              $length !== 0
5723
              &&
5724
              $offset !== 0
5725
          )
5726
          &&
5727
          $length + $offset <= 0
5728
          &&
5729
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
5730 1
      ) {
5731
        return false;
5732 1
      }
5733
5734 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
5735
      if ($haystackTmp === false) {
5736
        $haystackTmp = '';
5737
      }
5738 1
      $haystack = (string)$haystackTmp;
5739
    }
5740
5741
    if ($encoding !== 'UTF-8') {
5742 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5743 1
    }
5744 1
5745 1
    if ($cleanUtf8 === true) {
5746 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5747
      // if invalid characters are found in $haystack before $needle
5748 1
      $needle = self::clean($needle);
5749 1
      $haystack = self::clean($haystack);
5750 1
    }
5751 1
5752 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5753
      self::checkForSupport();
5754 1
    }
5755 1
5756 View Code Duplication
    if (
5757 1
        $encoding !== 'UTF-8'
5758 1
        &&
5759
        self::$SUPPORT['mbstring'] === false
5760
    ) {
5761 1
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5762
    }
5763
5764
    if (self::$SUPPORT['mbstring'] === true) {
5765
      return \mb_substr_count($haystack, $needle, $encoding);
5766
    }
5767
5768
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5769
5770
    return \count($matches);
5771
  }
5772
5773 1
  /**
5774
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5775 1
   *
5776 1
   * @param string $haystack <p>The string to search in.</p>
5777 1
   * @param string $needle   <p>The substring to search for.</p>
5778
   *
5779 1
   * @return string <p>Return the sub-string.</p>
5780
   */
5781 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5782 1
  {
5783 1
    if (!isset($haystack[0])) {
5784
      return '';
5785
    }
5786 1
5787
    if (!isset($needle[0])) {
5788
      return $haystack;
5789
    }
5790
5791
    if (self::str_istarts_with($haystack, $needle) === true) {
5792
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5793
      if ($haystackTmp === false) {
5794
        $haystackTmp = '';
5795
      }
5796
      $haystack = (string)$haystackTmp;
5797
    }
5798
5799 1
    return $haystack;
5800
  }
5801 1
5802
  /**
5803
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5804
   *
5805
   * @param string $haystack <p>The string to search in.</p>
5806
   * @param string $needle   <p>The substring to search for.</p>
5807 1
   *
5808
   * @return string <p>Return the sub-string.</p>
5809 1
   */
5810 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5811
  {
5812
    if (!isset($haystack[0])) {
5813 1
      return '';
5814 1
    }
5815 1
5816 1
    if (!isset($needle[0])) {
5817 1
      return $haystack;
5818 1
    }
5819
5820
    if (self::str_iends_with($haystack, $needle) === true) {
5821 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5822 1
      if ($haystackTmp === false) {
5823
        $haystackTmp = '';
5824 1
      }
5825
      $haystack = (string)$haystackTmp;
5826
    }
5827
5828
    return $haystack;
5829
  }
5830
5831
  /**
5832
   * Removes an prefix ($needle) from start of the string ($haystack).
5833
   *
5834
   * @param string $haystack <p>The string to search in.</p>
5835
   * @param string $needle   <p>The substring to search for.</p>
5836
   *
5837
   * @return string <p>Return the sub-string.</p>
5838
   */
5839 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5840
  {
5841
    if (!isset($haystack[0])) {
5842 76
      return '';
5843
    }
5844
5845 76
    if (!isset($needle[0])) {
5846
      return $haystack;
5847 76
    }
5848 10
5849
    if (self::str_starts_with($haystack, $needle) === true) {
5850
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5851
      if ($haystackTmp === false) {
5852 74
        $haystackTmp = '';
5853 3
      }
5854
      $haystack = (string)$haystackTmp;
5855
    }
5856 73
5857
    return $haystack;
5858
  }
5859 1
5860 1
  /**
5861
   * Replace text within a portion of a string.
5862
   *
5863 73
   * source: https://gist.github.com/stemar/8287074
5864 2
   *
5865
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5866
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5867 71
   * @param int|int[]       $offset           <p>
5868 71
   *                                          If start is positive, the replacing will begin at the start'th offset
5869 45
   *                                          into string.
5870 45
   *                                          <br><br>
5871
   *                                          If start is negative, the replacing will begin at the start'th character
5872
   *                                          from the end of string.
5873 71
   *                                          </p>
5874 2
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
5875
   *                                          portion of string which is to be replaced. If it is negative, it
5876
   *                                          represents the number of characters from the end of string at which to
5877 69
   *                                          stop replacing. If it is not given, then it will default to strlen(
5878 30
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5879 30
   *                                          length is zero then this function will have the effect of inserting
5880 60
   *                                          replacement into string at the given start offset.</p>
5881
   *
5882
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5883
   */
5884
  public static function substr_replace($str, $replacement, $offset, $length = null)
5885 69
  {
5886 25
    if (\is_array($str) === true) {
5887 69
      $num = \count($str);
5888 47
5889 47
      // the replacement
5890 24
      if (\is_array($replacement) === true) {
5891
        $replacement = \array_slice($replacement, 0, $num);
5892
      } else {
5893 69
        $replacement = \array_pad([$replacement], $num, $replacement);
5894
      }
5895
5896
      // the offset
5897
      if (\is_array($offset) === true) {
5898
        $offset = \array_slice($offset, 0, $num);
5899 69
        foreach ($offset as &$valueTmp) {
5900 22
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5901 69
        }
5902 22
        unset($valueTmp);
5903
      } else {
5904
        $offset = \array_pad([$offset], $num, $offset);
5905
      }
5906
5907 47
      // the length
5908 1
      if (null === $length) {
5909 47
        $length = \array_fill(0, $num, 0);
5910
      } elseif (\is_array($length) === true) {
5911
        $length = \array_slice($length, 0, $num);
5912
        foreach ($length as &$valueTmpV2) {
5913 47
          if (null !== $valueTmpV2) {
5914 47
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5915
          } else {
5916
            $valueTmpV2 = 0;
5917
          }
5918
        }
5919
        unset($valueTmpV2);
5920
      } else {
5921
        $length = \array_pad([$length], $num, $length);
5922
      }
5923
5924
      // recursive call
5925
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
5926
    }
5927
5928
    if (\is_array($replacement) === true) {
5929
      if (\count($replacement) > 0) {
5930
        $replacement = $replacement[0];
5931
      } else {
5932
        $replacement = '';
5933
      }
5934
    }
5935
5936
    // init
5937
    $str = (string)$str;
5938
    $replacement = (string)$replacement;
5939
5940
    if (!isset($str[0])) {
5941
      return $replacement;
5942
    }
5943
5944
    if (self::is_ascii($str)) {
5945
      return ($length === null) ?
5946
          \substr_replace($str, $replacement, $offset) :
5947
          \substr_replace($str, $replacement, $offset, $length);
5948
    }
5949
5950
    \preg_match_all('/./us', $str, $smatches);
5951
    \preg_match_all('/./us', $replacement, $rmatches);
5952
5953
    if ($length === null) {
5954
      $length = self::strlen($str);
5955
    }
5956
5957
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
5958
5959
    return \implode('', $smatches[0]);
5960
  }
5961
5962
  /**
5963
   * Removes an suffix ($needle) from end of the string ($haystack).
5964
   *
5965
   * @param string $haystack <p>The string to search in.</p>
5966
   * @param string $needle   <p>The substring to search for.</p>
5967
   *
5968 1
   * @return string <p>Return the sub-string.</p>
5969
   */
5970 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5971
  {
5972 1
    if (!isset($haystack[0])) {
5973
      return '';
5974 1
    }
5975 1
5976 1
    if (!isset($needle[0])) {
5977
      return $haystack;
5978
    }
5979 1
5980
    if (self::str_ends_with($haystack, $needle) === true) {
5981 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5982 1
      if ($haystackTmp === false) {
5983
        $haystackTmp = '';
5984
      }
5985 1
      $haystack = (string)$haystackTmp;
5986 1
    }
5987
5988 1
    return $haystack;
5989 1
  }
5990
5991
  /**
5992 1
   * Returns a case swapped version of the string.
5993
   *
5994
   * @param string $str       <p>The input string.</p>
5995
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5996
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5997
   *
5998
   * @return string <p>Each character's case swapped.</p>
5999
   */
6000
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6001
  {
6002
    if (!isset($str[0])) {
6003
      return '';
6004
    }
6005
6006
    if ($encoding !== 'UTF-8') {
6007
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6008
    }
6009
6010
    if ($cleanUtf8 === true) {
6011
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6012
      // if invalid characters are found in $haystack before $needle
6013 1
      $str = self::clean($str);
6014
    }
6015
6016 1
    $strSwappedCase = \preg_replace_callback(
6017 1
        '/[\S]/u',
6018
        function ($match) use ($encoding) {
6019 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6020 1
6021
          if ($match[0] === $marchToUpper) {
6022
            return UTF8::strtolower($match[0], $encoding);
6023 1
          }
6024
6025 1
          return $marchToUpper;
6026 1
        },
6027 1
        $str
6028
    );
6029 1
6030 1
    return $strSwappedCase;
6031
  }
6032
6033
  /**
6034
   * alias for "UTF8::to_ascii()"
6035 1
   *
6036
   * @see        UTF8::to_ascii()
6037 1
   *
6038 1
   * @param string $str
6039 1
   * @param string $subst_chr
6040 1
   * @param bool   $strict
6041 1
   *
6042 1
   * @return string
6043 1
   *
6044
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6045
   */
6046 1
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
6047 1
  {
6048
    return self::to_ascii($str, $subst_chr, $strict);
6049
  }
6050 1
6051 1
  /**
6052
   * alias for "UTF8::to_iso8859()"
6053 1
   *
6054 1
   * @see        UTF8::to_iso8859()
6055 1
   *
6056
   * @param string|string[] $str
6057 1
   *
6058
   * @return string|string[]
6059
   *
6060
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6061
   */
6062
  public static function toIso8859($str)
6063
  {
6064 1
    return self::to_iso8859($str);
6065
  }
6066
6067
  /**
6068
   * alias for "UTF8::to_latin1()"
6069
   *
6070 1
   * @see        UTF8::to_latin1()
6071 1
   *
6072 1
   * @param string|string[] $str
6073
   *
6074
   * @return string|string[]
6075
   *
6076 1
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6077 1
   */
6078
  public static function toLatin1($str)
6079
  {
6080
    return self::to_latin1($str);
6081
  }
6082
6083
  /**
6084
   * alias for "UTF8::to_utf8()"
6085
   *
6086
   * @see        UTF8::to_utf8()
6087
   *
6088
   * @param string|string[] $str
6089
   *
6090
   * @return string|string[]
6091
   *
6092
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6093 1
   */
6094
  public static function toUTF8($str)
6095
  {
6096 1
    return self::to_utf8($str);
6097 1
  }
6098
6099 1
  /**
6100 1
   * Convert a string into ASCII.
6101
   *
6102
   * @param string $str     <p>The input string.</p>
6103 1
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6104 1
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6105
   *                        performance</p>
6106
   *
6107 1
   * @return string
6108 1
   */
6109 1
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
6110
  {
6111
    static $UTF8_TO_ASCII;
6112 1
6113 1
    if (!isset($str[0])) {
6114
      return '';
6115 1
    }
6116
6117
    // check if we only have ASCII, first (better performance)
6118
    if (self::is_ascii($str) === true) {
6119
      return $str;
6120
    }
6121
6122
    $str = self::clean($str, true, true, true);
6123
6124
    // check again, if we only have ASCII, now ...
6125
    if (self::is_ascii($str) === true) {
6126 1
      return $str;
6127
    }
6128
6129 1
    if ($strict === true) {
6130 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6131
        self::checkForSupport();
6132 1
      }
6133 1
6134
      if (self::$SUPPORT['intl'] === true) {
6135
6136 1
        // HACK for issue from "transliterator_transliterate()"
6137 1
        $str = \str_replace(
6138
            'ℌ',
6139
            'H',
6140 1
            $str
6141 1
        );
6142 1
6143
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6144
6145 1
        // check again, if we only have ASCII, now ...
6146 1
        if (self::is_ascii($str) === true) {
6147
          return $str;
6148 1
        }
6149
6150
      }
6151
    }
6152
6153
    if (self::$ORD === null) {
6154
      self::$ORD = self::getData('ord');
6155
    }
6156
6157
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6158
    $chars = $ar[0];
6159 1
    foreach ($chars as &$c) {
6160
6161
      $ordC0 = self::$ORD[$c[0]];
6162 1
6163 1
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6164
        continue;
6165 1
      }
6166 1
6167
      $ordC1 = self::$ORD[$c[1]];
6168
6169 1
      // ASCII - next please
6170 1
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6171
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6172
      }
6173 1
6174 1
      if ($ordC0 >= 224) {
6175 1
        $ordC2 = self::$ORD[$c[2]];
6176
6177
        if ($ordC0 <= 239) {
6178 1
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6179 1
        }
6180
6181 1
        if ($ordC0 >= 240) {
6182
          $ordC3 = self::$ORD[$c[3]];
6183
6184
          if ($ordC0 <= 247) {
6185
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6186
          }
6187
6188
          if ($ordC0 >= 248) {
6189
            $ordC4 = self::$ORD[$c[4]];
6190
6191 View Code Duplication
            if ($ordC0 <= 251) {
6192
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6193
            }
6194
6195
            if ($ordC0 >= 252) {
6196
              $ordC5 = self::$ORD[$c[5]];
6197
6198 View Code Duplication
              if ($ordC0 <= 253) {
6199
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6200
              }
6201
            }
6202
          }
6203
        }
6204
      }
6205
6206
      if ($ordC0 === 254 || $ordC0 === 255) {
6207
        $c = $unknown;
6208 7
        continue;
6209
      }
6210 7
6211 1
      if (!isset($ord)) {
6212
        $c = $unknown;
6213
        continue;
6214 1
      }
6215 1
6216 1
      $bank = $ord >> 8;
6217 1
      if (!isset($UTF8_TO_ASCII[$bank])) {
6218
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
6219
        if ($UTF8_TO_ASCII[$bank] === false) {
6220
          $UTF8_TO_ASCII[$bank] = [];
6221 1
        }
6222 1
      }
6223 1
6224 1
      $newchar = $ord & 255;
6225 1
6226 1
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6227 1
6228 1
        // keep for debugging
6229
        /*
6230
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6231
        echo "char: " . $c . "\n";
6232 1
        echo "ord: " . $ord . "\n";
6233 1
        echo "newchar: " . $newchar . "\n";
6234 1
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6235 1
        echo "bank:" . $bank . "\n\n";
6236 1
        */
6237 1
6238 1
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6239 1
      } else {
6240
6241
        // keep for debugging missing chars
6242 1
        /*
6243 1
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6244 1
        echo "char: " . $c . "\n";
6245 1
        echo "ord: " . $ord . "\n";
6246
        echo "newchar: " . $newchar . "\n";
6247
        echo "bank:" . $bank . "\n\n";
6248
        */
6249 1
6250
        $c = $unknown;
6251
      }
6252 7
    }
6253 1
6254 1
    return \implode('', $chars);
6255 1
  }
6256 1
6257
  /**
6258 1
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6259
   *
6260
   * @param string|string[] $str
6261 7
   *
6262 7
   * @return string|string[]
6263
   */
6264 7
  public static function to_iso8859($str)
6265 1
  {
6266
    if (\is_array($str) === true) {
6267
      foreach ($str as $k => $v) {
6268 6
        $str[$k] = self::to_iso8859($v);
6269 3
      }
6270 3
6271 3
      return $str;
6272
    }
6273
6274 5
    $str = (string)$str;
6275 5
    if (!isset($str[0])) {
6276
      return '';
6277 5
    }
6278 3
6279 3
    return self::utf8_decode($str);
6280
  }
6281 5
6282
  /**
6283 5
   * alias for "UTF8::to_iso8859()"
6284
   *
6285
   * @see UTF8::to_iso8859()
6286
   *
6287
   * @param string|string[] $str
6288
   *
6289
   * @return string|string[]
6290
   */
6291
  public static function to_latin1($str)
6292
  {
6293
    return self::to_iso8859($str);
6294 1
  }
6295
6296 1
  /**
6297 1
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6298
   *
6299 1
   * <ul>
6300 1
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6301
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6302
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6303 1
   * case.</li>
6304 1
   * </ul>
6305
   *
6306
   * @param string|string[] $str                    <p>Any string or array.</p>
6307 1
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6308 1
   *
6309 1
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6310
   */
6311
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
6312 1
  {
6313 1 View Code Duplication
    if (\is_array($str) === true) {
6314
      foreach ($str as $k => $v) {
6315 1
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6316
      }
6317
6318
      return $str;
6319
    }
6320
6321
    $str = (string)$str;
6322
    if (!isset($str[0])) {
6323
      return $str;
6324
    }
6325
6326
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6327 1
      self::checkForSupport();
6328
    }
6329 1
6330
    $max = self::strlen_in_byte($str);
6331 1
    $buf = '';
6332 1
6333
    /** @noinspection ForeachInvariantsInspection */
6334
    for ($i = 0; $i < $max; $i++) {
6335 1
      $c1 = $str[$i];
6336 1
6337 1
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6338
6339 1
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6340
6341
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6342 1
6343 1
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6344
            $buf .= $c1 . $c2;
6345 1
            $i++;
6346 1
          } else { // not valid UTF8 - convert it
6347
            $buf .= self::to_utf8_convert($c1);
6348 1
          }
6349
6350 1
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6351 1
6352
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6353
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6354 1
6355 1
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6356
            $buf .= $c1 . $c2 . $c3;
6357 1
            $i += 2;
6358
          } else { // not valid UTF8 - convert it
6359 1
            $buf .= self::to_utf8_convert($c1);
6360
          }
6361
6362
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6363
6364
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6365
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6366
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6367
6368
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6369
            $buf .= $c1 . $c2 . $c3 . $c4;
6370
            $i += 3;
6371
          } else { // not valid UTF8 - convert it
6372
            $buf .= self::to_utf8_convert($c1);
6373
          }
6374
6375
        } else { // doesn't look like UTF8, but should be converted
6376
          $buf .= self::to_utf8_convert($c1);
6377
        }
6378
6379
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6380
6381
        $buf .= self::to_utf8_convert($c1);
6382
6383
      } else { // it doesn't need conversion
6384
        $buf .= $c1;
6385
      }
6386
    }
6387
6388
    // decode unicode escape sequences
6389
    $buf = \preg_replace_callback(
6390
        '/\\\\u([0-9a-f]{4})/i',
6391
        function ($match) {
6392
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6393
        },
6394
        $buf
6395
    );
6396
6397
    // decode UTF-8 codepoints
6398
    if ($decodeHtmlEntityToUtf8 === true) {
6399
      $buf = self::html_entity_decode($buf);
6400
    }
6401
6402
    return $buf;
6403
  }
6404
6405
  /**
6406
   * @param int $int
6407
   *
6408
   * @return string
6409
   */
6410
  private static function to_utf8_convert($int): string
6411
  {
6412
    // init
6413
    $buf = '';
6414
6415
    if (self::$ORD === null) {
6416
      self::$ORD = self::getData('ord');
6417
    }
6418
6419
    if (self::$CHR === null) {
6420
      self::$CHR = self::getData('chr');
6421
    }
6422
6423
    if (self::$WIN1252_TO_UTF8 === null) {
6424
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6425
    }
6426
6427
    $ordC1 = self::$ORD[$int];
6428
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6429
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6430
    } else {
6431
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6432
      $cc2 = ($int & "\x3F") | "\x80";
6433
      $buf .= $cc1 . $cc2;
6434
    }
6435
6436
    return $buf;
6437
  }
6438 21
6439
  /**
6440 21
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6441
   *
6442
   * INFO: This is slower then "trim()"
6443 21
   *
6444
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6445 21
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6446 4
   *
6447
   * @param string $str   <p>The string to be trimmed</p>
6448
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
6449
   *
6450 18
   * @return string <p>The trimmed string.</p>
6451 6
   */
6452
  public static function trim(string $str = '', $chars = INF): string
6453
  {
6454 13
    if (!isset($str[0])) {
6455
      return '';
6456
    }
6457 13
6458 7
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6459
    if ($chars === INF || !$chars) {
6460
      return \preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6461 7
    }
6462
6463
    return self::rtrim(self::ltrim($str, $chars), $chars);
6464
  }
6465
6466
  /**
6467
   * Makes string's first char uppercase.
6468
   *
6469
   * @param string $str       <p>The input string.</p>
6470
   * @param string $encoding  [optional] <p>Set the charset.</p>
6471
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6472
   *
6473
   * @return string <p>The resulting string</p>
6474
   */
6475
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6476
  {
6477
    if ($cleanUtf8 === true) {
6478
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6479
      // if invalid characters are found in $haystack before $needle
6480
      $str = self::clean($str);
6481
    }
6482
6483
    $strPartTwo = self::substr($str, 1, null, $encoding);
6484
    if ($strPartTwo === false) {
6485
      $strPartTwo = '';
6486
    }
6487
6488
    $strPartOne = self::strtoupper(
6489 7
        (string)self::substr($str, 0, 1, $encoding),
6490
        $encoding,
6491
        $cleanUtf8
6492
    );
6493 7
6494 7
    return $strPartOne . $strPartTwo;
6495 7
  }
6496
6497 7
  /**
6498
   * alias for "UTF8::ucfirst()"
6499 7
   *
6500 7
   * @see UTF8::ucfirst()
6501
   *
6502
   * @param string $word
6503 7
   * @param string $encoding
6504
   * @param bool   $cleanUtf8
6505
   *
6506 7
   * @return string
6507 7
   */
6508 7
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6509
  {
6510 7
    return self::ucfirst($word, $encoding, $cleanUtf8);
6511 2
  }
6512
6513 2
  /**
6514 2
   * Uppercase for all words in the string.
6515 2
   *
6516
   * @param string   $str        <p>The input string.</p>
6517 2
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6518 1
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6519
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6520 1
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6521 1
   *
6522 1
   * @return string
6523
   */
6524 1
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6525
  {
6526
    if (!$str) {
6527
      return '';
6528
    }
6529
6530
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6531
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6532
6533
    if ($cleanUtf8 === true) {
6534
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6535
      // if invalid characters are found in $haystack before $needle
6536
      $str = self::clean($str);
6537
    }
6538
6539 1
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
6540 2
6541
    if (
6542 7
        $usePhpDefaultFunctions === true
6543
        &&
6544
        self::is_ascii($str) === true
6545
    ) {
6546
      return \ucwords($str);
6547 7
    }
6548
6549
    $words = self::str_to_words($str, $charlist);
6550
    $newWords = [];
6551
6552 7
    if (\count($exceptions) > 0) {
6553 7
      $useExceptions = true;
6554 3
    } else {
6555 3
      $useExceptions = false;
6556 1
    }
6557 1
6558 3 View Code Duplication
    foreach ($words as $word) {
6559
6560 7
      if (!$word) {
6561
        continue;
6562 7
      }
6563
6564
      if (
6565
          $useExceptions === false
6566
          ||
6567
          (
6568
              $useExceptions === true
6569
              &&
6570
              !\in_array($word, $exceptions, true)
6571
          )
6572
      ) {
6573
        $word = self::ucfirst($word, $encoding);
6574 7
      }
6575 7
6576
      $newWords[] = $word;
6577
    }
6578
6579
    return \implode('', $newWords);
6580
  }
6581
6582
  /**
6583
   * Multi decode html entity & fix urlencoded-win1252-chars.
6584
   *
6585
   * e.g:
6586 1
   * 'test+test'                     => 'test test'
6587
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6588 7
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6589
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6590 7
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6591
   * 'Düsseldorf'                   => 'Düsseldorf'
6592
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6593
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6594
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6595
   *
6596
   * @param string $str          <p>The input string.</p>
6597
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6598
   *
6599
   * @return string
6600 3
   */
6601 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6602 3
  {
6603
    if (!isset($str[0])) {
6604
      return '';
6605 1
    }
6606
6607
    $pattern = '/%u([0-9a-f]{3,4})/i';
6608 1
    if (\preg_match($pattern, $str)) {
6609 1
      $str = \preg_replace($pattern, '&#x\\1;', \urldecode($str));
6610
    }
6611 1
6612
    $flags = ENT_QUOTES | ENT_HTML5;
6613
6614 3
    do {
6615
      $str_compare = $str;
6616 3
6617 1
      $str = self::fix_simple_utf8(
6618
          \urldecode(
6619
              self::html_entity_decode(
6620 3
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6621
                  $flags
6622
              )
6623
          )
6624
      );
6625
6626
    } while ($multi_decode === true && $str_compare !== $str);
6627
6628
    return $str;
6629
  }
6630
6631
  /**
6632 1
   * Return a array with "urlencoded"-win1252 -> UTF-8
6633
   *
6634 1
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6635
   *
6636
   * @return array
6637
   */
6638
  public static function urldecode_fix_win1252_chars(): array
6639
  {
6640
    return [
6641
        '%20' => ' ',
6642
        '%21' => '!',
6643
        '%22' => '"',
6644
        '%23' => '#',
6645
        '%24' => '$',
6646
        '%25' => '%',
6647
        '%26' => '&',
6648
        '%27' => "'",
6649
        '%28' => '(',
6650
        '%29' => ')',
6651
        '%2A' => '*',
6652 20
        '%2B' => '+',
6653
        '%2C' => ',',
6654 20
        '%2D' => '-',
6655
        '%2E' => '.',
6656 2
        '%2F' => '/',
6657
        '%30' => '0',
6658
        '%31' => '1',
6659 2
        '%32' => '2',
6660 2
        '%33' => '3',
6661
        '%34' => '4',
6662 2
        '%35' => '5',
6663
        '%36' => '6',
6664
        '%37' => '7',
6665 20
        '%38' => '8',
6666
        '%39' => '9',
6667 20
        '%3A' => ':',
6668 3
        '%3B' => ';',
6669
        '%3C' => '<',
6670
        '%3D' => '=',
6671 20
        '%3E' => '>',
6672
        '%3F' => '?',
6673
        '%40' => '@',
6674
        '%41' => 'A',
6675 20
        '%42' => 'B',
6676 20
        '%43' => 'C',
6677
        '%44' => 'D',
6678
        '%45' => 'E',
6679 20
        '%46' => 'F',
6680 20
        '%47' => 'G',
6681
        '%48' => 'H',
6682 20
        '%49' => 'I',
6683
        '%4A' => 'J',
6684 20
        '%4B' => 'K',
6685
        '%4C' => 'L',
6686 18
        '%4D' => 'M',
6687
        '%4E' => 'N',
6688 18
        '%4F' => 'O',
6689 15
        '%50' => 'P',
6690 15
        '%51' => 'Q',
6691 15
        '%52' => 'R',
6692 8
        '%53' => 'S',
6693
        '%54' => 'T',
6694
        '%55' => 'U',
6695 20
        '%56' => 'V',
6696
        '%57' => 'W',
6697 18
        '%58' => 'X',
6698 18
        '%59' => 'Y',
6699
        '%5A' => 'Z',
6700 18
        '%5B' => '[',
6701 12
        '%5C' => '\\',
6702 12
        '%5D' => ']',
6703 12
        '%5E' => '^',
6704 10
        '%5F' => '_',
6705
        '%60' => '`',
6706
        '%61' => 'a',
6707 19
        '%62' => 'b',
6708
        '%63' => 'c',
6709 12
        '%64' => 'd',
6710 12
        '%65' => 'e',
6711 12
        '%66' => 'f',
6712
        '%67' => 'g',
6713 12
        '%68' => 'h',
6714 5
        '%69' => 'i',
6715 5
        '%6A' => 'j',
6716 5
        '%6B' => 'k',
6717 9
        '%6C' => 'l',
6718
        '%6D' => 'm',
6719
        '%6E' => 'n',
6720 12
        '%6F' => 'o',
6721 9
        '%70' => 'p',
6722
        '%71' => 'q',
6723
        '%72' => 'r',
6724 20
        '%73' => 's',
6725
        '%74' => 't',
6726 3
        '%75' => 'u',
6727
        '%76' => 'v',
6728 3
        '%77' => 'w',
6729 18
        '%78' => 'x',
6730
        '%79' => 'y',
6731 20
        '%7A' => 'z',
6732
        '%7B' => '{',
6733
        '%7C' => '|',
6734 20
        '%7D' => '}',
6735 20
        '%7E' => '~',
6736 20
        '%7F' => '',
6737 4
        '%80' => '`',
6738 20
        '%81' => '',
6739
        '%82' => '‚',
6740 20
        '%83' => 'ƒ',
6741
        '%84' => '„',
6742
        '%85' => '…',
6743 20
        '%86' => '†',
6744 1
        '%87' => '‡',
6745 1
        '%88' => 'ˆ',
6746
        '%89' => '‰',
6747 20
        '%8A' => 'Š',
6748
        '%8B' => '‹',
6749
        '%8C' => 'Œ',
6750
        '%8D' => '',
6751
        '%8E' => 'Ž',
6752
        '%8F' => '',
6753
        '%90' => '',
6754
        '%91' => '‘',
6755 15
        '%92' => '’',
6756
        '%93' => '“',
6757
        '%94' => '”',
6758 15
        '%95' => '•',
6759
        '%96' => '–',
6760 15
        '%97' => '—',
6761
        '%98' => '˜',
6762
        '%99' => '™',
6763
        '%9A' => 'š',
6764 15
        '%9B' => '›',
6765 1
        '%9C' => 'œ',
6766 1
        '%9D' => '',
6767
        '%9E' => 'ž',
6768 15
        '%9F' => 'Ÿ',
6769 1
        '%A0' => '',
6770 1
        '%A1' => '¡',
6771
        '%A2' => '¢',
6772 15
        '%A3' => '£',
6773 15
        '%A4' => '¤',
6774 15
        '%A5' => '¥',
6775 15
        '%A6' => '¦',
6776 2
        '%A7' => '§',
6777 2
        '%A8' => '¨',
6778 2
        '%A9' => '©',
6779
        '%AA' => 'ª',
6780
        '%AB' => '«',
6781 15
        '%AC' => '¬',
6782
        '%AD' => '',
6783
        '%AE' => '®',
6784
        '%AF' => '¯',
6785
        '%B0' => '°',
6786
        '%B1' => '±',
6787
        '%B2' => '²',
6788
        '%B3' => '³',
6789
        '%B4' => '´',
6790
        '%B5' => 'µ',
6791
        '%B6' => '¶',
6792
        '%B7' => '·',
6793
        '%B8' => '¸',
6794
        '%B9' => '¹',
6795
        '%BA' => 'º',
6796
        '%BB' => '»',
6797 26
        '%BC' => '¼',
6798
        '%BD' => '½',
6799 26
        '%BE' => '¾',
6800
        '%BF' => '¿',
6801 26
        '%C0' => 'À',
6802 5
        '%C1' => 'Á',
6803
        '%C2' => 'Â',
6804
        '%C3' => 'Ã',
6805
        '%C4' => 'Ä',
6806 22
        '%C5' => 'Å',
6807 6
        '%C6' => 'Æ',
6808
        '%C7' => 'Ç',
6809
        '%C8' => 'È',
6810 16
        '%C9' => 'É',
6811
        '%CA' => 'Ê',
6812
        '%CB' => 'Ë',
6813
        '%CC' => 'Ì',
6814
        '%CD' => 'Í',
6815
        '%CE' => 'Î',
6816
        '%CF' => 'Ï',
6817
        '%D0' => 'Ð',
6818
        '%D1' => 'Ñ',
6819
        '%D2' => 'Ò',
6820
        '%D3' => 'Ó',
6821
        '%D4' => 'Ô',
6822 14
        '%D5' => 'Õ',
6823
        '%D6' => 'Ö',
6824 14
        '%D7' => '×',
6825
        '%D8' => 'Ø',
6826
        '%D9' => 'Ù',
6827 1
        '%DA' => 'Ú',
6828 1
        '%DB' => 'Û',
6829
        '%DC' => 'Ü',
6830 14
        '%DD' => 'Ý',
6831 14
        '%DE' => 'Þ',
6832
        '%DF' => 'ß',
6833
        '%E0' => 'à',
6834
        '%E1' => 'á',
6835 14
        '%E2' => 'â',
6836 14
        '%E3' => 'ã',
6837 14
        '%E4' => 'ä',
6838
        '%E5' => 'å',
6839 14
        '%E6' => 'æ',
6840
        '%E7' => 'ç',
6841 14
        '%E8' => 'è',
6842
        '%E9' => 'é',
6843
        '%EA' => 'ê',
6844
        '%EB' => 'ë',
6845
        '%EC' => 'ì',
6846
        '%ED' => 'í',
6847
        '%EE' => 'î',
6848
        '%EF' => 'ï',
6849
        '%F0' => 'ð',
6850
        '%F1' => 'ñ',
6851
        '%F2' => 'ò',
6852
        '%F3' => 'ó',
6853
        '%F4' => 'ô',
6854
        '%F5' => 'õ',
6855 1
        '%F6' => 'ö',
6856
        '%F7' => '÷',
6857 1
        '%F8' => 'ø',
6858
        '%F9' => 'ù',
6859
        '%FA' => 'ú',
6860
        '%FB' => 'û',
6861
        '%FC' => 'ü',
6862
        '%FD' => 'ý',
6863
        '%FE' => 'þ',
6864
        '%FF' => 'ÿ',
6865
    ];
6866
  }
6867
6868
  /**
6869
   * Decodes an UTF-8 string to ISO-8859-1.
6870
   *
6871 8
   * @param string $str <p>The input string.</p>
6872
   * @param bool   $keepUtf8Chars
6873 8
   *
6874 2
   * @return string
6875
   */
6876
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
6877
  {
6878
    if (!isset($str[0])) {
6879
      return '';
6880 7
    }
6881
6882
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6883 1
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6884 1
6885 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6886 7
6887
      if (self::$WIN1252_TO_UTF8 === null) {
6888
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6889
      }
6890 7
6891 7
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6892 7
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6893
    }
6894
6895
    /** @noinspection PhpInternalEntityUsedInspection */
6896 7
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
6897 7
6898
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6899 7
      self::checkForSupport();
6900 1
    }
6901 1
6902 7
    // save for later comparision
6903
    $str_backup = $str;
6904
    $len = self::strlen_in_byte($str);
6905 7
6906
    if (self::$ORD === null) {
6907 7
      self::$ORD = self::getData('ord');
6908 7
    }
6909
6910
    if (self::$CHR === null) {
6911
      self::$CHR = self::getData('chr');
6912
    }
6913 7
6914
    $noCharFound = '?';
6915
    /** @noinspection ForeachInvariantsInspection */
6916 1
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
6917 1
      switch ($str[$i] & "\xF0") {
6918 1
        case "\xC0":
6919 7
        case "\xD0":
6920 7
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
6921 7
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
6922
          break;
6923 7
6924 7
        /** @noinspection PhpMissingBreakStatementInspection */
6925
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
6926 7
          ++$i;
6927
        case "\xE0":
6928
          $str[$j] = $noCharFound;
6929
          $i += 2;
6930
          break;
6931
6932
        default:
6933
          $str[$j] = $str[$i];
6934
      }
6935
    }
6936
6937
    $return = (string)self::substr($str, 0, $j, '8BIT');
6938
6939
    if (
6940
        $keepUtf8Chars === true
6941
        &&
6942
        self::strlen($return) >= self::strlen($str_backup)
6943
    ) {
6944
      return $str_backup;
6945
    }
6946
6947
    return $return;
6948 1
  }
6949
6950 1
  /**
6951
   * Encodes an ISO-8859-1 string to UTF-8.
6952 1
   *
6953 1
   * @param string $str <p>The input string.</p>
6954
   *
6955
   * @return string
6956 1
   */
6957 1
  public static function utf8_encode(string $str): string
6958 1
  {
6959 1
    if (!isset($str[0])) {
6960
      return '';
6961 1
    }
6962
6963
    $strTmp = \utf8_encode($str);
6964 1
6965
    // the polyfill maybe return false
6966 1
    if ($strTmp === false) {
6967 1
      return '';
6968 1
    }
6969 1
6970
    $str = (string)$strTmp;
6971 1
    if (false === \strpos($str, "\xC2")) {
6972 1
      return $str;
6973 1
    }
6974
6975 1
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
6976
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
6977 1
6978 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
6979
6980
      if (self::$WIN1252_TO_UTF8 === null) {
6981
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6982
      }
6983
6984
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6985
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6986
    }
6987
6988
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
6989
  }
6990
6991
  /**
6992
   * fix -> utf8-win1252 chars
6993
   *
6994
   * @param string $str <p>The input string.</p>
6995
   *
6996
   * @return string
6997
   *
6998
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
6999
   */
7000
  public static function utf8_fix_win1252_chars(string $str): string
7001
  {
7002
    return self::fix_simple_utf8($str);
7003
  }
7004
7005
  /**
7006
   * Returns an array with all utf8 whitespace characters.
7007
   *
7008
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7009
   *
7010
   * @author: Derek E. [email protected]
7011
   *
7012
   * @return array <p>
7013
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7014
   *               as defined in above URL.
7015
   *               </p>
7016
   */
7017
  public static function whitespace_table(): array
7018
  {
7019
    return self::$WHITESPACE_TABLE;
7020
  }
7021
7022
  /**
7023
   * Limit the number of words in a string.
7024
   *
7025
   * @param string $str      <p>The input string.</p>
7026
   * @param int    $limit    <p>The limit of words as integer.</p>
7027
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7028
   *
7029
   * @return string
7030
   */
7031
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
7032
  {
7033
    if (!isset($str[0])) {
7034
      return '';
7035
    }
7036
7037
    if ($limit < 1) {
7038
      return '';
7039
    }
7040
7041
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7042
7043
    if (
7044
        !isset($matches[0])
7045
        ||
7046
        self::strlen($str) === self::strlen($matches[0])
7047
    ) {
7048
      return $str;
7049
    }
7050
7051
    return self::rtrim($matches[0]) . $strAddOn;
7052
  }
7053
7054
  /**
7055
   * Wraps a string to a given number of characters
7056
   *
7057
   * @link  http://php.net/manual/en/function.wordwrap.php
7058
   *
7059
   * @param string $str   <p>The input string.</p>
7060
   * @param int    $width [optional] <p>The column width.</p>
7061
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7062
   * @param bool   $cut   [optional] <p>
7063
   *                      If the cut is set to true, the string is
7064
   *                      always wrapped at or before the specified width. So if you have
7065
   *                      a word that is larger than the given width, it is broken apart.
7066
   *                      </p>
7067
   *
7068
   * @return string <p>The given string wrapped at the specified column.</p>
7069
   */
7070
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
7071
  {
7072
    if (!isset($str[0], $break[0])) {
7073
      return '';
7074
    }
7075
7076
    $w = '';
7077
    $strSplit = \explode($break, $str);
7078
    $count = \count($strSplit);
7079
7080
    $chars = [];
7081
    /** @noinspection ForeachInvariantsInspection */
7082
    for ($i = 0; $i < $count; ++$i) {
7083
7084
      if ($i) {
7085
        $chars[] = $break;
7086
        $w .= '#';
7087
      }
7088
7089
      $c = $strSplit[$i];
7090
      unset($strSplit[$i]);
7091
7092
      foreach (self::split($c) as $c) {
7093
        $chars[] = $c;
7094
        $w .= ' ' === $c ? ' ' : '?';
7095
      }
7096
    }
7097
7098
    $strReturn = '';
7099
    $j = 0;
7100
    $b = $i = -1;
7101
    $w = \wordwrap($w, $width, '#', $cut);
7102
7103
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7104
      for (++$i; $i < $b; ++$i) {
7105
        $strReturn .= $chars[$j];
7106
        unset($chars[$j++]);
7107
      }
7108
7109
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7110
        unset($chars[$j++]);
7111
      }
7112
7113
      $strReturn .= $break;
7114
    }
7115
7116
    return $strReturn . \implode('', $chars);
7117
  }
7118
7119
  /**
7120
   * Returns an array of Unicode White Space characters.
7121
   *
7122
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7123
   */
7124
  public static function ws(): array
7125
  {
7126
    return self::$WHITESPACE;
7127
  }
7128
7129
}
7130