Completed
Push — master ( b6b34a...efa664 )
by Lars
02:59
created

UTF8::hex_to_int()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 12
ccs 6
cts 6
cp 1
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 6
nc 3
nop 1
crap 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 3
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Convert binary into an string.
254
   *
255
   * @param mixed $bin 1|0
256
   *
257
   * @return string
258
   */
259 1
  public static function binary_to_str($bin): string
260
  {
261 1
    if (!isset($bin[0])) {
262
      return '';
263
    }
264
265 1
    $convert = \base_convert($bin, 2, 16);
266 1
    if ($convert === '0') {
267 1
      return '';
268
    }
269
270 1
    return \pack('H*', $convert);
271
  }
272
273
  /**
274
   * Returns the UTF-8 Byte Order Mark Character.
275
   *
276
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
277
   *
278
   * @return string UTF-8 Byte Order Mark
279
   */
280 2
  public static function bom(): string
281
  {
282 2
    return "\xef\xbb\xbf";
283
  }
284
285
  /**
286
   * @alias of UTF8::chr_map()
287
   *
288
   * @see   UTF8::chr_map()
289
   *
290
   * @param string|array $callback
291
   * @param string       $str
292
   *
293
   * @return array
294
   */
295 1
  public static function callback($callback, string $str): array
296
  {
297 1
    return self::chr_map($callback, $str);
298
  }
299
300
  /**
301
   * This method will auto-detect your server environment for UTF-8 support.
302
   *
303
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
304
   */
305 19
  public static function checkForSupport()
306
  {
307 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
308
309 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
310
311
      // http://php.net/manual/en/book.mbstring.php
312 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
313 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
314
315
      // http://php.net/manual/en/book.iconv.php
316 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
317
318
      // http://php.net/manual/en/book.intl.php
319 1
      self::$SUPPORT['intl'] = self::intl_loaded();
320 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
321
      if (
322 1
          self::$SUPPORT['intl'] === true
323
          &&
324 1
          \function_exists('transliterator_list_ids') === true
325
      ) {
326 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
327
      }
328
329
      // http://php.net/manual/en/class.intlchar.php
330 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
331
332
      // http://php.net/manual/en/book.pcre.php
333 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
334
    }
335 19
  }
336
337
  /**
338
   * Generates a UTF-8 encoded character from the given code point.
339
   *
340
   * INFO: opposite to UTF8::ord()
341
   *
342
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
343
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
344
   *
345
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
346
   */
347 10
  public static function chr($code_point, string $encoding = 'UTF-8')
348
  {
349
    // init
350 10
    static $CHAR_CACHE = [];
351
352 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
353
      self::checkForSupport();
354
    }
355
356 10
    if ($encoding !== 'UTF-8') {
357 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
358
    }
359
360 View Code Duplication
    if (
361 10
        $encoding !== 'UTF-8'
362
        &&
363 10
        $encoding !== 'WINDOWS-1252'
364
        &&
365 10
        self::$SUPPORT['mbstring'] === false
366
    ) {
367
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
368
    }
369
370 10
    $cacheKey = $code_point . $encoding;
371 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
372 8
      return $CHAR_CACHE[$cacheKey];
373
    }
374
375 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
376
377 7
      if (self::$CHR === null) {
378
        self::$CHR = self::getData('chr');
379
      }
380
381 7
      $chr = self::$CHR[$code_point];
382
383 7
      if ($encoding !== 'UTF-8') {
384 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
385
      }
386
387 7
      return $CHAR_CACHE[$cacheKey] = $chr;
388
    }
389
390 7
    if (self::$SUPPORT['intlChar'] === true) {
391 7
      $chr = \IntlChar::chr($code_point);
392
393 7
      if ($encoding !== 'UTF-8') {
394
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
395
      }
396
397 7
      return $CHAR_CACHE[$cacheKey] = $chr;
398
    }
399
400
    if (self::$CHR === null) {
401
      self::$CHR = self::getData('chr');
402
    }
403
404
    if ($code_point <= 0x7F) {
405
      $chr = self::$CHR[$code_point];
406
    } elseif ($code_point <= 0x7FF) {
407
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
408
             self::$CHR[($code_point & 0x3F) + 0x80];
409
    } elseif ($code_point <= 0xFFFF) {
410
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
411
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
412
             self::$CHR[($code_point & 0x3F) + 0x80];
413
    } else {
414
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
415
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
416
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
417
             self::$CHR[($code_point & 0x3F) + 0x80];
418
    }
419
420
    if ($encoding !== 'UTF-8') {
421
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
422
    }
423
424
    return $CHAR_CACHE[$cacheKey] = $chr;
425
  }
426
427
  /**
428
   * Applies callback to all characters of a string.
429
   *
430
   * @param string|array $callback <p>The callback function.</p>
431
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
432
   *
433
   * @return array <p>The outcome of callback.</p>
434
   */
435 1
  public static function chr_map($callback, string $str): array
436
  {
437 1
    $chars = self::split($str);
438
439 1
    return \array_map($callback, $chars);
440
  }
441
442
  /**
443
   * Generates an array of byte length of each character of a Unicode string.
444
   *
445
   * 1 byte => U+0000  - U+007F
446
   * 2 byte => U+0080  - U+07FF
447
   * 3 byte => U+0800  - U+FFFF
448
   * 4 byte => U+10000 - U+10FFFF
449
   *
450
   * @param string $str <p>The original unicode string.</p>
451
   *
452
   * @return array <p>An array of byte lengths of each character.</p>
453
   */
454 4
  public static function chr_size_list(string $str): array
455
  {
456 4
    if (!isset($str[0])) {
457 3
      return [];
458
    }
459
460 4
    return \array_map(
461 4
        function ($data) {
462 4
          return UTF8::strlen($data, '8BIT');
463 4
        },
464 4
        self::split($str)
465
    );
466
  }
467
468
  /**
469
   * Get a decimal code representation of a specific character.
470
   *
471
   * @param string $char <p>The input character.</p>
472
   *
473
   * @return int
474
   */
475 2
  public static function chr_to_decimal(string $char): int
476
  {
477 2
    $code = self::ord($char[0]);
478 2
    $bytes = 1;
479
480 2
    if (!($code & 0x80)) {
481
      // 0xxxxxxx
482 2
      return $code;
483
    }
484
485 2
    if (($code & 0xe0) === 0xc0) {
486
      // 110xxxxx
487 2
      $bytes = 2;
488 2
      $code &= ~0xc0;
489 2
    } elseif (($code & 0xf0) === 0xe0) {
490
      // 1110xxxx
491 2
      $bytes = 3;
492 2
      $code &= ~0xe0;
493 1
    } elseif (($code & 0xf8) === 0xf0) {
494
      // 11110xxx
495 1
      $bytes = 4;
496 1
      $code &= ~0xf0;
497
    }
498
499 2
    for ($i = 2; $i <= $bytes; $i++) {
500
      // 10xxxxxx
501 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
502
    }
503
504 2
    return $code;
505
  }
506
507
  /**
508
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
509
   *
510
   * @param string $char <p>The input character</p>
511
   * @param string $pfix [optional]
512
   *
513
   * @return string <p>The code point encoded as U+xxxx<p>
514
   */
515 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
516
  {
517 1
    if (!isset($char[0])) {
518 1
      return '';
519
    }
520
521 1
    if ($char === '&#0;') {
522 1
      $char = '';
523
    }
524
525 1
    return self::int_to_hex(self::ord($char), $pfix);
526
  }
527
528
  /**
529
   * alias for "UTF8::chr_to_decimal()"
530
   *
531
   * @see UTF8::chr_to_decimal()
532
   *
533
   * @param string $chr
534
   *
535
   * @return int
536
   */
537 1
  public static function chr_to_int(string $chr): int
538
  {
539 1
    return self::chr_to_decimal($chr);
540
  }
541
542
  /**
543
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
544
   *
545
   * @param string $body     <p>The original string to be split.</p>
546
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
547
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
548
   *
549
   * @return string <p>The chunked string</p>
550
   */
551 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
552
  {
553 1
    return \implode($end, self::split($body, $chunklen));
554
  }
555
556
  /**
557
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
558
   *
559
   * @param string $str                     <p>The string to be sanitized.</p>
560
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
561
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
562
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
563
   *                                        => "..."</p>
564
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
565
   *                                        $normalize_whitespace</p>
566
   *
567
   * @return string <p>Clean UTF-8 encoded string.</p>
568
   */
569 62
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false): string
570
  {
571
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
572
    // caused connection reset problem on larger strings
573
574 62
    $regx = '/
575
      (
576
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
577
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
578
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
579
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
580
        ){1,100}                      # ...one or more times
581
      )
582
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
583
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
584
    /x';
585 62
    $str = (string)\preg_replace($regx, '$1', $str);
586
587 62
    $str = self::replace_diamond_question_mark($str, '');
588 62
    $str = self::remove_invisible_characters($str);
589
590 62
    if ($normalize_whitespace === true) {
591 37
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
592
    }
593
594 62
    if ($normalize_msword === true) {
595 15
      $str = self::normalize_msword($str);
596
    }
597
598 62
    if ($remove_bom === true) {
599 36
      $str = self::remove_bom($str);
600
    }
601
602 62
    return $str;
603
  }
604
605
  /**
606
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
607
   *
608
   * @param string $str <p>The input string.</p>
609
   *
610
   * @return string
611
   */
612 23
  public static function cleanup(string $str): string
613
  {
614 23
    if (!isset($str[0])) {
615 2
      return '';
616
    }
617
618
    // fixed ISO <-> UTF-8 Errors
619 23
    $str = self::fix_simple_utf8($str);
620
621
    // remove all none UTF-8 symbols
622
    // && remove diamond question mark (�)
623
    // && remove remove invisible characters (e.g. "\0")
624
    // && remove BOM
625
    // && normalize whitespace chars (but keep non-breaking-spaces)
626 23
    $str = self::clean($str, true, true, false, true);
627
628 23
    return $str;
629
  }
630
631
  /**
632
   * Accepts a string or a array of strings and returns an array of Unicode code points.
633
   *
634
   * INFO: opposite to UTF8::string()
635
   *
636
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
637
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
638
   *                                    default, code points will be returned as integers.</p>
639
   *
640
   * @return array <p>The array of code points.</p>
641
   */
642 7
  public static function codepoints($arg, bool $u_style = false): array
643
  {
644 7
    if (\is_string($arg) === true) {
645 7
      $arg = self::split($arg);
646
    }
647
648 7
    $arg = \array_map(
649
        [
650 7
            self::class,
651
            'ord',
652
        ],
653 7
        $arg
654
    );
655
656 7
    if ($u_style) {
657 1
      $arg = \array_map(
658
          [
659 1
              self::class,
660
              'int_to_hex',
661
          ],
662 1
          $arg
663
      );
664
    }
665
666 7
    return $arg;
667
  }
668
669
  /**
670
   * Returns count of characters used in a string.
671
   *
672
   * @param string $str       <p>The input string.</p>
673
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
674
   *
675
   * @return array <p>An associative array of Character as keys and
676
   *               their count as values.</p>
677
   */
678 7
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
679
  {
680 7
    return \array_count_values(self::split($str, 1, $cleanUtf8));
681
  }
682
683
  /**
684
   * Converts a int-value into an UTF-8 character.
685
   *
686
   * @param mixed $int
687
   *
688
   * @return string
689
   */
690 5
  public static function decimal_to_chr($int): string
691
  {
692 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
693
  }
694
695
  /**
696
   * Encode a string with a new charset-encoding.
697
   *
698
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
699
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
700
   *
701
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
702
   * @param string $str      <p>The input string</p>
703
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
704
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
705
   *
706
   * @return string
707
   */
708 13
  public static function encode(string $encoding, string $str, bool $force = true): string
709
  {
710 13
    if (!isset($str[0], $encoding[0])) {
711 6
      return $str;
712
    }
713
714 13
    if ($encoding !== 'UTF-8') {
715 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
716
    }
717
718 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
719
      self::checkForSupport();
720
    }
721
722 13
    $encodingDetected = self::str_detect_encoding($str);
723
724
    if (
725 13
        $encodingDetected !== false
726
        &&
727
        (
728 13
            $force === true
729
            ||
730 13
            $encodingDetected !== $encoding
731
        )
732
    ) {
733
734 View Code Duplication
      if (
735 13
          $encoding === 'UTF-8'
736
          &&
737
          (
738 13
              $force === true
739 3
              || $encodingDetected === 'UTF-8'
740 3
              || $encodingDetected === 'WINDOWS-1252'
741 13
              || $encodingDetected === 'ISO-8859-1'
742
          )
743
      ) {
744 11
        return self::to_utf8($str);
745
      }
746
747 View Code Duplication
      if (
748 5
          $encoding === 'ISO-8859-1'
749
          &&
750
          (
751 2
              $force === true
752 1
              || $encodingDetected === 'ISO-8859-1'
753 1
              || $encodingDetected === 'WINDOWS-1252'
754 5
              || $encodingDetected === 'UTF-8'
755
          )
756
      ) {
757 2
        return self::to_iso8859($str);
758
      }
759
760 View Code Duplication
      if (
761 4
          $encoding !== 'UTF-8'
762
          &&
763 4
          $encoding !== 'WINDOWS-1252'
764
          &&
765 4
          self::$SUPPORT['mbstring'] === false
766
      ) {
767
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
768
      }
769
770 4
      $strEncoded = \mb_convert_encoding(
771 4
          $str,
772 4
          $encoding,
773 4
          $encodingDetected
774
      );
775
776 4
      if ($strEncoded) {
777 4
        return $strEncoded;
778
      }
779
    }
780
781 3
    return $str;
782
  }
783
784
  /**
785
   * Reads entire file into a string.
786
   *
787
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
788
   *
789
   * @link http://php.net/manual/en/function.file-get-contents.php
790
   *
791
   * @param string        $filename         <p>
792
   *                                        Name of the file to read.
793
   *                                        </p>
794
   * @param bool          $use_include_path [optional] <p>
795
   *                                        Prior to PHP 5, this parameter is called
796
   *                                        use_include_path and is a bool.
797
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
798
   *                                        to trigger include path
799
   *                                        search.
800
   *                                        </p>
801
   * @param resource|null $context          [optional] <p>
802
   *                                        A valid context resource created with
803
   *                                        stream_context_create. If you don't need to use a
804
   *                                        custom context, you can skip this parameter by &null;.
805
   *                                        </p>
806
   * @param int|null      $offset           [optional] <p>
807
   *                                        The offset where the reading starts.
808
   *                                        </p>
809
   * @param int|null      $maxLength        [optional] <p>
810
   *                                        Maximum length of data read. The default is to read until end
811
   *                                        of file is reached.
812
   *                                        </p>
813
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
814
   *
815
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
816
   *                                        images or pdf, because they used non default utf-8 chars</p>
817
   *
818
   * @return string|false <p>The function returns the read data or false on failure.</p>
819
   */
820 4
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
821
  {
822
    // init
823 4
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
824
825 4
    if ($timeout && $context === null) {
826 3
      $context = \stream_context_create(
827
          [
828
              'http' =>
829
                  [
830 3
                      'timeout' => $timeout,
831
                  ],
832
          ]
833
      );
834
    }
835
836 4
    if ($offset === null) {
837 4
      $offset = 0;
838
    }
839
840 4
    if (\is_int($maxLength) === true) {
841 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
842
    } else {
843 4
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
844
    }
845
846
    // return false on error
847 4
    if ($data === false) {
848
      return false;
849
    }
850
851 4
    if ($convertToUtf8 === true) {
852 4
      $data = self::encode('UTF-8', $data, false);
853 4
      $data = self::cleanup($data);
854
    }
855
856 4
    return $data;
857
  }
858
859
  /**
860
   * Checks if a file starts with BOM (Byte Order Mark) character.
861
   *
862
   * @param string $file_path <p>Path to a valid file.</p>
863
   *
864
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
865
   */
866 1
  public static function file_has_bom(string $file_path): bool
867
  {
868 1
    return self::string_has_bom(\file_get_contents($file_path));
869
  }
870
871
  /**
872
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
873
   *
874
   * @param mixed  $var
875
   * @param int    $normalization_form
876
   * @param string $leading_combining
877
   *
878
   * @return mixed
879
   */
880 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
881
  {
882 9
    switch (\gettype($var)) {
883 9 View Code Duplication
      case 'array':
884 3
        foreach ($var as $k => $v) {
885
          /** @noinspection AlterInForeachInspection */
886 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
887
        }
888 3
        break;
889 9 View Code Duplication
      case 'object':
890 2
        foreach ($var as $k => $v) {
891 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
892
        }
893 2
        break;
894 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
895
896 9
        if (false !== \strpos($var, "\r")) {
897
          // Workaround https://bugs.php.net/65732
898 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
899
        }
900
901 9
        if (self::is_ascii($var) === false) {
902
          /** @noinspection PhpUndefinedClassInspection */
903 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
904 6
            $n = '-';
905
          } else {
906
            /** @noinspection PhpUndefinedClassInspection */
907 7
            $n = \Normalizer::normalize($var, $normalization_form);
908
909 7
            if (isset($n[0])) {
910 4
              $var = $n;
911
            } else {
912 5
              $var = self::encode('UTF-8', $var, true);
913
            }
914
          }
915
916
          if (
917 9
              $var[0] >= "\x80"
918
              &&
919 9
              isset($n[0], $leading_combining[0])
920
              &&
921 9
              \preg_match('/^\p{Mn}/u', $var)
922
          ) {
923
            // Prevent leading combining chars
924
            // for NFC-safe concatenations.
925 2
            $var = $leading_combining . $var;
926
          }
927
        }
928
929 9
        break;
930
    }
931
932 9
    return $var;
933
  }
934
935
  /**
936
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
937
   *
938
   * Gets a specific external variable by name and optionally filters it
939
   *
940
   * @link  http://php.net/manual/en/function.filter-input.php
941
   *
942
   * @param int    $type          <p>
943
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
944
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
945
   *                              <b>INPUT_ENV</b>.
946
   *                              </p>
947
   * @param string $variable_name <p>
948
   *                              Name of a variable to get.
949
   *                              </p>
950
   * @param int    $filter        [optional] <p>
951
   *                              The ID of the filter to apply. The
952
   *                              manual page lists the available filters.
953
   *                              </p>
954
   * @param mixed  $options       [optional] <p>
955
   *                              Associative array of options or bitwise disjunction of flags. If filter
956
   *                              accepts options, flags can be provided in "flags" field of array.
957
   *                              </p>
958
   *
959
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
960
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
961
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
962
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
963
   * @since 5.2.0
964
   */
965 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
966
  {
967
    if (4 > \func_num_args()) {
968
      $var = \filter_input($type, $variable_name, $filter);
969
    } else {
970
      $var = \filter_input($type, $variable_name, $filter, $options);
971
    }
972
973
    return self::filter($var);
974
  }
975
976
  /**
977
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
978
   *
979
   * Gets external variables and optionally filters them
980
   *
981
   * @link  http://php.net/manual/en/function.filter-input-array.php
982
   *
983
   * @param int   $type       <p>
984
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
985
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
986
   *                          <b>INPUT_ENV</b>.
987
   *                          </p>
988
   * @param mixed $definition [optional] <p>
989
   *                          An array defining the arguments. A valid key is a string
990
   *                          containing a variable name and a valid value is either a filter type, or an array
991
   *                          optionally specifying the filter, flags and options. If the value is an
992
   *                          array, valid keys are filter which specifies the
993
   *                          filter type,
994
   *                          flags which specifies any flags that apply to the
995
   *                          filter, and options which specifies any options that
996
   *                          apply to the filter. See the example below for a better understanding.
997
   *                          </p>
998
   *                          <p>
999
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1000
   *                          input array are filtered by this filter.
1001
   *                          </p>
1002
   * @param bool  $add_empty  [optional] <p>
1003
   *                          Add missing keys as <b>NULL</b> to the return value.
1004
   *                          </p>
1005
   *
1006
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1007
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1008
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1009
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1010
   * fails.
1011
   * @since 5.2.0
1012
   */
1013 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1014
  {
1015
    if (2 > \func_num_args()) {
1016
      $a = \filter_input_array($type);
1017
    } else {
1018
      $a = \filter_input_array($type, $definition, $add_empty);
1019
    }
1020
1021
    return self::filter($a);
1022
  }
1023
1024
  /**
1025
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1026
   *
1027
   * Filters a variable with a specified filter
1028
   *
1029
   * @link  http://php.net/manual/en/function.filter-var.php
1030
   *
1031
   * @param mixed $variable <p>
1032
   *                        Value to filter.
1033
   *                        </p>
1034
   * @param int   $filter   [optional] <p>
1035
   *                        The ID of the filter to apply. The
1036
   *                        manual page lists the available filters.
1037
   *                        </p>
1038
   * @param mixed $options  [optional] <p>
1039
   *                        Associative array of options or bitwise disjunction of flags. If filter
1040
   *                        accepts options, flags can be provided in "flags" field of array. For
1041
   *                        the "callback" filter, callable type should be passed. The
1042
   *                        callback must accept one argument, the value to be filtered, and return
1043
   *                        the value after filtering/sanitizing it.
1044
   *                        </p>
1045
   *                        <p>
1046
   *                        <code>
1047
   *                        // for filters that accept options, use this format
1048
   *                        $options = array(
1049
   *                        'options' => array(
1050
   *                        'default' => 3, // value to return if the filter fails
1051
   *                        // other options here
1052
   *                        'min_range' => 0
1053
   *                        ),
1054
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1055
   *                        );
1056
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1057
   *                        // for filter that only accept flags, you can pass them directly
1058
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1059
   *                        // for filter that only accept flags, you can also pass as an array
1060
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1061
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1062
   *                        // callback validate filter
1063
   *                        function foo($value)
1064
   *                        {
1065
   *                        // Expected format: Surname, GivenNames
1066
   *                        if (strpos($value, ", ") === false) return false;
1067
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1068
   *                        $empty = (empty($surname) || empty($givennames));
1069
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1070
   *                        if ($empty || $notstrings) {
1071
   *                        return false;
1072
   *                        } else {
1073
   *                        return $value;
1074
   *                        }
1075
   *                        }
1076
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1077
   *                        </code>
1078
   *                        </p>
1079
   *
1080
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1081
   * @since 5.2.0
1082
   */
1083 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1084
  {
1085 1
    if (3 > \func_num_args()) {
1086 1
      $variable = \filter_var($variable, $filter);
1087
    } else {
1088 1
      $variable = \filter_var($variable, $filter, $options);
1089
    }
1090
1091 1
    return self::filter($variable);
1092
  }
1093
1094
  /**
1095
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1096
   *
1097
   * Gets multiple variables and optionally filters them
1098
   *
1099
   * @link  http://php.net/manual/en/function.filter-var-array.php
1100
   *
1101
   * @param array $data       <p>
1102
   *                          An array with string keys containing the data to filter.
1103
   *                          </p>
1104
   * @param mixed $definition [optional] <p>
1105
   *                          An array defining the arguments. A valid key is a string
1106
   *                          containing a variable name and a valid value is either a
1107
   *                          filter type, or an
1108
   *                          array optionally specifying the filter, flags and options.
1109
   *                          If the value is an array, valid keys are filter
1110
   *                          which specifies the filter type,
1111
   *                          flags which specifies any flags that apply to the
1112
   *                          filter, and options which specifies any options that
1113
   *                          apply to the filter. See the example below for a better understanding.
1114
   *                          </p>
1115
   *                          <p>
1116
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1117
   *                          input array are filtered by this filter.
1118
   *                          </p>
1119
   * @param bool  $add_empty  [optional] <p>
1120
   *                          Add missing keys as <b>NULL</b> to the return value.
1121
   *                          </p>
1122
   *
1123
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1124
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1125
   * the variable is not set.
1126
   * @since 5.2.0
1127
   */
1128 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1129
  {
1130 1
    if (2 > \func_num_args()) {
1131 1
      $a = \filter_var_array($data);
1132
    } else {
1133 1
      $a = \filter_var_array($data, $definition, $add_empty);
1134
    }
1135
1136 1
    return self::filter($a);
1137
  }
1138
1139
  /**
1140
   * Check if the number of unicode characters are not more than the specified integer.
1141
   *
1142
   * @param string $str      The original string to be checked.
1143
   * @param int    $box_size The size in number of chars to be checked against string.
1144
   *
1145
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1146
   */
1147 1
  public static function fits_inside(string $str, int $box_size): bool
1148
  {
1149 1
    return (self::strlen($str) <= $box_size);
1150
  }
1151
1152
  /**
1153
   * Try to fix simple broken UTF-8 strings.
1154
   *
1155
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1156
   *
1157
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1158
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1159
   * See: http://en.wikipedia.org/wiki/Windows-1252
1160
   *
1161
   * @param string $str <p>The input string</p>
1162
   *
1163
   * @return string
1164
   */
1165 28 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1166
  {
1167 28
    if (!isset($str[0])) {
1168 2
      return '';
1169
    }
1170
1171 28
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1172 28
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1173
1174 28
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1175
1176 1
      if (self::$BROKEN_UTF8_FIX === null) {
1177 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1178
      }
1179
1180 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1181 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1182
    }
1183
1184 28
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1185
  }
1186
1187
  /**
1188
   * Fix a double (or multiple) encoded UTF8 string.
1189
   *
1190
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1191
   *
1192
   * @return string|string[] <p>Will return the fixed input-"array" or
1193
   *                         the fixed input-"string".</p>
1194
   */
1195 1
  public static function fix_utf8($str)
1196
  {
1197 1
    if (\is_array($str) === true) {
1198 1
      foreach ($str as $k => $v) {
1199 1
        $str[$k] = self::fix_utf8($v);
1200
      }
1201
1202 1
      return $str;
1203
    }
1204
1205 1
    $last = '';
1206 1
    while ($last !== $str) {
1207 1
      $last = $str;
1208 1
      $str = self::to_utf8(
1209 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1208 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1210
      );
1211
    }
1212
1213 1
    return $str;
1214
  }
1215
1216
  /**
1217
   * Get character of a specific character.
1218
   *
1219
   * @param string $char
1220
   *
1221
   * @return string <p>'RTL' or 'LTR'</p>
1222
   */
1223 1
  public static function getCharDirection(string $char): string
1224
  {
1225 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1226
      self::checkForSupport();
1227
    }
1228
1229 1
    if (self::$SUPPORT['intlChar'] === true) {
1230 1
      $tmpReturn = \IntlChar::charDirection($char);
1231
1232
      // from "IntlChar"-Class
1233
      $charDirection = [
1234 1
          'RTL' => [1, 13, 14, 15, 21],
1235
          'LTR' => [0, 11, 12, 20],
1236
      ];
1237
1238 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1239
        return 'LTR';
1240
      }
1241
1242 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1243 1
        return 'RTL';
1244
      }
1245
    }
1246
1247 1
    $c = static::chr_to_decimal($char);
1248
1249 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1250 1
      return 'LTR';
1251
    }
1252
1253 1
    if (0x85e >= $c) {
1254
1255 1
      if (0x5be === $c ||
1256 1
          0x5c0 === $c ||
1257 1
          0x5c3 === $c ||
1258 1
          0x5c6 === $c ||
1259 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1260 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1261 1
          0x608 === $c ||
1262 1
          0x60b === $c ||
1263 1
          0x60d === $c ||
1264 1
          0x61b === $c ||
1265 1
          (0x61e <= $c && 0x64a >= $c) ||
1266
          (0x66d <= $c && 0x66f >= $c) ||
1267
          (0x671 <= $c && 0x6d5 >= $c) ||
1268
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1269
          (0x6ee <= $c && 0x6ef >= $c) ||
1270
          (0x6fa <= $c && 0x70d >= $c) ||
1271
          0x710 === $c ||
1272
          (0x712 <= $c && 0x72f >= $c) ||
1273
          (0x74d <= $c && 0x7a5 >= $c) ||
1274
          0x7b1 === $c ||
1275
          (0x7c0 <= $c && 0x7ea >= $c) ||
1276
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1277
          0x7fa === $c ||
1278
          (0x800 <= $c && 0x815 >= $c) ||
1279
          0x81a === $c ||
1280
          0x824 === $c ||
1281
          0x828 === $c ||
1282
          (0x830 <= $c && 0x83e >= $c) ||
1283
          (0x840 <= $c && 0x858 >= $c) ||
1284 1
          0x85e === $c
1285
      ) {
1286 1
        return 'RTL';
1287
      }
1288
1289 1
    } elseif (0x200f === $c) {
1290
1291
      return 'RTL';
1292
1293 1
    } elseif (0xfb1d <= $c) {
1294
1295 1
      if (0xfb1d === $c ||
1296 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1297 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1298 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1299 1
          0xfb3e === $c ||
1300 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1301 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1302 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1303 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1304 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1305 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1306 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1307 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1308 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1309 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1310 1
          0x10808 === $c ||
1311 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1312 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1313 1
          0x1083c === $c ||
1314 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1315 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1316 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1317 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1318 1
          0x1093f === $c ||
1319 1
          0x10a00 === $c ||
1320 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1321 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1322 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1323 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1324 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1325 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1326 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1327 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1328 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1329 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1330
      ) {
1331 1
        return 'RTL';
1332
      }
1333
    }
1334
1335 1
    return 'LTR';
1336
  }
1337
1338
  /**
1339
   * get data from "/data/*.ser"
1340
   *
1341
   * @param string $file
1342
   *
1343
   * @return bool|string|array|int <p>Will return false on error.</p>
1344
   */
1345 6
  private static function getData(string $file)
1346
  {
1347 6
    $file = __DIR__ . '/data/' . $file . '.php';
1348 6
    if (\file_exists($file)) {
1349
      /** @noinspection PhpIncludeInspection */
1350 6
      return require $file;
1351
    }
1352
1353 1
    return false;
1354
  }
1355
1356
  /**
1357
   * Check for php-support.
1358
   *
1359
   * @param string|null $key
1360
   *
1361
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1362
   *               return bool-value, if $key is used and available<br>
1363
   *               otherwise return null</p>
1364
   */
1365 19
  public static function getSupportInfo(string $key = null)
1366
  {
1367 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1368
      self::checkForSupport();
1369
    }
1370
1371 19
    if ($key === null) {
1372 2
      return self::$SUPPORT;
1373
    }
1374
1375 18
    if (!isset(self::$SUPPORT[$key])) {
1376 1
      return null;
1377
    }
1378
1379 17
    return self::$SUPPORT[$key];
1380
  }
1381
1382
  /**
1383
   * alias for "UTF8::string_has_bom()"
1384
   *
1385
   * @see        UTF8::string_has_bom()
1386
   *
1387
   * @param string $str
1388
   *
1389
   * @return bool
1390
   *
1391
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1392
   */
1393 1
  public static function hasBom(string $str): bool
1394
  {
1395 1
    return self::string_has_bom($str);
1396
  }
1397
1398
  /**
1399
   * Converts a hexadecimal-value into an UTF-8 character.
1400
   *
1401
   * @param string $hexdec <p>The hexadecimal value.</p>
1402
   *
1403
   * @return string|false <p>One single UTF-8 character.</p>
1404
   */
1405 2
  public static function hex_to_chr(string $hexdec)
1406
  {
1407 2
    return self::decimal_to_chr(\hexdec($hexdec));
1408
  }
1409
1410
  /**
1411
   * Converts hexadecimal U+xxxx code point representation to integer.
1412
   *
1413
   * INFO: opposite to UTF8::int_to_hex()
1414
   *
1415
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1416
   *
1417
   * @return int|false <p>The code point, or false on failure.</p>
1418
   */
1419 1
  public static function hex_to_int(string $hexDec)
1420
  {
1421 1
    if (!isset($hexDec[0])) {
1422 1
      return false;
1423
    }
1424
1425 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1426 1
      return \intval($match[1], 16);
1427
    }
1428
1429 1
    return false;
1430
  }
1431
1432
  /**
1433
   * alias for "UTF8::html_entity_decode()"
1434
   *
1435
   * @see UTF8::html_entity_decode()
1436
   *
1437
   * @param string $str
1438
   * @param int    $flags
1439
   * @param string $encoding
1440
   *
1441
   * @return string
1442
   */
1443 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1444
  {
1445 1
    return self::html_entity_decode($str, $flags, $encoding);
1446
  }
1447
1448
  /**
1449
   * Converts a UTF-8 string to a series of HTML numbered entities.
1450
   *
1451
   * INFO: opposite to UTF8::html_decode()
1452
   *
1453
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1454
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1455
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1456
   *
1457
   * @return string <p>HTML numbered entities.</p>
1458
   */
1459 2
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1460
  {
1461 2
    if (!isset($str[0])) {
1462 1
      return '';
1463
    }
1464
1465 2
    if ($encoding !== 'UTF-8') {
1466 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1467
    }
1468
1469
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1470 2
    if (\function_exists('mb_encode_numericentity')) {
1471
1472 2
      $startCode = 0x00;
1473 2
      if ($keepAsciiChars === true) {
1474 1
        $startCode = 0x80;
1475
      }
1476
1477 2
      return \mb_encode_numericentity(
1478 2
          $str,
1479 2
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1480 2
          $encoding
1481
      );
1482
    }
1483
1484
    return \implode(
1485
        '',
1486
        \array_map(
1487
            function ($data) use ($keepAsciiChars, $encoding) {
1488
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1489
            },
1490
            self::split($str)
1491
        )
1492
    );
1493
  }
1494
1495
  /**
1496
   * UTF-8 version of html_entity_decode()
1497
   *
1498
   * The reason we are not using html_entity_decode() by itself is because
1499
   * while it is not technically correct to leave out the semicolon
1500
   * at the end of an entity most browsers will still interpret the entity
1501
   * correctly. html_entity_decode() does not convert entities without
1502
   * semicolons, so we are left with our own little solution here. Bummer.
1503
   *
1504
   * Convert all HTML entities to their applicable characters
1505
   *
1506
   * INFO: opposite to UTF8::html_encode()
1507
   *
1508
   * @link http://php.net/manual/en/function.html-entity-decode.php
1509
   *
1510
   * @param string $str      <p>
1511
   *                         The input string.
1512
   *                         </p>
1513
   * @param int    $flags    [optional] <p>
1514
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1515
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1516
   *                         <table>
1517
   *                         Available <i>flags</i> constants
1518
   *                         <tr valign="top">
1519
   *                         <td>Constant Name</td>
1520
   *                         <td>Description</td>
1521
   *                         </tr>
1522
   *                         <tr valign="top">
1523
   *                         <td><b>ENT_COMPAT</b></td>
1524
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1525
   *                         </tr>
1526
   *                         <tr valign="top">
1527
   *                         <td><b>ENT_QUOTES</b></td>
1528
   *                         <td>Will convert both double and single quotes.</td>
1529
   *                         </tr>
1530
   *                         <tr valign="top">
1531
   *                         <td><b>ENT_NOQUOTES</b></td>
1532
   *                         <td>Will leave both double and single quotes unconverted.</td>
1533
   *                         </tr>
1534
   *                         <tr valign="top">
1535
   *                         <td><b>ENT_HTML401</b></td>
1536
   *                         <td>
1537
   *                         Handle code as HTML 4.01.
1538
   *                         </td>
1539
   *                         </tr>
1540
   *                         <tr valign="top">
1541
   *                         <td><b>ENT_XML1</b></td>
1542
   *                         <td>
1543
   *                         Handle code as XML 1.
1544
   *                         </td>
1545
   *                         </tr>
1546
   *                         <tr valign="top">
1547
   *                         <td><b>ENT_XHTML</b></td>
1548
   *                         <td>
1549
   *                         Handle code as XHTML.
1550
   *                         </td>
1551
   *                         </tr>
1552
   *                         <tr valign="top">
1553
   *                         <td><b>ENT_HTML5</b></td>
1554
   *                         <td>
1555
   *                         Handle code as HTML 5.
1556
   *                         </td>
1557
   *                         </tr>
1558
   *                         </table>
1559
   *                         </p>
1560
   * @param string $encoding [optional] <p>Encoding to use.</p>
1561
   *
1562
   * @return string <p>The decoded string.</p>
1563
   */
1564 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1565
  {
1566 17
    if (!isset($str[0])) {
1567 6
      return '';
1568
    }
1569
1570 17
    if (!isset($str[3])) { // examples: &; || &x;
1571 10
      return $str;
1572
    }
1573
1574
    if (
1575 16
        \strpos($str, '&') === false
1576
        ||
1577
        (
1578 16
            \strpos($str, '&#') === false
1579
            &&
1580 16
            \strpos($str, ';') === false
1581
        )
1582
    ) {
1583 9
      return $str;
1584
    }
1585
1586 16
    if ($encoding !== 'UTF-8') {
1587 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1588
    }
1589
1590 16
    if ($flags === null) {
1591 5
      $flags = ENT_QUOTES | ENT_HTML5;
1592
    }
1593
1594 View Code Duplication
    if (
1595 16
        $encoding !== 'UTF-8'
1596
        &&
1597 16
        $encoding !== 'WINDOWS-1252'
1598
        &&
1599 16
        self::$SUPPORT['mbstring'] === false
1600
    ) {
1601
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1602
    }
1603
1604
    do {
1605 16
      $str_compare = $str;
1606
1607 16
      $str = (string)\preg_replace_callback(
1608 16
          "/&#\d{2,6};/",
1609 16
          function ($matches) use ($encoding) {
1610 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1611
1612 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1613 13
              return $returnTmp;
1614
            }
1615
1616 7
            return $matches[0];
1617 16
          },
1618 16
          $str
1619
      );
1620
1621
      // decode numeric & UTF16 two byte entities
1622 16
      $str = \html_entity_decode(
1623 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1624 16
          $flags,
1625 16
          $encoding
1626
      );
1627
1628 16
    } while ($str_compare !== $str);
1629
1630 16
    return $str;
1631
  }
1632
1633
  /**
1634
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1635
   *
1636
   * @link http://php.net/manual/en/function.htmlentities.php
1637
   *
1638
   * @param string $str           <p>
1639
   *                              The input string.
1640
   *                              </p>
1641
   * @param int    $flags         [optional] <p>
1642
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1643
   *                              invalid code unit sequences and the used document type. The default is
1644
   *                              ENT_COMPAT | ENT_HTML401.
1645
   *                              <table>
1646
   *                              Available <i>flags</i> constants
1647
   *                              <tr valign="top">
1648
   *                              <td>Constant Name</td>
1649
   *                              <td>Description</td>
1650
   *                              </tr>
1651
   *                              <tr valign="top">
1652
   *                              <td><b>ENT_COMPAT</b></td>
1653
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1654
   *                              </tr>
1655
   *                              <tr valign="top">
1656
   *                              <td><b>ENT_QUOTES</b></td>
1657
   *                              <td>Will convert both double and single quotes.</td>
1658
   *                              </tr>
1659
   *                              <tr valign="top">
1660
   *                              <td><b>ENT_NOQUOTES</b></td>
1661
   *                              <td>Will leave both double and single quotes unconverted.</td>
1662
   *                              </tr>
1663
   *                              <tr valign="top">
1664
   *                              <td><b>ENT_IGNORE</b></td>
1665
   *                              <td>
1666
   *                              Silently discard invalid code unit sequences instead of returning
1667
   *                              an empty string. Using this flag is discouraged as it
1668
   *                              may have security implications.
1669
   *                              </td>
1670
   *                              </tr>
1671
   *                              <tr valign="top">
1672
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1673
   *                              <td>
1674
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1675
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1676
   *                              </td>
1677
   *                              </tr>
1678
   *                              <tr valign="top">
1679
   *                              <td><b>ENT_DISALLOWED</b></td>
1680
   *                              <td>
1681
   *                              Replace invalid code points for the given document type with a
1682
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1683
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1684
   *                              instance, to ensure the well-formedness of XML documents with
1685
   *                              embedded external content.
1686
   *                              </td>
1687
   *                              </tr>
1688
   *                              <tr valign="top">
1689
   *                              <td><b>ENT_HTML401</b></td>
1690
   *                              <td>
1691
   *                              Handle code as HTML 4.01.
1692
   *                              </td>
1693
   *                              </tr>
1694
   *                              <tr valign="top">
1695
   *                              <td><b>ENT_XML1</b></td>
1696
   *                              <td>
1697
   *                              Handle code as XML 1.
1698
   *                              </td>
1699
   *                              </tr>
1700
   *                              <tr valign="top">
1701
   *                              <td><b>ENT_XHTML</b></td>
1702
   *                              <td>
1703
   *                              Handle code as XHTML.
1704
   *                              </td>
1705
   *                              </tr>
1706
   *                              <tr valign="top">
1707
   *                              <td><b>ENT_HTML5</b></td>
1708
   *                              <td>
1709
   *                              Handle code as HTML 5.
1710
   *                              </td>
1711
   *                              </tr>
1712
   *                              </table>
1713
   *                              </p>
1714
   * @param string $encoding      [optional] <p>
1715
   *                              Like <b>htmlspecialchars</b>,
1716
   *                              <b>htmlentities</b> takes an optional third argument
1717
   *                              <i>encoding</i> which defines encoding used in
1718
   *                              conversion.
1719
   *                              Although this argument is technically optional, you are highly
1720
   *                              encouraged to specify the correct value for your code.
1721
   *                              </p>
1722
   * @param bool   $double_encode [optional] <p>
1723
   *                              When <i>double_encode</i> is turned off PHP will not
1724
   *                              encode existing html entities. The default is to convert everything.
1725
   *                              </p>
1726
   *
1727
   *
1728
   * @return string the encoded string.
1729
   * </p>
1730
   * <p>
1731
   * If the input <i>string</i> contains an invalid code unit
1732
   * sequence within the given <i>encoding</i> an empty string
1733
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1734
   * <b>ENT_SUBSTITUTE</b> flags are set.
1735
   */
1736 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1737
  {
1738 2
    if ($encoding !== 'UTF-8') {
1739 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1740
    }
1741
1742 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
1743
1744
    /**
1745
     * PHP doesn't replace a backslash to its html entity since this is something
1746
     * that's mostly used to escape characters when inserting in a database. Since
1747
     * we're using a decent database layer, we don't need this shit and we're replacing
1748
     * the double backslashes by its' html entity equivalent.
1749
     *
1750
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1751
     */
1752 2
    $str = \str_replace('\\', '&#92;', $str);
1753
1754 2
    if ($encoding !== 'UTF-8') {
1755 1
      return $str;
1756
    }
1757
1758 2
    $byteLengths = self::chr_size_list($str);
1759 2
    $search = [];
1760 2
    $replacements = [];
1761 2
    foreach ($byteLengths as $counter => $byteLength) {
1762 2
      if ($byteLength >= 3) {
1763 1
        $char = self::access($str, $counter);
1764
1765 1
        if (!isset($replacements[$char])) {
1766 1
          $search[$char] = $char;
1767 2
          $replacements[$char] = self::html_encode($char);
1768
        }
1769
      }
1770
    }
1771
1772 2
    return \str_replace($search, $replacements, $str);
1773
  }
1774
1775
  /**
1776
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1777
   *
1778
   * INFO: Take a look at "UTF8::htmlentities()"
1779
   *
1780
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1781
   *
1782
   * @param string $str           <p>
1783
   *                              The string being converted.
1784
   *                              </p>
1785
   * @param int    $flags         [optional] <p>
1786
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1787
   *                              invalid code unit sequences and the used document type. The default is
1788
   *                              ENT_COMPAT | ENT_HTML401.
1789
   *                              <table>
1790
   *                              Available <i>flags</i> constants
1791
   *                              <tr valign="top">
1792
   *                              <td>Constant Name</td>
1793
   *                              <td>Description</td>
1794
   *                              </tr>
1795
   *                              <tr valign="top">
1796
   *                              <td><b>ENT_COMPAT</b></td>
1797
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1798
   *                              </tr>
1799
   *                              <tr valign="top">
1800
   *                              <td><b>ENT_QUOTES</b></td>
1801
   *                              <td>Will convert both double and single quotes.</td>
1802
   *                              </tr>
1803
   *                              <tr valign="top">
1804
   *                              <td><b>ENT_NOQUOTES</b></td>
1805
   *                              <td>Will leave both double and single quotes unconverted.</td>
1806
   *                              </tr>
1807
   *                              <tr valign="top">
1808
   *                              <td><b>ENT_IGNORE</b></td>
1809
   *                              <td>
1810
   *                              Silently discard invalid code unit sequences instead of returning
1811
   *                              an empty string. Using this flag is discouraged as it
1812
   *                              may have security implications.
1813
   *                              </td>
1814
   *                              </tr>
1815
   *                              <tr valign="top">
1816
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1817
   *                              <td>
1818
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1819
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1820
   *                              </td>
1821
   *                              </tr>
1822
   *                              <tr valign="top">
1823
   *                              <td><b>ENT_DISALLOWED</b></td>
1824
   *                              <td>
1825
   *                              Replace invalid code points for the given document type with a
1826
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1827
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1828
   *                              instance, to ensure the well-formedness of XML documents with
1829
   *                              embedded external content.
1830
   *                              </td>
1831
   *                              </tr>
1832
   *                              <tr valign="top">
1833
   *                              <td><b>ENT_HTML401</b></td>
1834
   *                              <td>
1835
   *                              Handle code as HTML 4.01.
1836
   *                              </td>
1837
   *                              </tr>
1838
   *                              <tr valign="top">
1839
   *                              <td><b>ENT_XML1</b></td>
1840
   *                              <td>
1841
   *                              Handle code as XML 1.
1842
   *                              </td>
1843
   *                              </tr>
1844
   *                              <tr valign="top">
1845
   *                              <td><b>ENT_XHTML</b></td>
1846
   *                              <td>
1847
   *                              Handle code as XHTML.
1848
   *                              </td>
1849
   *                              </tr>
1850
   *                              <tr valign="top">
1851
   *                              <td><b>ENT_HTML5</b></td>
1852
   *                              <td>
1853
   *                              Handle code as HTML 5.
1854
   *                              </td>
1855
   *                              </tr>
1856
   *                              </table>
1857
   *                              </p>
1858
   * @param string $encoding      [optional] <p>
1859
   *                              Defines encoding used in conversion.
1860
   *                              </p>
1861
   *                              <p>
1862
   *                              For the purposes of this function, the encodings
1863
   *                              ISO-8859-1, ISO-8859-15,
1864
   *                              UTF-8, cp866,
1865
   *                              cp1251, cp1252, and
1866
   *                              KOI8-R are effectively equivalent, provided the
1867
   *                              <i>string</i> itself is valid for the encoding, as
1868
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1869
   *                              the same positions in all of these encodings.
1870
   *                              </p>
1871
   * @param bool   $double_encode [optional] <p>
1872
   *                              When <i>double_encode</i> is turned off PHP will not
1873
   *                              encode existing html entities, the default is to convert everything.
1874
   *                              </p>
1875
   *
1876
   * @return string The converted string.
1877
   * </p>
1878
   * <p>
1879
   * If the input <i>string</i> contains an invalid code unit
1880
   * sequence within the given <i>encoding</i> an empty string
1881
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1882
   * <b>ENT_SUBSTITUTE</b> flags are set.
1883
   */
1884 1
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1885
  {
1886 1
    if ($encoding !== 'UTF-8') {
1887 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1888
    }
1889
1890 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
1891
  }
1892
1893
  /**
1894
   * Checks whether iconv is available on the server.
1895
   *
1896
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1897
   */
1898 1
  public static function iconv_loaded(): bool
1899
  {
1900 1
    return \extension_loaded('iconv') ? true : false;
1901
  }
1902
1903
  /**
1904
   * alias for "UTF8::decimal_to_chr()"
1905
   *
1906
   * @see UTF8::decimal_to_chr()
1907
   *
1908
   * @param mixed $int
1909
   *
1910
   * @return string
1911
   */
1912 2
  public static function int_to_chr($int): string
1913
  {
1914 2
    return self::decimal_to_chr($int);
1915
  }
1916
1917
  /**
1918
   * Converts Integer to hexadecimal U+xxxx code point representation.
1919
   *
1920
   * INFO: opposite to UTF8::hex_to_int()
1921
   *
1922
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
1923
   * @param string $pfix [optional]
1924
   *
1925
   * @return string <p>The code point, or empty string on failure.</p>
1926
   */
1927 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
1928
  {
1929 3
    $hex = \dechex($int);
1930
1931 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
1932
1933 3
    return $pfix . $hex;
1934
  }
1935
1936
  /**
1937
   * Checks whether intl-char is available on the server.
1938
   *
1939
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1940
   */
1941 1
  public static function intlChar_loaded(): bool
1942
  {
1943 1
    return \class_exists('IntlChar');
1944
  }
1945
1946
  /**
1947
   * Checks whether intl is available on the server.
1948
   *
1949
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1950
   */
1951 4
  public static function intl_loaded(): bool
1952
  {
1953 4
    return \extension_loaded('intl');
1954
  }
1955
1956
  /**
1957
   * alias for "UTF8::is_ascii()"
1958
   *
1959
   * @see        UTF8::is_ascii()
1960
   *
1961
   * @param string $str
1962
   *
1963
   * @return boolean
1964
   *
1965
   * @deprecated <p>use "UTF8::is_ascii()"</p>
1966
   */
1967 1
  public static function isAscii(string $str): bool
1968
  {
1969 1
    return self::is_ascii($str);
1970
  }
1971
1972
  /**
1973
   * alias for "UTF8::is_base64()"
1974
   *
1975
   * @see        UTF8::is_base64()
1976
   *
1977
   * @param string $str
1978
   *
1979
   * @return bool
1980
   *
1981
   * @deprecated <p>use "UTF8::is_base64()"</p>
1982
   */
1983 1
  public static function isBase64(string $str): bool
1984
  {
1985 1
    return self::is_base64($str);
1986
  }
1987
1988
  /**
1989
   * alias for "UTF8::is_binary()"
1990
   *
1991
   * @see        UTF8::is_binary()
1992
   *
1993
   * @param mixed $str
1994
   *
1995
   * @return bool
1996
   *
1997
   * @deprecated <p>use "UTF8::is_binary()"</p>
1998
   */
1999 1
  public static function isBinary($str): bool
2000
  {
2001 1
    return self::is_binary($str);
2002
  }
2003
2004
  /**
2005
   * alias for "UTF8::is_bom()"
2006
   *
2007
   * @see        UTF8::is_bom()
2008
   *
2009
   * @param string $utf8_chr
2010
   *
2011
   * @return boolean
2012
   *
2013
   * @deprecated <p>use "UTF8::is_bom()"</p>
2014
   */
2015 1
  public static function isBom(string $utf8_chr): bool
2016
  {
2017 1
    return self::is_bom($utf8_chr);
2018
  }
2019
2020
  /**
2021
   * alias for "UTF8::is_html()"
2022
   *
2023
   * @see        UTF8::is_html()
2024
   *
2025
   * @param string $str
2026
   *
2027
   * @return boolean
2028
   *
2029
   * @deprecated <p>use "UTF8::is_html()"</p>
2030
   */
2031 1
  public static function isHtml(string $str): bool
2032
  {
2033 1
    return self::is_html($str);
2034
  }
2035
2036
  /**
2037
   * alias for "UTF8::is_json()"
2038
   *
2039
   * @see        UTF8::is_json()
2040
   *
2041
   * @param string $str
2042
   *
2043
   * @return bool
2044
   *
2045
   * @deprecated <p>use "UTF8::is_json()"</p>
2046
   */
2047
  public static function isJson(string $str): bool
2048
  {
2049
    return self::is_json($str);
2050
  }
2051
2052
  /**
2053
   * alias for "UTF8::is_utf16()"
2054
   *
2055
   * @see        UTF8::is_utf16()
2056
   *
2057
   * @param string $str
2058
   *
2059
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2060
   *
2061
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2062
   */
2063 1
  public static function isUtf16(string $str)
2064
  {
2065 1
    return self::is_utf16($str);
2066
  }
2067
2068
  /**
2069
   * alias for "UTF8::is_utf32()"
2070
   *
2071
   * @see        UTF8::is_utf32()
2072
   *
2073
   * @param string $str
2074
   *
2075
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2076
   *
2077
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2078
   */
2079 1
  public static function isUtf32(string $str)
2080
  {
2081 1
    return self::is_utf32($str);
2082
  }
2083
2084
  /**
2085
   * alias for "UTF8::is_utf8()"
2086
   *
2087
   * @see        UTF8::is_utf8()
2088
   *
2089
   * @param string $str
2090
   * @param bool   $strict
2091
   *
2092
   * @return bool
2093
   *
2094
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2095
   */
2096 16
  public static function isUtf8($str, $strict = false): bool
2097
  {
2098 16
    return self::is_utf8($str, $strict);
2099
  }
2100
2101
  /**
2102
   * Checks if a string is 7 bit ASCII.
2103
   *
2104
   * @param string $str <p>The string to check.</p>
2105
   *
2106
   * @return bool <p>
2107
   *              <strong>true</strong> if it is ASCII<br>
2108
   *              <strong>false</strong> otherwise
2109
   *              </p>
2110
   */
2111 56
  public static function is_ascii(string $str): bool
2112
  {
2113 56
    if (!isset($str[0])) {
2114 6
      return true;
2115
    }
2116
2117 55
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2118
  }
2119
2120
  /**
2121
   * Returns true if the string is base64 encoded, false otherwise.
2122
   *
2123
   * @param string $str <p>The input string.</p>
2124
   *
2125
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2126
   */
2127 1
  public static function is_base64(string $str): bool
2128
  {
2129 1
    $base64String = (string)\base64_decode($str, true);
2130
2131 1
    return $base64String && \base64_encode($base64String) === $str;
2132
  }
2133
2134
  /**
2135
   * Check if the input is binary... (is look like a hack).
2136
   *
2137
   * @param mixed $input
2138
   *
2139
   * @return bool
2140
   */
2141 18
  public static function is_binary($input): bool
2142
  {
2143 18
    $input = (string)$input;
2144 18
    if (!isset($input[0])) {
2145 4
      return false;
2146
    }
2147
2148 18
    if (\preg_match('~^[01]+$~', $input)) {
2149 5
      return true;
2150
    }
2151
2152 18
    $testLength = \strlen($input);
2153 18
    if ($testLength && \substr_count($input, "\x0") / $testLength > 0.3) {
2154 5
      return true;
2155
    }
2156
2157 17
    if (\substr_count($input, "\x00") > 0) {
2158 1
      return true;
2159
    }
2160
2161 17
    return false;
2162
  }
2163
2164
  /**
2165
   * Check if the file is binary.
2166
   *
2167
   * @param string $file
2168
   *
2169
   * @return boolean
2170
   */
2171 1
  public static function is_binary_file($file): bool
2172
  {
2173
    try {
2174 1
      $fp = \fopen($file, 'rb');
2175 1
      $block = \fread($fp, 512);
2176 1
      \fclose($fp);
2177
    } catch (\Exception $e) {
2178
      $block = '';
2179
    }
2180
2181 1
    return self::is_binary($block);
2182
  }
2183
2184
  /**
2185
   * Checks if the given string is equal to any "Byte Order Mark".
2186
   *
2187
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2188
   *
2189
   * @param string $str <p>The input string.</p>
2190
   *
2191
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2192
   */
2193 1
  public static function is_bom($str): bool
2194
  {
2195 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2196 1
      if ($str === $bomString) {
2197 1
        return true;
2198
      }
2199
    }
2200
2201 1
    return false;
2202
  }
2203
2204
  /**
2205
   * Check if the string contains any html-tags <lall>.
2206
   *
2207
   * @param string $str <p>The input string.</p>
2208
   *
2209
   * @return boolean
2210
   */
2211 1
  public static function is_html(string $str): bool
2212
  {
2213 1
    if (!isset($str[0])) {
2214 1
      return false;
2215
    }
2216
2217
    // init
2218 1
    $matches = [];
2219
2220 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2221
2222 1
    return !(\count($matches) === 0);
2223
  }
2224
2225
  /**
2226
   * Try to check if "$str" is an json-string.
2227
   *
2228
   * @param string $str <p>The input string.</p>
2229
   *
2230
   * @return bool
2231
   */
2232 1
  public static function is_json(string $str): bool
2233
  {
2234 1
    if (!isset($str[0])) {
2235 1
      return false;
2236
    }
2237
2238 1
    $json = self::json_decode($str);
2239
2240
    return (
2241 1
               \is_object($json) === true
2242
               ||
2243 1
               \is_array($json) === true
2244
           )
2245
           &&
2246 1
           \json_last_error() === JSON_ERROR_NONE;
2247
  }
2248
2249
  /**
2250
   * Check if the string is UTF-16.
2251
   *
2252
   * @param string $str <p>The input string.</p>
2253
   *
2254
   * @return int|false <p>
2255
   *                   <strong>false</strong> if is't not UTF-16,<br>
2256
   *                   <strong>1</strong> for UTF-16LE,<br>
2257
   *                   <strong>2</strong> for UTF-16BE.
2258
   *                   </p>
2259
   */
2260 6 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2261
  {
2262 6
    $str = self::remove_bom($str);
2263
2264 6
    if (self::is_binary($str) === true) {
2265
2266 6
      $maybeUTF16LE = 0;
2267 6
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2268 6
      if ($test) {
2269 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2270 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2271 5
        if ($test3 === $test) {
2272 5
          $strChars = self::count_chars($str, true);
2273 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2274 5
            if (\in_array($test3char, $strChars, true) === true) {
2275 5
              $maybeUTF16LE++;
2276
            }
2277
          }
2278
        }
2279
      }
2280
2281 6
      $maybeUTF16BE = 0;
2282 6
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2283 6
      if ($test) {
2284 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2285 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2286 5
        if ($test3 === $test) {
2287 5
          $strChars = self::count_chars($str, true);
2288 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2289 5
            if (\in_array($test3char, $strChars, true) === true) {
2290 5
              $maybeUTF16BE++;
2291
            }
2292
          }
2293
        }
2294
      }
2295
2296 6
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2297 4
        if ($maybeUTF16LE > $maybeUTF16BE) {
2298 2
          return 1;
2299
        }
2300
2301 4
        return 2;
2302
      }
2303
2304
    }
2305
2306 3
    return false;
2307
  }
2308
2309
  /**
2310
   * Check if the string is UTF-32.
2311
   *
2312
   * @param string $str
2313
   *
2314
   * @return int|false <p>
2315
   *                   <strong>false</strong> if is't not UTF-32,<br>
2316
   *                   <strong>1</strong> for UTF-32LE,<br>
2317
   *                   <strong>2</strong> for UTF-32BE.
2318
   *                   </p>
2319
   */
2320 3 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2321
  {
2322 3
    $str = self::remove_bom($str);
2323
2324 3
    if (self::is_binary($str) === true) {
2325
2326 3
      $maybeUTF32LE = 0;
2327 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2328 3
      if ($test) {
2329 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2330 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2331 2
        if ($test3 === $test) {
2332 2
          $strChars = self::count_chars($str, true);
2333 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2334 2
            if (\in_array($test3char, $strChars, true) === true) {
2335 2
              $maybeUTF32LE++;
2336
            }
2337
          }
2338
        }
2339
      }
2340
2341 3
      $maybeUTF32BE = 0;
2342 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2343 3
      if ($test) {
2344 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2345 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2346 2
        if ($test3 === $test) {
2347 2
          $strChars = self::count_chars($str, true);
2348 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2349 2
            if (\in_array($test3char, $strChars, true) === true) {
2350 2
              $maybeUTF32BE++;
2351
            }
2352
          }
2353
        }
2354
      }
2355
2356 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2357 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
2358 1
          return 1;
2359
        }
2360
2361 1
        return 2;
2362
      }
2363
2364
    }
2365
2366 3
    return false;
2367
  }
2368
2369
  /**
2370
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2371
   *
2372
   * @see    http://hsivonen.iki.fi/php-utf8/
2373
   *
2374
   * @param string|string[] $str    <p>The string to be checked.</p>
2375
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2376
   *
2377
   * @return bool
2378
   */
2379 61
  public static function is_utf8($str, bool $strict = false): bool
2380
  {
2381 61
    if (\is_array($str) === true) {
2382 1
      foreach ($str as $k => $v) {
2383 1
        if (false === self::is_utf8($v, $strict)) {
2384 1
          return false;
2385
        }
2386
      }
2387
2388
      return true;
2389
    }
2390
2391 61
    if (!isset($str[0])) {
2392 3
      return true;
2393
    }
2394
2395 59
    if ($strict === true) {
2396 1
      if (self::is_utf16($str) !== false) {
2397 1
        return false;
2398
      }
2399
2400
      if (self::is_utf32($str) !== false) {
2401
        return false;
2402
      }
2403
    }
2404
2405 59
    if (self::pcre_utf8_support() !== true) {
2406
2407
      // If even just the first character can be matched, when the /u
2408
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2409
      // invalid, nothing at all will match, even if the string contains
2410
      // some valid sequences
2411
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2412
    }
2413
2414 59
    $mState = 0; // cached expected number of octets after the current octet
2415
    // until the beginning of the next UTF8 character sequence
2416 59
    $mUcs4 = 0; // cached Unicode character
2417 59
    $mBytes = 1; // cached expected number of octets in the current sequence
2418
2419 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2420
      self::checkForSupport();
2421
    }
2422
2423 59
    if (self::$ORD === null) {
2424 1
      self::$ORD = self::getData('ord');
2425
    }
2426
2427 59
    $len = self::strlen_in_byte($str);
2428
    /** @noinspection ForeachInvariantsInspection */
2429 59
    for ($i = 0; $i < $len; $i++) {
2430 59
      $in = self::$ORD[$str[$i]];
2431 59
      if ($mState === 0) {
2432
        // When mState is zero we expect either a US-ASCII character or a
2433
        // multi-octet sequence.
2434 59
        if (0 === (0x80 & $in)) {
2435
          // US-ASCII, pass straight through.
2436 54
          $mBytes = 1;
2437 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2438
          // First octet of 2 octet sequence.
2439 49
          $mUcs4 = $in;
2440 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2441 49
          $mState = 1;
2442 49
          $mBytes = 2;
2443 46
        } elseif (0xE0 === (0xF0 & $in)) {
2444
          // First octet of 3 octet sequence.
2445 30
          $mUcs4 = $in;
2446 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2447 30
          $mState = 2;
2448 30
          $mBytes = 3;
2449 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2450
          // First octet of 4 octet sequence.
2451 13
          $mUcs4 = $in;
2452 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2453 13
          $mState = 3;
2454 13
          $mBytes = 4;
2455 11
        } elseif (0xF8 === (0xFC & $in)) {
2456
          /* First octet of 5 octet sequence.
2457
          *
2458
          * This is illegal because the encoded codepoint must be either
2459
          * (a) not the shortest form or
2460
          * (b) outside the Unicode range of 0-0x10FFFF.
2461
          * Rather than trying to resynchronize, we will carry on until the end
2462
          * of the sequence and let the later error handling code catch it.
2463
          */
2464 4
          $mUcs4 = $in;
2465 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2466 4
          $mState = 4;
2467 4
          $mBytes = 5;
2468 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2469
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2470 4
          $mUcs4 = $in;
2471 4
          $mUcs4 = ($mUcs4 & 1) << 30;
2472 4
          $mState = 5;
2473 4
          $mBytes = 6;
2474
        } else {
2475
          /* Current octet is neither in the US-ASCII range nor a legal first
2476
           * octet of a multi-octet sequence.
2477
           */
2478 59
          return false;
2479
        }
2480
      } else {
2481
        // When mState is non-zero, we expect a continuation of the multi-octet
2482
        // sequence
2483 54
        if (0x80 === (0xC0 & $in)) {
2484
          // Legal continuation.
2485 50
          $shift = ($mState - 1) * 6;
2486 50
          $tmp = $in;
2487 50
          $tmp = ($tmp & 0x0000003F) << $shift;
2488 50
          $mUcs4 |= $tmp;
2489
          /**
2490
           * End of the multi-octet sequence. mUcs4 now contains the final
2491
           * Unicode code point to be output
2492
           */
2493 50
          if (0 === --$mState) {
2494
            /*
2495
            * Check for illegal sequences and code points.
2496
            */
2497
            // From Unicode 3.1, non-shortest form is illegal
2498
            if (
2499 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2500 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2501 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2502 50
                (4 < $mBytes) ||
2503
                // From Unicode 3.2, surrogate characters are illegal.
2504 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2505
                // Code points outside the Unicode range are illegal.
2506 50
                ($mUcs4 > 0x10FFFF)
2507
            ) {
2508 7
              return false;
2509
            }
2510
            // initialize UTF8 cache
2511 50
            $mState = 0;
2512 50
            $mUcs4 = 0;
2513 50
            $mBytes = 1;
2514
          }
2515
        } else {
2516
          /**
2517
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2518
           * Incomplete multi-octet sequence.
2519
           */
2520 26
          return false;
2521
        }
2522
      }
2523
    }
2524
2525 29
    return true;
2526
  }
2527
2528
  /**
2529
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2530
   * Decodes a JSON string
2531
   *
2532
   * @link http://php.net/manual/en/function.json-decode.php
2533
   *
2534
   * @param string $json    <p>
2535
   *                        The <i>json</i> string being decoded.
2536
   *                        </p>
2537
   *                        <p>
2538
   *                        This function only works with UTF-8 encoded strings.
2539
   *                        </p>
2540
   *                        <p>PHP implements a superset of
2541
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2542
   *                        only supports these values when they are nested inside an array or an object.
2543
   *                        </p>
2544
   * @param bool   $assoc   [optional] <p>
2545
   *                        When <b>TRUE</b>, returned objects will be converted into
2546
   *                        associative arrays.
2547
   *                        </p>
2548
   * @param int    $depth   [optional] <p>
2549
   *                        User specified recursion depth.
2550
   *                        </p>
2551
   * @param int    $options [optional] <p>
2552
   *                        Bitmask of JSON decode options. Currently only
2553
   *                        <b>JSON_BIGINT_AS_STRING</b>
2554
   *                        is supported (default is to cast large integers as floats)
2555
   *                        </p>
2556
   *
2557
   * @return mixed the value encoded in <i>json</i> in appropriate
2558
   * PHP type. Values true, false and
2559
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2560
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2561
   * <i>json</i> cannot be decoded or if the encoded
2562
   * data is deeper than the recursion limit.
2563
   */
2564 2
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
2565
  {
2566 2
    $json = self::filter($json);
2567
2568 2
    $json = \json_decode($json, $assoc, $depth, $options);
2569
2570 2
    return $json;
2571
  }
2572
2573
  /**
2574
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2575
   * Returns the JSON representation of a value.
2576
   *
2577
   * @link http://php.net/manual/en/function.json-encode.php
2578
   *
2579
   * @param mixed $value   <p>
2580
   *                       The <i>value</i> being encoded. Can be any type except
2581
   *                       a resource.
2582
   *                       </p>
2583
   *                       <p>
2584
   *                       All string data must be UTF-8 encoded.
2585
   *                       </p>
2586
   *                       <p>PHP implements a superset of
2587
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2588
   *                       only supports these values when they are nested inside an array or an object.
2589
   *                       </p>
2590
   * @param int   $options [optional] <p>
2591
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2592
   *                       <b>JSON_HEX_TAG</b>,
2593
   *                       <b>JSON_HEX_AMP</b>,
2594
   *                       <b>JSON_HEX_APOS</b>,
2595
   *                       <b>JSON_NUMERIC_CHECK</b>,
2596
   *                       <b>JSON_PRETTY_PRINT</b>,
2597
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2598
   *                       <b>JSON_FORCE_OBJECT</b>,
2599
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2600
   *                       constants is described on
2601
   *                       the JSON constants page.
2602
   *                       </p>
2603
   * @param int   $depth   [optional] <p>
2604
   *                       Set the maximum depth. Must be greater than zero.
2605
   *                       </p>
2606
   *
2607
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2608
   */
2609 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
2610
  {
2611 2
    $value = self::filter($value);
2612
2613 2
    $json = \json_encode($value, $options, $depth);
2614
2615 2
    return $json;
2616
  }
2617
2618
  /**
2619
   * Makes string's first char lowercase.
2620
   *
2621
   * @param string $str       <p>The input string</p>
2622
   * @param string $encoding  [optional] <p>Set the charset.</p>
2623
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2624
   *
2625
   * @return string <p>The resulting string</p>
2626
   */
2627 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2628
  {
2629 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2630 7
    if ($strPartTwo === false) {
2631
      $strPartTwo = '';
2632
    }
2633
2634 7
    $strPartOne = self::strtolower(
2635 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2636 7
        $encoding,
2637 7
        $cleanUtf8
2638
    );
2639
2640 7
    return $strPartOne . $strPartTwo;
2641
  }
2642
2643
  /**
2644
   * alias for "UTF8::lcfirst()"
2645
   *
2646
   * @see UTF8::lcfirst()
2647
   *
2648
   * @param string $word
2649
   * @param string $encoding
2650
   * @param bool   $cleanUtf8
2651
   *
2652
   * @return string
2653
   */
2654 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2655
  {
2656 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
2657
  }
2658
2659
  /**
2660
   * Lowercase for all words in the string.
2661
   *
2662
   * @param string   $str        <p>The input string.</p>
2663
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2664
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2665
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2666
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2667
   *
2668
   * @return string
2669
   */
2670 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2671
  {
2672 1
    if (!$str) {
2673 1
      return '';
2674
    }
2675
2676 1
    $words = self::str_to_words($str, $charlist);
2677 1
    $newWords = [];
2678
2679 1
    if (\count($exceptions) > 0) {
2680 1
      $useExceptions = true;
2681
    } else {
2682 1
      $useExceptions = false;
2683
    }
2684
2685 1 View Code Duplication
    foreach ($words as $word) {
2686
2687 1
      if (!$word) {
2688 1
        continue;
2689
      }
2690
2691
      if (
2692 1
          $useExceptions === false
2693
          ||
2694
          (
2695 1
              $useExceptions === true
2696
              &&
2697 1
              !\in_array($word, $exceptions, true)
2698
          )
2699
      ) {
2700 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2701
      }
2702
2703 1
      $newWords[] = $word;
2704
    }
2705
2706 1
    return \implode('', $newWords);
2707
  }
2708
2709
  /**
2710
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2711
   *
2712
   * @param string $str   <p>The string to be trimmed</p>
2713
   * @param mixed  $chars <p>Optional characters to be stripped</p>
2714
   *
2715
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2716
   */
2717 24 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2718
  {
2719 24
    if (!isset($str[0])) {
2720 2
      return '';
2721
    }
2722
2723
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2724 23
    if ($chars === INF || !$chars) {
2725 2
      return \preg_replace('/^[\pZ\pC]+/u', '', $str);
2726
    }
2727
2728 23
    return \preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2729
  }
2730
2731
  /**
2732
   * Returns the UTF-8 character with the maximum code point in the given data.
2733
   *
2734
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2735
   *
2736
   * @return string <p>The character with the highest code point than others.</p>
2737
   */
2738 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2739
  {
2740 1
    if (\is_array($arg) === true) {
2741 1
      $arg = \implode('', $arg);
2742
    }
2743
2744 1
    return self::chr(\max(self::codepoints($arg)));
2745
  }
2746
2747
  /**
2748
   * Calculates and returns the maximum number of bytes taken by any
2749
   * UTF-8 encoded character in the given string.
2750
   *
2751
   * @param string $str <p>The original Unicode string.</p>
2752
   *
2753
   * @return int <p>Max byte lengths of the given chars.</p>
2754
   */
2755 1
  public static function max_chr_width(string $str): int
2756
  {
2757 1
    $bytes = self::chr_size_list($str);
2758 1
    if (\count($bytes) > 0) {
2759 1
      return (int)\max($bytes);
2760
    }
2761
2762 1
    return 0;
2763
  }
2764
2765
  /**
2766
   * Checks whether mbstring is available on the server.
2767
   *
2768
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2769
   */
2770 12
  public static function mbstring_loaded(): bool
2771
  {
2772 12
    $return = \extension_loaded('mbstring') ? true : false;
2773
2774 12
    if ($return === true) {
2775 12
      \mb_internal_encoding('UTF-8');
2776
    }
2777
2778 12
    return $return;
2779
  }
2780
2781 1
  private static function mbstring_overloaded(): bool
2782
  {
2783 1
    return \defined('MB_OVERLOAD_STRING')
2784
           &&
2785 1
           \ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING;
2786
  }
2787
2788
  /**
2789
   * Returns the UTF-8 character with the minimum code point in the given data.
2790
   *
2791
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2792
   *
2793
   * @return string <p>The character with the lowest code point than others.</p>
2794
   */
2795 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2796
  {
2797 1
    if (\is_array($arg) === true) {
2798 1
      $arg = \implode('', $arg);
2799
    }
2800
2801 1
    return self::chr(\min(self::codepoints($arg)));
2802
  }
2803
2804
  /**
2805
   * alias for "UTF8::normalize_encoding()"
2806
   *
2807
   * @see        UTF8::normalize_encoding()
2808
   *
2809
   * @param string $encoding
2810
   * @param mixed  $fallback
2811
   *
2812
   * @return string
2813
   *
2814
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2815
   */
2816 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
2817
  {
2818 1
    return self::normalize_encoding($encoding, $fallback);
2819
  }
2820
2821
  /**
2822
   * Normalize the encoding-"name" input.
2823
   *
2824
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2825
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2826
   *
2827
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
2828
   *                default)</p>
2829
   */
2830 77
  public static function normalize_encoding(string $encoding, $fallback = '')
2831
  {
2832 77
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
2833
2834 77
    if (!$encoding) {
2835 4
      return $fallback;
2836
    }
2837
2838
    if (
2839 76
        'UTF-8' === $encoding
2840
        ||
2841 76
        'UTF8' === $encoding
2842
    ) {
2843 11
      return 'UTF-8';
2844
    }
2845
2846 72
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2847 69
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2848
    }
2849
2850 7
    if (self::$ENCODINGS === null) {
2851 1
      self::$ENCODINGS = self::getData('encodings');
2852
    }
2853
2854 7
    if (\in_array($encoding, self::$ENCODINGS, true)) {
2855 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
2856
2857 3
      return $encoding;
2858
    }
2859
2860 6
    $encodingOrig = $encoding;
2861 6
    $encoding = \strtoupper($encoding);
2862 6
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2863
2864
    $equivalences = [
2865 6
        'ISO8859'     => 'ISO-8859-1',
2866
        'ISO88591'    => 'ISO-8859-1',
2867
        'ISO'         => 'ISO-8859-1',
2868
        'LATIN'       => 'ISO-8859-1',
2869
        'LATIN1'      => 'ISO-8859-1', // Western European
2870
        'ISO88592'    => 'ISO-8859-2',
2871
        'LATIN2'      => 'ISO-8859-2', // Central European
2872
        'ISO88593'    => 'ISO-8859-3',
2873
        'LATIN3'      => 'ISO-8859-3', // Southern European
2874
        'ISO88594'    => 'ISO-8859-4',
2875
        'LATIN4'      => 'ISO-8859-4', // Northern European
2876
        'ISO88595'    => 'ISO-8859-5',
2877
        'ISO88596'    => 'ISO-8859-6', // Greek
2878
        'ISO88597'    => 'ISO-8859-7',
2879
        'ISO88598'    => 'ISO-8859-8', // Hebrew
2880
        'ISO88599'    => 'ISO-8859-9',
2881
        'LATIN5'      => 'ISO-8859-9', // Turkish
2882
        'ISO885911'   => 'ISO-8859-11',
2883
        'TIS620'      => 'ISO-8859-11', // Thai
2884
        'ISO885910'   => 'ISO-8859-10',
2885
        'LATIN6'      => 'ISO-8859-10', // Nordic
2886
        'ISO885913'   => 'ISO-8859-13',
2887
        'LATIN7'      => 'ISO-8859-13', // Baltic
2888
        'ISO885914'   => 'ISO-8859-14',
2889
        'LATIN8'      => 'ISO-8859-14', // Celtic
2890
        'ISO885915'   => 'ISO-8859-15',
2891
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
2892
        'ISO885916'   => 'ISO-8859-16',
2893
        'LATIN10'     => 'ISO-8859-16', // Southeast European
2894
        'CP1250'      => 'WINDOWS-1250',
2895
        'WIN1250'     => 'WINDOWS-1250',
2896
        'WINDOWS1250' => 'WINDOWS-1250',
2897
        'CP1251'      => 'WINDOWS-1251',
2898
        'WIN1251'     => 'WINDOWS-1251',
2899
        'WINDOWS1251' => 'WINDOWS-1251',
2900
        'CP1252'      => 'WINDOWS-1252',
2901
        'WIN1252'     => 'WINDOWS-1252',
2902
        'WINDOWS1252' => 'WINDOWS-1252',
2903
        'CP1253'      => 'WINDOWS-1253',
2904
        'WIN1253'     => 'WINDOWS-1253',
2905
        'WINDOWS1253' => 'WINDOWS-1253',
2906
        'CP1254'      => 'WINDOWS-1254',
2907
        'WIN1254'     => 'WINDOWS-1254',
2908
        'WINDOWS1254' => 'WINDOWS-1254',
2909
        'CP1255'      => 'WINDOWS-1255',
2910
        'WIN1255'     => 'WINDOWS-1255',
2911
        'WINDOWS1255' => 'WINDOWS-1255',
2912
        'CP1256'      => 'WINDOWS-1256',
2913
        'WIN1256'     => 'WINDOWS-1256',
2914
        'WINDOWS1256' => 'WINDOWS-1256',
2915
        'CP1257'      => 'WINDOWS-1257',
2916
        'WIN1257'     => 'WINDOWS-1257',
2917
        'WINDOWS1257' => 'WINDOWS-1257',
2918
        'CP1258'      => 'WINDOWS-1258',
2919
        'WIN1258'     => 'WINDOWS-1258',
2920
        'WINDOWS1258' => 'WINDOWS-1258',
2921
        'UTF16'       => 'UTF-16',
2922
        'UTF32'       => 'UTF-32',
2923
        'UTF8'        => 'UTF-8',
2924
        'UTF'         => 'UTF-8',
2925
        'UTF7'        => 'UTF-7',
2926
        '8BIT'        => 'CP850',
2927
        'BINARY'      => 'CP850',
2928
    ];
2929
2930 6
    if (!empty($equivalences[$encodingUpperHelper])) {
2931 5
      $encoding = $equivalences[$encodingUpperHelper];
2932
    }
2933
2934 6
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
2935
2936 6
    return $encoding;
2937
  }
2938
2939
  /**
2940
   * Normalize some MS Word special characters.
2941
   *
2942
   * @param string $str <p>The string to be normalized.</p>
2943
   *
2944
   * @return string
2945
   */
2946 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2947
  {
2948 16
    if (!isset($str[0])) {
2949 1
      return '';
2950
    }
2951
2952 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
2953 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
2954
2955 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
2956
2957
2958 1
      if (self::$UTF8_MSWORD === null) {
2959 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
2960
      }
2961
2962 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
2963 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
2964
    }
2965
2966 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
2967
  }
2968
2969
  /**
2970
   * Normalize the whitespace.
2971
   *
2972
   * @param string $str                     <p>The string to be normalized.</p>
2973
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
2974
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
2975
   *                                        bidirectional text chars.</p>
2976
   *
2977
   * @return string
2978
   */
2979 38
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
2980
  {
2981 38
    if (!isset($str[0])) {
2982 4
      return '';
2983
    }
2984
2985 38
    static $WHITESPACE_CACHE = [];
2986 38
    $cacheKey = (int)$keepNonBreakingSpace;
2987
2988 38
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
2989
2990 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
2991
2992 2
      if ($keepNonBreakingSpace === true) {
2993 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
2994
      }
2995
2996 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
2997
    }
2998
2999 38
    if ($keepBidiUnicodeControls === false) {
3000 38
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3001
3002 38
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3003 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3004
      }
3005
3006 38
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3007
    }
3008
3009 38
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3010
  }
3011
3012
  /**
3013
   * Strip all whitespace characters. This includes tabs and newline
3014
   * characters, as well as multibyte whitespace such as the thin space
3015
   * and ideographic space.
3016
   *
3017
   * @param string $str
3018
   *
3019
   * @return string
3020
   */
3021 12
  public static function strip_whitespace(string $str): string
3022
  {
3023 12
    if (!isset($str[0])) {
3024 1
      return '';
3025
    }
3026
3027 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
3028
  }
3029
3030
  /**
3031
   * Calculates Unicode code point of the given UTF-8 encoded character.
3032
   *
3033
   * INFO: opposite to UTF8::chr()
3034
   *
3035
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3036
   * @param string $encoding [optional] <p>Default is UTF-8</p>
3037
   *
3038
   * @return int <p>
3039
   *             Unicode code point of the given character,<br>
3040
   *             0 on invalid UTF-8 byte sequence.
3041
   *             </p>
3042
   */
3043 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3044
  {
3045
    // init
3046 23
    static $CHAR_CACHE = [];
3047
3048
    // save the original string
3049 23
    $chr_orig = $chr;
3050
3051 23
    if ($encoding !== 'UTF-8') {
3052 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3053
3054
      // check again, if it's still not UTF-8
3055
      /** @noinspection NotOptimalIfConditionsInspection */
3056 2
      if ($encoding !== 'UTF-8') {
3057 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3058
      }
3059
    }
3060
3061 23
    $cacheKey = $chr_orig . $encoding;
3062 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3063 23
      return $CHAR_CACHE[$cacheKey];
3064
    }
3065
3066 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3067
      self::checkForSupport();
3068
    }
3069
3070 11
    if (self::$SUPPORT['intlChar'] === true) {
3071 10
      $code = \IntlChar::ord($chr);
3072 10
      if ($code) {
3073 9
        return $CHAR_CACHE[$cacheKey] = $code;
3074
      }
3075
    }
3076
3077
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3078 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3079 6
    $code = $chr ? $chr[1] : 0;
3080
3081 6
    if (0xF0 <= $code && isset($chr[4])) {
3082
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3083
    }
3084
3085 6
    if (0xE0 <= $code && isset($chr[3])) {
3086 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3087
    }
3088
3089 6
    if (0xC0 <= $code && isset($chr[2])) {
3090 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3091
    }
3092
3093 5
    return $CHAR_CACHE[$cacheKey] = $code;
3094
  }
3095
3096
  /**
3097
   * Parses the string into an array (into the the second parameter).
3098
   *
3099
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3100
   *          if the second parameter is not set!
3101
   *
3102
   * @link http://php.net/manual/en/function.parse-str.php
3103
   *
3104
   * @param string $str       <p>The input string.</p>
3105
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3106
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3107
   *
3108
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3109
   */
3110 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3111
  {
3112 1
    if ($cleanUtf8 === true) {
3113 1
      $str = self::clean($str);
3114
    }
3115
3116
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3117 1
    $return = \mb_parse_str($str, $result);
3118
3119 1
    return !($return === false || empty($result));
3120
  }
3121
3122
  /**
3123
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3124
   *
3125
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3126
   */
3127 59
  public static function pcre_utf8_support(): bool
3128
  {
3129
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3130 59
    return (bool)@\preg_match('//u', '');
3131
  }
3132
3133
  /**
3134
   * Create an array containing a range of UTF-8 characters.
3135
   *
3136
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3137
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3138
   *
3139
   * @return array
3140
   */
3141 1
  public static function range($var1, $var2): array
3142
  {
3143 1
    if (!$var1 || !$var2) {
3144 1
      return [];
3145
    }
3146
3147 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3148 1
      $start = (int)$var1;
3149 1
    } elseif (\ctype_xdigit($var1)) {
3150
      $start = (int)self::hex_to_int($var1);
3151
    } else {
3152 1
      $start = self::ord($var1);
3153
    }
3154
3155 1
    if (!$start) {
3156
      return [];
3157
    }
3158
3159 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3160 1
      $end = (int)$var2;
3161 1
    } elseif (\ctype_xdigit($var2)) {
3162
      $end = (int)self::hex_to_int($var2);
3163
    } else {
3164 1
      $end = self::ord($var2);
3165
    }
3166
3167 1
    if (!$end) {
3168
      return [];
3169
    }
3170
3171 1
    return \array_map(
3172
        [
3173 1
            self::class,
3174
            'chr',
3175
        ],
3176 1
        \range($start, $end)
3177
    );
3178
  }
3179
3180
  /**
3181
   * Multi decode html entity & fix urlencoded-win1252-chars.
3182
   *
3183
   * e.g:
3184
   * 'test+test'                     => 'test+test'
3185
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3186
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3187
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3188
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3189
   * 'Düsseldorf'                   => 'Düsseldorf'
3190
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3191
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3192
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3193
   *
3194
   * @param string $str          <p>The input string.</p>
3195
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3196
   *
3197
   * @return string
3198
   */
3199 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3200
  {
3201 2
    if (!isset($str[0])) {
3202 1
      return '';
3203
    }
3204
3205 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3206 2
    if (\preg_match($pattern, $str)) {
3207 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3208
    }
3209
3210 2
    $flags = ENT_QUOTES | ENT_HTML5;
3211
3212
    do {
3213 2
      $str_compare = $str;
3214
3215 2
      $str = self::fix_simple_utf8(
3216 2
          \rawurldecode(
3217 2
              self::html_entity_decode(
3218 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3219 2
                  $flags
3220
              )
3221
          )
3222
      );
3223
3224 2
    } while ($multi_decode === true && $str_compare !== $str);
3225
3226 2
    return $str;
3227
  }
3228
3229
  /**
3230
   * alias for "UTF8::remove_bom()"
3231
   *
3232
   * @see        UTF8::remove_bom()
3233
   *
3234
   * @param string $str
3235
   *
3236
   * @return string
3237
   *
3238
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3239
   */
3240
  public static function removeBOM(string $str): string
3241
  {
3242
    return self::remove_bom($str);
3243
  }
3244
3245
  /**
3246
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3247
   *
3248
   * @param string $str <p>The input string.</p>
3249
   *
3250
   * @return string <p>String without UTF-BOM</p>
3251
   */
3252 41
  public static function remove_bom(string $str): string
3253
  {
3254 41
    if (!isset($str[0])) {
3255 5
      return '';
3256
    }
3257
3258 41
    foreach (self::$BOM as $bomString => $bomByteLength) {
3259 41
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3260 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3261 5
        if ($strTmp === false) {
3262
          $strTmp = '';
3263
        }
3264 41
        $str = (string)$strTmp;
3265
      }
3266
    }
3267
3268 41
    return $str;
3269
  }
3270
3271
  /**
3272
   * Removes duplicate occurrences of a string in another string.
3273
   *
3274
   * @param string          $str  <p>The base string.</p>
3275
   * @param string|string[] $what <p>String to search for in the base string.</p>
3276
   *
3277
   * @return string <p>The result string with removed duplicates.</p>
3278
   */
3279 1
  public static function remove_duplicates(string $str, $what = ' '): string
3280
  {
3281 1
    if (\is_string($what) === true) {
3282 1
      $what = [$what];
3283
    }
3284
3285 1
    if (\is_array($what) === true) {
3286
      /** @noinspection ForeachSourceInspection */
3287 1
      foreach ($what as $item) {
3288 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
3289
      }
3290
    }
3291
3292 1
    return $str;
3293
  }
3294
3295
  /**
3296
   * Remove invisible characters from a string.
3297
   *
3298
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3299
   *
3300
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3301
   *
3302
   * @param string $str
3303
   * @param bool   $url_encoded
3304
   * @param string $replacement
3305
   *
3306
   * @return string
3307
   */
3308 63
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
3309
  {
3310
    // init
3311 63
    $non_displayables = [];
3312
3313
    // every control character except newline (dec 10),
3314
    // carriage return (dec 13) and horizontal tab (dec 09)
3315 63
    if ($url_encoded) {
3316 63
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3317 63
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3318
    }
3319
3320 63
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3321
3322
    do {
3323 63
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
3324 63
    } while ($count !== 0);
3325
3326 63
    return $str;
3327
  }
3328
3329
  /**
3330
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3331
   *
3332
   * @param string $str                <p>The input string</p>
3333
   * @param string $replacementChar    <p>The replacement character.</p>
3334
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3335
   *
3336
   * @return string
3337
   */
3338 63
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
3339
  {
3340 63
    if (!isset($str[0])) {
3341 4
      return '';
3342
    }
3343
3344 63
    if ($processInvalidUtf8 === true) {
3345 63
      $replacementCharHelper = $replacementChar;
3346 63
      if ($replacementChar === '') {
3347 63
        $replacementCharHelper = 'none';
3348
      }
3349
3350 63
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3351
        self::checkForSupport();
3352
      }
3353
3354 63
      $save = \mb_substitute_character();
3355 63
      \mb_substitute_character($replacementCharHelper);
3356 63
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3357 63
      \mb_substitute_character($save);
3358
3359 63
      if (\is_string($strTmp)) {
3360 63
        $str = $strTmp;
3361
      } else {
3362
        $str = '';
3363
      }
3364
    }
3365
3366 63
    return str_replace(
3367
        [
3368 63
            "\xEF\xBF\xBD",
3369
            '�',
3370
        ],
3371
        [
3372 63
            $replacementChar,
3373 63
            $replacementChar,
3374
        ],
3375 63
        $str
3376
    );
3377
  }
3378
3379
  /**
3380
   * Strip whitespace or other characters from end of a UTF-8 string.
3381
   *
3382
   * @param string $str   <p>The string to be trimmed.</p>
3383
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
3384
   *
3385
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3386
   */
3387 23 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3388
  {
3389 23
    if (!isset($str[0])) {
3390 5
      return '';
3391
    }
3392
3393
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3394 19
    if ($chars === INF || !$chars) {
3395 3
      return \preg_replace('/[\pZ\pC]+$/u', '', $str);
3396
    }
3397
3398 18
    return \preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3399
  }
3400
3401
  /**
3402
   * rxClass
3403
   *
3404
   * @param string $s
3405
   * @param string $class
3406
   *
3407
   * @return string
3408
   */
3409 60
  private static function rxClass(string $s, string $class = ''): string
3410
  {
3411 60
    static $RX_CLASSS_CACHE = [];
3412
3413 60
    $cacheKey = $s . $class;
3414
3415 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3416 48
      return $RX_CLASSS_CACHE[$cacheKey];
3417
    }
3418
3419
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3420 20
    $class = [$class];
3421
3422
    /** @noinspection SuspiciousLoopInspection */
3423 20
    foreach (self::str_split($s) as $s) {
3424 19
      if ('-' === $s) {
3425
        $class[0] = '-' . $class[0];
3426 19
      } elseif (!isset($s[2])) {
3427 19
        $class[0] .= \preg_quote($s, '/');
3428 2
      } elseif (1 === self::strlen($s)) {
3429 2
        $class[0] .= $s;
3430
      } else {
3431 19
        $class[] = $s;
3432
      }
3433
    }
3434
3435 20
    if ($class[0]) {
3436 20
      $class[0] = '[' . $class[0] . ']';
3437
    }
3438
3439 20
    if (1 === \count($class)) {
3440 20
      $return = $class[0];
3441
    } else {
3442
      $return = '(?:' . \implode('|', $class) . ')';
3443
    }
3444
3445 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3446
3447 20
    return $return;
3448
  }
3449
3450
  /**
3451
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3452
   */
3453 1
  public static function showSupport()
3454
  {
3455 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3456
      self::checkForSupport();
3457
    }
3458
3459 1
    echo '<pre>';
3460 1
    foreach (self::$SUPPORT as $key => $value) {
3461 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
3462
    }
3463 1
    echo '</pre>';
3464 1
  }
3465
3466
  /**
3467
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3468
   *
3469
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3470
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3471
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3472
   *
3473
   * @return string <p>The HTML numbered entity.</p>
3474
   */
3475 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
3476
  {
3477 1
    if (!isset($char[0])) {
3478 1
      return '';
3479
    }
3480
3481
    if (
3482 1
        $keepAsciiChars === true
3483
        &&
3484 1
        self::is_ascii($char) === true
3485
    ) {
3486 1
      return $char;
3487
    }
3488
3489 1
    if ($encoding !== 'UTF-8') {
3490 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3491
    }
3492
3493 1
    return '&#' . self::ord($char, $encoding) . ';';
3494
  }
3495
3496
  /**
3497
   * Convert a string to an array of Unicode characters.
3498
   *
3499
   * @param string $str       <p>The string to split into array.</p>
3500
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
3501
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3502
   *
3503
   * @return string[] <p>An array containing chunks of the string.</p>
3504
   */
3505 39
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
3506
  {
3507 39
    if (!isset($str[0])) {
3508 3
      return [];
3509
    }
3510
3511
    // init
3512 38
    $ret = [];
3513
3514 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3515
      self::checkForSupport();
3516
    }
3517
3518 38
    if ($cleanUtf8 === true) {
3519 7
      $str = self::clean($str);
3520
    }
3521
3522 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
3523
3524 38
      \preg_match_all('/./us', $str, $retArray);
3525 38
      if (isset($retArray[0])) {
3526 38
        $ret = $retArray[0];
3527
      }
3528 38
      unset($retArray);
3529
3530
    } else {
3531
3532
      // fallback
3533
3534 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3535
        self::checkForSupport();
3536
      }
3537
3538 2
      $len = self::strlen_in_byte($str);
3539
3540
      /** @noinspection ForeachInvariantsInspection */
3541 2
      for ($i = 0; $i < $len; $i++) {
3542
3543 2
        if (($str[$i] & "\x80") === "\x00") {
3544
3545 2
          $ret[] = $str[$i];
3546
3547
        } elseif (
3548 2
            isset($str[$i + 1])
3549
            &&
3550 2
            ($str[$i] & "\xE0") === "\xC0"
3551
        ) {
3552
3553
          if (($str[$i + 1] & "\xC0") === "\x80") {
3554
            $ret[] = $str[$i] . $str[$i + 1];
3555
3556
            $i++;
3557
          }
3558
3559 View Code Duplication
        } elseif (
3560 2
            isset($str[$i + 2])
3561
            &&
3562 2
            ($str[$i] & "\xF0") === "\xE0"
3563
        ) {
3564
3565
          if (
3566 2
              ($str[$i + 1] & "\xC0") === "\x80"
3567
              &&
3568 2
              ($str[$i + 2] & "\xC0") === "\x80"
3569
          ) {
3570 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3571
3572 2
            $i += 2;
3573
          }
3574
3575
        } elseif (
3576
            isset($str[$i + 3])
3577
            &&
3578
            ($str[$i] & "\xF8") === "\xF0"
3579
        ) {
3580
3581 View Code Duplication
          if (
3582
              ($str[$i + 1] & "\xC0") === "\x80"
3583
              &&
3584
              ($str[$i + 2] & "\xC0") === "\x80"
3585
              &&
3586
              ($str[$i + 3] & "\xC0") === "\x80"
3587
          ) {
3588
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3589
3590
            $i += 3;
3591
          }
3592
3593
        }
3594
      }
3595
    }
3596
3597 38
    if ($length > 1) {
3598 5
      $ret = \array_chunk($ret, $length);
3599
3600 5
      return \array_map(
3601 5
          function ($item) {
3602 5
            return \implode('', $item);
3603 5
          }, $ret
3604
      );
3605
    }
3606
3607 34
    if (isset($ret[0]) && $ret[0] === '') {
3608
      return [];
3609
    }
3610
3611 34
    return $ret;
3612
  }
3613
3614
  /**
3615
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3616
   *
3617
   * @param string $str <p>The input string.</p>
3618
   *
3619
   * @return false|string <p>
3620
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3621
   *                      otherwise it will return false.
3622
   *                      </p>
3623
   */
3624 14
  public static function str_detect_encoding(string $str)
3625
  {
3626
    //
3627
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3628
    //
3629
3630 14
    if (self::is_binary($str) === true) {
3631
3632 4
      if (self::is_utf16($str) === 1) {
3633 1
        return 'UTF-16LE';
3634
      }
3635
3636 4
      if (self::is_utf16($str) === 2) {
3637 2
        return 'UTF-16BE';
3638
      }
3639
3640 2
      if (self::is_utf32($str) === 1) {
3641
        return 'UTF-32LE';
3642
      }
3643
3644 2
      if (self::is_utf32($str) === 2) {
3645
        return 'UTF-32BE';
3646
      }
3647
3648
    }
3649
3650
    //
3651
    // 2.) simple check for ASCII chars
3652
    //
3653
3654 13
    if (self::is_ascii($str) === true) {
3655 5
      return 'ASCII';
3656
    }
3657
3658
    //
3659
    // 3.) simple check for UTF-8 chars
3660
    //
3661
3662 13
    if (self::is_utf8($str) === true) {
3663 11
      return 'UTF-8';
3664
    }
3665
3666
    //
3667
    // 4.) check via "\mb_detect_encoding()"
3668
    //
3669
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3670
3671
    $detectOrder = [
3672 6
        'ISO-8859-1',
3673
        'ISO-8859-2',
3674
        'ISO-8859-3',
3675
        'ISO-8859-4',
3676
        'ISO-8859-5',
3677
        'ISO-8859-6',
3678
        'ISO-8859-7',
3679
        'ISO-8859-8',
3680
        'ISO-8859-9',
3681
        'ISO-8859-10',
3682
        'ISO-8859-13',
3683
        'ISO-8859-14',
3684
        'ISO-8859-15',
3685
        'ISO-8859-16',
3686
        'WINDOWS-1251',
3687
        'WINDOWS-1252',
3688
        'WINDOWS-1254',
3689
        'ISO-2022-JP',
3690
        'JIS',
3691
        'EUC-JP',
3692
    ];
3693
3694 6
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3695 6
    if ($encoding) {
3696 6
      return $encoding;
3697
    }
3698
3699
    //
3700
    // 5.) check via "iconv()"
3701
    //
3702
3703
    if (self::$ENCODINGS === null) {
3704
      self::$ENCODINGS = self::getData('encodings');
3705
    }
3706
3707
    $md5 = \md5($str);
3708
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3709
      # INFO: //IGNORE and //TRANSLIT still throw notice
3710
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3711
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3712
        return $encodingTmp;
3713
      }
3714
    }
3715
3716
    return false;
3717
  }
3718
3719
  /**
3720
   * Check if the string ends with the given substring.
3721
   *
3722
   * @param string $haystack <p>The string to search in.</p>
3723
   * @param string $needle   <p>The substring to search for.</p>
3724
   *
3725
   * @return bool
3726
   */
3727 2 View Code Duplication
  public static function str_ends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3728
  {
3729 2
    if (!isset($haystack[0], $needle[0])) {
3730 1
      return false;
3731
    }
3732
3733 2
    if (\substr($haystack, -\strlen($needle)) === $needle) {
3734 2
      return true;
3735
    }
3736
3737 2
    return false;
3738
  }
3739
3740
  /**
3741
   * Check if the string ends with the given substring, case insensitive.
3742
   *
3743
   * @param string $haystack <p>The string to search in.</p>
3744
   * @param string $needle   <p>The substring to search for.</p>
3745
   *
3746
   * @return bool
3747
   */
3748 2 View Code Duplication
  public static function str_iends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3749
  {
3750 2
    if (!isset($haystack[0], $needle[0])) {
3751 1
      return false;
3752
    }
3753
3754 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
3755 2
      return true;
3756
    }
3757
3758 2
    return false;
3759
  }
3760
3761
  /**
3762
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3763
   *
3764
   * @link  http://php.net/manual/en/function.str-ireplace.php
3765
   *
3766
   * @param mixed $search  <p>
3767
   *                       Every replacement with search array is
3768
   *                       performed on the result of previous replacement.
3769
   *                       </p>
3770
   * @param mixed $replace <p>
3771
   *                       </p>
3772
   * @param mixed $subject <p>
3773
   *                       If subject is an array, then the search and
3774
   *                       replace is performed with every entry of
3775
   *                       subject, and the return value is an array as
3776
   *                       well.
3777
   *                       </p>
3778
   * @param int   $count   [optional] <p>
3779
   *                       The number of matched and replaced needles will
3780
   *                       be returned in count which is passed by
3781
   *                       reference.
3782
   *                       </p>
3783
   *
3784
   * @return mixed <p>A string or an array of replacements.</p>
3785
   */
3786 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3787
  {
3788 26
    $search = (array)$search;
3789
3790
    /** @noinspection AlterInForeachInspection */
3791 26
    foreach ($search as &$s) {
3792 26
      if ('' === $s .= '') {
3793 2
        $s = '/^(?<=.)$/';
3794
      } else {
3795 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
3796
      }
3797
    }
3798
3799 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
3800 26
    $count = $replace; // used as reference parameter
3801
3802 26
    return $subject;
3803
  }
3804
3805
  /**
3806
   * Check if the string starts with the given substring, case insensitive.
3807
   *
3808
   * @param string $haystack <p>The string to search in.</p>
3809
   * @param string $needle   <p>The substring to search for.</p>
3810
   *
3811
   * @return bool
3812
   */
3813 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3814
  {
3815 2
    if (!isset($haystack[0], $needle[0])) {
3816 1
      return false;
3817
    }
3818
3819 2
    if (self::stripos($haystack, $needle) === 0) {
3820 2
      return true;
3821
    }
3822
3823 2
    return false;
3824
  }
3825
3826
  /**
3827
   * Limit the number of characters in a string, but also after the next word.
3828
   *
3829
   * @param string $str
3830
   * @param int    $length
3831
   * @param string $strAddOn
3832
   *
3833
   * @return string
3834
   */
3835 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
3836
  {
3837 1
    if (!isset($str[0])) {
3838 1
      return '';
3839
    }
3840
3841 1
    if (self::strlen($str) <= $length) {
3842 1
      return $str;
3843
    }
3844
3845 1
    if (self::substr($str, $length - 1, 1) === ' ') {
3846 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
3847
    }
3848
3849 1
    $str = (string)self::substr($str, 0, $length);
3850 1
    $array = \explode(' ', $str);
3851 1
    \array_pop($array);
3852 1
    $new_str = \implode(' ', $array);
3853
3854 1
    if ($new_str === '') {
3855 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
3856
    } else {
3857 1
      $str = $new_str . $strAddOn;
3858
    }
3859
3860 1
    return $str;
3861
  }
3862
3863
  /**
3864
   * Pad a UTF-8 string to given length with another string.
3865
   *
3866
   * @param string $str        <p>The input string.</p>
3867
   * @param int    $pad_length <p>The length of return string.</p>
3868
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3869
   * @param int    $pad_type   [optional] <p>
3870
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3871
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3872
   *                           </p>
3873
   *
3874
   * @return string <strong>Returns the padded string</strong>
3875
   */
3876 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
3877
  {
3878 2
    $str_length = self::strlen($str);
3879
3880
    if (
3881 2
        \is_int($pad_length) === true
3882
        &&
3883 2
        $pad_length > 0
3884
        &&
3885 2
        $pad_length >= $str_length
3886
    ) {
3887 2
      $ps_length = self::strlen($pad_string);
3888
3889 2
      $diff = ($pad_length - $str_length);
3890
3891
      switch ($pad_type) {
3892 2 View Code Duplication
        case STR_PAD_LEFT:
3893 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3894 2
          $pre = (string)self::substr($pre, 0, $diff);
3895 2
          $post = '';
3896 2
          break;
3897
3898 2
        case STR_PAD_BOTH:
3899 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3900 2
          $pre = (string)self::substr($pre, 0, $diff / 2);
3901 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3902 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
3903 2
          break;
3904
3905 2
        case STR_PAD_RIGHT:
3906 View Code Duplication
        default:
3907 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3908 2
          $post = (string)self::substr($post, 0, $diff);
3909 2
          $pre = '';
3910
      }
3911
3912 2
      return $pre . $str . $post;
3913
    }
3914
3915 2
    return $str;
3916
  }
3917
3918
  /**
3919
   * Repeat a string.
3920
   *
3921
   * @param string $str        <p>
3922
   *                           The string to be repeated.
3923
   *                           </p>
3924
   * @param int    $multiplier <p>
3925
   *                           Number of time the input string should be
3926
   *                           repeated.
3927
   *                           </p>
3928
   *                           <p>
3929
   *                           multiplier has to be greater than or equal to 0.
3930
   *                           If the multiplier is set to 0, the function
3931
   *                           will return an empty string.
3932
   *                           </p>
3933
   *
3934
   * @return string <p>The repeated string.</p>
3935
   */
3936 1
  public static function str_repeat(string $str, int $multiplier): string
3937
  {
3938 1
    $str = self::filter($str);
3939
3940 1
    return \str_repeat($str, $multiplier);
3941
  }
3942
3943
  /**
3944
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
3945
   *
3946
   * Replace all occurrences of the search string with the replacement string
3947
   *
3948
   * @link http://php.net/manual/en/function.str-replace.php
3949
   *
3950
   * @param mixed $search  <p>
3951
   *                       The value being searched for, otherwise known as the needle.
3952
   *                       An array may be used to designate multiple needles.
3953
   *                       </p>
3954
   * @param mixed $replace <p>
3955
   *                       The replacement value that replaces found search
3956
   *                       values. An array may be used to designate multiple replacements.
3957
   *                       </p>
3958
   * @param mixed $subject <p>
3959
   *                       The string or array being searched and replaced on,
3960
   *                       otherwise known as the haystack.
3961
   *                       </p>
3962
   *                       <p>
3963
   *                       If subject is an array, then the search and
3964
   *                       replace is performed with every entry of
3965
   *                       subject, and the return value is an array as
3966
   *                       well.
3967
   *                       </p>
3968
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
3969
   *
3970
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
3971
   */
3972 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
3973
  {
3974 12
    return \str_replace($search, $replace, $subject, $count);
3975
  }
3976
3977
  /**
3978
   * Replace the first "$search"-term with the "$replace"-term.
3979
   *
3980
   * @param string $search
3981
   * @param string $replace
3982
   * @param string $subject
3983
   *
3984
   * @return string
3985
   */
3986 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
3987
  {
3988 1
    $pos = self::strpos($subject, $search);
3989
3990 1
    if ($pos !== false) {
3991 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
3992
    }
3993
3994 1
    return $subject;
3995
  }
3996
3997
  /**
3998
   * Shuffles all the characters in the string.
3999
   *
4000
   * @param string $str <p>The input string</p>
4001
   *
4002
   * @return string <p>The shuffled string.</p>
4003
   */
4004 1
  public static function str_shuffle(string $str): string
4005
  {
4006 1
    $array = self::split($str);
4007
4008 1
    \shuffle($array);
4009
4010 1
    return \implode('', $array);
4011
  }
4012
4013
  /**
4014
   * Sort all characters according to code points.
4015
   *
4016
   * @param string $str    <p>A UTF-8 string.</p>
4017
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4018
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4019
   *
4020
   * @return string <p>String of sorted characters.</p>
4021
   */
4022 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
4023
  {
4024 1
    $array = self::codepoints($str);
4025
4026 1
    if ($unique) {
4027 1
      $array = \array_flip(\array_flip($array));
4028
    }
4029
4030 1
    if ($desc) {
4031 1
      \arsort($array);
4032
    } else {
4033 1
      \asort($array);
4034
    }
4035
4036 1
    return self::string($array);
4037
  }
4038
4039
  /**
4040
   * Split a string into an array.
4041
   *
4042
   * @param string|string[] $str
4043
   * @param int             $len
4044
   *
4045
   * @return array
4046
   */
4047 23
  public static function str_split($str, int $len = 1): array
4048
  {
4049 23 View Code Duplication
    if (\is_array($str) === true) {
4050 1
      foreach ($str as $k => $v) {
4051 1
        $str[$k] = self::str_split($v, $len);
4052
      }
4053
4054 1
      return $str;
4055
    }
4056
4057 23
    if (!isset($str[0])) {
4058 1
      return [];
4059
    }
4060
4061 22
    if ($len < 1) {
4062
      return \str_split($str, $len);
4063
    }
4064
4065
    /** @noinspection PhpInternalEntityUsedInspection */
4066 22
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4067 22
    $a = $a[0];
4068
4069 22
    if ($len === 1) {
4070 22
      return $a;
4071
    }
4072
4073 1
    $arrayOutput = [];
4074 1
    $p = -1;
4075
4076
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4077 1
    foreach ($a as $l => $a) {
4078 1
      if ($l % $len) {
4079 1
        $arrayOutput[$p] .= $a;
4080
      } else {
4081 1
        $arrayOutput[++$p] = $a;
4082
      }
4083
    }
4084
4085 1
    return $arrayOutput;
4086
  }
4087
4088
  /**
4089
   * Check if the string starts with the given substring.
4090
   *
4091
   * @param string $haystack <p>The string to search in.</p>
4092
   * @param string $needle   <p>The substring to search for.</p>
4093
   *
4094
   * @return bool
4095
   */
4096 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4097
  {
4098 2
    if (!isset($haystack[0], $needle[0])) {
4099 1
      return false;
4100
    }
4101
4102 2
    if (\strpos($haystack, $needle) === 0) {
4103 2
      return true;
4104
    }
4105
4106 2
    return false;
4107
  }
4108
4109
  /**
4110
   * Get a binary representation of a specific string.
4111
   *
4112
   * @param string $str <p>The input string.</p>
4113
   *
4114
   * @return string
4115
   */
4116 1
  public static function str_to_binary(string $str): string
4117
  {
4118 1
    $value = \unpack('H*', $str);
4119
4120 1
    return \base_convert($value[1], 16, 2);
4121
  }
4122
4123
  /**
4124
   * Convert a string into an array of words.
4125
   *
4126
   * @param string   $str
4127
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4128
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4129
   * @param null|int $removeShortValues
4130
   *
4131
   * @return array
4132
   */
4133 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
4134
  {
4135 10
    if (!isset($str[0])) {
4136 2
      if ($removeEmptyValues === true) {
4137
        return [];
4138
      }
4139
4140 2
      return [''];
4141
    }
4142
4143 10
    $charList = self::rxClass($charList, '\pL');
4144
4145 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4146
4147
    if (
4148 10
        $removeShortValues === null
4149
        &&
4150 10
        $removeEmptyValues === false
4151
    ) {
4152 10
      return $return;
4153
    }
4154
4155 1
    $tmpReturn = [];
4156 1
    foreach ($return as $returnValue) {
4157
      if (
4158 1
          $removeShortValues !== null
4159
          &&
4160 1
          self::strlen($returnValue) <= $removeShortValues
4161
      ) {
4162 1
        continue;
4163
      }
4164
4165
      if (
4166 1
          $removeEmptyValues === true
4167
          &&
4168 1
          \trim($returnValue) === ''
4169
      ) {
4170 1
        continue;
4171
      }
4172
4173 1
      $tmpReturn[] = $returnValue;
4174
    }
4175
4176 1
    return $tmpReturn;
4177
  }
4178
4179
  /**
4180
   * alias for "UTF8::to_ascii()"
4181
   *
4182
   * @see UTF8::to_ascii()
4183
   *
4184
   * @param string $str
4185
   * @param string $unknown
4186
   * @param bool   $strict
4187
   *
4188
   * @return string
4189
   */
4190 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
4191
  {
4192 7
    return self::to_ascii($str, $unknown, $strict);
4193
  }
4194
4195
  /**
4196
   * Counts number of words in the UTF-8 string.
4197
   *
4198
   * @param string $str      <p>The input string.</p>
4199
   * @param int    $format   [optional] <p>
4200
   *                         <strong>0</strong> => return a number of words (default)<br>
4201
   *                         <strong>1</strong> => return an array of words<br>
4202
   *                         <strong>2</strong> => return an array of words with word-offset as key
4203
   *                         </p>
4204
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4205
   *
4206
   * @return array|int <p>The number of words in the string</p>
4207
   */
4208 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
4209
  {
4210 1
    $strParts = self::str_to_words($str, $charlist);
4211
4212 1
    $len = \count($strParts);
4213
4214 1
    if ($format === 1) {
4215
4216 1
      $numberOfWords = [];
4217 1
      for ($i = 1; $i < $len; $i += 2) {
4218 1
        $numberOfWords[] = $strParts[$i];
4219
      }
4220
4221 1
    } elseif ($format === 2) {
4222
4223 1
      $numberOfWords = [];
4224 1
      $offset = self::strlen($strParts[0]);
4225 1
      for ($i = 1; $i < $len; $i += 2) {
4226 1
        $numberOfWords[$offset] = $strParts[$i];
4227 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4228
      }
4229
4230
    } else {
4231
4232 1
      $numberOfWords = ($len - 1) / 2;
4233
4234
    }
4235
4236 1
    return $numberOfWords;
4237
  }
4238
4239
  /**
4240
   * Case-insensitive string comparison.
4241
   *
4242
   * INFO: Case-insensitive version of UTF8::strcmp()
4243
   *
4244
   * @param string $str1
4245
   * @param string $str2
4246
   *
4247
   * @return int <p>
4248
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4249
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4250
   *             <strong>0</strong> if they are equal.
4251
   *             </p>
4252
   */
4253 11
  public static function strcasecmp(string $str1, string $str2): int
4254
  {
4255 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4256
  }
4257
4258
  /**
4259
   * alias for "UTF8::strstr()"
4260
   *
4261
   * @see UTF8::strstr()
4262
   *
4263
   * @param string $haystack
4264
   * @param string $needle
4265
   * @param bool   $before_needle
4266
   * @param string $encoding
4267
   * @param bool   $cleanUtf8
4268
   *
4269
   * @return string|false
4270
   */
4271 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4272
  {
4273 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4274
  }
4275
4276
  /**
4277
   * Case-sensitive string comparison.
4278
   *
4279
   * @param string $str1
4280
   * @param string $str2
4281
   *
4282
   * @return int  <p>
4283
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4284
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4285
   *              <strong>0</strong> if they are equal.
4286
   *              </p>
4287
   */
4288 14
  public static function strcmp(string $str1, string $str2): int
4289
  {
4290
    /** @noinspection PhpUndefinedClassInspection */
4291 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
4292 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
4293 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
4294
    );
4295
  }
4296
4297
  /**
4298
   * Find length of initial segment not matching mask.
4299
   *
4300
   * @param string $str
4301
   * @param string $charList
4302
   * @param int    $offset
4303
   * @param int    $length
4304
   *
4305
   * @return int|null
4306
   */
4307 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
4308
  {
4309 15
    if ('' === $charList .= '') {
4310 1
      return null;
4311
    }
4312
4313 14 View Code Duplication
    if ($offset || $length !== null) {
4314 2
      $strTmp = self::substr($str, $offset, $length);
4315 2
      if ($strTmp === false) {
4316
        return null;
4317
      }
4318 2
      $str = (string)$strTmp;
4319
    }
4320
4321 14
    if (!isset($str[0])) {
4322 1
      return null;
4323
    }
4324
4325 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4326 13
      return self::strlen($length[1]);
4327
    }
4328
4329 1
    return self::strlen($str);
4330
  }
4331
4332
  /**
4333
   * alias for "UTF8::stristr()"
4334
   *
4335
   * @see UTF8::stristr()
4336
   *
4337
   * @param string $haystack
4338
   * @param string $needle
4339
   * @param bool   $before_needle
4340
   * @param string $encoding
4341
   * @param bool   $cleanUtf8
4342
   *
4343
   * @return string|false
4344
   */
4345 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4346
  {
4347 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4348
  }
4349
4350
  /**
4351
   * Create a UTF-8 string from code points.
4352
   *
4353
   * INFO: opposite to UTF8::codepoints()
4354
   *
4355
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4356
   *
4357
   * @return string <p>UTF-8 encoded string.</p>
4358
   */
4359 2
  public static function string(array $array): string
4360
  {
4361 2
    return \implode(
4362 2
        '',
4363 2
        \array_map(
4364
            [
4365 2
                self::class,
4366
                'chr',
4367
            ],
4368 2
            $array
4369
        )
4370
    );
4371
  }
4372
4373
  /**
4374
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4375
   *
4376
   * @param string $str <p>The input string.</p>
4377
   *
4378
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4379
   */
4380 3
  public static function string_has_bom(string $str): bool
4381
  {
4382 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
4383 3
      if (0 === \strpos($str, $bomString)) {
4384 3
        return true;
4385
      }
4386
    }
4387
4388 3
    return false;
4389
  }
4390
4391
  /**
4392
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4393
   *
4394
   * @link http://php.net/manual/en/function.strip-tags.php
4395
   *
4396
   * @param string $str             <p>
4397
   *                                The input string.
4398
   *                                </p>
4399
   * @param string $allowable_tags  [optional] <p>
4400
   *                                You can use the optional second parameter to specify tags which should
4401
   *                                not be stripped.
4402
   *                                </p>
4403
   *                                <p>
4404
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4405
   *                                can not be changed with allowable_tags.
4406
   *                                </p>
4407
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
4408
   *
4409
   * @return string <p>The stripped string.</p>
4410
   */
4411 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
4412
  {
4413 2
    if (!isset($str[0])) {
4414 1
      return '';
4415
    }
4416
4417 2
    if ($cleanUtf8 === true) {
4418 1
      $str = self::clean($str);
4419
    }
4420
4421 2
    return \strip_tags($str, $allowable_tags);
4422
  }
4423
4424
  /**
4425
   * Finds position of first occurrence of a string within another, case insensitive.
4426
   *
4427
   * @link http://php.net/manual/en/function.mb-stripos.php
4428
   *
4429
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4430
   * @param string $needle    <p>The string to find in haystack.</p>
4431
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
4432
   * @param string $encoding  [optional] <p>Set the charset.</p>
4433
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4434
   *
4435
   * @return int|false <p>
4436
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4437
   *                   or false if needle is not found.
4438
   *                   </p>
4439
   */
4440 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4441
  {
4442 10
    if (!isset($haystack[0], $needle[0])) {
4443 3
      return false;
4444
    }
4445
4446 9
    if ($cleanUtf8 === true) {
4447
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4448
      // if invalid characters are found in $haystack before $needle
4449 1
      $haystack = self::clean($haystack);
4450 1
      $needle = self::clean($needle);
4451
    }
4452
4453 9
    if ($encoding !== 'UTF-8') {
4454 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4455
    }
4456
4457 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4458
      self::checkForSupport();
4459
    }
4460
4461 View Code Duplication
    if (
4462 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4463
        &&
4464 9
        self::$SUPPORT['intl'] === true
4465
    ) {
4466 9
      return \grapheme_stripos($haystack, $needle, $offset);
4467
    }
4468
4469
    // fallback to "mb_"-function via polyfill
4470 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4471
  }
4472
4473
  /**
4474
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4475
   *
4476
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
4477
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
4478
   * @param bool   $before_needle  [optional] <p>
4479
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4480
   *                               haystack before the first occurrence of the needle (excluding the needle).
4481
   *                               </p>
4482
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4483
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4484
   *
4485
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4486
   */
4487 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4488
  {
4489 17
    if (!isset($haystack[0], $needle[0])) {
4490 6
      return false;
4491
    }
4492
4493 11
    if ($encoding !== 'UTF-8') {
4494 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4495
    }
4496
4497 11
    if ($cleanUtf8 === true) {
4498
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4499
      // if invalid characters are found in $haystack before $needle
4500 1
      $needle = self::clean($needle);
4501 1
      $haystack = self::clean($haystack);
4502
    }
4503
4504 11
    if (!$needle) {
4505
      return $haystack;
4506
    }
4507
4508 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4509
      self::checkForSupport();
4510
    }
4511
4512 View Code Duplication
    if (
4513 11
        $encoding !== 'UTF-8'
4514
        &&
4515 11
        self::$SUPPORT['mbstring'] === false
4516
    ) {
4517
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4518
    }
4519
4520 11
    if (self::$SUPPORT['mbstring'] === true) {
4521 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4522
    }
4523
4524 View Code Duplication
    if (
4525
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4526
        &&
4527
        self::$SUPPORT['intl'] === true
4528
    ) {
4529
      return \grapheme_stristr($haystack, $needle, $before_needle);
4530
    }
4531
4532
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4533
      return \stristr($haystack, $needle, $before_needle);
4534
    }
4535
4536
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
4537
4538
    if (!isset($match[1])) {
4539
      return false;
4540
    }
4541
4542
    if ($before_needle) {
4543
      return $match[1];
4544
    }
4545
4546
    return self::substr($haystack, self::strlen($match[1]));
4547
  }
4548
4549
  /**
4550
   * Get the string length, not the byte-length!
4551
   *
4552
   * @link     http://php.net/manual/en/function.mb-strlen.php
4553
   *
4554
   * @param string $str       <p>The string being checked for length.</p>
4555
   * @param string $encoding  [optional] <p>Set the charset.</p>
4556
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4557
   *
4558
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4559
   *             character counted as +1)</p>
4560
   */
4561 89
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
4562
  {
4563 89
    if (!isset($str[0])) {
4564 6
      return 0;
4565
    }
4566
4567 88
    if ($encoding !== 'UTF-8') {
4568 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4569
    }
4570
4571 88
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4572
      self::checkForSupport();
4573
    }
4574
4575
    switch ($encoding) {
4576 88
      case 'ASCII':
4577 88
      case 'CP850':
4578 80
      case '8BIT':
4579
        if (
4580 10
            $encoding === 'CP850'
4581
            &&
4582 10
            self::$SUPPORT['mbstring_func_overload'] === false
4583
        ) {
4584 10
          return \strlen($str);
4585
        }
4586
4587
        return \mb_strlen($str, '8BIT');
4588
    }
4589
4590 80
    if ($cleanUtf8 === true) {
4591
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4592
      // if invalid characters are found in $str
4593 2
      $str = self::clean($str);
4594
    }
4595
4596 View Code Duplication
    if (
4597 80
        $encoding !== 'UTF-8'
4598
        &&
4599 80
        self::$SUPPORT['mbstring'] === false
4600
        &&
4601 80
        self::$SUPPORT['iconv'] === false
4602
    ) {
4603
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4604
    }
4605
4606
    if (
4607 80
        $encoding !== 'UTF-8'
4608
        &&
4609 80
        self::$SUPPORT['iconv'] === true
4610
        &&
4611 80
        self::$SUPPORT['mbstring'] === false
4612
    ) {
4613
      $returnTmp = \iconv_strlen($str, $encoding);
4614
      if ($returnTmp !== false) {
4615
        return $returnTmp;
4616
      }
4617
    }
4618
4619 80 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4620 79
      $returnTmp = \mb_strlen($str, $encoding);
4621 79
      if ($returnTmp !== false) {
4622 79
        return $returnTmp;
4623
      }
4624
    }
4625
4626 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
4627
      $returnTmp = \iconv_strlen($str, $encoding);
4628
      if ($returnTmp !== false) {
4629
        return $returnTmp;
4630
      }
4631
    }
4632
4633
    if (
4634 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4635
        &&
4636 2
        self::$SUPPORT['intl'] === true
4637
    ) {
4638
      return \grapheme_strlen($str);
4639
    }
4640
4641 2
    if (self::is_ascii($str)) {
4642 1
      return \strlen($str);
4643
    }
4644
4645
    // fallback via vanilla php
4646 2
    \preg_match_all('/./us', $str, $parts);
4647 2
    $returnTmp = \count($parts[0]);
4648 2
    if ($returnTmp !== 0) {
4649 2
      return $returnTmp;
4650
    }
4651
4652
    // fallback to "mb_"-function via polyfill
4653
    return \mb_strlen($str, $encoding);
4654
  }
4655
4656
  /**
4657
   * Get string length in byte.
4658
   *
4659
   * @param string $str
4660
   *
4661
   * @return int
4662
   */
4663 70
  public static function strlen_in_byte(string $str): int
4664
  {
4665 70
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4666
      $len = \mb_strlen($str, '8BIT');
4667
    } else {
4668 70
      $len = \strlen($str);
4669
    }
4670
4671 70
    return $len;
4672
  }
4673
4674
  /**
4675
   * Case insensitive string comparisons using a "natural order" algorithm.
4676
   *
4677
   * INFO: natural order version of UTF8::strcasecmp()
4678
   *
4679
   * @param string $str1 <p>The first string.</p>
4680
   * @param string $str2 <p>The second string.</p>
4681
   *
4682
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4683
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4684
   *             <strong>0</strong> if they are equal
4685
   */
4686 1
  public static function strnatcasecmp(string $str1, string $str2): int
4687
  {
4688 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4689
  }
4690
4691
  /**
4692
   * String comparisons using a "natural order" algorithm
4693
   *
4694
   * INFO: natural order version of UTF8::strcmp()
4695
   *
4696
   * @link  http://php.net/manual/en/function.strnatcmp.php
4697
   *
4698
   * @param string $str1 <p>The first string.</p>
4699
   * @param string $str2 <p>The second string.</p>
4700
   *
4701
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4702
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4703
   *             <strong>0</strong> if they are equal
4704
   */
4705 2
  public static function strnatcmp(string $str1, string $str2): int
4706
  {
4707 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4708
  }
4709
4710
  /**
4711
   * Case-insensitive string comparison of the first n characters.
4712
   *
4713
   * @link  http://php.net/manual/en/function.strncasecmp.php
4714
   *
4715
   * @param string $str1 <p>The first string.</p>
4716
   * @param string $str2 <p>The second string.</p>
4717
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4718
   *
4719
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4720
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4721
   *             <strong>0</strong> if they are equal
4722
   */
4723 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
4724
  {
4725 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4726
  }
4727
4728
  /**
4729
   * String comparison of the first n characters.
4730
   *
4731
   * @link  http://php.net/manual/en/function.strncmp.php
4732
   *
4733
   * @param string $str1 <p>The first string.</p>
4734
   * @param string $str2 <p>The second string.</p>
4735
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4736
   *
4737
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4738
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4739
   *             <strong>0</strong> if they are equal
4740
   */
4741 2
  public static function strncmp(string $str1, string $str2, int $len): int
4742
  {
4743 2
    $str1 = (string)self::substr($str1, 0, $len);
4744 2
    $str2 = (string)self::substr($str2, 0, $len);
4745
4746 2
    return self::strcmp($str1, $str2);
4747
  }
4748
4749
  /**
4750
   * Search a string for any of a set of characters.
4751
   *
4752
   * @link  http://php.net/manual/en/function.strpbrk.php
4753
   *
4754
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4755
   * @param string $char_list <p>This parameter is case sensitive.</p>
4756
   *
4757
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
4758
   */
4759 1
  public static function strpbrk(string $haystack, string $char_list)
4760
  {
4761 1
    if (!isset($haystack[0], $char_list[0])) {
4762 1
      return false;
4763
    }
4764
4765 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4766 1
      return \substr($haystack, \strpos($haystack, $m[0]));
4767
    }
4768
4769 1
    return false;
4770
  }
4771
4772
  /**
4773
   * Find position of first occurrence of string in a string.
4774
   *
4775
   * @link http://php.net/manual/en/function.mb-strpos.php
4776
   *
4777
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4778
   * @param string $needle    <p>The string to find in haystack.</p>
4779
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4780
   * @param string $encoding  [optional] <p>Set the charset.</p>
4781
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4782
   *
4783
   * @return int|false <p>
4784
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
4785
   *                   If needle is not found it returns false.
4786
   *                   </p>
4787
   */
4788 57
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4789
  {
4790 57
    if (!isset($haystack[0], $needle[0])) {
4791 3
      return false;
4792
    }
4793
4794
    // iconv and mbstring do not support integer $needle
4795 56 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4796
      $needle = (string)self::chr((int)$needle);
4797
    }
4798
4799 56
    if ($cleanUtf8 === true) {
4800
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4801
      // if invalid characters are found in $haystack before $needle
4802 2
      $needle = self::clean($needle);
4803 2
      $haystack = self::clean($haystack);
4804
    }
4805
4806 56
    if ($encoding !== 'UTF-8') {
4807 43
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4808
    }
4809
4810 56
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4811
      self::checkForSupport();
4812
    }
4813
4814
    if (
4815 56
        $encoding === 'CP850'
4816
        &&
4817 56
        self::$SUPPORT['mbstring_func_overload'] === false
4818
    ) {
4819 42
      return \strpos($haystack, $needle, $offset);
4820
    }
4821
4822 View Code Duplication
    if (
4823 15
        $encoding !== 'UTF-8'
4824
        &&
4825 15
        self::$SUPPORT['iconv'] === false
4826
        &&
4827 15
        self::$SUPPORT['mbstring'] === false
4828
    ) {
4829
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4830
    }
4831
4832
    if (
4833 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
4834
        &&
4835 15
        $encoding !== 'UTF-8'
4836
        &&
4837 15
        self::$SUPPORT['mbstring'] === false
4838
        &&
4839 15
        self::$SUPPORT['iconv'] === true
4840
    ) {
4841
      // ignore invalid negative offset to keep compatibility
4842
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4843
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4844
      if ($returnTmp !== false) {
4845
        return $returnTmp;
4846
      }
4847
    }
4848
4849 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4850 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
4851 15
      if ($returnTmp !== false) {
4852 13
        return $returnTmp;
4853
      }
4854
    }
4855
4856 View Code Duplication
    if (
4857 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4858
        &&
4859 10
        self::$SUPPORT['intl'] === true
4860
    ) {
4861 9
      return \grapheme_strpos($haystack, $needle, $offset);
4862
    }
4863
4864
    if (
4865 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
4866
        &&
4867 2
        self::$SUPPORT['iconv'] === true
4868
    ) {
4869
      // ignore invalid negative offset to keep compatibility
4870
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4871 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4872 1
      if ($returnTmp !== false) {
4873
        return $returnTmp;
4874
      }
4875
    }
4876
4877 2
    $haystackIsAscii = self::is_ascii($haystack);
4878 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
4879 1
      return \strpos($haystack, $needle, $offset);
4880
    }
4881
4882
    // fallback via vanilla php
4883
4884 2
    if ($haystackIsAscii) {
4885
      $haystackTmp = \substr($haystack, $offset);
4886
    } else {
4887 2
      $haystackTmp = self::substr($haystack, $offset);
4888
    }
4889 2
    if ($haystackTmp === false) {
4890
      $haystackTmp = '';
4891
    }
4892 2
    $haystack = (string)$haystackTmp;
4893
4894 2
    if ($offset < 0) {
4895
      $offset = 0;
4896
    }
4897
4898 2
    $pos = \strpos($haystack, $needle);
4899 2
    if ($pos === false) {
4900
      return false;
4901
    }
4902
4903 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
4904 2
    if ($returnTmp !== false) {
4905 2
      return $returnTmp;
4906
    }
4907
4908
    // fallback to "mb_"-function via polyfill
4909
    return \mb_strpos($haystack, $needle, $offset, $encoding);
4910
  }
4911
4912
  /**
4913
   * Finds the last occurrence of a character in a string within another.
4914
   *
4915
   * @link http://php.net/manual/en/function.mb-strrchr.php
4916
   *
4917
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
4918
   * @param string $needle        <p>The string to find in haystack</p>
4919
   * @param bool   $before_needle [optional] <p>
4920
   *                              Determines which portion of haystack
4921
   *                              this function returns.
4922
   *                              If set to true, it returns all of haystack
4923
   *                              from the beginning to the last occurrence of needle.
4924
   *                              If set to false, it returns all of haystack
4925
   *                              from the last occurrence of needle to the end,
4926
   *                              </p>
4927
   * @param string $encoding      [optional] <p>
4928
   *                              Character encoding name to use.
4929
   *                              If it is omitted, internal character encoding is used.
4930
   *                              </p>
4931
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
4932
   *
4933
   * @return string|false The portion of haystack or false if needle is not found.
4934
   */
4935 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4936
  {
4937 1
    if ($encoding !== 'UTF-8') {
4938 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4939
    }
4940
4941 1
    if ($cleanUtf8 === true) {
4942
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4943
      // if invalid characters are found in $haystack before $needle
4944 1
      $needle = self::clean($needle);
4945 1
      $haystack = self::clean($haystack);
4946
    }
4947
4948
    // fallback to "mb_"-function via polyfill
4949 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
4950
  }
4951
4952
  /**
4953
   * Reverses characters order in the string.
4954
   *
4955
   * @param string $str The input string
4956
   *
4957
   * @return string The string with characters in the reverse sequence
4958
   */
4959 4
  public static function strrev(string $str): string
4960
  {
4961 4
    if (!isset($str[0])) {
4962 2
      return '';
4963
    }
4964
4965 3
    return \implode('', \array_reverse(self::split($str)));
4966
  }
4967
4968
  /**
4969
   * Finds the last occurrence of a character in a string within another, case insensitive.
4970
   *
4971
   * @link http://php.net/manual/en/function.mb-strrichr.php
4972
   *
4973
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
4974
   * @param string $needle         <p>The string to find in haystack.</p>
4975
   * @param bool   $before_needle  [optional] <p>
4976
   *                               Determines which portion of haystack
4977
   *                               this function returns.
4978
   *                               If set to true, it returns all of haystack
4979
   *                               from the beginning to the last occurrence of needle.
4980
   *                               If set to false, it returns all of haystack
4981
   *                               from the last occurrence of needle to the end,
4982
   *                               </p>
4983
   * @param string $encoding       [optional] <p>
4984
   *                               Character encoding name to use.
4985
   *                               If it is omitted, internal character encoding is used.
4986
   *                               </p>
4987
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4988
   *
4989
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
4990
   */
4991 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4992
  {
4993 1
    if ($encoding !== 'UTF-8') {
4994 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4995
    }
4996
4997 1
    if ($cleanUtf8 === true) {
4998
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4999
      // if invalid characters are found in $haystack before $needle
5000 1
      $needle = self::clean($needle);
5001 1
      $haystack = self::clean($haystack);
5002
    }
5003
5004 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5005
  }
5006
5007
  /**
5008
   * Find position of last occurrence of a case-insensitive string.
5009
   *
5010
   * @param string $haystack  <p>The string to look in.</p>
5011
   * @param string $needle    <p>The string to look for.</p>
5012
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5013
   * @param string $encoding  [optional] <p>Set the charset.</p>
5014
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5015
   *
5016
   * @return int|false <p>
5017
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5018
   *                   not found, it returns false.
5019
   *                   </p>
5020
   */
5021 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5022
  {
5023 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5024
      $needle = (string)self::chr((int)$needle);
5025
    }
5026
5027 1
    if (!isset($haystack[0], $needle[0])) {
5028
      return false;
5029
    }
5030
5031 1
    if ($cleanUtf8 === true) {
5032
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5033 1
      $needle = self::clean($needle);
5034 1
      $haystack = self::clean($haystack);
5035
    }
5036
5037 1
    if ($encoding !== 'UTF-8') {
5038 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5039
    }
5040
5041 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5042
      self::checkForSupport();
5043
    }
5044
5045 View Code Duplication
    if (
5046 1
        $encoding !== 'UTF-8'
5047
        &&
5048 1
        self::$SUPPORT['mbstring'] === false
5049
    ) {
5050
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5051
    }
5052
5053 1
    if (self::$SUPPORT['mbstring'] === true) {
5054 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5055
    }
5056
5057 View Code Duplication
    if (
5058
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5059
        &&
5060
        self::$SUPPORT['intl'] === true
5061
    ) {
5062
      return \grapheme_strripos($haystack, $needle, $offset);
5063
    }
5064
5065
    // fallback via vanilla php
5066
5067
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5068
  }
5069
5070
  /**
5071
   * Find position of last occurrence of a string in a string.
5072
   *
5073
   * @link http://php.net/manual/en/function.mb-strrpos.php
5074
   *
5075
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5076
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5077
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5078
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5079
   *                              the end of the string.
5080
   *                              </p>
5081
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5082
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5083
   *
5084
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5085
   *                   is not found, it returns false.</p>
5086
   */
5087 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5088
  {
5089 10
    if ((int)$needle === $needle && $needle >= 0) {
5090 2
      $needle = (string)self::chr($needle);
5091
    }
5092 10
    $needle = (string)$needle;
5093
5094 10
    if (!isset($haystack[0], $needle[0])) {
5095 2
      return false;
5096
    }
5097
5098
    if (
5099 9
        $cleanUtf8 === true
5100
        ||
5101 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5102
    ) {
5103
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5104 3
      $needle = self::clean($needle);
5105 3
      $haystack = self::clean($haystack);
5106
    }
5107
5108 9
    if ($encoding !== 'UTF-8') {
5109 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5110
    }
5111
5112 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5113
      self::checkForSupport();
5114
    }
5115
5116 View Code Duplication
    if (
5117 9
        $encoding !== 'UTF-8'
5118
        &&
5119 9
        self::$SUPPORT['mbstring'] === false
5120
    ) {
5121
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5122
    }
5123
5124 9
    if (self::$SUPPORT['mbstring'] === true) {
5125 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5126
    }
5127
5128 View Code Duplication
    if (
5129
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5130
        &&
5131
        self::$SUPPORT['intl'] === true
5132
    ) {
5133
      return \grapheme_strrpos($haystack, $needle, $offset);
5134
    }
5135
5136
    // fallback via vanilla php
5137
5138
    $haystackTmp = null;
5139
    if ($offset > 0) {
5140
      $haystackTmp = self::substr($haystack, $offset);
5141
    } elseif ($offset < 0) {
5142
      $haystackTmp = self::substr($haystack, 0, $offset);
5143
      $offset = 0;
5144
    }
5145
5146
    if ($haystackTmp !== null) {
5147
      if ($haystackTmp === false) {
5148
        $haystackTmp = '';
5149
      }
5150
      $haystack = (string)$haystackTmp;
5151
    }
5152
5153
    $pos = \strrpos($haystack, $needle);
5154
    if ($pos === false) {
5155
      return false;
5156
    }
5157
5158
    return $offset + self::strlen(\substr($haystack, 0, $pos));
5159
  }
5160
5161
  /**
5162
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5163
   * mask.
5164
   *
5165
   * @param string $str    <p>The input string.</p>
5166
   * @param string $mask   <p>The mask of chars</p>
5167
   * @param int    $offset [optional]
5168
   * @param int    $length [optional]
5169
   *
5170
   * @return int
5171
   */
5172 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
5173
  {
5174 10 View Code Duplication
    if ($offset || $length !== null) {
5175 2
      $strTmp = self::substr($str, $offset, $length);
5176 2
      if ($strTmp === false) {
5177
        $strTmp = '';
5178
      }
5179 2
      $str = (string)$strTmp;
5180
    }
5181
5182 10
    if (!isset($str[0], $mask[0])) {
5183 2
      return 0;
5184
    }
5185
5186 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5187
  }
5188
5189
  /**
5190
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5191
   *
5192
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5193
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5194
   * @param bool   $before_needle  [optional] <p>
5195
   *                               If <b>TRUE</b>, strstr() returns the part of the
5196
   *                               haystack before the first occurrence of the needle (excluding the needle).
5197
   *                               </p>
5198
   * @param string $encoding       [optional] <p>Set the charset.</p>
5199
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5200
   *
5201
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5202
   */
5203 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5204
  {
5205 2
    if (!isset($haystack[0], $needle[0])) {
5206 1
      return false;
5207
    }
5208
5209 2
    if ($cleanUtf8 === true) {
5210
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5211
      // if invalid characters are found in $haystack before $needle
5212
      $needle = self::clean($needle);
5213
      $haystack = self::clean($haystack);
5214
    }
5215
5216 2
    if ($encoding !== 'UTF-8') {
5217 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5218
    }
5219
5220 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5221
      self::checkForSupport();
5222
    }
5223
5224 View Code Duplication
    if (
5225 2
        $encoding !== 'UTF-8'
5226
        &&
5227 2
        self::$SUPPORT['mbstring'] === false
5228
    ) {
5229
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5230
    }
5231
5232 2
    if (self::$SUPPORT['mbstring'] === true) {
5233 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5234
    }
5235
5236 View Code Duplication
    if (
5237
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5238
        &&
5239
        self::$SUPPORT['intl'] === true
5240
    ) {
5241
      return \grapheme_strstr($haystack, $needle, $before_needle);
5242
    }
5243
5244
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
5245
5246
    if (!isset($match[1])) {
5247
      return false;
5248
    }
5249
5250
    if ($before_needle) {
5251
      return $match[1];
5252
    }
5253
5254
    return self::substr($haystack, self::strlen($match[1]));
5255
  }
5256
5257
  /**
5258
   * Unicode transformation for case-less matching.
5259
   *
5260
   * @link http://unicode.org/reports/tr21/tr21-5.html
5261
   *
5262
   * @param string $str        <p>The input string.</p>
5263
   * @param bool   $full       [optional] <p>
5264
   *                           <b>true</b>, replace full case folding chars (default)<br>
5265
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5266
   *                           </p>
5267
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5268
   *
5269
   * @return string
5270
   */
5271 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
5272
  {
5273 13
    if (!isset($str[0])) {
5274 4
      return '';
5275
    }
5276
5277 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5278 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5279
5280 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5281 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
5282 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
5283
    }
5284
5285 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5286
5287 12
    if ($full) {
5288
5289 12
      static $FULL_CASE_FOLD = null;
5290 12
      if ($FULL_CASE_FOLD === null) {
5291 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5292
      }
5293
5294 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5295
    }
5296
5297 12
    if ($cleanUtf8 === true) {
5298 1
      $str = self::clean($str);
5299
    }
5300
5301 12
    return self::strtolower($str);
5302
  }
5303
5304
  /**
5305
   * Make a string lowercase.
5306
   *
5307
   * @link http://php.net/manual/en/function.mb-strtolower.php
5308
   *
5309
   * @param string      $str       <p>The string being lowercased.</p>
5310
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5311
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5312
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5313
   *
5314
   * @return string str with all alphabetic characters converted to lowercase.
5315
   */
5316 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5317
  {
5318
    // init
5319 25
    $str = (string)$str;
5320 25
    if (!isset($str[0])) {
5321 3
      return '';
5322
    }
5323
5324 23
    if ($cleanUtf8 === true) {
5325
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5326
      // if invalid characters are found in $haystack before $needle
5327 1
      $str = self::clean($str);
5328
    }
5329
5330 23
    if ($encoding !== 'UTF-8') {
5331 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5332
    }
5333
5334 23
    if ($lang !== null) {
5335 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5336
        self::checkForSupport();
5337
      }
5338
5339 1
      if (self::$SUPPORT['intl'] === true) {
5340
5341 1
        $langCode = $lang . '-Lower';
5342 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5343
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5344
5345
          $langCode = 'Any-Lower';
5346
        }
5347
5348 1
        return transliterator_transliterate($langCode, $str);
5349
      }
5350
5351
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5352
    }
5353
5354 23
    return \mb_strtolower($str, $encoding);
5355
  }
5356
5357
  /**
5358
   * Generic case sensitive transformation for collation matching.
5359
   *
5360
   * @param string $str <p>The input string</p>
5361
   *
5362
   * @return string
5363
   */
5364 3
  private static function strtonatfold(string $str): string
5365
  {
5366
    /** @noinspection PhpUndefinedClassInspection */
5367 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5368
  }
5369
5370
  /**
5371
   * Make a string uppercase.
5372
   *
5373
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5374
   *
5375
   * @param string      $str       <p>The string being uppercased.</p>
5376
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5377
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5378
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5379
   *
5380
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
5381
   */
5382 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5383
  {
5384 19
    $str = (string)$str;
5385 19
    if (!isset($str[0])) {
5386 3
      return '';
5387
    }
5388
5389 17
    if ($cleanUtf8 === true) {
5390
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5391
      // if invalid characters are found in $haystack before $needle
5392 2
      $str = self::clean($str);
5393
    }
5394
5395 17
    if ($encoding !== 'UTF-8') {
5396 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5397
    }
5398
5399 17
    if ($lang !== null) {
5400 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5401
        self::checkForSupport();
5402
      }
5403
5404 1
      if (self::$SUPPORT['intl'] === true) {
5405
5406 1
        $langCode = $lang . '-Upper';
5407 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5408
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5409
5410
          $langCode = 'Any-Upper';
5411
        }
5412
5413 1
        return transliterator_transliterate($langCode, $str);
5414
      }
5415
5416
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5417
    }
5418
5419 17
    return \mb_strtoupper($str, $encoding);
5420
  }
5421
5422
  /**
5423
   * Translate characters or replace sub-strings.
5424
   *
5425
   * @link  http://php.net/manual/en/function.strtr.php
5426
   *
5427
   * @param string          $str  <p>The string being translated.</p>
5428
   * @param string|string[] $from <p>The string replacing from.</p>
5429
   * @param string|string[] $to   <p>The string being translated to to.</p>
5430
   *
5431
   * @return string <p>
5432
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5433
   *                corresponding character in to.
5434
   *                </p>
5435
   */
5436 1
  public static function strtr(string $str, $from, $to = INF): string
5437
  {
5438 1
    if (!isset($str[0])) {
5439
      return '';
5440
    }
5441
5442 1
    if ($from === $to) {
5443
      return $str;
5444
    }
5445
5446 1
    if (INF !== $to) {
5447 1
      $from = self::str_split($from);
5448 1
      $to = self::str_split($to);
5449 1
      $countFrom = \count($from);
5450 1
      $countTo = \count($to);
5451
5452 1
      if ($countFrom > $countTo) {
5453 1
        $from = \array_slice($from, 0, $countTo);
5454 1
      } elseif ($countFrom < $countTo) {
5455 1
        $to = \array_slice($to, 0, $countFrom);
5456
      }
5457
5458 1
      $from = \array_combine($from, $to);
5459
    }
5460
5461 1
    if (\is_string($from)) {
5462 1
      return \str_replace($from, '', $str);
5463
    }
5464
5465 1
    return \strtr($str, $from);
5466
  }
5467
5468
  /**
5469
   * Return the width of a string.
5470
   *
5471
   * @param string $str       <p>The input string.</p>
5472
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5473
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5474
   *
5475
   * @return int
5476
   */
5477 1
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5478
  {
5479 1
    if ($encoding !== 'UTF-8') {
5480 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5481
    }
5482
5483 1
    if ($cleanUtf8 === true) {
5484
      // iconv and mbstring are not tolerant to invalid encoding
5485
      // further, their behaviour is inconsistent with that of PHP's substr
5486 1
      $str = self::clean($str);
5487
    }
5488
5489
    // fallback to "mb_"-function via polyfill
5490 1
    return \mb_strwidth($str, $encoding);
5491
  }
5492
5493
  /**
5494
   * Changes all keys in an array.
5495
   *
5496
   * @param array $array <p>The array to work on</p>
5497
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
5498
   *                     or <strong>CASE_LOWER</strong> (default)</p>
5499
   *
5500
   * @return array <p>An array with its keys lower or uppercased.</p>
5501
   */
5502 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
5503
  {
5504
    if (
5505 1
        $case !== CASE_LOWER
5506
        &&
5507 1
        $case !== CASE_UPPER
5508
    ) {
5509
      $case = CASE_LOWER;
5510
    }
5511
5512 1
    $return = [];
5513 1
    foreach ($array as $key => $value) {
5514 1
      if ($case === CASE_LOWER) {
5515 1
        $key = self::strtolower($key);
5516
      } else {
5517 1
        $key = self::strtoupper($key);
5518
      }
5519
5520 1
      $return[$key] = $value;
5521
    }
5522
5523 1
    return $return;
5524
  }
5525
5526
  /**
5527
   * Get part of a string.
5528
   *
5529
   * @link http://php.net/manual/en/function.mb-substr.php
5530
   *
5531
   * @param string $str       <p>The string being checked.</p>
5532
   * @param int    $offset    <p>The first position used in str.</p>
5533
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
5534
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5535
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5536
   *
5537
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5538
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5539
   *                      characters long, <b>FALSE</b> will be returned.</p>
5540
   */
5541 72
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5542
  {
5543 72
    if (!isset($str[0])) {
5544 10
      return '';
5545
    }
5546
5547
    // Empty string
5548 69
    if ($length === 0) {
5549 3
      return '';
5550
    }
5551
5552 68
    if ($cleanUtf8 === true) {
5553
      // iconv and mbstring are not tolerant to invalid encoding
5554
      // further, their behaviour is inconsistent with that of PHP's substr
5555 1
      $str = self::clean($str);
5556
    }
5557
5558
    // Whole string
5559 68
    if (!$offset && $length === null) {
5560 3
      return $str;
5561
    }
5562
5563 65
    $str_length = 0;
5564 65
    if ($offset || $length === null) {
5565 45
      $str_length = self::strlen($str, $encoding);
5566
    }
5567
5568
    // Impossible
5569 65
    if ($offset && $offset > $str_length) {
5570 2
      return false;
5571
    }
5572
5573 63
    if ($length === null) {
5574 30
      $length = $str_length;
5575
    } else {
5576 54
      $length = (int)$length;
5577
    }
5578
5579 63
    if ($encoding !== 'UTF-8') {
5580 19
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5581
    }
5582
5583 63
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5584
      self::checkForSupport();
5585
    }
5586
5587
    if (
5588 63
        $encoding === 'CP850'
5589
        &&
5590 63
        self::$SUPPORT['mbstring_func_overload'] === false
5591
    ) {
5592 16
      return \substr($str, $offset, $length ?? $str_length);
5593
    }
5594
5595 View Code Duplication
    if (
5596 47
        $encoding !== 'UTF-8'
5597
        &&
5598 47
        self::$SUPPORT['mbstring'] === false
5599
    ) {
5600
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5601
    }
5602
5603 47
    if (self::$SUPPORT['mbstring'] === true) {
5604 47
      return \mb_substr($str, $offset, $length, $encoding);
5605
    }
5606
5607
    if (
5608
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5609
        &&
5610
        self::$SUPPORT['intl'] === true
5611
    ) {
5612
      return \grapheme_substr($str, $offset, $length);
5613
    }
5614
5615
    if (
5616
        $length >= 0 // "iconv_substr()" can't handle negative length
5617
        &&
5618
        self::$SUPPORT['iconv'] === true
5619
    ) {
5620
      $returnTmp = \iconv_substr($str, $offset, $length);
5621
      if ($returnTmp !== false) {
5622
        return $returnTmp;
5623
      }
5624
    }
5625
5626
    if (self::is_ascii($str)) {
5627
      return ($length === null) ?
5628
          \substr($str, $offset) :
5629
          \substr($str, $offset, $length);
5630
    }
5631
5632
    // fallback via vanilla php
5633
5634
    // split to array, and remove invalid characters
5635
    $array = self::split($str);
5636
5637
    // extract relevant part, and join to make sting again
5638
    return \implode('', \array_slice($array, $offset, $length));
5639
  }
5640
5641
  /**
5642
   * Binary safe comparison of two strings from an offset, up to length characters.
5643
   *
5644
   * @param string   $str1               <p>The main string being compared.</p>
5645
   * @param string   $str2               <p>The secondary string being compared.</p>
5646
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
5647
   *                                     counting from the end of the string.</p>
5648
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
5649
   *                                     the length of the str compared to the length of main_str less the offset.</p>
5650
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5651
   *                                     insensitive.</p>
5652
   *
5653
   * @return int <p>
5654
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5655
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5656
   *             <strong>0</strong> if they are equal.
5657
   *             </p>
5658
   */
5659 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
5660
  {
5661
    if (
5662 1
        $offset !== 0
5663
        ||
5664 1
        $length !== null
5665
    ) {
5666 1
      $str1Tmp = self::substr($str1, $offset, $length);
5667 1
      if ($str1Tmp === false) {
5668
        $str1Tmp = '';
5669
      }
5670 1
      $str1 = (string)$str1Tmp;
5671
5672 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5673 1
      if ($str2Tmp === false) {
5674
        $str2Tmp = '';
5675
      }
5676 1
      $str2 = (string)$str2Tmp;
5677
    }
5678
5679 1
    if ($case_insensitivity === true) {
5680 1
      return self::strcasecmp($str1, $str2);
5681
    }
5682
5683 1
    return self::strcmp($str1, $str2);
5684
  }
5685
5686
  /**
5687
   * Count the number of substring occurrences.
5688
   *
5689
   * @link  http://php.net/manual/en/function.substr-count.php
5690
   *
5691
   * @param string $haystack   <p>The string to search in.</p>
5692
   * @param string $needle     <p>The substring to search for.</p>
5693
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
5694
   * @param int    $length     [optional] <p>
5695
   *                           The maximum length after the specified offset to search for the
5696
   *                           substring. It outputs a warning if the offset plus the length is
5697
   *                           greater than the haystack length.
5698
   *                           </p>
5699
   * @param string $encoding   <p>Set the charset.</p>
5700
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5701
   *
5702
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5703
   */
5704 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5705
  {
5706 1
    if (!isset($haystack[0], $needle[0])) {
5707 1
      return false;
5708
    }
5709
5710 1
    if ($offset || $length !== null) {
5711
5712 1
      if ($length === null) {
5713 1
        $length = self::strlen($haystack);
5714
      }
5715
5716
      if (
5717
          (
5718 1
              $length !== 0
5719
              &&
5720 1
              $offset !== 0
5721
          )
5722
          &&
5723 1
          ($length + $offset) <= 0
5724
          &&
5725 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
5726
      ) {
5727 1
        return false;
5728
      }
5729
5730 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
5731 1
      if ($haystackTmp === false) {
5732
        $haystackTmp = '';
5733
      }
5734 1
      $haystack = (string)$haystackTmp;
5735
    }
5736
5737 1
    if ($encoding !== 'UTF-8') {
5738 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5739
    }
5740
5741 1
    if ($cleanUtf8 === true) {
5742
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5743
      // if invalid characters are found in $haystack before $needle
5744
      $needle = self::clean($needle);
5745
      $haystack = self::clean($haystack);
5746
    }
5747
5748 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5749
      self::checkForSupport();
5750
    }
5751
5752 View Code Duplication
    if (
5753 1
        $encoding !== 'UTF-8'
5754
        &&
5755 1
        self::$SUPPORT['mbstring'] === false
5756
    ) {
5757
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5758
    }
5759
5760 1
    if (self::$SUPPORT['mbstring'] === true) {
5761 1
      return \mb_substr_count($haystack, $needle, $encoding);
5762
    }
5763
5764
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5765
5766
    return \count($matches);
5767
  }
5768
5769
  /**
5770
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5771
   *
5772
   * @param string $haystack <p>The string to search in.</p>
5773
   * @param string $needle   <p>The substring to search for.</p>
5774
   *
5775
   * @return string <p>Return the sub-string.</p>
5776
   */
5777 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5778
  {
5779 1
    if (!isset($haystack[0])) {
5780 1
      return '';
5781
    }
5782
5783 1
    if (!isset($needle[0])) {
5784 1
      return $haystack;
5785
    }
5786
5787 1
    if (self::str_istarts_with($haystack, $needle) === true) {
5788 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5789 1
      if ($haystackTmp === false) {
5790
        $haystackTmp = '';
5791
      }
5792 1
      $haystack = (string)$haystackTmp;
5793
    }
5794
5795 1
    return $haystack;
5796
  }
5797
5798
  /**
5799
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5800
   *
5801
   * @param string $haystack <p>The string to search in.</p>
5802
   * @param string $needle   <p>The substring to search for.</p>
5803
   *
5804
   * @return string <p>Return the sub-string.</p>
5805
   */
5806 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5807
  {
5808 1
    if (!isset($haystack[0])) {
5809 1
      return '';
5810
    }
5811
5812 1
    if (!isset($needle[0])) {
5813 1
      return $haystack;
5814
    }
5815
5816 1
    if (self::str_iends_with($haystack, $needle) === true) {
5817 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5818 1
      if ($haystackTmp === false) {
5819
        $haystackTmp = '';
5820
      }
5821 1
      $haystack = (string)$haystackTmp;
5822
    }
5823
5824 1
    return $haystack;
5825
  }
5826
5827
  /**
5828
   * Removes an prefix ($needle) from start of the string ($haystack).
5829
   *
5830
   * @param string $haystack <p>The string to search in.</p>
5831
   * @param string $needle   <p>The substring to search for.</p>
5832
   *
5833
   * @return string <p>Return the sub-string.</p>
5834
   */
5835 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5836
  {
5837 1
    if (!isset($haystack[0])) {
5838 1
      return '';
5839
    }
5840
5841 1
    if (!isset($needle[0])) {
5842 1
      return $haystack;
5843
    }
5844
5845 1
    if (self::str_starts_with($haystack, $needle) === true) {
5846 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5847 1
      if ($haystackTmp === false) {
5848
        $haystackTmp = '';
5849
      }
5850 1
      $haystack = (string)$haystackTmp;
5851
    }
5852
5853 1
    return $haystack;
5854
  }
5855
5856
  /**
5857
   * Replace text within a portion of a string.
5858
   *
5859
   * source: https://gist.github.com/stemar/8287074
5860
   *
5861
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5862
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5863
   * @param int|int[]       $offset           <p>
5864
   *                                          If start is positive, the replacing will begin at the start'th offset
5865
   *                                          into string.
5866
   *                                          <br><br>
5867
   *                                          If start is negative, the replacing will begin at the start'th character
5868
   *                                          from the end of string.
5869
   *                                          </p>
5870
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
5871
   *                                          portion of string which is to be replaced. If it is negative, it
5872
   *                                          represents the number of characters from the end of string at which to
5873
   *                                          stop replacing. If it is not given, then it will default to strlen(
5874
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5875
   *                                          length is zero then this function will have the effect of inserting
5876
   *                                          replacement into string at the given start offset.</p>
5877
   *
5878
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5879
   */
5880 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
5881
  {
5882 7
    if (\is_array($str) === true) {
5883 1
      $num = \count($str);
5884
5885
      // the replacement
5886 1
      if (\is_array($replacement) === true) {
5887 1
        $replacement = \array_slice($replacement, 0, $num);
5888
      } else {
5889 1
        $replacement = \array_pad([$replacement], $num, $replacement);
5890
      }
5891
5892
      // the offset
5893 1
      if (\is_array($offset) === true) {
5894 1
        $offset = \array_slice($offset, 0, $num);
5895 1
        foreach ($offset as &$valueTmp) {
5896 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5897
        }
5898 1
        unset($valueTmp);
5899
      } else {
5900 1
        $offset = \array_pad([$offset], $num, $offset);
5901
      }
5902
5903
      // the length
5904 1
      if (null === $length) {
5905 1
        $length = \array_fill(0, $num, 0);
5906 1
      } elseif (\is_array($length) === true) {
5907 1
        $length = \array_slice($length, 0, $num);
5908 1
        foreach ($length as &$valueTmpV2) {
5909 1
          if (null !== $valueTmpV2) {
5910 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5911
          } else {
5912 1
            $valueTmpV2 = 0;
5913
          }
5914
        }
5915 1
        unset($valueTmpV2);
5916
      } else {
5917 1
        $length = \array_pad([$length], $num, $length);
5918
      }
5919
5920
      // recursive call
5921 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
5922
    }
5923
5924 7
    if (\is_array($replacement) === true) {
5925 1
      if (\count($replacement) > 0) {
5926 1
        $replacement = $replacement[0];
5927
      } else {
5928 1
        $replacement = '';
5929
      }
5930
    }
5931
5932
    // init
5933 7
    $str = (string)$str;
5934 7
    $replacement = (string)$replacement;
5935
5936 7
    if (!isset($str[0])) {
5937 1
      return $replacement;
5938
    }
5939
5940 6
    if (self::is_ascii($str)) {
5941 3
      return ($length === null) ?
5942
          \substr_replace($str, $replacement, $offset) :
5943 3
          \substr_replace($str, $replacement, $offset, $length);
5944
    }
5945
5946 5
    \preg_match_all('/./us', $str, $smatches);
5947 5
    \preg_match_all('/./us', $replacement, $rmatches);
5948
5949 5
    if ($length === null) {
5950 3
      $length = self::strlen($str);
5951
    }
5952
5953 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
5954
5955 5
    return \implode('', $smatches[0]);
5956
  }
5957
5958
  /**
5959
   * Removes an suffix ($needle) from end of the string ($haystack).
5960
   *
5961
   * @param string $haystack <p>The string to search in.</p>
5962
   * @param string $needle   <p>The substring to search for.</p>
5963
   *
5964
   * @return string <p>Return the sub-string.</p>
5965
   */
5966 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5967
  {
5968 1
    if (!isset($haystack[0])) {
5969 1
      return '';
5970
    }
5971
5972 1
    if (!isset($needle[0])) {
5973 1
      return $haystack;
5974
    }
5975
5976 1
    if (self::str_ends_with($haystack, $needle) === true) {
5977 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5978 1
      if ($haystackTmp === false) {
5979
        $haystackTmp = '';
5980
      }
5981 1
      $haystack = (string)$haystackTmp;
5982
    }
5983
5984 1
    return $haystack;
5985
  }
5986
5987
  /**
5988
   * Returns a case swapped version of the string.
5989
   *
5990
   * @param string $str       <p>The input string.</p>
5991
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5992
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5993
   *
5994
   * @return string <p>Each character's case swapped.</p>
5995
   */
5996 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
5997
  {
5998 1
    if (!isset($str[0])) {
5999 1
      return '';
6000
    }
6001
6002 1
    if ($encoding !== 'UTF-8') {
6003 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6004
    }
6005
6006 1
    if ($cleanUtf8 === true) {
6007
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6008
      // if invalid characters are found in $haystack before $needle
6009 1
      $str = self::clean($str);
6010
    }
6011
6012 1
    $strSwappedCase = \preg_replace_callback(
6013 1
        '/[\S]/u',
6014 1
        function ($match) use ($encoding) {
6015 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6016
6017 1
          if ($match[0] === $marchToUpper) {
6018 1
            return UTF8::strtolower($match[0], $encoding);
6019
          }
6020
6021 1
          return $marchToUpper;
6022 1
        },
6023 1
        $str
6024
    );
6025
6026 1
    return $strSwappedCase;
6027
  }
6028
6029
  /**
6030
   * alias for "UTF8::to_ascii()"
6031
   *
6032
   * @see        UTF8::to_ascii()
6033
   *
6034
   * @param string $str
6035
   * @param string $subst_chr
6036
   * @param bool   $strict
6037
   *
6038
   * @return string
6039
   *
6040
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6041
   */
6042 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
6043
  {
6044 7
    return self::to_ascii($str, $subst_chr, $strict);
6045
  }
6046
6047
  /**
6048
   * alias for "UTF8::to_iso8859()"
6049
   *
6050
   * @see        UTF8::to_iso8859()
6051
   *
6052
   * @param string|string[] $str
6053
   *
6054
   * @return string|string[]
6055
   *
6056
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6057
   */
6058 1
  public static function toIso8859($str)
6059
  {
6060 1
    return self::to_iso8859($str);
6061
  }
6062
6063
  /**
6064
   * alias for "UTF8::to_latin1()"
6065
   *
6066
   * @see        UTF8::to_latin1()
6067
   *
6068
   * @param string|string[] $str
6069
   *
6070
   * @return string|string[]
6071
   *
6072
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6073
   */
6074 1
  public static function toLatin1($str)
6075
  {
6076 1
    return self::to_latin1($str);
6077
  }
6078
6079
  /**
6080
   * alias for "UTF8::to_utf8()"
6081
   *
6082
   * @see        UTF8::to_utf8()
6083
   *
6084
   * @param string|string[] $str
6085
   *
6086
   * @return string|string[]
6087
   *
6088
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6089
   */
6090 1
  public static function toUTF8($str)
6091
  {
6092 1
    return self::to_utf8($str);
6093
  }
6094
6095
  /**
6096
   * Convert a string into ASCII.
6097
   *
6098
   * @param string $str     <p>The input string.</p>
6099
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6100
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6101
   *                        performance</p>
6102
   *
6103
   * @return string
6104
   */
6105 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
6106
  {
6107 21
    static $UTF8_TO_ASCII;
6108
6109 21
    if (!isset($str[0])) {
6110 4
      return '';
6111
    }
6112
6113
    // check if we only have ASCII, first (better performance)
6114 18
    if (self::is_ascii($str) === true) {
6115 6
      return $str;
6116
    }
6117
6118 13
    $str = self::clean($str, true, true, true);
6119
6120
    // check again, if we only have ASCII, now ...
6121 13
    if (self::is_ascii($str) === true) {
6122 7
      return $str;
6123
    }
6124
6125 7
    if ($strict === true) {
6126 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6127
        self::checkForSupport();
6128
      }
6129
6130 1
      if (self::$SUPPORT['intl'] === true) {
6131
6132
        // HACK for issue from "transliterator_transliterate()"
6133 1
        $str = \str_replace(
6134 1
            'ℌ',
6135 1
            'H',
6136 1
            $str
6137
        );
6138
6139 1
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6140
6141
        // check again, if we only have ASCII, now ...
6142 1
        if (self::is_ascii($str) === true) {
6143 1
          return $str;
6144
        }
6145
6146
      }
6147
    }
6148
6149 7
    if (self::$ORD === null) {
6150
      self::$ORD = self::getData('ord');
6151
    }
6152
6153 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6154 7
    $chars = $ar[0];
6155 7
    foreach ($chars as &$c) {
6156
6157 7
      $ordC0 = self::$ORD[$c[0]];
6158
6159 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6160 7
        continue;
6161
      }
6162
6163 7
      $ordC1 = self::$ORD[$c[1]];
6164
6165
      // ASCII - next please
6166 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6167 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6168
      }
6169
6170 7
      if ($ordC0 >= 224) {
6171 2
        $ordC2 = self::$ORD[$c[2]];
6172
6173 2
        if ($ordC0 <= 239) {
6174 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6175
        }
6176
6177 2
        if ($ordC0 >= 240) {
6178 1
          $ordC3 = self::$ORD[$c[3]];
6179
6180 1
          if ($ordC0 <= 247) {
6181 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6182
          }
6183
6184 1
          if ($ordC0 >= 248) {
6185
            $ordC4 = self::$ORD[$c[4]];
6186
6187 View Code Duplication
            if ($ordC0 <= 251) {
6188
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6189
            }
6190
6191
            if ($ordC0 >= 252) {
6192
              $ordC5 = self::$ORD[$c[5]];
6193
6194 View Code Duplication
              if ($ordC0 <= 253) {
6195
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6196
              }
6197
            }
6198
          }
6199
        }
6200
      }
6201
6202 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6203
        $c = $unknown;
6204
        continue;
6205
      }
6206
6207 7
      if (!isset($ord)) {
6208
        $c = $unknown;
6209
        continue;
6210
      }
6211
6212 7
      $bank = $ord >> 8;
6213 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6214 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
6215 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6216 1
          $UTF8_TO_ASCII[$bank] = [];
6217
        }
6218
      }
6219
6220 7
      $newchar = $ord & 255;
6221
6222 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6223
6224
        // keep for debugging
6225
        /*
6226
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6227
        echo "char: " . $c . "\n";
6228
        echo "ord: " . $ord . "\n";
6229
        echo "newchar: " . $newchar . "\n";
6230
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6231
        echo "bank:" . $bank . "\n\n";
6232
        */
6233
6234 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6235
      } else {
6236
6237
        // keep for debugging missing chars
6238
        /*
6239
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6240
        echo "char: " . $c . "\n";
6241
        echo "ord: " . $ord . "\n";
6242
        echo "newchar: " . $newchar . "\n";
6243
        echo "bank:" . $bank . "\n\n";
6244
        */
6245
6246 7
        $c = $unknown;
6247
      }
6248
    }
6249
6250 7
    return \implode('', $chars);
6251
  }
6252
6253
  /**
6254
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6255
   *
6256
   * @param string|string[] $str
6257
   *
6258
   * @return string|string[]
6259
   */
6260 3
  public static function to_iso8859($str)
6261
  {
6262 3
    if (\is_array($str) === true) {
6263 1
      foreach ($str as $k => $v) {
6264 1
        $str[$k] = self::to_iso8859($v);
6265
      }
6266
6267 1
      return $str;
6268
    }
6269
6270 3
    $str = (string)$str;
6271 3
    if (!isset($str[0])) {
6272 1
      return '';
6273
    }
6274
6275 3
    return self::utf8_decode($str);
6276
  }
6277
6278
  /**
6279
   * alias for "UTF8::to_iso8859()"
6280
   *
6281
   * @see UTF8::to_iso8859()
6282
   *
6283
   * @param string|string[] $str
6284
   *
6285
   * @return string|string[]
6286
   */
6287 1
  public static function to_latin1($str)
6288
  {
6289 1
    return self::to_iso8859($str);
6290
  }
6291
6292
  /**
6293
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6294
   *
6295
   * <ul>
6296
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6297
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6298
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6299
   * case.</li>
6300
   * </ul>
6301
   *
6302
   * @param string|string[] $str                    <p>Any string or array.</p>
6303
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6304
   *
6305
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6306
   */
6307 20
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
6308
  {
6309 20 View Code Duplication
    if (\is_array($str) === true) {
6310 2
      foreach ($str as $k => $v) {
6311 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6312
      }
6313
6314 2
      return $str;
6315
    }
6316
6317 20
    $str = (string)$str;
6318 20
    if (!isset($str[0])) {
6319 3
      return $str;
6320
    }
6321
6322 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6323
      self::checkForSupport();
6324
    }
6325
6326 20
    $max = self::strlen_in_byte($str);
6327 20
    $buf = '';
6328
6329
    /** @noinspection ForeachInvariantsInspection */
6330 20
    for ($i = 0; $i < $max; $i++) {
6331 20
      $c1 = $str[$i];
6332
6333 20
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6334
6335 20
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6336
6337 18
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6338
6339 18
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6340 15
            $buf .= $c1 . $c2;
6341 15
            $i++;
6342
          } else { // not valid UTF8 - convert it
6343 18
            $buf .= self::to_utf8_convert($c1);
6344
          }
6345
6346 19
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6347
6348 18
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6349 18
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6350
6351 18
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6352 12
            $buf .= $c1 . $c2 . $c3;
6353 12
            $i += 2;
6354
          } else { // not valid UTF8 - convert it
6355 18
            $buf .= self::to_utf8_convert($c1);
6356
          }
6357
6358 12
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6359
6360 12
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6361 12
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6362 12
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6363
6364 12
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6365 5
            $buf .= $c1 . $c2 . $c3 . $c4;
6366 5
            $i += 3;
6367
          } else { // not valid UTF8 - convert it
6368 12
            $buf .= self::to_utf8_convert($c1);
6369
          }
6370
6371
        } else { // doesn't look like UTF8, but should be converted
6372 20
          $buf .= self::to_utf8_convert($c1);
6373
        }
6374
6375 18
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6376
6377 2
        $buf .= self::to_utf8_convert($c1);
6378
6379
      } else { // it doesn't need conversion
6380 18
        $buf .= $c1;
6381
      }
6382
    }
6383
6384
    // decode unicode escape sequences
6385 20
    $buf = \preg_replace_callback(
6386 20
        '/\\\\u([0-9a-f]{4})/i',
6387 20
        function ($match) {
6388 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6389 20
        },
6390 20
        $buf
6391
    );
6392
6393
    // decode UTF-8 codepoints
6394 20
    if ($decodeHtmlEntityToUtf8 === true) {
6395 1
      $buf = self::html_entity_decode($buf);
6396
    }
6397
6398 20
    return $buf;
6399
  }
6400
6401
  /**
6402
   * @param int $int
6403
   *
6404
   * @return string
6405
   */
6406 14
  private static function to_utf8_convert($int): string
6407
  {
6408
    // init
6409 14
    $buf = '';
6410
6411 14
    if (self::$ORD === null) {
6412
      self::$ORD = self::getData('ord');
6413
    }
6414
6415 14
    if (self::$CHR === null) {
6416 1
      self::$CHR = self::getData('chr');
6417
    }
6418
6419 14
    if (self::$WIN1252_TO_UTF8 === null) {
6420 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6421
    }
6422
6423 14
    $ordC1 = self::$ORD[$int];
6424 14
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6425 14
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6426
    } else {
6427 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6428 1
      $cc2 = ($int & "\x3F") | "\x80";
6429 1
      $buf .= $cc1 . $cc2;
6430
    }
6431
6432 14
    return $buf;
6433
  }
6434
6435
  /**
6436
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6437
   *
6438
   * INFO: This is slower then "trim()"
6439
   *
6440
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6441
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6442
   *
6443
   * @param string $str   <p>The string to be trimmed</p>
6444
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
6445
   *
6446
   * @return string <p>The trimmed string.</p>
6447
   */
6448 26
  public static function trim(string $str = '', $chars = INF): string
6449
  {
6450 26
    if (!isset($str[0])) {
6451 5
      return '';
6452
    }
6453
6454
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6455 22
    if ($chars === INF || !$chars) {
6456 6
      return \preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6457
    }
6458
6459 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6460
  }
6461
6462
  /**
6463
   * Makes string's first char uppercase.
6464
   *
6465
   * @param string $str       <p>The input string.</p>
6466
   * @param string $encoding  [optional] <p>Set the charset.</p>
6467
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6468
   *
6469
   * @return string <p>The resulting string</p>
6470
   */
6471 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6472
  {
6473 14
    if ($cleanUtf8 === true) {
6474
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6475
      // if invalid characters are found in $haystack before $needle
6476 1
      $str = self::clean($str);
6477
    }
6478
6479 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
6480 14
    if ($strPartTwo === false) {
6481
      $strPartTwo = '';
6482
    }
6483
6484 14
    $strPartOne = self::strtoupper(
6485 14
        (string)self::substr($str, 0, 1, $encoding),
6486 14
        $encoding,
6487 14
        $cleanUtf8
6488
    );
6489
6490 14
    return $strPartOne . $strPartTwo;
6491
  }
6492
6493
  /**
6494
   * alias for "UTF8::ucfirst()"
6495
   *
6496
   * @see UTF8::ucfirst()
6497
   *
6498
   * @param string $word
6499
   * @param string $encoding
6500
   * @param bool   $cleanUtf8
6501
   *
6502
   * @return string
6503
   */
6504 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6505
  {
6506 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
6507
  }
6508
6509
  /**
6510
   * Uppercase for all words in the string.
6511
   *
6512
   * @param string   $str        <p>The input string.</p>
6513
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6514
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6515
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6516
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6517
   *
6518
   * @return string
6519
   */
6520 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6521
  {
6522 8
    if (!$str) {
6523 2
      return '';
6524
    }
6525
6526
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6527
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6528
6529 7
    if ($cleanUtf8 === true) {
6530
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6531
      // if invalid characters are found in $haystack before $needle
6532 1
      $str = self::clean($str);
6533
    }
6534
6535 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
6536
6537
    if (
6538 7
        $usePhpDefaultFunctions === true
6539
        &&
6540 7
        self::is_ascii($str) === true
6541
    ) {
6542
      return \ucwords($str);
6543
    }
6544
6545 7
    $words = self::str_to_words($str, $charlist);
6546 7
    $newWords = [];
6547
6548 7
    if (\count($exceptions) > 0) {
6549 1
      $useExceptions = true;
6550
    } else {
6551 7
      $useExceptions = false;
6552
    }
6553
6554 7 View Code Duplication
    foreach ($words as $word) {
6555
6556 7
      if (!$word) {
6557 7
        continue;
6558
      }
6559
6560
      if (
6561 7
          $useExceptions === false
6562
          ||
6563
          (
6564 1
              $useExceptions === true
6565
              &&
6566 7
              !\in_array($word, $exceptions, true)
6567
          )
6568
      ) {
6569 7
        $word = self::ucfirst($word, $encoding);
6570
      }
6571
6572 7
      $newWords[] = $word;
6573
    }
6574
6575 7
    return \implode('', $newWords);
6576
  }
6577
6578
  /**
6579
   * Multi decode html entity & fix urlencoded-win1252-chars.
6580
   *
6581
   * e.g:
6582
   * 'test+test'                     => 'test test'
6583
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6584
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6585
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6586
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6587
   * 'Düsseldorf'                   => 'Düsseldorf'
6588
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6589
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6590
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6591
   *
6592
   * @param string $str          <p>The input string.</p>
6593
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6594
   *
6595
   * @return string
6596
   */
6597 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6598
  {
6599 1
    if (!isset($str[0])) {
6600 1
      return '';
6601
    }
6602
6603 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
6604 1
    if (\preg_match($pattern, $str)) {
6605 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
6606
    }
6607
6608 1
    $flags = ENT_QUOTES | ENT_HTML5;
6609
6610
    do {
6611 1
      $str_compare = $str;
6612
6613 1
      $str = self::fix_simple_utf8(
6614 1
          \urldecode(
6615 1
              self::html_entity_decode(
6616 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6617 1
                  $flags
6618
              )
6619
          )
6620
      );
6621
6622 1
    } while ($multi_decode === true && $str_compare !== $str);
6623
6624 1
    return $str;
6625
  }
6626
6627
  /**
6628
   * Return a array with "urlencoded"-win1252 -> UTF-8
6629
   *
6630
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6631
   *
6632
   * @return array
6633
   */
6634 1
  public static function urldecode_fix_win1252_chars(): array
6635
  {
6636
    return [
6637 1
        '%20' => ' ',
6638
        '%21' => '!',
6639
        '%22' => '"',
6640
        '%23' => '#',
6641
        '%24' => '$',
6642
        '%25' => '%',
6643
        '%26' => '&',
6644
        '%27' => "'",
6645
        '%28' => '(',
6646
        '%29' => ')',
6647
        '%2A' => '*',
6648
        '%2B' => '+',
6649
        '%2C' => ',',
6650
        '%2D' => '-',
6651
        '%2E' => '.',
6652
        '%2F' => '/',
6653
        '%30' => '0',
6654
        '%31' => '1',
6655
        '%32' => '2',
6656
        '%33' => '3',
6657
        '%34' => '4',
6658
        '%35' => '5',
6659
        '%36' => '6',
6660
        '%37' => '7',
6661
        '%38' => '8',
6662
        '%39' => '9',
6663
        '%3A' => ':',
6664
        '%3B' => ';',
6665
        '%3C' => '<',
6666
        '%3D' => '=',
6667
        '%3E' => '>',
6668
        '%3F' => '?',
6669
        '%40' => '@',
6670
        '%41' => 'A',
6671
        '%42' => 'B',
6672
        '%43' => 'C',
6673
        '%44' => 'D',
6674
        '%45' => 'E',
6675
        '%46' => 'F',
6676
        '%47' => 'G',
6677
        '%48' => 'H',
6678
        '%49' => 'I',
6679
        '%4A' => 'J',
6680
        '%4B' => 'K',
6681
        '%4C' => 'L',
6682
        '%4D' => 'M',
6683
        '%4E' => 'N',
6684
        '%4F' => 'O',
6685
        '%50' => 'P',
6686
        '%51' => 'Q',
6687
        '%52' => 'R',
6688
        '%53' => 'S',
6689
        '%54' => 'T',
6690
        '%55' => 'U',
6691
        '%56' => 'V',
6692
        '%57' => 'W',
6693
        '%58' => 'X',
6694
        '%59' => 'Y',
6695
        '%5A' => 'Z',
6696
        '%5B' => '[',
6697
        '%5C' => '\\',
6698
        '%5D' => ']',
6699
        '%5E' => '^',
6700
        '%5F' => '_',
6701
        '%60' => '`',
6702
        '%61' => 'a',
6703
        '%62' => 'b',
6704
        '%63' => 'c',
6705
        '%64' => 'd',
6706
        '%65' => 'e',
6707
        '%66' => 'f',
6708
        '%67' => 'g',
6709
        '%68' => 'h',
6710
        '%69' => 'i',
6711
        '%6A' => 'j',
6712
        '%6B' => 'k',
6713
        '%6C' => 'l',
6714
        '%6D' => 'm',
6715
        '%6E' => 'n',
6716
        '%6F' => 'o',
6717
        '%70' => 'p',
6718
        '%71' => 'q',
6719
        '%72' => 'r',
6720
        '%73' => 's',
6721
        '%74' => 't',
6722
        '%75' => 'u',
6723
        '%76' => 'v',
6724
        '%77' => 'w',
6725
        '%78' => 'x',
6726
        '%79' => 'y',
6727
        '%7A' => 'z',
6728
        '%7B' => '{',
6729
        '%7C' => '|',
6730
        '%7D' => '}',
6731
        '%7E' => '~',
6732
        '%7F' => '',
6733
        '%80' => '`',
6734
        '%81' => '',
6735
        '%82' => '‚',
6736
        '%83' => 'ƒ',
6737
        '%84' => '„',
6738
        '%85' => '…',
6739
        '%86' => '†',
6740
        '%87' => '‡',
6741
        '%88' => 'ˆ',
6742
        '%89' => '‰',
6743
        '%8A' => 'Š',
6744
        '%8B' => '‹',
6745
        '%8C' => 'Œ',
6746
        '%8D' => '',
6747
        '%8E' => 'Ž',
6748
        '%8F' => '',
6749
        '%90' => '',
6750
        '%91' => '‘',
6751
        '%92' => '’',
6752
        '%93' => '“',
6753
        '%94' => '”',
6754
        '%95' => '•',
6755
        '%96' => '–',
6756
        '%97' => '—',
6757
        '%98' => '˜',
6758
        '%99' => '™',
6759
        '%9A' => 'š',
6760
        '%9B' => '›',
6761
        '%9C' => 'œ',
6762
        '%9D' => '',
6763
        '%9E' => 'ž',
6764
        '%9F' => 'Ÿ',
6765
        '%A0' => '',
6766
        '%A1' => '¡',
6767
        '%A2' => '¢',
6768
        '%A3' => '£',
6769
        '%A4' => '¤',
6770
        '%A5' => '¥',
6771
        '%A6' => '¦',
6772
        '%A7' => '§',
6773
        '%A8' => '¨',
6774
        '%A9' => '©',
6775
        '%AA' => 'ª',
6776
        '%AB' => '«',
6777
        '%AC' => '¬',
6778
        '%AD' => '',
6779
        '%AE' => '®',
6780
        '%AF' => '¯',
6781
        '%B0' => '°',
6782
        '%B1' => '±',
6783
        '%B2' => '²',
6784
        '%B3' => '³',
6785
        '%B4' => '´',
6786
        '%B5' => 'µ',
6787
        '%B6' => '¶',
6788
        '%B7' => '·',
6789
        '%B8' => '¸',
6790
        '%B9' => '¹',
6791
        '%BA' => 'º',
6792
        '%BB' => '»',
6793
        '%BC' => '¼',
6794
        '%BD' => '½',
6795
        '%BE' => '¾',
6796
        '%BF' => '¿',
6797
        '%C0' => 'À',
6798
        '%C1' => 'Á',
6799
        '%C2' => 'Â',
6800
        '%C3' => 'Ã',
6801
        '%C4' => 'Ä',
6802
        '%C5' => 'Å',
6803
        '%C6' => 'Æ',
6804
        '%C7' => 'Ç',
6805
        '%C8' => 'È',
6806
        '%C9' => 'É',
6807
        '%CA' => 'Ê',
6808
        '%CB' => 'Ë',
6809
        '%CC' => 'Ì',
6810
        '%CD' => 'Í',
6811
        '%CE' => 'Î',
6812
        '%CF' => 'Ï',
6813
        '%D0' => 'Ð',
6814
        '%D1' => 'Ñ',
6815
        '%D2' => 'Ò',
6816
        '%D3' => 'Ó',
6817
        '%D4' => 'Ô',
6818
        '%D5' => 'Õ',
6819
        '%D6' => 'Ö',
6820
        '%D7' => '×',
6821
        '%D8' => 'Ø',
6822
        '%D9' => 'Ù',
6823
        '%DA' => 'Ú',
6824
        '%DB' => 'Û',
6825
        '%DC' => 'Ü',
6826
        '%DD' => 'Ý',
6827
        '%DE' => 'Þ',
6828
        '%DF' => 'ß',
6829
        '%E0' => 'à',
6830
        '%E1' => 'á',
6831
        '%E2' => 'â',
6832
        '%E3' => 'ã',
6833
        '%E4' => 'ä',
6834
        '%E5' => 'å',
6835
        '%E6' => 'æ',
6836
        '%E7' => 'ç',
6837
        '%E8' => 'è',
6838
        '%E9' => 'é',
6839
        '%EA' => 'ê',
6840
        '%EB' => 'ë',
6841
        '%EC' => 'ì',
6842
        '%ED' => 'í',
6843
        '%EE' => 'î',
6844
        '%EF' => 'ï',
6845
        '%F0' => 'ð',
6846
        '%F1' => 'ñ',
6847
        '%F2' => 'ò',
6848
        '%F3' => 'ó',
6849
        '%F4' => 'ô',
6850
        '%F5' => 'õ',
6851
        '%F6' => 'ö',
6852
        '%F7' => '÷',
6853
        '%F8' => 'ø',
6854
        '%F9' => 'ù',
6855
        '%FA' => 'ú',
6856
        '%FB' => 'û',
6857
        '%FC' => 'ü',
6858
        '%FD' => 'ý',
6859
        '%FE' => 'þ',
6860
        '%FF' => 'ÿ',
6861
    ];
6862
  }
6863
6864
  /**
6865
   * Decodes an UTF-8 string to ISO-8859-1.
6866
   *
6867
   * @param string $str <p>The input string.</p>
6868
   * @param bool   $keepUtf8Chars
6869
   *
6870
   * @return string
6871
   */
6872 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
6873
  {
6874 6
    if (!isset($str[0])) {
6875 3
      return '';
6876
    }
6877
6878 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6879 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6880
6881 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6882
6883 1
      if (self::$WIN1252_TO_UTF8 === null) {
6884
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6885
      }
6886
6887 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6888 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6889
    }
6890
6891
    /** @noinspection PhpInternalEntityUsedInspection */
6892 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
6893
6894 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6895
      self::checkForSupport();
6896
    }
6897
6898
    // save for later comparision
6899 6
    $str_backup = $str;
6900 6
    $len = self::strlen_in_byte($str);
6901
6902 6
    if (self::$ORD === null) {
6903
      self::$ORD = self::getData('ord');
6904
    }
6905
6906 6
    if (self::$CHR === null) {
6907
      self::$CHR = self::getData('chr');
6908
    }
6909
6910 6
    $noCharFound = '?';
6911
    /** @noinspection ForeachInvariantsInspection */
6912 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
6913 6
      switch ($str[$i] & "\xF0") {
6914 6
        case "\xC0":
6915 6
        case "\xD0":
6916 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
6917 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
6918 6
          break;
6919
6920
        /** @noinspection PhpMissingBreakStatementInspection */
6921 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
6922
          ++$i;
6923 6
        case "\xE0":
6924 5
          $str[$j] = $noCharFound;
6925 5
          $i += 2;
6926 5
          break;
6927
6928
        default:
6929 6
          $str[$j] = $str[$i];
6930
      }
6931
    }
6932
6933 6
    $return = (string)self::substr($str, 0, $j, '8BIT');
6934
6935
    if (
6936 6
        $keepUtf8Chars === true
6937
        &&
6938 6
        self::strlen($return) >= self::strlen($str_backup)
6939
    ) {
6940 1
      return $str_backup;
6941
    }
6942
6943 6
    return $return;
6944
  }
6945
6946
  /**
6947
   * Encodes an ISO-8859-1 string to UTF-8.
6948
   *
6949
   * @param string $str <p>The input string.</p>
6950
   *
6951
   * @return string
6952
   */
6953 7
  public static function utf8_encode(string $str): string
6954
  {
6955 7
    if (!isset($str[0])) {
6956 7
      return '';
6957
    }
6958
6959 7
    $strTmp = \utf8_encode($str);
6960
6961
    // the polyfill maybe return false
6962 7
    if ($strTmp === false) {
6963
      return '';
6964
    }
6965
6966 7
    $str = (string)$strTmp;
6967 7
    if (false === \strpos($str, "\xC2")) {
6968 3
      return $str;
6969
    }
6970
6971 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
6972 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
6973
6974 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
6975
6976 1
      if (self::$WIN1252_TO_UTF8 === null) {
6977
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6978
      }
6979
6980 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6981 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6982
    }
6983
6984 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
6985
  }
6986
6987
  /**
6988
   * fix -> utf8-win1252 chars
6989
   *
6990
   * @param string $str <p>The input string.</p>
6991
   *
6992
   * @return string
6993
   *
6994
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
6995
   */
6996 1
  public static function utf8_fix_win1252_chars(string $str): string
6997
  {
6998 1
    return self::fix_simple_utf8($str);
6999
  }
7000
7001
  /**
7002
   * Returns an array with all utf8 whitespace characters.
7003
   *
7004
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7005
   *
7006
   * @author: Derek E. [email protected]
7007
   *
7008
   * @return array <p>
7009
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7010
   *               as defined in above URL.
7011
   *               </p>
7012
   */
7013 1
  public static function whitespace_table(): array
7014
  {
7015 1
    return self::$WHITESPACE_TABLE;
7016
  }
7017
7018
  /**
7019
   * Limit the number of words in a string.
7020
   *
7021
   * @param string $str      <p>The input string.</p>
7022
   * @param int    $limit    <p>The limit of words as integer.</p>
7023
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7024
   *
7025
   * @return string
7026
   */
7027 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
7028
  {
7029 1
    if (!isset($str[0])) {
7030 1
      return '';
7031
    }
7032
7033 1
    if ($limit < 1) {
7034 1
      return '';
7035
    }
7036
7037 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7038
7039
    if (
7040 1
        !isset($matches[0])
7041
        ||
7042 1
        self::strlen($str) === self::strlen($matches[0])
7043
    ) {
7044 1
      return $str;
7045
    }
7046
7047 1
    return self::rtrim($matches[0]) . $strAddOn;
7048
  }
7049
7050
  /**
7051
   * Wraps a string to a given number of characters
7052
   *
7053
   * @link  http://php.net/manual/en/function.wordwrap.php
7054
   *
7055
   * @param string $str   <p>The input string.</p>
7056
   * @param int    $width [optional] <p>The column width.</p>
7057
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7058
   * @param bool   $cut   [optional] <p>
7059
   *                      If the cut is set to true, the string is
7060
   *                      always wrapped at or before the specified width. So if you have
7061
   *                      a word that is larger than the given width, it is broken apart.
7062
   *                      </p>
7063
   *
7064
   * @return string <p>The given string wrapped at the specified column.</p>
7065
   */
7066 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
7067
  {
7068 10
    if (!isset($str[0], $break[0])) {
7069 3
      return '';
7070
    }
7071
7072 8
    $w = '';
7073 8
    $strSplit = \explode($break, $str);
7074 8
    $count = \count($strSplit);
7075
7076 8
    $chars = [];
7077
    /** @noinspection ForeachInvariantsInspection */
7078 8
    for ($i = 0; $i < $count; ++$i) {
7079
7080 8
      if ($i) {
7081 1
        $chars[] = $break;
7082 1
        $w .= '#';
7083
      }
7084
7085 8
      $c = $strSplit[$i];
7086 8
      unset($strSplit[$i]);
7087
7088 8
      foreach (self::split($c) as $c) {
7089 8
        $chars[] = $c;
7090 8
        $w .= ' ' === $c ? ' ' : '?';
7091
      }
7092
    }
7093
7094 8
    $strReturn = '';
7095 8
    $j = 0;
7096 8
    $b = $i = -1;
7097 8
    $w = \wordwrap($w, $width, '#', $cut);
7098
7099 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7100 6
      for (++$i; $i < $b; ++$i) {
7101 6
        $strReturn .= $chars[$j];
7102 6
        unset($chars[$j++]);
7103
      }
7104
7105 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7106 3
        unset($chars[$j++]);
7107
      }
7108
7109 6
      $strReturn .= $break;
7110
    }
7111
7112 8
    return $strReturn . \implode('', $chars);
7113
  }
7114
7115
  /**
7116
   * Returns an array of Unicode White Space characters.
7117
   *
7118
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7119
   */
7120 1
  public static function ws(): array
7121
  {
7122 1
    return self::$WHITESPACE;
7123
  }
7124
7125
}
7126