Completed
Push — master ( 162de9...6022ca )
by Lars
05:42
created

UTF8::str_split()   C

Complexity

Conditions 8
Paths 8

Size

Total Lines 40
Code Lines 21

Duplication

Lines 7
Ratio 17.5 %

Code Coverage

Tests 26
CRAP Score 8

Importance

Changes 0
Metric Value
dl 7
loc 40
ccs 26
cts 26
cp 1
rs 5.3846
c 0
b 0
f 0
cc 8
eloc 21
nc 8
nop 2
crap 8
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206 16
   * __construct()
207
   */
208 16
  public function __construct()
209 16
  {
210
    self::checkForSupport();
211
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219 3
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228
      return '';
229 3
    }
230 1
231
    return (string)self::substr($str, $pos, 1);
232
  }
233 3
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246
      $str = self::bom() . $str;
247 1
    }
248 1
249 1
    return $str;
250
  }
251 1
252
  /**
253
   * Convert binary into an string.
254
   *
255
   * @param mixed $bin 1|0
256
   *
257
   * @return string
258
   */
259
  public static function binary_to_str($bin): string
260
  {
261 1
    if (!isset($bin[0])) {
262
      return '';
263 1
    }
264
265
    $convert = base_convert($bin, 2, 16);
266
    if ($convert === '0') {
267 1
      return '';
268 1
    }
269 1
270
    return pack('H*', $convert);
271
  }
272 1
273
  /**
274
   * Returns the UTF-8 Byte Order Mark Character.
275
   *
276
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
277
   *
278
   * @return string UTF-8 Byte Order Mark
279
   */
280
  public static function bom(): string
281
  {
282 2
    return "\xef\xbb\xbf";
283
  }
284 2
285
  /**
286
   * @alias of UTF8::chr_map()
287
   *
288
   * @see   UTF8::chr_map()
289
   *
290
   * @param string|array $callback
291
   * @param string       $str
292
   *
293
   * @return array
294
   */
295
  public static function callback($callback, string $str): array
296
  {
297 1
    return self::chr_map($callback, $str);
298
  }
299 1
300
  /**
301
   * This method will auto-detect your server environment for UTF-8 support.
302
   *
303
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
304
   */
305
  public static function checkForSupport()
306
  {
307 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
308
309 19
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
310
311 1
      // http://php.net/manual/en/book.mbstring.php
312
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
313
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
314 1
315 1
      // http://php.net/manual/en/book.iconv.php
316
      self::$SUPPORT['iconv'] = self::iconv_loaded();
317
318 1
      // http://php.net/manual/en/book.intl.php
319
      self::$SUPPORT['intl'] = self::intl_loaded();
320
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
321 1
      if (
322 1
          self::$SUPPORT['intl'] === true
323
          &&
324 1
          \function_exists('transliterator_list_ids') === true
325 1
      ) {
326 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
327 1
      }
328
329
      // http://php.net/manual/en/class.intlchar.php
330
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
331
332 1
      // http://php.net/manual/en/book.pcre.php
333
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
334
    }
335 1
  }
336 1
337 19
  /**
338
   * Generates a UTF-8 encoded character from the given code point.
339
   *
340
   * INFO: opposite to UTF8::ord()
341
   *
342
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
343
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
344
   *
345
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
346
   */
347
  public static function chr($code_point, string $encoding = 'UTF-8')
348
  {
349 10
    // init
350
    static $CHAR_CACHE = [];
351
352 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
353
      self::checkForSupport();
354 10
    }
355
356
    if ($encoding !== 'UTF-8') {
357
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
358 10
    }
359 2
360 2 View Code Duplication
    if (
361
        $encoding !== 'UTF-8'
362
        &&
363
        $encoding !== 'WINDOWS-1252'
364 10
        &&
365
        self::$SUPPORT['mbstring'] === false
366 10
    ) {
367 1
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
368 10
    }
369
370
    $cacheKey = $code_point . $encoding;
371
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
372 10
      return $CHAR_CACHE[$cacheKey];
373 10
    }
374 8
375
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
376
377 9
      if (self::$CHR === null) {
378
        self::$CHR = self::getData('chr');
379 7
      }
380
381
      $chr = self::$CHR[$code_point];
382
383 7
      if ($encoding !== 'UTF-8') {
384
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
385 7
      }
386 1
387 1
      return $CHAR_CACHE[$cacheKey] = $chr;
388
    }
389 7
390
    if (self::$SUPPORT['intlChar'] === true) {
391
      $chr = \IntlChar::chr($code_point);
392 7
393
      if ($encoding !== 'UTF-8') {
394
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
395
      }
396
397
      return $CHAR_CACHE[$cacheKey] = $chr;
398
    }
399
400
    if (self::$CHR === null) {
401
      self::$CHR = self::getData('chr');
402 7
    }
403
404
    if ($code_point <= 0x7F) {
405
      $chr = self::$CHR[$code_point];
406 7
    } elseif ($code_point <= 0x7FF) {
407
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
408 7
             self::$CHR[($code_point & 0x3F) + 0x80];
409 6
    } elseif ($code_point <= 0xFFFF) {
410 6
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
411 7
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
412 7
             self::$CHR[($code_point & 0x3F) + 0x80];
413 7
    } else {
414 7
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
415 7
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
416 1
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
417 1
             self::$CHR[($code_point & 0x3F) + 0x80];
418 1
    }
419 1
420
    if ($encoding !== 'UTF-8') {
421
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
422 7
    }
423
424
    return $CHAR_CACHE[$cacheKey] = $chr;
425
  }
426 7
427
  /**
428
   * Applies callback to all characters of a string.
429
   *
430
   * @param string|array $callback <p>The callback function.</p>
431
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
432
   *
433
   * @return array <p>The outcome of callback.</p>
434
   */
435
  public static function chr_map($callback, string $str): array
436
  {
437 1
    $chars = self::split($str);
438
439 1
    return \array_map($callback, $chars);
440
  }
441 1
442
  /**
443
   * Generates an array of byte length of each character of a Unicode string.
444
   *
445
   * 1 byte => U+0000  - U+007F
446
   * 2 byte => U+0080  - U+07FF
447
   * 3 byte => U+0800  - U+FFFF
448
   * 4 byte => U+10000 - U+10FFFF
449
   *
450
   * @param string $str <p>The original unicode string.</p>
451
   *
452
   * @return array <p>An array of byte lengths of each character.</p>
453
   */
454
  public static function chr_size_list(string $str): array
455
  {
456 4
    if (!isset($str[0])) {
457
      return [];
458 4
    }
459
460 4
    return \array_map(
461 3
        function ($data) {
462
          return UTF8::strlen($data, '8BIT');
463
        },
464 4
        self::split($str)
465
    );
466 4
  }
467 4
468 4
  /**
469 4
   * Get a decimal code representation of a specific character.
470
   *
471
   * @param string $char <p>The input character.</p>
472
   *
473
   * @return int
474
   */
475
  public static function chr_to_decimal(string $char): int
476
  {
477
    $code = self::ord($char[0]);
478
    $bytes = 1;
479 2
480
    if (!($code & 0x80)) {
481 2
      // 0xxxxxxx
482 2
      return $code;
483 2
    }
484
485 2
    if (($code & 0xe0) === 0xc0) {
486
      // 110xxxxx
487 2
      $bytes = 2;
488
      $code &= ~0xc0;
489
    } elseif (($code & 0xf0) === 0xe0) {
490 2
      // 1110xxxx
491
      $bytes = 3;
492 2
      $code &= ~0xe0;
493 2
    } elseif (($code & 0xf8) === 0xf0) {
494 2
      // 11110xxx
495
      $bytes = 4;
496 2
      $code &= ~0xf0;
497 2
    }
498 2
499
    for ($i = 2; $i <= $bytes; $i++) {
500 1
      // 10xxxxxx
501 1
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
502 1
    }
503
504 2
    return $code;
505
  }
506 2
507 2
  /**
508
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
509 2
   *
510
   * @param string $char <p>The input character</p>
511
   * @param string $pfix [optional]
512
   *
513
   * @return string <p>The code point encoded as U+xxxx<p>
514
   */
515
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
516
  {
517
    if (!isset($char[0])) {
518
      return '';
519
    }
520 1
521
    if ($char === '&#0;') {
522 1
      $char = '';
523
    }
524 1
525 1
    return self::int_to_hex(self::ord($char), $pfix);
526
  }
527
528 1
  /**
529
   * alias for "UTF8::chr_to_decimal()"
530
   *
531
   * @see UTF8::chr_to_decimal()
532 1
   *
533
   * @param string $chr
534
   *
535
   * @return int
536
   */
537
  public static function chr_to_int(string $chr): int
538
  {
539
    return self::chr_to_decimal($chr);
540
  }
541
542
  /**
543
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
544 1
   *
545
   * @param string $body     <p>The original string to be split.</p>
546 1
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
547
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
548
   *
549
   * @return string <p>The chunked string</p>
550
   */
551
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
552
  {
553
    return implode($end, self::split($body, $chunklen));
554
  }
555
556
  /**
557
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
558 1
   *
559
   * @param string $str                     <p>The string to be sanitized.</p>
560 1
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
561
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
562
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
563
   *                                        => "..."</p>
564
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
565
   *                                        $normalize_whitespace</p>
566
   *
567
   * @return string <p>Clean UTF-8 encoded string.</p>
568
   */
569
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false): string
570
  {
571
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
572
    // caused connection reset problem on larger strings
573
574
    $regx = '/
575
      (
576 61
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
577
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
578
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
579
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
580
        ){1,100}                      # ...one or more times
581
      )
582
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
583
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
584
    /x';
585
    $str = preg_replace($regx, '$1', $str);
586
587
    $str = self::replace_diamond_question_mark($str, '');
588
    $str = self::remove_invisible_characters($str);
589
590
    if ($normalize_whitespace === true) {
591 61
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
592 61
    }
593
594 61
    if ($normalize_msword === true) {
595 61
      $str = self::normalize_msword($str);
596
    }
597 61
598 36
    if ($remove_bom === true) {
599 36
      $str = self::remove_bom($str);
600
    }
601 61
602 15
    return $str;
603 15
  }
604
605 61
  /**
606 35
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
607 35
   *
608
   * @param string $str <p>The input string.</p>
609 61
   *
610
   * @return string
611
   */
612
  public static function cleanup(string $str): string
613
  {
614
    if (!isset($str[0])) {
615
      return '';
616
    }
617
618
    // fixed ISO <-> UTF-8 Errors
619 21
    $str = self::fix_simple_utf8($str);
620
621 21
    // remove all none UTF-8 symbols
622
    // && remove diamond question mark (�)
623 21
    // && remove remove invisible characters (e.g. "\0")
624 2
    // && remove BOM
625
    // && normalize whitespace chars (but keep non-breaking-spaces)
626
    $str = self::clean($str, true, true, false, true);
627
628 21
    return $str;
629
  }
630
631
  /**
632
   * Accepts a string or a array of strings and returns an array of Unicode code points.
633
   *
634
   * INFO: opposite to UTF8::string()
635 21
   *
636
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
637 21
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
638
   *                                    default, code points will be returned as integers.</p>
639
   *
640
   * @return array <p>The array of code points.</p>
641
   */
642
  public static function codepoints($arg, bool $u_style = false): array
643
  {
644
    if (\is_string($arg) === true) {
645
      $arg = self::split($arg);
646
    }
647
648
    $arg = \array_map(
649
        [
650
            UTF8::class,
651 7
            'ord',
652
        ],
653 7
        $arg
654 7
    );
655 7
656
    if ($u_style) {
657 7
      $arg = \array_map(
658
          [
659 7
              UTF8::class,
660 7
              'int_to_hex',
661 7
          ],
662
          $arg
663 7
      );
664
    }
665 7
666 1
    return $arg;
667
  }
668 1
669 1
  /**
670 1
   * Returns count of characters used in a string.
671
   *
672 1
   * @param string $str       <p>The input string.</p>
673 1
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
674
   *
675 7
   * @return array <p>An associative array of Character as keys and
676
   *               their count as values.</p>
677
   */
678
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
679
  {
680
    return array_count_values(self::split($str, 1, $cleanUtf8));
681
  }
682
683
  /**
684
   * Converts a int-value into an UTF-8 character.
685
   *
686
   * @param mixed $int
687 7
   *
688
   * @return string
689 7
   */
690
  public static function decimal_to_chr($int): string
691
  {
692
    if (Bootup::is_php('5.4') === true) {
693
      $flags = ENT_QUOTES | ENT_HTML5;
694
    } else {
695
      $flags = ENT_QUOTES;
696
    }
697
698
    return self::html_entity_decode('&#' . $int . ';', $flags);
699 5
  }
700
701 5
  /**
702
   * Encode a string with a new charset-encoding.
703
   *
704 5
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
705
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
706
   *
707 5
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
708
   * @param string $str      <p>The input string</p>
709
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
710
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
711
   *
712
   * @return string
713
   */
714
  public static function encode(string $encoding, string $str, bool $force = true): string
715
  {
716
    if (!isset($str[0], $encoding[0])) {
717
      return $str;
718
    }
719
720
    if ($encoding !== 'UTF-8') {
721
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
722
    }
723 11
724
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
725 11
      self::checkForSupport();
726 11
    }
727
728 11
    $encodingDetected = self::str_detect_encoding($str);
729 5
730
    if (
731
        $encodingDetected !== false
732 11
        &&
733 2
        (
734 2
            $force === true
735
            ||
736 11
            $encodingDetected !== $encoding
737
        )
738
    ) {
739
740 11 View Code Duplication
      if (
741
          $encoding === 'UTF-8'
742
          &&
743
          (
744 11
              $force === true
745
              || $encodingDetected === 'UTF-8'
746
              || $encodingDetected === 'WINDOWS-1252'
747 11
              || $encodingDetected === 'ISO-8859-1'
748
          )
749 3
      ) {
750 11
        return self::to_utf8($str);
751
      }
752
753 View Code Duplication
      if (
754 11
          $encoding === 'ISO-8859-1'
755
          &&
756
          (
757 11
              $force === true
758 2
              || $encodingDetected === 'ISO-8859-1'
759 2
              || $encodingDetected === 'WINDOWS-1252'
760 2
              || $encodingDetected === 'UTF-8'
761 11
          )
762 11
      ) {
763
        return self::to_iso8859($str);
764
      }
765
766 View Code Duplication
      if (
767 3
          $encoding !== 'UTF-8'
768
          &&
769
          $encoding !== 'WINDOWS-1252'
770 2
          &&
771 1
          self::$SUPPORT['mbstring'] === false
772 1
      ) {
773 1
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
774 3
      }
775 2
776
      $strEncoded = \mb_convert_encoding(
777
          $str,
778
          $encoding,
779
          $encodingDetected
780 2
      );
781
782 2
      if ($strEncoded) {
783 1
        return $strEncoded;
784 2
      }
785
    }
786
787
    return $str;
788 2
  }
789 2
790 2
  /**
791
   * Reads entire file into a string.
792 2
   *
793
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
794 2
   *
795 2
   * @link http://php.net/manual/en/function.file-get-contents.php
796
   *
797
   * @param string        $filename         <p>
798
   *                                        Name of the file to read.
799 1
   *                                        </p>
800
   * @param bool          $use_include_path [optional] <p>
801
   *                                        Prior to PHP 5, this parameter is called
802
   *                                        use_include_path and is a bool.
803
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
804
   *                                        to trigger include path
805
   *                                        search.
806
   *                                        </p>
807
   * @param resource|null $context          [optional] <p>
808
   *                                        A valid context resource created with
809
   *                                        stream_context_create. If you don't need to use a
810
   *                                        custom context, you can skip this parameter by &null;.
811
   *                                        </p>
812
   * @param int|null      $offset           [optional] <p>
813
   *                                        The offset where the reading starts.
814
   *                                        </p>
815
   * @param int|null      $maxLength        [optional] <p>
816
   *                                        Maximum length of data read. The default is to read until end
817
   *                                        of file is reached.
818
   *                                        </p>
819
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
820
   *
821
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
822
   *                                        images or pdf, because they used non default utf-8 chars</p>
823
   *
824
   * @return string|false <p>The function returns the read data or false on failure.</p>
825
   */
826
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
827
  {
828
    // init
829
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
830
831
    if ($timeout && $context === null) {
832
      $context = stream_context_create(
833
          [
834
              'http' =>
835
                  [
836
                      'timeout' => $timeout,
837
                  ],
838
          ]
839
      );
840
    }
841
842
    if ($offset === null) {
843
      $offset = 0;
844
    }
845
846
    if (\is_int($maxLength) === true) {
847
      $data = file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
848
    } else {
849
      $data = file_get_contents($filename, $use_include_path, $context, $offset);
850
    }
851
852
    // return false on error
853
    if ($data === false) {
854
      return false;
855
    }
856
857
    if ($convertToUtf8 === true) {
858
      $data = self::encode('UTF-8', $data, false);
859
      $data = self::cleanup($data);
860
    }
861
862
    return $data;
863
  }
864
865
  /**
866
   * Checks if a file starts with BOM (Byte Order Mark) character.
867
   *
868
   * @param string $file_path <p>Path to a valid file.</p>
869
   *
870
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
871
   */
872
  public static function file_has_bom(string $file_path): bool
873
  {
874
    return self::string_has_bom(file_get_contents($file_path));
875
  }
876
877
  /**
878
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
879
   *
880
   * @param mixed  $var
881
   * @param int    $normalization_form
882
   * @param string $leading_combining
883
   *
884 3
   * @return mixed
885
   */
886
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
887 3
  {
888 3
    switch (\gettype($var)) {
889 View Code Duplication
      case 'array':
890 3
        foreach ($var as $k => $v) {
891 2
          /** @noinspection AlterInForeachInspection */
892
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
893
        }
894
        break;
895 2 View Code Duplication
      case 'object':
896 2
        foreach ($var as $k => $v) {
897
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
898 2
        }
899 2
        break;
900
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
901 3
902 3
        if (false !== strpos($var, "\r")) {
903 3
          // Workaround https://bugs.php.net/65732
904
          $var = str_replace(["\r\n", "\r"], "\n", $var);
905 3
        }
906 3
907 3
        if (self::is_ascii($var) === false) {
908
          /** @noinspection PhpUndefinedClassInspection */
909 3
          if (\Normalizer::isNormalized($var, $normalization_form)) {
910 1
            $n = '-';
911 1
          } else {
912 3
            /** @noinspection PhpUndefinedClassInspection */
913
            $n = \Normalizer::normalize($var, $normalization_form);
914
915
            if (isset($n[0])) {
916 3
              $var = $n;
917 1
            } else {
918
              $var = self::encode('UTF-8', $var, true);
919
            }
920 2
          }
921 2
922 2
          if (
923 2
              $var[0] >= "\x80"
924
              &&
925 2
              isset($n[0], $leading_combining[0])
926
              &&
927
              preg_match('/^\p{Mn}/u', $var)
928
          ) {
929
            // Prevent leading combining chars
930
            // for NFC-safe concatenations.
931
            $var = $leading_combining . $var;
932
          }
933
        }
934
935 1
        break;
936
    }
937 1
938
    return $var;
939
  }
940
941
  /**
942
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
943
   *
944
   * Gets a specific external variable by name and optionally filters it
945
   *
946
   * @link  http://php.net/manual/en/function.filter-input.php
947
   *
948
   * @param int    $type          <p>
949 9
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
950
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
951 9
   *                              <b>INPUT_ENV</b>.
952 9
   *                              </p>
953 3
   * @param string $variable_name <p>
954
   *                              Name of a variable to get.
955 3
   *                              </p>
956 3
   * @param int    $filter        [optional] <p>
957 3
   *                              The ID of the filter to apply. The
958 9
   *                              manual page lists the available filters.
959 2
   *                              </p>
960 2
   * @param mixed  $options       [optional] <p>
961 2
   *                              Associative array of options or bitwise disjunction of flags. If filter
962 2
   *                              accepts options, flags can be provided in "flags" field of array.
963 9
   *                              </p>
964
   *
965 8
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
966
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
967 2
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
968 2
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
969
   * @since 5.2.0
970 8
   */
971 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
972 8
  {
973 6
    if (4 > \func_num_args()) {
974 6
      $var = filter_input($type, $variable_name, $filter);
975
    } else {
976 6
      $var = filter_input($type, $variable_name, $filter, $options);
977
    }
978 6
979 3
    return self::filter($var);
980 3
  }
981 5
982
  /**
983
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
984
   *
985
   * Gets external variables and optionally filters them
986 8
   *
987 8
   * @link  http://php.net/manual/en/function.filter-input-array.php
988 6
   *
989 8
   * @param int   $type       <p>
990 5
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
991 8
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
992
   *                          <b>INPUT_ENV</b>.
993
   *                          </p>
994 2
   * @param mixed $definition [optional] <p>
995 2
   *                          An array defining the arguments. A valid key is a string
996 8
   *                          containing a variable name and a valid value is either a filter type, or an array
997
   *                          optionally specifying the filter, flags and options. If the value is an
998 8
   *                          array, valid keys are filter which specifies the
999 9
   *                          filter type,
1000
   *                          flags which specifies any flags that apply to the
1001 9
   *                          filter, and options which specifies any options that
1002
   *                          apply to the filter. See the example below for a better understanding.
1003
   *                          </p>
1004
   *                          <p>
1005
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1006
   *                          input array are filtered by this filter.
1007
   *                          </p>
1008
   * @param bool  $add_empty  [optional] <p>
1009
   *                          Add missing keys as <b>NULL</b> to the return value.
1010
   *                          </p>
1011
   *
1012
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1013
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1014
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1015
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1016
   * fails.
1017
   * @since 5.2.0
1018
   */
1019 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1020
  {
1021
    if (2 > \func_num_args()) {
1022
      $a = filter_input_array($type);
1023
    } else {
1024
      $a = filter_input_array($type, $definition, $add_empty);
1025
    }
1026
1027
    return self::filter($a);
1028
  }
1029
1030
  /**
1031
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1032
   *
1033
   * Filters a variable with a specified filter
1034
   *
1035
   * @link  http://php.net/manual/en/function.filter-var.php
1036
   *
1037
   * @param mixed $variable <p>
1038
   *                        Value to filter.
1039
   *                        </p>
1040
   * @param int   $filter   [optional] <p>
1041
   *                        The ID of the filter to apply. The
1042
   *                        manual page lists the available filters.
1043
   *                        </p>
1044
   * @param mixed $options  [optional] <p>
1045
   *                        Associative array of options or bitwise disjunction of flags. If filter
1046
   *                        accepts options, flags can be provided in "flags" field of array. For
1047
   *                        the "callback" filter, callable type should be passed. The
1048
   *                        callback must accept one argument, the value to be filtered, and return
1049
   *                        the value after filtering/sanitizing it.
1050
   *                        </p>
1051
   *                        <p>
1052
   *                        <code>
1053
   *                        // for filters that accept options, use this format
1054
   *                        $options = array(
1055
   *                        'options' => array(
1056
   *                        'default' => 3, // value to return if the filter fails
1057
   *                        // other options here
1058
   *                        'min_range' => 0
1059
   *                        ),
1060
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1061
   *                        );
1062
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1063
   *                        // for filter that only accept flags, you can pass them directly
1064
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1065
   *                        // for filter that only accept flags, you can also pass as an array
1066
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1067
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1068
   *                        // callback validate filter
1069
   *                        function foo($value)
1070
   *                        {
1071
   *                        // Expected format: Surname, GivenNames
1072
   *                        if (strpos($value, ", ") === false) return false;
1073
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1074
   *                        $empty = (empty($surname) || empty($givennames));
1075
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1076
   *                        if ($empty || $notstrings) {
1077
   *                        return false;
1078
   *                        } else {
1079
   *                        return $value;
1080
   *                        }
1081
   *                        }
1082
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1083
   *                        </code>
1084
   *                        </p>
1085
   *
1086
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1087
   * @since 5.2.0
1088
   */
1089 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1090
  {
1091
    if (3 > \func_num_args()) {
1092
      $variable = filter_var($variable, $filter);
1093
    } else {
1094
      $variable = filter_var($variable, $filter, $options);
1095
    }
1096
1097
    return self::filter($variable);
1098
  }
1099
1100
  /**
1101
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1102
   *
1103
   * Gets multiple variables and optionally filters them
1104
   *
1105
   * @link  http://php.net/manual/en/function.filter-var-array.php
1106
   *
1107
   * @param array $data       <p>
1108
   *                          An array with string keys containing the data to filter.
1109
   *                          </p>
1110
   * @param mixed $definition [optional] <p>
1111
   *                          An array defining the arguments. A valid key is a string
1112
   *                          containing a variable name and a valid value is either a
1113
   *                          filter type, or an
1114
   *                          array optionally specifying the filter, flags and options.
1115
   *                          If the value is an array, valid keys are filter
1116
   *                          which specifies the filter type,
1117
   *                          flags which specifies any flags that apply to the
1118
   *                          filter, and options which specifies any options that
1119
   *                          apply to the filter. See the example below for a better understanding.
1120
   *                          </p>
1121
   *                          <p>
1122
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1123
   *                          input array are filtered by this filter.
1124
   *                          </p>
1125
   * @param bool  $add_empty  [optional] <p>
1126
   *                          Add missing keys as <b>NULL</b> to the return value.
1127
   *                          </p>
1128
   *
1129
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1130
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1131
   * the variable is not set.
1132
   * @since 5.2.0
1133
   */
1134 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1135
  {
1136
    if (2 > \func_num_args()) {
1137
      $a = filter_var_array($data);
1138
    } else {
1139
      $a = filter_var_array($data, $definition, $add_empty);
1140
    }
1141
1142
    return self::filter($a);
1143
  }
1144
1145
  /**
1146
   * Check if the number of unicode characters are not more than the specified integer.
1147
   *
1148
   * @param string $str      The original string to be checked.
1149
   * @param int    $box_size The size in number of chars to be checked against string.
1150
   *
1151
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1152 1
   */
1153
  public static function fits_inside(string $str, int $box_size): bool
1154 1
  {
1155 1
    return (self::strlen($str) <= $box_size);
1156 1
  }
1157 1
1158
  /**
1159
   * Try to fix simple broken UTF-8 strings.
1160 1
   *
1161
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1162
   *
1163
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1164
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1165
   * See: http://en.wikipedia.org/wiki/Windows-1252
1166
   *
1167
   * @param string $str <p>The input string</p>
1168
   *
1169
   * @return string
1170
   */
1171 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1172
  {
1173
    if (!isset($str[0])) {
1174
      return '';
1175
    }
1176
1177
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1178
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1179
1180
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1181
1182
      if (self::$BROKEN_UTF8_FIX === null) {
1183
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1184
      }
1185
1186
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1187
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1188
    }
1189
1190
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1191
  }
1192
1193
  /**
1194
   * Fix a double (or multiple) encoded UTF8 string.
1195
   *
1196
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1197 1
   *
1198
   * @return string|string[] <p>Will return the fixed input-"array" or
1199 1
   *                         the fixed input-"string".</p>
1200 1
   */
1201 1
  public static function fix_utf8($str)
1202 1
  {
1203
    if (\is_array($str) === true) {
1204
      foreach ($str as $k => $v) {
1205 1
        $str[$k] = self::fix_utf8($v);
1206
      }
1207
1208
      return $str;
1209
    }
1210
1211
    $last = '';
1212
    while ($last !== $str) {
1213
      $last = $str;
1214
      $str = self::to_utf8(
1215
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1214 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1216 1
      );
1217
    }
1218 1
1219
    return $str;
1220
  }
1221
1222
  /**
1223
   * Get character of a specific character.
1224
   *
1225
   * @param string $char
1226
   *
1227
   * @return string <p>'RTL' or 'LTR'</p>
1228
   */
1229
  public static function getCharDirection(string $char): string
1230
  {
1231
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1232
      self::checkForSupport();
1233
    }
1234 26
1235
    if (self::$SUPPORT['intlChar'] === true) {
1236
      $tmpReturn = \IntlChar::charDirection($char);
1237 26
1238
      // from "IntlChar"-Class
1239 26
      $charDirection = [
1240 2
          'RTL' => [1, 13, 14, 15, 21],
1241
          'LTR' => [0, 11, 12, 20],
1242
      ];
1243 26
1244 26
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1245
        return 'LTR';
1246 26
      }
1247
1248 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1249 1
        return 'RTL';
1250 1
      }
1251
    }
1252 1
1253 1
    $c = static::chr_to_decimal($char);
1254 1
1255
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1256 26
      return 'LTR';
1257
    }
1258
1259
    if (0x85e >= $c) {
1260
1261
      if (0x5be === $c ||
1262
          0x5c0 === $c ||
1263
          0x5c3 === $c ||
1264
          0x5c6 === $c ||
1265
          (0x5d0 <= $c && 0x5ea >= $c) ||
1266
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1267 1
          0x608 === $c ||
1268
          0x60b === $c ||
1269 1
          0x60d === $c ||
1270
          0x61b === $c ||
1271
          (0x61e <= $c && 0x64a >= $c) ||
1272 1
          (0x66d <= $c && 0x66f >= $c) ||
1273
          (0x671 <= $c && 0x6d5 >= $c) ||
1274
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1275 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1276 1
          (0x6fa <= $c && 0x70d >= $c) ||
1277
          0x710 === $c ||
1278 1
          (0x712 <= $c && 0x72f >= $c) ||
1279
          (0x74d <= $c && 0x7a5 >= $c) ||
1280
          0x7b1 === $c ||
1281 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1282 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1283 1
          0x7fa === $c ||
1284 1
          (0x800 <= $c && 0x815 >= $c) ||
1285 1
          0x81a === $c ||
1286 1
          0x824 === $c ||
1287 1
          0x828 === $c ||
1288
          (0x830 <= $c && 0x83e >= $c) ||
1289 1
          (0x840 <= $c && 0x858 >= $c) ||
1290
          0x85e === $c
1291
      ) {
1292
        return 'RTL';
1293
      }
1294
1295
    } elseif (0x200f === $c) {
1296
1297
      return 'RTL';
1298
1299 1
    } elseif (0xfb1d <= $c) {
1300
1301 1
      if (0xfb1d === $c ||
1302
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1303
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1304
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1305 1
          0xfb3e === $c ||
1306
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1307
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1308
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1309
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1310
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1311
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1312
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1313
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1314
          (0xfe76 <= $c && 0xfefc >= $c) ||
1315
          (0x10800 <= $c && 0x10805 >= $c) ||
1316
          0x10808 === $c ||
1317
          (0x1080a <= $c && 0x10835 >= $c) ||
1318
          (0x10837 <= $c && 0x10838 >= $c) ||
1319
          0x1083c === $c ||
1320
          (0x1083f <= $c && 0x10855 >= $c) ||
1321
          (0x10857 <= $c && 0x1085f >= $c) ||
1322
          (0x10900 <= $c && 0x1091b >= $c) ||
1323 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1324
          0x1093f === $c ||
1325 1
          0x10a00 === $c ||
1326 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1327
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1328
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1329 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1330
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1331 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1332 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1333 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1334 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1335 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1336 1
      ) {
1337 1
        return 'RTL';
1338 1
      }
1339 1
    }
1340 1
1341 1
    return 'LTR';
1342 1
  }
1343 1
1344 1
  /**
1345 1
   * get data from "/data/*.ser"
1346 1
   *
1347 1
   * @param string $file
1348 1
   *
1349 1
   * @return bool|string|array|int <p>Will return false on error.</p>
1350 1
   */
1351 1
  private static function getData(string $file)
1352 1
  {
1353 1
    $file = __DIR__ . '/data/' . $file . '.php';
1354 1
    if (file_exists($file)) {
1355 1
      /** @noinspection PhpIncludeInspection */
1356 1
      return require $file;
1357 1
    }
1358 1
1359 1
    return false;
1360
  }
1361 1
1362 1
  /**
1363
   * Check for php-support.
1364
   *
1365 1
   * @param string|null $key
1366
   *
1367
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1368
   *               return bool-value, if $key is used and available<br>
1369 1
   *               otherwise return null</p>
1370
   */
1371 1
  public static function getSupportInfo(string $key = null)
1372 1
  {
1373 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1374 1
      self::checkForSupport();
1375 1
    }
1376 1
1377 1
    if ($key === null) {
1378 1
      return self::$SUPPORT;
1379 1
    }
1380 1
1381 1
    if (!isset(self::$SUPPORT[$key])) {
1382 1
      return null;
1383 1
    }
1384 1
1385 1
    return self::$SUPPORT[$key];
1386 1
  }
1387 1
1388 1
  /**
1389 1
   * alias for "UTF8::string_has_bom()"
1390 1
   *
1391 1
   * @see        UTF8::string_has_bom()
1392 1
   *
1393 1
   * @param string $str
1394 1
   *
1395 1
   * @return bool
1396 1
   *
1397 1
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1398 1
   */
1399 1
  public static function hasBom(string $str): bool
1400 1
  {
1401 1
    return self::string_has_bom($str);
1402 1
  }
1403 1
1404 1
  /**
1405
   * Converts a hexadecimal-value into an UTF-8 character.
1406 1
   *
1407 1
   * @param string $hexdec <p>The hexadecimal value.</p>
1408
   *
1409
   * @return string|false <p>One single UTF-8 character.</p>
1410
   */
1411 1
  public static function hex_to_chr(string $hexdec)
1412
  {
1413
    return self::decimal_to_chr(hexdec($hexdec));
1414
  }
1415
1416
  /**
1417
   * Converts hexadecimal U+xxxx code point representation to integer.
1418
   *
1419
   * INFO: opposite to UTF8::int_to_hex()
1420
   *
1421 6
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1422
   *
1423 6
   * @return int|false <p>The code point, or false on failure.</p>
1424 6
   */
1425
  public static function hex_to_int(string $hexDec)
1426 6
  {
1427
    if (!isset($hexDec[0])) {
1428
      return false;
1429 1
    }
1430
1431
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1432
      return \intval($match[1], 16);
1433
    }
1434
1435
    return false;
1436
  }
1437
1438
  /**
1439
   * alias for "UTF8::html_entity_decode()"
1440
   *
1441 19
   * @see UTF8::html_entity_decode()
1442
   *
1443 19
   * @param string $str
1444
   * @param int    $flags
1445
   * @param string $encoding
1446
   *
1447 19
   * @return string
1448 2
   */
1449
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1450
  {
1451 18
    return self::html_entity_decode($str, $flags, $encoding);
1452 1
  }
1453
1454
  /**
1455 17
   * Converts a UTF-8 string to a series of HTML numbered entities.
1456
   *
1457
   * INFO: opposite to UTF8::html_decode()
1458
   *
1459
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1460
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1461
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1462
   *
1463
   * @return string <p>HTML numbered entities.</p>
1464
   */
1465
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1466
  {
1467
    if (!isset($str[0])) {
1468
      return '';
1469
    }
1470
1471
    if ($encoding !== 'UTF-8') {
1472
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1473
    }
1474
1475
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1476
    if (\function_exists('mb_encode_numericentity')) {
1477
1478
      $startCode = 0x00;
1479
      if ($keepAsciiChars === true) {
1480
        $startCode = 0x80;
1481 2
      }
1482
1483 2
      return mb_encode_numericentity(
1484
          $str,
1485
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1486
          $encoding
1487
      );
1488
    }
1489
1490
    return \implode(
1491
        '',
1492
        \array_map(
1493
            function ($data) use ($keepAsciiChars, $encoding) {
1494
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1495 1
            },
1496
            self::split($str)
1497 1
        )
1498
    );
1499 1
  }
1500 1
1501
  /**
1502
   * UTF-8 version of html_entity_decode()
1503 1
   *
1504 1
   * The reason we are not using html_entity_decode() by itself is because
1505
   * while it is not technically correct to leave out the semicolon
1506
   * at the end of an entity most browsers will still interpret the entity
1507 1
   * correctly. html_entity_decode() does not convert entities without
1508
   * semicolons, so we are left with our own little solution here. Bummer.
1509
   *
1510
   * Convert all HTML entities to their applicable characters
1511
   *
1512
   * INFO: opposite to UTF8::html_encode()
1513
   *
1514
   * @link http://php.net/manual/en/function.html-entity-decode.php
1515
   *
1516
   * @param string $str      <p>
1517
   *                         The input string.
1518
   *                         </p>
1519
   * @param int    $flags    [optional] <p>
1520
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1521 1
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1522
   *                         <table>
1523 1
   *                         Available <i>flags</i> constants
1524
   *                         <tr valign="top">
1525
   *                         <td>Constant Name</td>
1526
   *                         <td>Description</td>
1527
   *                         </tr>
1528
   *                         <tr valign="top">
1529
   *                         <td><b>ENT_COMPAT</b></td>
1530
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1531
   *                         </tr>
1532
   *                         <tr valign="top">
1533
   *                         <td><b>ENT_QUOTES</b></td>
1534
   *                         <td>Will convert both double and single quotes.</td>
1535
   *                         </tr>
1536
   *                         <tr valign="top">
1537 2
   *                         <td><b>ENT_NOQUOTES</b></td>
1538
   *                         <td>Will leave both double and single quotes unconverted.</td>
1539
   *                         </tr>
1540 2
   *                         <tr valign="top">
1541
   *                         <td><b>ENT_HTML401</b></td>
1542 2
   *                         <td>
1543 1
   *                         Handle code as HTML 4.01.
1544
   *                         </td>
1545
   *                         </tr>
1546 2
   *                         <tr valign="top">
1547 1
   *                         <td><b>ENT_XML1</b></td>
1548 1
   *                         <td>
1549
   *                         Handle code as XML 1.
1550
   *                         </td>
1551 2
   *                         </tr>
1552
   *                         <tr valign="top">
1553 2
   *                         <td><b>ENT_XHTML</b></td>
1554 2
   *                         <td>
1555 1
   *                         Handle code as XHTML.
1556 1
   *                         </td>
1557
   *                         </tr>
1558 2
   *                         <tr valign="top">
1559 2
   *                         <td><b>ENT_HTML5</b></td>
1560 2
   *                         <td>
1561
   *                         Handle code as HTML 5.
1562 2
   *                         </td>
1563
   *                         </tr>
1564
   *                         </table>
1565
   *                         </p>
1566
   * @param string $encoding [optional] <p>Encoding to use.</p>
1567
   *
1568
   * @return string <p>The decoded string.</p>
1569
   */
1570
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1571
  {
1572
    if (!isset($str[0])) {
1573
      return '';
1574
    }
1575
1576
    if (!isset($str[3])) { // examples: &; || &x;
1577
      return $str;
1578
    }
1579
1580
    if (
1581
        strpos($str, '&') === false
1582
        ||
1583
        (
1584
            strpos($str, '&#') === false
1585
            &&
1586
            strpos($str, ';') === false
1587
        )
1588
    ) {
1589
      return $str;
1590
    }
1591
1592
    if ($encoding !== 'UTF-8') {
1593
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1594
    }
1595
1596
    if ($flags === null) {
1597
      if (Bootup::is_php('5.4') === true) {
1598
        $flags = ENT_QUOTES | ENT_HTML5;
1599
      } else {
1600
        $flags = ENT_QUOTES;
1601
      }
1602
    }
1603
1604 View Code Duplication
    if (
1605
        $encoding !== 'UTF-8'
1606
        &&
1607
        $encoding !== 'WINDOWS-1252'
1608
        &&
1609
        self::$SUPPORT['mbstring'] === false
1610
    ) {
1611
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1612
    }
1613
1614
    do {
1615
      $str_compare = $str;
1616
1617
      $str = preg_replace_callback(
1618
          "/&#\d{2,6};/",
1619
          function ($matches) use ($encoding) {
1620
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1621
1622
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1623
              return $returnTmp;
1624
            }
1625
1626
            return $matches[0];
1627
          },
1628
          $str
1629
      );
1630
1631
      // decode numeric & UTF16 two byte entities
1632
      $str = html_entity_decode(
1633
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1634
          $flags,
1635
          $encoding
1636
      );
1637
1638
    } while ($str_compare !== $str);
1639
1640
    return $str;
1641
  }
1642
1643
  /**
1644
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1645 16
   *
1646
   * @link http://php.net/manual/en/function.htmlentities.php
1647
   *
1648 16
   * @param string $str           <p>
1649
   *                              The input string.
1650 16
   *                              </p>
1651 5
   * @param int    $flags         [optional] <p>
1652
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1653
   *                              invalid code unit sequences and the used document type. The default is
1654 16
   *                              ENT_COMPAT | ENT_HTML401.
1655 9
   *                              <table>
1656
   *                              Available <i>flags</i> constants
1657
   *                              <tr valign="top">
1658
   *                              <td>Constant Name</td>
1659 15
   *                              <td>Description</td>
1660 15
   *                              </tr>
1661
   *                              <tr valign="top">
1662 15
   *                              <td><b>ENT_COMPAT</b></td>
1663 15
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1664 9
   *                              </tr>
1665 9
   *                              <tr valign="top">
1666 15
   *                              <td><b>ENT_QUOTES</b></td>
1667 8
   *                              <td>Will convert both double and single quotes.</td>
1668
   *                              </tr>
1669
   *                              <tr valign="top">
1670 15
   *                              <td><b>ENT_NOQUOTES</b></td>
1671 2
   *                              <td>Will leave both double and single quotes unconverted.</td>
1672 2
   *                              </tr>
1673
   *                              <tr valign="top">
1674 15
   *                              <td><b>ENT_IGNORE</b></td>
1675 5
   *                              <td>
1676
   *                              Silently discard invalid code unit sequences instead of returning
1677
   *                              an empty string. Using this flag is discouraged as it
1678 5
   *                              may have security implications.
1679
   *                              </td>
1680 5
   *                              </tr>
1681
   *                              <tr valign="top">
1682
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1683
   *                              <td>
1684 15
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1685
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1686 15
   *                              </td>
1687 2
   *                              </tr>
1688 15
   *                              <tr valign="top">
1689
   *                              <td><b>ENT_DISALLOWED</b></td>
1690
   *                              <td>
1691
   *                              Replace invalid code points for the given document type with a
1692
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1693 15
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1694
   *                              instance, to ensure the well-formedness of XML documents with
1695 15
   *                              embedded external content.
1696 15
   *                              </td>
1697
   *                              </tr>
1698 13
   *                              <tr valign="top">
1699
   *                              <td><b>ENT_HTML401</b></td>
1700 13
   *                              <td>
1701 13
   *                              Handle code as HTML 4.01.
1702
   *                              </td>
1703
   *                              </tr>
1704 6
   *                              <tr valign="top">
1705 15
   *                              <td><b>ENT_XML1</b></td>
1706
   *                              <td>
1707 15
   *                              Handle code as XML 1.
1708
   *                              </td>
1709
   *                              </tr>
1710 15
   *                              <tr valign="top">
1711 15
   *                              <td><b>ENT_XHTML</b></td>
1712 15
   *                              <td>
1713
   *                              Handle code as XHTML.
1714 15
   *                              </td>
1715
   *                              </tr>
1716 15
   *                              <tr valign="top">
1717
   *                              <td><b>ENT_HTML5</b></td>
1718 15
   *                              <td>
1719
   *                              Handle code as HTML 5.
1720
   *                              </td>
1721
   *                              </tr>
1722
   *                              </table>
1723
   *                              </p>
1724
   * @param string $encoding      [optional] <p>
1725
   *                              Like <b>htmlspecialchars</b>,
1726
   *                              <b>htmlentities</b> takes an optional third argument
1727
   *                              <i>encoding</i> which defines encoding used in
1728
   *                              conversion.
1729
   *                              Although this argument is technically optional, you are highly
1730
   *                              encouraged to specify the correct value for your code.
1731
   *                              </p>
1732
   * @param bool   $double_encode [optional] <p>
1733
   *                              When <i>double_encode</i> is turned off PHP will not
1734
   *                              encode existing html entities. The default is to convert everything.
1735
   *                              </p>
1736
   *
1737
   *
1738
   * @return string the encoded string.
1739
   * </p>
1740
   * <p>
1741
   * If the input <i>string</i> contains an invalid code unit
1742
   * sequence within the given <i>encoding</i> an empty string
1743
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1744
   * <b>ENT_SUBSTITUTE</b> flags are set.
1745
   */
1746
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1747
  {
1748
    if ($encoding !== 'UTF-8') {
1749
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1750
    }
1751
1752
    $str = htmlentities($str, $flags, $encoding, $double_encode);
1753
1754
    /**
1755
     * PHP doesn't replace a backslash to its html entity since this is something
1756
     * that's mostly used to escape characters when inserting in a database. Since
1757
     * we're using a decent database layer, we don't need this shit and we're replacing
1758
     * the double backslashes by its' html entity equivalent.
1759
     *
1760
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1761
     */
1762
    $str = str_replace('\\', '&#92;', $str);
1763
1764
    if ($encoding !== 'UTF-8') {
1765
      return $str;
1766
    }
1767
1768
    $byteLengths = self::chr_size_list($str);
1769
    $search = [];
1770
    $replacements = [];
1771
    foreach ($byteLengths as $counter => $byteLength) {
1772
      if ($byteLength >= 3) {
1773
        $char = self::access($str, $counter);
1774
1775
        if (!isset($replacements[$char])) {
1776
          $search[$char] = $char;
1777
          $replacements[$char] = self::html_encode($char);
1778
        }
1779
      }
1780
    }
1781
1782
    return str_replace($search, $replacements, $str);
1783
  }
1784
1785
  /**
1786
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1787
   *
1788
   * INFO: Take a look at "UTF8::htmlentities()"
1789
   *
1790
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1791
   *
1792
   * @param string $str           <p>
1793
   *                              The string being converted.
1794
   *                              </p>
1795
   * @param int    $flags         [optional] <p>
1796
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1797
   *                              invalid code unit sequences and the used document type. The default is
1798
   *                              ENT_COMPAT | ENT_HTML401.
1799
   *                              <table>
1800
   *                              Available <i>flags</i> constants
1801
   *                              <tr valign="top">
1802
   *                              <td>Constant Name</td>
1803
   *                              <td>Description</td>
1804
   *                              </tr>
1805
   *                              <tr valign="top">
1806
   *                              <td><b>ENT_COMPAT</b></td>
1807
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1808
   *                              </tr>
1809
   *                              <tr valign="top">
1810
   *                              <td><b>ENT_QUOTES</b></td>
1811
   *                              <td>Will convert both double and single quotes.</td>
1812
   *                              </tr>
1813
   *                              <tr valign="top">
1814
   *                              <td><b>ENT_NOQUOTES</b></td>
1815
   *                              <td>Will leave both double and single quotes unconverted.</td>
1816
   *                              </tr>
1817
   *                              <tr valign="top">
1818
   *                              <td><b>ENT_IGNORE</b></td>
1819
   *                              <td>
1820
   *                              Silently discard invalid code unit sequences instead of returning
1821
   *                              an empty string. Using this flag is discouraged as it
1822
   *                              may have security implications.
1823
   *                              </td>
1824 2
   *                              </tr>
1825
   *                              <tr valign="top">
1826 2
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1827 1
   *                              <td>
1828 1
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1829
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1830 2
   *                              </td>
1831
   *                              </tr>
1832
   *                              <tr valign="top">
1833
   *                              <td><b>ENT_DISALLOWED</b></td>
1834
   *                              <td>
1835
   *                              Replace invalid code points for the given document type with a
1836
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1837
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1838
   *                              instance, to ensure the well-formedness of XML documents with
1839
   *                              embedded external content.
1840 2
   *                              </td>
1841
   *                              </tr>
1842 2
   *                              <tr valign="top">
1843 1
   *                              <td><b>ENT_HTML401</b></td>
1844
   *                              <td>
1845
   *                              Handle code as HTML 4.01.
1846 2
   *                              </td>
1847 2
   *                              </tr>
1848 2
   *                              <tr valign="top">
1849 2
   *                              <td><b>ENT_XML1</b></td>
1850 2
   *                              <td>
1851 1
   *                              Handle code as XML 1.
1852
   *                              </td>
1853 1
   *                              </tr>
1854 1
   *                              <tr valign="top">
1855 1
   *                              <td><b>ENT_XHTML</b></td>
1856 1
   *                              <td>
1857 1
   *                              Handle code as XHTML.
1858 2
   *                              </td>
1859
   *                              </tr>
1860 2
   *                              <tr valign="top">
1861
   *                              <td><b>ENT_HTML5</b></td>
1862
   *                              <td>
1863
   *                              Handle code as HTML 5.
1864
   *                              </td>
1865
   *                              </tr>
1866
   *                              </table>
1867
   *                              </p>
1868
   * @param string $encoding      [optional] <p>
1869
   *                              Defines encoding used in conversion.
1870
   *                              </p>
1871
   *                              <p>
1872
   *                              For the purposes of this function, the encodings
1873
   *                              ISO-8859-1, ISO-8859-15,
1874
   *                              UTF-8, cp866,
1875
   *                              cp1251, cp1252, and
1876
   *                              KOI8-R are effectively equivalent, provided the
1877
   *                              <i>string</i> itself is valid for the encoding, as
1878
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1879
   *                              the same positions in all of these encodings.
1880
   *                              </p>
1881
   * @param bool   $double_encode [optional] <p>
1882
   *                              When <i>double_encode</i> is turned off PHP will not
1883
   *                              encode existing html entities, the default is to convert everything.
1884
   *                              </p>
1885
   *
1886
   * @return string The converted string.
1887
   * </p>
1888
   * <p>
1889
   * If the input <i>string</i> contains an invalid code unit
1890
   * sequence within the given <i>encoding</i> an empty string
1891
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1892
   * <b>ENT_SUBSTITUTE</b> flags are set.
1893
   */
1894
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1895
  {
1896
    if ($encoding !== 'UTF-8') {
1897
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1898
    }
1899
1900
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
1901
  }
1902
1903
  /**
1904
   * Checks whether iconv is available on the server.
1905
   *
1906
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1907
   */
1908
  public static function iconv_loaded(): bool
1909
  {
1910
    $return = \extension_loaded('iconv') ? true : false;
1911
1912
    // INFO: "default_charset" is already set by the "Bootup"-class
1913
1914
    if (Bootup::is_php('5.6') === false) {
1915
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
1916
      iconv_set_encoding('input_encoding', 'UTF-8');
1917
      iconv_set_encoding('output_encoding', 'UTF-8');
1918
      iconv_set_encoding('internal_encoding', 'UTF-8');
1919
    }
1920
1921
    return $return;
1922
  }
1923
1924
  /**
1925
   * alias for "UTF8::decimal_to_chr()"
1926
   *
1927
   * @see UTF8::decimal_to_chr()
1928
   *
1929
   * @param mixed $int
1930
   *
1931
   * @return string
1932
   */
1933
  public static function int_to_chr($int): string
1934
  {
1935
    return self::decimal_to_chr($int);
1936
  }
1937
1938
  /**
1939
   * Converts Integer to hexadecimal U+xxxx code point representation.
1940
   *
1941
   * INFO: opposite to UTF8::hex_to_int()
1942
   *
1943
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
1944
   * @param string $pfix [optional]
1945
   *
1946
   * @return string <p>The code point, or empty string on failure.</p>
1947
   */
1948
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
1949
  {
1950
    $hex = dechex($int);
1951
1952
    $hex = (\strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
1953
1954
    return $pfix . $hex;
1955
  }
1956
1957
  /**
1958
   * Checks whether intl-char is available on the server.
1959
   *
1960
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1961
   */
1962
  public static function intlChar_loaded(): bool
1963
  {
1964
    return (
1965
        Bootup::is_php('7.0') === true
1966
        &&
1967
        class_exists('IntlChar') === true
1968
    );
1969
  }
1970
1971
  /**
1972 1
   * Checks whether intl is available on the server.
1973
   *
1974 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1975 1
   */
1976 1
  public static function intl_loaded(): bool
1977
  {
1978 1
    return \extension_loaded('intl') ? true : false;
1979
  }
1980
1981
  /**
1982
   * alias for "UTF8::is_ascii()"
1983
   *
1984
   * @see        UTF8::is_ascii()
1985
   *
1986 1
   * @param string $str
1987
   *
1988 1
   * @return boolean
1989
   *
1990
   * @deprecated <p>use "UTF8::is_ascii()"</p>
1991
   */
1992 1
  public static function isAscii(string $str): bool
1993
  {
1994 1
    return self::is_ascii($str);
1995 1
  }
1996 1
1997 1
  /**
1998
   * alias for "UTF8::is_base64()"
1999 1
   *
2000
   * @see        UTF8::is_base64()
2001
   *
2002
   * @param string $str
2003
   *
2004
   * @return bool
2005
   *
2006
   * @deprecated <p>use "UTF8::is_base64()"</p>
2007
   */
2008
  public static function isBase64(string $str): bool
2009
  {
2010
    return self::is_base64($str);
2011 2
  }
2012
2013 2
  /**
2014
   * alias for "UTF8::is_binary()"
2015
   *
2016
   * @see        UTF8::is_binary()
2017
   *
2018
   * @param mixed $str
2019
   *
2020
   * @return bool
2021
   *
2022
   * @deprecated <p>use "UTF8::is_binary()"</p>
2023
   */
2024
  public static function isBinary($str): bool
2025
  {
2026 3
    return self::is_binary($str);
2027
  }
2028 3
2029 3
  /**
2030
   * alias for "UTF8::is_bom()"
2031 3
   *
2032
   * @see        UTF8::is_bom()
2033 3
   *
2034
   * @param string $utf8_chr
2035
   *
2036 1
   * @return boolean
2037
   *
2038
   * @deprecated <p>use "UTF8::is_bom()"</p>
2039
   */
2040
  public static function isBom(string $utf8_chr): bool
2041
  {
2042
    return self::is_bom($utf8_chr);
2043
  }
2044 1
2045
  /**
2046
   * alias for "UTF8::is_html()"
2047 1
   *
2048 1
   * @see        UTF8::is_html()
2049
   *
2050 1
   * @param string $str
2051
   *
2052
   * @return boolean
2053
   *
2054
   * @deprecated <p>use "UTF8::is_html()"</p>
2055
   */
2056
  public static function isHtml(string $str): bool
2057
  {
2058 4
    return self::is_html($str);
2059
  }
2060 4
2061
  /**
2062
   * alias for "UTF8::is_json()"
2063
   *
2064
   * @see        UTF8::is_json()
2065
   *
2066
   * @param string $str
2067
   *
2068
   * @return bool
2069
   *
2070
   * @deprecated <p>use "UTF8::is_json()"</p>
2071
   */
2072
  public static function isJson(string $str): bool
2073
  {
2074
    return self::is_json($str);
2075
  }
2076
2077
  /**
2078
   * alias for "UTF8::is_utf16()"
2079
   *
2080
   * @see        UTF8::is_utf16()
2081
   *
2082
   * @param string $str
2083
   *
2084
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2085
   *
2086
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2087
   */
2088
  public static function isUtf16(string $str)
2089
  {
2090
    return self::is_utf16($str);
2091
  }
2092
2093
  /**
2094
   * alias for "UTF8::is_utf32()"
2095
   *
2096
   * @see        UTF8::is_utf32()
2097
   *
2098
   * @param string $str
2099
   *
2100
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2101
   *
2102
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2103
   */
2104
  public static function isUtf32(string $str)
2105
  {
2106
    return self::is_utf32($str);
2107
  }
2108
2109
  /**
2110
   * alias for "UTF8::is_utf8()"
2111
   *
2112
   * @see        UTF8::is_utf8()
2113
   *
2114
   * @param string $str
2115
   * @param bool   $strict
2116
   *
2117
   * @return bool
2118
   *
2119
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2120
   */
2121
  public static function isUtf8($str, $strict = false): bool
2122
  {
2123
    return self::is_utf8($str, $strict);
2124
  }
2125
2126
  /**
2127
   * Checks if a string is 7 bit ASCII.
2128
   *
2129
   * @param string $str <p>The string to check.</p>
2130
   *
2131
   * @return bool <p>
2132
   *              <strong>true</strong> if it is ASCII<br>
2133
   *              <strong>false</strong> otherwise
2134
   *              </p>
2135
   */
2136
  public static function is_ascii(string $str): bool
2137
  {
2138
    if (!isset($str[0])) {
2139
      return true;
2140
    }
2141
2142
    return !preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2143
  }
2144
2145
  /**
2146
   * Returns true if the string is base64 encoded, false otherwise.
2147
   *
2148
   * @param string $str <p>The input string.</p>
2149
   *
2150
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2151
   */
2152
  public static function is_base64(string $str): bool
2153
  {
2154
    $base64String = (string)base64_decode($str, true);
2155
2156
    return $base64String && base64_encode($base64String) === $str;
2157
  }
2158
2159
  /**
2160
   * Check if the input is binary... (is look like a hack).
2161
   *
2162
   * @param mixed $input
2163
   *
2164
   * @return bool
2165
   */
2166
  public static function is_binary($input): bool
2167
  {
2168
    $input = (string)$input;
2169
    if (!isset($input[0])) {
2170
      return false;
2171
    }
2172
2173
    if (\preg_match('~^[01]+$~', $input)) {
2174
      return true;
2175
    }
2176
2177
    $testLength = \strlen($input);
2178
    if ($testLength && \substr_count($input, "\x0") / $testLength > 0.3) {
2179
      return true;
2180
    }
2181
2182
    if (\substr_count($input, "\x00") > 0) {
2183
      return true;
2184
    }
2185
2186
    return false;
2187
  }
2188
2189
  /**
2190
   * Check if the file is binary.
2191
   *
2192
   * @param string $file
2193
   *
2194
   * @return boolean
2195
   */
2196
  public static function is_binary_file($file): bool
2197
  {
2198
    try {
2199
      $fp = \fopen($file, 'rb');
2200
      $block = \fread($fp, 512);
2201
      \fclose($fp);
2202
    } catch (\Exception $e) {
2203
      $block = '';
2204
    }
2205
2206
    return self::is_binary($block);
2207
  }
2208
2209
  /**
2210
   * Checks if the given string is equal to any "Byte Order Mark".
2211
   *
2212
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2213
   *
2214
   * @param string $str <p>The input string.</p>
2215
   *
2216
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2217
   */
2218 55
  public static function is_bom($str): bool
2219
  {
2220 55
    foreach (self::$BOM as $bomString => $bomByteLength) {
2221
      if ($str === $bomString) {
2222 55
        return true;
2223 6
      }
2224
    }
2225
2226 54
    return false;
2227
  }
2228
2229
  /**
2230
   * Check if the string contains any html-tags <lall>.
2231
   *
2232
   * @param string $str <p>The input string.</p>
2233
   *
2234
   * @return boolean
2235
   */
2236 1
  public static function is_html(string $str): bool
2237
  {
2238 1
    if (!isset($str[0])) {
2239
      return false;
2240 1
    }
2241 1
2242
    // init
2243
    $matches = [];
2244 1
2245 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2246 1
2247
    return !(\count($matches) === 0);
2248
  }
2249 1
2250
  /**
2251
   * Try to check if "$str" is an json-string.
2252
   *
2253
   * @param string $str <p>The input string.</p>
2254
   *
2255
   * @return bool
2256
   */
2257
  public static function is_json(string $str): bool
2258
  {
2259 16
    if (!isset($str[0])) {
2260
      return false;
2261 16
    }
2262
2263 16
    $json = self::json_decode($str);
2264 4
2265
    return (
2266
               \is_object($json) === true
2267 16
               ||
2268 4
               \is_array($json) === true
2269
           )
2270
           &&
2271 16
           \json_last_error() === JSON_ERROR_NONE;
2272 16
  }
2273 5
2274
  /**
2275
   * Check if the string is UTF-16.
2276 15
   *
2277 1
   * @param string $str <p>The input string.</p>
2278
   *
2279
   * @return int|false <p>
2280 15
   *                   <strong>false</strong> if is't not UTF-16,<br>
2281
   *                   <strong>1</strong> for UTF-16LE,<br>
2282
   *                   <strong>2</strong> for UTF-16BE.
2283
   *                   </p>
2284
   */
2285 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2286
  {
2287
    $str = self::remove_bom($str);
2288
2289
    if (self::is_binary($str) === true) {
2290 1
2291
      $maybeUTF16LE = 0;
2292
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2293 1
      if ($test) {
2294 1
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2295 1
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2296 1
        if ($test3 === $test) {
2297
          $strChars = self::count_chars($str, true);
2298
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2299
            if (\in_array($test3char, $strChars, true) === true) {
2300 1
              $maybeUTF16LE++;
2301
            }
2302
          }
2303
        }
2304
      }
2305
2306
      $maybeUTF16BE = 0;
2307
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2308
      if ($test) {
2309
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2310
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2311
        if ($test3 === $test) {
2312 1
          $strChars = self::count_chars($str, true);
2313
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2314 1
            if (\in_array($test3char, $strChars, true) === true) {
2315 1
              $maybeUTF16BE++;
2316 1
            }
2317
          }
2318 1
        }
2319
      }
2320 1
2321
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2322
        if ($maybeUTF16LE > $maybeUTF16BE) {
2323
          return 1;
2324
        }
2325
2326
        return 2;
2327
      }
2328
2329
    }
2330 1
2331
    return false;
2332 1
  }
2333
2334 1
  /**
2335 1
   * Check if the string is UTF-32.
2336
   *
2337
   * @param string $str
2338
   *
2339 1
   * @return int|false <p>
2340
   *                   <strong>false</strong> if is't not UTF-32,<br>
2341 1
   *                   <strong>1</strong> for UTF-32LE,<br>
2342
   *                   <strong>2</strong> for UTF-32BE.
2343 1
   *                   </p>
2344 1
   */
2345 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2346
  {
2347 1
    $str = self::remove_bom($str);
2348
2349
    if (self::is_binary($str) === true) {
2350
2351
      $maybeUTF32LE = 0;
2352
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2353
      if ($test) {
2354
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2355
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2356
        if ($test3 === $test) {
2357 1
          $strChars = self::count_chars($str, true);
2358
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2359 1
            if (\in_array($test3char, $strChars, true) === true) {
2360
              $maybeUTF32LE++;
2361 1
            }
2362 1
          }
2363
        }
2364
      }
2365 1
2366
      $maybeUTF32BE = 0;
2367
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2368
      if ($test) {
2369 1
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2370 1
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2371 1
        if ($test3 === $test) {
2372 1
          $strChars = self::count_chars($str, true);
2373 1
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2374 1
            if (\in_array($test3char, $strChars, true) === true) {
2375 1
              $maybeUTF32BE++;
2376 1
            }
2377
          }
2378
        }
2379 1
      }
2380
2381
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2382
        if ($maybeUTF32LE > $maybeUTF32BE) {
2383
          return 1;
2384
        }
2385
2386
        return 2;
2387
      }
2388
2389
    }
2390
2391
    return false;
2392
  }
2393 5
2394
  /**
2395 5
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2396
   *
2397 5
   * @see    http://hsivonen.iki.fi/php-utf8/
2398
   *
2399 5
   * @param string|string[] $str    <p>The string to be checked.</p>
2400 5
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2401 5
   *
2402 5
   * @return bool
2403 5
   */
2404 5
  public static function is_utf8($str, bool $strict = false): bool
2405 5
  {
2406 5
    if (\is_array($str) === true) {
2407 4
      foreach ($str as $k => $v) {
2408 2
        if (false === self::is_utf8($v, $strict)) {
2409 2
          return false;
2410 5
        }
2411 5
      }
2412 5
2413
      return true;
2414 5
    }
2415 5
2416 5
    if (!isset($str[0])) {
2417 5
      return true;
2418 5
    }
2419 5
2420 5
    if ($strict === true) {
2421 5
      if (self::is_utf16($str) !== false) {
2422 4
        return false;
2423 3
      }
2424 3
2425 5
      if (self::is_utf32($str) !== false) {
2426 5
        return false;
2427 5
      }
2428
    }
2429 5
2430 3
    if (self::pcre_utf8_support() !== true) {
2431 2
2432
      // If even just the first character can be matched, when the /u
2433
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2434 3
      // invalid, nothing at all will match, even if the string contains
2435
      // some valid sequences
2436
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2437 3
    }
2438
2439 3
    $mState = 0; // cached expected number of octets after the current octet
2440
    // until the beginning of the next UTF8 character sequence
2441
    $mUcs4 = 0; // cached Unicode character
2442
    $mBytes = 1; // cached expected number of octets in the current sequence
2443
2444
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2445
      self::checkForSupport();
2446
    }
2447
2448
    if (self::$ORD === null) {
2449
      self::$ORD = self::getData('ord');
2450
    }
2451
2452
    $len = self::strlen_in_byte($str);
2453 3
    /** @noinspection ForeachInvariantsInspection */
2454
    for ($i = 0; $i < $len; $i++) {
2455 3
      $in = self::$ORD[$str[$i]];
2456
      if ($mState === 0) {
2457 3
        // When mState is zero we expect either a US-ASCII character or a
2458
        // multi-octet sequence.
2459 3
        if (0 === (0x80 & $in)) {
2460 3
          // US-ASCII, pass straight through.
2461 3
          $mBytes = 1;
2462 2 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2463 2
          // First octet of 2 octet sequence.
2464 2
          $mUcs4 = $in;
2465 2
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2466 2
          $mState = 1;
2467 2
          $mBytes = 2;
2468 1
        } elseif (0xE0 === (0xF0 & $in)) {
2469 1
          // First octet of 3 octet sequence.
2470 2
          $mUcs4 = $in;
2471 2
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2472 2
          $mState = 2;
2473
          $mBytes = 3;
2474 3 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2475 3
          // First octet of 4 octet sequence.
2476 3
          $mUcs4 = $in;
2477 2
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2478 2
          $mState = 3;
2479 2
          $mBytes = 4;
2480 2
        } elseif (0xF8 === (0xFC & $in)) {
2481 2
          /* First octet of 5 octet sequence.
2482 2
          *
2483 1
          * This is illegal because the encoded codepoint must be either
2484 1
          * (a) not the shortest form or
2485 2
          * (b) outside the Unicode range of 0-0x10FFFF.
2486 2
          * Rather than trying to resynchronize, we will carry on until the end
2487 2
          * of the sequence and let the later error handling code catch it.
2488
          */
2489 3
          $mUcs4 = $in;
2490 1
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2491 1
          $mState = 4;
2492
          $mBytes = 5;
2493 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2494 1
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2495
          $mUcs4 = $in;
2496
          $mUcs4 = ($mUcs4 & 1) << 30;
2497 3
          $mState = 5;
2498
          $mBytes = 6;
2499 3
        } else {
2500
          /* Current octet is neither in the US-ASCII range nor a legal first
2501
           * octet of a multi-octet sequence.
2502
           */
2503
          return false;
2504
        }
2505
      } else {
2506
        // When mState is non-zero, we expect a continuation of the multi-octet
2507
        // sequence
2508
        if (0x80 === (0xC0 & $in)) {
2509
          // Legal continuation.
2510
          $shift = ($mState - 1) * 6;
2511
          $tmp = $in;
2512 60
          $tmp = ($tmp & 0x0000003F) << $shift;
2513
          $mUcs4 |= $tmp;
2514 60
          /**
2515
           * End of the multi-octet sequence. mUcs4 now contains the final
2516 60
           * Unicode code point to be output
2517 3
           */
2518
          if (0 === --$mState) {
2519
            /*
2520 58
            * Check for illegal sequences and code points.
2521 1
            */
2522 1
            // From Unicode 3.1, non-shortest form is illegal
2523
            if (
2524
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2525
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2526
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2527
                (4 < $mBytes) ||
2528
                // From Unicode 3.2, surrogate characters are illegal.
2529
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2530 58
                // Code points outside the Unicode range are illegal.
2531
                ($mUcs4 > 0x10FFFF)
2532
            ) {
2533
              return false;
2534
            }
2535
            // initialize UTF8 cache
2536
            $mState = 0;
2537
            $mUcs4 = 0;
2538
            $mBytes = 1;
2539 58
          }
2540
        } else {
2541 58
          /**
2542 58
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2543
           * Incomplete multi-octet sequence.
2544 58
           */
2545
          return false;
2546
        }
2547
      }
2548 58
    }
2549 1
2550 1
    return true;
2551
  }
2552 58
2553
  /**
2554 58
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2555 58
   * Decodes a JSON string
2556 58
   *
2557
   * @link http://php.net/manual/en/function.json-decode.php
2558
   *
2559 58
   * @param string $json    <p>
2560
   *                        The <i>json</i> string being decoded.
2561 52
   *                        </p>
2562 58
   *                        <p>
2563
   *                        This function only works with UTF-8 encoded strings.
2564 48
   *                        </p>
2565 48
   *                        <p>PHP implements a superset of
2566 48
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2567 48
   *                        only supports these values when they are nested inside an array or an object.
2568 55
   *                        </p>
2569
   * @param bool   $assoc   [optional] <p>
2570 28
   *                        When <b>TRUE</b>, returned objects will be converted into
2571 28
   *                        associative arrays.
2572 28
   *                        </p>
2573 28
   * @param int    $depth   [optional] <p>
2574 45
   *                        User specified recursion depth.
2575
   *                        </p>
2576 11
   * @param int    $options [optional] <p>
2577 11
   *                        Bitmask of JSON decode options. Currently only
2578 11
   *                        <b>JSON_BIGINT_AS_STRING</b>
2579 11
   *                        is supported (default is to cast large integers as floats)
2580 22
   *                        </p>
2581
   *
2582
   * @return mixed the value encoded in <i>json</i> in appropriate
2583
   * PHP type. Values true, false and
2584
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2585
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2586
   * <i>json</i> cannot be decoded or if the encoded
2587
   * data is deeper than the recursion limit.
2588
   */
2589 4 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2590 4
  {
2591 4
    $json = (string)self::filter($json);
2592 4
2593 12
    if (Bootup::is_php('5.4') === true) {
2594
      $json = json_decode($json, $assoc, $depth, $options);
2595 4
    } else {
2596 4
      $json = json_decode($json, $assoc, $depth);
2597 4
    }
2598 4
2599 4
    return $json;
2600
  }
2601
2602
  /**
2603 6
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2604
   * Returns the JSON representation of a value.
2605 57
   *
2606
   * @link http://php.net/manual/en/function.json-encode.php
2607
   *
2608 52
   * @param mixed $value   <p>
2609
   *                       The <i>value</i> being encoded. Can be any type except
2610 48
   *                       a resource.
2611 48
   *                       </p>
2612 48
   *                       <p>
2613 48
   *                       All string data must be UTF-8 encoded.
2614
   *                       </p>
2615
   *                       <p>PHP implements a superset of
2616
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2617
   *                       only supports these values when they are nested inside an array or an object.
2618 48
   *                       </p>
2619
   * @param int   $options [optional] <p>
2620
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2621
   *                       <b>JSON_HEX_TAG</b>,
2622
   *                       <b>JSON_HEX_AMP</b>,
2623
   *                       <b>JSON_HEX_APOS</b>,
2624 48
   *                       <b>JSON_NUMERIC_CHECK</b>,
2625 48
   *                       <b>JSON_PRETTY_PRINT</b>,
2626 48
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2627 48
   *                       <b>JSON_FORCE_OBJECT</b>,
2628
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2629 48
   *                       constants is described on
2630
   *                       the JSON constants page.
2631 48
   *                       </p>
2632 48
   * @param int   $depth   [optional] <p>
2633 7
   *                       Set the maximum depth. Must be greater than zero.
2634
   *                       </p>
2635
   *
2636 48
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2637 48
   */
2638 48 View Code Duplication
  public static function json_encode($value, int $options = 0, int $depth = 512): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2639 48
  {
2640 48
    $value = self::filter($value);
2641
2642
    if (Bootup::is_php('5.5') === true) {
2643
      $json = json_encode($value, $options, $depth);
2644
    } else {
2645 26
      $json = json_encode($value, $options);
2646
    }
2647
2648 57
    return $json;
2649
  }
2650 27
2651
  /**
2652
   * Makes string's first char lowercase.
2653
   *
2654
   * @param string $str       <p>The input string</p>
2655
   * @param string $encoding  [optional] <p>Set the charset.</p>
2656
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2657
   *
2658
   * @return string <p>The resulting string</p>
2659
   */
2660
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2661
  {
2662
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2663
    if ($strPartTwo === false) {
2664
      $strPartTwo = '';
2665
    }
2666
2667
    $strPartOne = self::strtolower(
2668
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2669
        $encoding,
2670
        $cleanUtf8
2671
    );
2672
2673
    return $strPartOne . $strPartTwo;
2674
  }
2675
2676
  /**
2677
   * alias for "UTF8::lcfirst()"
2678
   *
2679
   * @see UTF8::lcfirst()
2680
   *
2681
   * @param string $word
2682
   * @param string $encoding
2683
   * @param bool   $cleanUtf8
2684
   *
2685
   * @return string
2686
   */
2687
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2688
  {
2689 2
    return self::lcfirst($word, $encoding, $cleanUtf8);
2690
  }
2691 2
2692
  /**
2693 2
   * Lowercase for all words in the string.
2694
   *
2695
   * @param string   $str        <p>The input string.</p>
2696 2
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2697
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2698
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2699 2
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2700
   *
2701
   * @return string
2702
   */
2703
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2704
  {
2705
    if (!$str) {
2706
      return '';
2707
    }
2708
2709
    $words = self::str_to_words($str, $charlist);
2710
    $newWords = [];
2711
2712
    if (\count($exceptions) > 0) {
2713
      $useExceptions = true;
2714
    } else {
2715
      $useExceptions = false;
2716
    }
2717
2718 View Code Duplication
    foreach ($words as $word) {
2719
2720
      if (!$word) {
2721
        continue;
2722
      }
2723
2724
      if (
2725
          $useExceptions === false
2726
          ||
2727
          (
2728
              $useExceptions === true
2729
              &&
2730
              !\in_array($word, $exceptions, true)
2731
          )
2732
      ) {
2733
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2734
      }
2735
2736
      $newWords[] = $word;
2737
    }
2738 2
2739
    return implode('', $newWords);
2740 2
  }
2741
2742 2
  /**
2743
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2744
   *
2745 2
   * @param string $str   <p>The string to be trimmed</p>
2746
   * @param mixed  $chars <p>Optional characters to be stripped</p>
2747
   *
2748 2
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2749
   */
2750 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2751
  {
2752
    if (!isset($str[0])) {
2753
      return '';
2754
    }
2755
2756
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2757
    if ($chars === INF || !$chars) {
2758
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
2759
    }
2760 7
2761
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2762 7
  }
2763 7
2764
  /**
2765
   * Returns the UTF-8 character with the maximum code point in the given data.
2766
   *
2767 7
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2768 7
   *
2769 7
   * @return string <p>The character with the highest code point than others.</p>
2770
   */
2771 7 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2772
  {
2773 7
    if (\is_array($arg) === true) {
2774
      $arg = \implode('', $arg);
2775
    }
2776
2777
    return self::chr(\max(self::codepoints($arg)));
2778
  }
2779
2780
  /**
2781
   * Calculates and returns the maximum number of bytes taken by any
2782
   * UTF-8 encoded character in the given string.
2783
   *
2784
   * @param string $str <p>The original Unicode string.</p>
2785
   *
2786
   * @return int <p>Max byte lengths of the given chars.</p>
2787 1
   */
2788
  public static function max_chr_width(string $str): int
2789 1
  {
2790
    $bytes = self::chr_size_list($str);
2791
    if (\count($bytes) > 0) {
2792
      return (int)\max($bytes);
2793
    }
2794
2795
    return 0;
2796
  }
2797
2798
  /**
2799
   * Checks whether mbstring is available on the server.
2800
   *
2801
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2802
   */
2803 1
  public static function mbstring_loaded(): bool
2804
  {
2805 1
    $return = \extension_loaded('mbstring') ? true : false;
2806 1
2807
    if ($return === true) {
2808
      \mb_internal_encoding('UTF-8');
2809 1
    }
2810 1
2811
    return $return;
2812 1
  }
2813 1
2814 1
  private static function mbstring_overloaded(): bool
2815 1
  {
2816
    return \defined('MB_OVERLOAD_STRING')
2817
           &&
2818 1
           ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING;
2819
  }
2820 1
2821 1
  /**
2822
   * Returns the UTF-8 character with the minimum code point in the given data.
2823
   *
2824
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2825
   *
2826 1
   * @return string <p>The character with the lowest code point than others.</p>
2827
   */
2828 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2829 1
  {
2830 1
    if (\is_array($arg) === true) {
2831 1
      $arg = implode('', $arg);
2832 1
    }
2833 1
2834 1
    return self::chr(min(self::codepoints($arg)));
2835
  }
2836 1
2837 1
  /**
2838
   * alias for "UTF8::normalize_encoding()"
2839 1
   *
2840
   * @see        UTF8::normalize_encoding()
2841
   *
2842
   * @param string $encoding
2843
   * @param mixed  $fallback
2844
   *
2845
   * @return string
2846
   *
2847
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2848
   */
2849
  public static function normalizeEncoding(string $encoding, $fallback = '')
2850 24
  {
2851
    return self::normalize_encoding($encoding, $fallback);
2852 24
  }
2853
2854 24
  /**
2855 2
   * Normalize the encoding-"name" input.
2856
   *
2857
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2858
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2859 23
   *
2860 2
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
2861
   *                default)</p>
2862
   */
2863 23
  public static function normalize_encoding(string $encoding, $fallback = '')
2864
  {
2865
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
2866
2867
    if (!$encoding) {
2868
      return $fallback;
2869
    }
2870
2871
    if (
2872
        'UTF-8' === $encoding
2873 1
        ||
2874
        'UTF8' === $encoding
2875 1
    ) {
2876 1
      return 'UTF-8';
2877 1
    }
2878
2879 1
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2880
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2881
    }
2882
2883
    if (self::$ENCODINGS === null) {
2884
      self::$ENCODINGS = self::getData('encodings');
2885
    }
2886
2887
    if (\in_array($encoding, self::$ENCODINGS, true)) {
2888
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
2889
2890 1
      return $encoding;
2891
    }
2892 1
2893 1
    $encodingOrig = $encoding;
2894 1
    $encoding = \strtoupper($encoding);
2895
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2896
2897 1
    $equivalences = [
2898
        'ISO8859'     => 'ISO-8859-1',
2899
        'ISO88591'    => 'ISO-8859-1',
2900
        'ISO'         => 'ISO-8859-1',
2901
        'LATIN'       => 'ISO-8859-1',
2902
        'LATIN1'      => 'ISO-8859-1', // Western European
2903
        'ISO88592'    => 'ISO-8859-2',
2904
        'LATIN2'      => 'ISO-8859-2', // Central European
2905 12
        'ISO88593'    => 'ISO-8859-3',
2906
        'LATIN3'      => 'ISO-8859-3', // Southern European
2907 12
        'ISO88594'    => 'ISO-8859-4',
2908
        'LATIN4'      => 'ISO-8859-4', // Northern European
2909 12
        'ISO88595'    => 'ISO-8859-5',
2910 12
        'ISO88596'    => 'ISO-8859-6', // Greek
2911 12
        'ISO88597'    => 'ISO-8859-7',
2912
        'ISO88598'    => 'ISO-8859-8', // Hebrew
2913 12
        'ISO88599'    => 'ISO-8859-9',
2914
        'LATIN5'      => 'ISO-8859-9', // Turkish
2915
        'ISO885911'   => 'ISO-8859-11',
2916 1
        'TIS620'      => 'ISO-8859-11', // Thai
2917
        'ISO885910'   => 'ISO-8859-10',
2918
        'LATIN6'      => 'ISO-8859-10', // Nordic
2919 1
        'ISO885913'   => 'ISO-8859-13',
2920 1
        'LATIN7'      => 'ISO-8859-13', // Baltic
2921 1
        'ISO885914'   => 'ISO-8859-14',
2922 1
        'LATIN8'      => 'ISO-8859-14', // Celtic
2923
        'ISO885915'   => 'ISO-8859-15',
2924
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
2925
        'ISO885916'   => 'ISO-8859-16',
2926 1
        'LATIN10'     => 'ISO-8859-16', // Southeast European
2927
        'CP1250'      => 'WINDOWS-1250',
2928
        'WIN1250'     => 'WINDOWS-1250',
2929
        'WINDOWS1250' => 'WINDOWS-1250',
2930
        'CP1251'      => 'WINDOWS-1251',
2931
        'WIN1251'     => 'WINDOWS-1251',
2932
        'WINDOWS1251' => 'WINDOWS-1251',
2933
        'CP1252'      => 'WINDOWS-1252',
2934
        'WIN1252'     => 'WINDOWS-1252',
2935
        'WINDOWS1252' => 'WINDOWS-1252',
2936 1
        'CP1253'      => 'WINDOWS-1253',
2937
        'WIN1253'     => 'WINDOWS-1253',
2938 1
        'WINDOWS1253' => 'WINDOWS-1253',
2939 1
        'CP1254'      => 'WINDOWS-1254',
2940 1
        'WIN1254'     => 'WINDOWS-1254',
2941
        'WINDOWS1254' => 'WINDOWS-1254',
2942 1
        'CP1255'      => 'WINDOWS-1255',
2943
        'WIN1255'     => 'WINDOWS-1255',
2944
        'WINDOWS1255' => 'WINDOWS-1255',
2945
        'CP1256'      => 'WINDOWS-1256',
2946
        'WIN1256'     => 'WINDOWS-1256',
2947
        'WINDOWS1256' => 'WINDOWS-1256',
2948
        'CP1257'      => 'WINDOWS-1257',
2949
        'WIN1257'     => 'WINDOWS-1257',
2950
        'WINDOWS1257' => 'WINDOWS-1257',
2951
        'CP1258'      => 'WINDOWS-1258',
2952
        'WIN1258'     => 'WINDOWS-1258',
2953
        'WINDOWS1258' => 'WINDOWS-1258',
2954
        'UTF16'       => 'UTF-16',
2955
        'UTF32'       => 'UTF-32',
2956
        'UTF8'        => 'UTF-8',
2957
        'UTF'         => 'UTF-8',
2958
        'UTF7'        => 'UTF-7',
2959
        '8BIT'        => 'CP850',
2960
        'BINARY'      => 'CP850',
2961
    ];
2962
2963
    if (!empty($equivalences[$encodingUpperHelper])) {
2964
      $encoding = $equivalences[$encodingUpperHelper];
2965
    }
2966
2967
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
2968
2969
    return $encoding;
2970 80
  }
2971
2972 80
  /**
2973
   * Normalize some MS Word special characters.
2974 80
   *
2975 3
   * @param string $str <p>The string to be normalized.</p>
2976
   *
2977
   * @return string
2978 79
   */
2979 1 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2980
  {
2981
    if (!isset($str[0])) {
2982 79
      return '';
2983 1
    }
2984 1
2985
    static $UTF8_MSWORD_KEYS_CACHE = null;
2986 79
    static $UTF8_MSWORD_VALUES_CACHE = null;
2987 7
2988
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
2989
2990 78
2991 77
      if (self::$UTF8_MSWORD === null) {
2992
        self::$UTF8_MSWORD = self::getData('utf8_msword');
2993
      }
2994 5
2995 5
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
2996 5
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
2997
    }
2998
2999 5
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3000 5
  }
3001 5
3002 5
  /**
3003 5
   * Normalize the whitespace.
3004 5
   *
3005 5
   * @param string $str                     <p>The string to be normalized.</p>
3006 5
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3007 5
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3008 5
   *                                        bidirectional text chars.</p>
3009 5
   *
3010 5
   * @return string
3011 5
   */
3012 5
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3013 5
  {
3014 5
    if (!isset($str[0])) {
3015 5
      return '';
3016 5
    }
3017 5
3018 5
    static $WHITESPACE_CACHE = [];
3019 5
    $cacheKey = (int)$keepNonBreakingSpace;
3020 5
3021 5
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3022 5
3023 5
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3024 5
3025 5
      if ($keepNonBreakingSpace === true) {
3026 5
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3027 5
      }
3028 5
3029 5
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3030 5
    }
3031 5
3032 5
    if ($keepBidiUnicodeControls === false) {
3033 5
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3034 5
3035 5
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3036 5
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3037 5
      }
3038 5
3039 5
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3040 5
    }
3041 5
3042 5
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3043 5
  }
3044 5
3045 5
  /**
3046 5
   * Strip all whitespace characters. This includes tabs and newline
3047 5
   * characters, as well as multibyte whitespace such as the thin space
3048 5
   * and ideographic space.
3049 5
   *
3050 5
   * @param string $str
3051 5
   *
3052 5
   * @return string
3053 5
   */
3054 5
  public static function strip_whitespace(string $str): string
3055 5
  {
3056 5
    if (!isset($str[0])) {
3057 5
      return '';
3058 5
    }
3059 5
3060 5
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3061 5
  }
3062 5
3063
  /**
3064 5
   * Calculates Unicode code point of the given UTF-8 encoded character.
3065 5
   *
3066 5
   * INFO: opposite to UTF8::chr()
3067
   *
3068 5
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3069
   * @param string $encoding [optional] <p>Default is UTF-8</p>
3070 5
   *
3071
   * @return int <p>
3072
   *             Unicode code point of the given character,<br>
3073
   *             0 on invalid UTF-8 byte sequence.
3074
   *             </p>
3075
   */
3076
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3077
  {
3078
    // init
3079
    static $CHAR_CACHE = [];
3080 16
3081
    // save the original string
3082 16
    $chr_orig = $chr;
3083
3084 16
    if ($encoding !== 'UTF-8') {
3085 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3086
3087
      // check again, if it's still not UTF-8
3088 16
      /** @noinspection NotOptimalIfConditionsInspection */
3089 16
      if ($encoding !== 'UTF-8') {
3090
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3091 16
      }
3092
    }
3093
3094 1
    $cacheKey = $chr_orig . $encoding;
3095 1
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3096 1
      return $CHAR_CACHE[$cacheKey];
3097
    }
3098 1
3099 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3100 1
      self::checkForSupport();
3101
    }
3102 16
3103
    if (self::$SUPPORT['intlChar'] === true) {
3104
      $code = \IntlChar::ord($chr);
3105
      if ($code) {
3106
        return $CHAR_CACHE[$cacheKey] = $code;
3107
      }
3108
    }
3109
3110
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3111
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3112
    $code = $chr ? $chr[1] : 0;
3113
3114
    if (0xF0 <= $code && isset($chr[4])) {
3115 37
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3116
    }
3117 37
3118
    if (0xE0 <= $code && isset($chr[3])) {
3119 37
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3120 4
    }
3121
3122
    if (0xC0 <= $code && isset($chr[2])) {
3123 37
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3124 37
    }
3125
3126 37
    return $CHAR_CACHE[$cacheKey] = $code;
3127
  }
3128 2
3129
  /**
3130 2
   * Parses the string into an array (into the the second parameter).
3131
   *
3132 1
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3133 1
   *          if the second parameter is not set!
3134
   *
3135 2
   * @link http://php.net/manual/en/function.parse-str.php
3136 2
   *
3137
   * @param string $str       <p>The input string.</p>
3138 37
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3139 37
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3140
   *
3141 37
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3142 1
   */
3143 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3144
  {
3145 37
    if ($cleanUtf8 === true) {
3146 37
      $str = self::clean($str);
3147
    }
3148 37
3149
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3150
    $return = \mb_parse_str($str, $result);
3151
3152
    return !($return === false || empty($result));
3153
  }
3154
3155
  /**
3156
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3157
   *
3158
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3159
   */
3160 12
  public static function pcre_utf8_support(): bool
3161
  {
3162 12
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3163
    return (bool)@preg_match('//u', '');
3164 12
  }
3165 1
3166
  /**
3167
   * Create an array containing a range of UTF-8 characters.
3168 11
   *
3169
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3170
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3171
   *
3172
   * @return array
3173
   */
3174
  public static function range($var1, $var2): array
3175
  {
3176
    if (!$var1 || !$var2) {
3177
      return [];
3178
    }
3179
3180 View Code Duplication
    if (ctype_digit((string)$var1)) {
3181
      $start = (int)$var1;
3182
    } elseif (ctype_xdigit($var1)) {
3183
      $start = (int)self::hex_to_int($var1);
3184
    } else {
3185
      $start = self::ord($var1);
3186
    }
3187
3188
    if (!$start) {
3189
      return [];
3190
    }
3191
3192 View Code Duplication
    if (ctype_digit((string)$var2)) {
3193
      $end = (int)$var2;
3194
    } elseif (ctype_xdigit($var2)) {
3195
      $end = (int)self::hex_to_int($var2);
3196
    } else {
3197
      $end = self::ord($var2);
3198
    }
3199
3200
    if (!$end) {
3201
      return [];
3202
    }
3203
3204
    return \array_map(
3205
        [
3206
            UTF8::class,
3207
            'chr',
3208
        ],
3209
        \range($start, $end)
3210
    );
3211
  }
3212
3213
  /**
3214
   * Multi decode html entity & fix urlencoded-win1252-chars.
3215
   *
3216
   * e.g:
3217
   * 'test+test'                     => 'test+test'
3218
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3219
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3220
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3221
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3222
   * 'Düsseldorf'                   => 'Düsseldorf'
3223 23
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3224
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3225
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3226 23
   *
3227 23
   * @param string $str          <p>The input string.</p>
3228
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3229
   *
3230 23
   * @return string
3231
   */
3232 23 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3233 2
  {
3234
    if (!isset($str[0])) {
3235
      return '';
3236
    }
3237 2
3238 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3239 2
    if (preg_match($pattern, $str)) {
3240 2
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3241
    }
3242 23
3243 23
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3244 23
3245
    do {
3246
      $str_compare = $str;
3247 11
3248
      $str = self::fix_simple_utf8(
3249
          rawurldecode(
3250
              self::html_entity_decode(
3251 11
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3252
                  $flags
3253
              )
3254
          )
3255
      );
3256
3257
    } while ($multi_decode === true && $str_compare !== $str);
3258
3259 11
    return $str;
3260 11
  }
3261
3262 11
  /**
3263 1
   * alias for "UTF8::remove_bom()"
3264
   *
3265
   * @see        UTF8::remove_bom()
3266 11
   *
3267 4
   * @param string $str
3268
   *
3269
   * @return string
3270 11
   *
3271 7
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3272
   */
3273
  public static function removeBOM(string $str): string
3274 10
  {
3275
    return self::remove_bom($str);
3276
  }
3277
3278
  /**
3279
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3280
   *
3281
   * @param string $str <p>The input string.</p>
3282
   *
3283
   * @return string <p>String without UTF-BOM</p>
3284
   */
3285
  public static function remove_bom(string $str): string
3286
  {
3287
    if (!isset($str[0])) {
3288
      return '';
3289
    }
3290
3291 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
3292
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3293 1
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3294 1
        if ($strTmp === false) {
3295 1
          $strTmp = '';
3296
        }
3297
        $str = (string)$strTmp;
3298 1
      }
3299 1
    }
3300 1
3301
    return $str;
3302
  }
3303 1
3304
  /**
3305
   * Removes duplicate occurrences of a string in another string.
3306
   *
3307
   * @param string          $str  <p>The base string.</p>
3308
   * @param string|string[] $what <p>String to search for in the base string.</p>
3309
   *
3310
   * @return string <p>The result string with removed duplicates.</p>
3311 58
   */
3312
  public static function remove_duplicates(string $str, $what = ' '): string
3313
  {
3314 58
    if (\is_string($what) === true) {
3315
      $what = [$what];
3316
    }
3317
3318
    if (\is_array($what) === true) {
3319
      /** @noinspection ForeachSourceInspection */
3320
      foreach ($what as $item) {
3321
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3322
      }
3323
    }
3324
3325 1
    return $str;
3326
  }
3327 1
3328 1
  /**
3329
   * Remove invisible characters from a string.
3330
   *
3331 1
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3332 1
   *
3333 1
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3334
   *
3335
   * @param string $str
3336 1
   * @param bool   $url_encoded
3337
   * @param string $replacement
3338
   *
3339 1
   * @return string
3340
   */
3341
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
3342
  {
3343 1
    // init
3344 1
    $non_displayables = [];
3345 1
3346
    // every control character except newline (dec 10),
3347
    // carriage return (dec 13) and horizontal tab (dec 09)
3348 1
    if ($url_encoded) {
3349
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3350
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3351 1
    }
3352
3353
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3354
3355 1
    do {
3356
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3357 1
    } while ($count !== 0);
3358 1
3359 1
    return $str;
3360 1
  }
3361 1
3362
  /**
3363
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3364
   *
3365
   * @param string $str                <p>The input string</p>
3366
   * @param string $replacementChar    <p>The replacement character.</p>
3367
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3368
   *
3369
   * @return string
3370
   */
3371
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
3372
  {
3373
    if (!isset($str[0])) {
3374
      return '';
3375
    }
3376
3377
    if ($processInvalidUtf8 === true) {
3378
      $replacementCharHelper = $replacementChar;
3379
      if ($replacementChar === '') {
3380
        $replacementCharHelper = 'none';
3381
      }
3382
3383 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3384
        self::checkForSupport();
3385 2
      }
3386
3387 2
      $save = \mb_substitute_character();
3388 1
      \mb_substitute_character($replacementCharHelper);
3389
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3390
      \mb_substitute_character($save);
3391 2
3392 2
      if (\is_string($strTmp)) {
3393 1
        $str = $strTmp;
3394 1
      } else {
3395
        $str = '';
3396 2
      }
3397
    }
3398
3399 2
    return str_replace(
3400
        [
3401 2
            "\xEF\xBF\xBD",
3402 2
            '�',
3403 2
        ],
3404 2
        [
3405
            $replacementChar,
3406 2
            $replacementChar,
3407 2
        ],
3408 2
        $str
3409
    );
3410 2
  }
3411
3412 2
  /**
3413
   * Strip whitespace or other characters from end of a UTF-8 string.
3414
   *
3415
   * @param string $str   <p>The string to be trimmed.</p>
3416
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
3417
   *
3418
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3419
   */
3420 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3421
  {
3422
    if (!isset($str[0])) {
3423
      return '';
3424
    }
3425
3426
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3427
    if ($chars === INF || !$chars) {
3428
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3429
    }
3430
3431
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3432
  }
3433
3434
  /**
3435
   * rxClass
3436
   *
3437
   * @param string $s
3438 40
   * @param string $class
3439
   *
3440 40
   * @return string
3441
   */
3442 40
  private static function rxClass(string $s, string $class = ''): string
3443 5
  {
3444
    static $RX_CLASSS_CACHE = [];
3445
3446 40
    $cacheKey = $s . $class;
3447 40
3448 5
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3449 5
      return $RX_CLASSS_CACHE[$cacheKey];
3450
    }
3451
3452 5
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3453 5
    $class = [$class];
3454 40
3455
    /** @noinspection SuspiciousLoopInspection */
3456 40
    foreach (self::str_split($s) as $s) {
3457
      if ('-' === $s) {
3458
        $class[0] = '-' . $class[0];
3459
      } elseif (!isset($s[2])) {
3460
        $class[0] .= preg_quote($s, '/');
3461
      } elseif (1 === self::strlen($s)) {
3462
        $class[0] .= $s;
3463
      } else {
3464
        $class[] = $s;
3465
      }
3466
    }
3467 1
3468
    if ($class[0]) {
3469 1
      $class[0] = '[' . $class[0] . ']';
3470 1
    }
3471 1
3472
    if (1 === \count($class)) {
3473 1
      $return = $class[0];
3474
    } else {
3475 1
      $return = '(?:' . implode('|', $class) . ')';
3476 1
    }
3477 1
3478 1
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3479
3480 1
    return $return;
3481
  }
3482
3483
  /**
3484
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3485
   */
3486
  public static function showSupport()
3487
  {
3488
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3489
      self::checkForSupport();
3490
    }
3491
3492
    echo '<pre>';
3493
    foreach (self::$SUPPORT as $key => $value) {
3494
      echo $key . ' - ' . print_r($value, true) . "\n<br>";
3495
    }
3496 62
    echo '</pre>';
3497
  }
3498
3499 62
  /**
3500
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3501
   *
3502
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3503 62
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3504 62
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3505 62
   *
3506 62
   * @return string <p>The HTML numbered entity.</p>
3507
   */
3508 62
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
3509
  {
3510
    if (!isset($char[0])) {
3511 62
      return '';
3512 62
    }
3513
3514 62
    if (
3515
        $keepAsciiChars === true
3516
        &&
3517
        self::is_ascii($char) === true
3518
    ) {
3519
      return $char;
3520
    }
3521
3522
    if ($encoding !== 'UTF-8') {
3523
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3524
    }
3525
3526 62
    return '&#' . self::ord($char, $encoding) . ';';
3527
  }
3528 62
3529
  /**
3530 62
   * Convert a string to an array of Unicode characters.
3531 4
   *
3532
   * @param string $str       <p>The string to split into array.</p>
3533
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
3534 62
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3535 62
   *
3536 62
   * @return string[] <p>An array containing chunks of the string.</p>
3537 62
   */
3538 62
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
3539
  {
3540 62
    if (!isset($str[0])) {
3541
      return [];
3542
    }
3543
3544 62
    // init
3545 62
    $ret = [];
3546 62
3547 62
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3548 62
      self::checkForSupport();
3549
    }
3550 62
3551
    if ($cleanUtf8 === true) {
3552 62
      $str = self::clean($str);
3553 62
    }
3554 62
3555
    if (self::$SUPPORT['pcre_utf8'] === true) {
3556 62
3557 62
      preg_match_all('/./us', $str, $retArray);
3558 62
      if (isset($retArray[0])) {
3559
        $ret = $retArray[0];
3560 62
      }
3561
      unset($retArray);
3562
3563
    } else {
3564
3565
      // fallback
3566
3567
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3568
        self::checkForSupport();
3569
      }
3570
3571 23
      $len = self::strlen_in_byte($str);
3572
3573 23
      /** @noinspection ForeachInvariantsInspection */
3574
      for ($i = 0; $i < $len; $i++) {
3575 23
3576 5
        if (($str[$i] & "\x80") === "\x00") {
3577
3578
          $ret[] = $str[$i];
3579
3580 19
        } elseif (
3581 3
            isset($str[$i + 1])
3582
            &&
3583
            ($str[$i] & "\xE0") === "\xC0"
3584 18
        ) {
3585
3586
          if (($str[$i + 1] & "\xC0") === "\x80") {
3587
            $ret[] = $str[$i] . $str[$i + 1];
3588
3589
            $i++;
3590
          }
3591
3592 View Code Duplication
        } elseif (
3593
            isset($str[$i + 2])
3594
            &&
3595 60
            ($str[$i] & "\xF0") === "\xE0"
3596
        ) {
3597 60
3598
          if (
3599 60
              ($str[$i + 1] & "\xC0") === "\x80"
3600
              &&
3601 60
              ($str[$i + 2] & "\xC0") === "\x80"
3602 48
          ) {
3603
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3604
3605
            $i += 2;
3606 20
          }
3607
3608
        } elseif (
3609 20
            isset($str[$i + 3])
3610 19
            &&
3611
            ($str[$i] & "\xF8") === "\xF0"
3612 19
        ) {
3613 19
3614 19 View Code Duplication
          if (
3615 2
              ($str[$i + 1] & "\xC0") === "\x80"
3616 2
              &&
3617
              ($str[$i + 2] & "\xC0") === "\x80"
3618
              &&
3619 20
              ($str[$i + 3] & "\xC0") === "\x80"
3620
          ) {
3621 20
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3622 20
3623 20
            $i += 3;
3624
          }
3625 20
3626 20
        }
3627 20
      }
3628
    }
3629
3630
    if ($length > 1) {
3631 20
      $ret = \array_chunk($ret, $length);
3632
3633 20
      return \array_map(
3634
          function ($item) {
3635
            return \implode('', $item);
3636
          }, $ret
3637
      );
3638
    }
3639 1
3640
    if (isset($ret[0]) && $ret[0] === '') {
3641 1
      return [];
3642
    }
3643
3644
    return $ret;
3645 1
  }
3646 1
3647 1
  /**
3648 1
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3649 1
   *
3650 1
   * @param string $str <p>The input string.</p>
3651
   *
3652
   * @return false|string <p>
3653
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3654
   *                      otherwise it will return false.
3655
   *                      </p>
3656
   */
3657
  public static function str_detect_encoding(string $str)
3658
  {
3659
    //
3660
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3661 1
    //
3662
3663 1
    if (self::is_binary($str) === true) {
3664
3665 1
      if (self::is_utf16($str) === 1) {
3666 1
        return 'UTF-16LE';
3667
      }
3668
3669
      if (self::is_utf16($str) === 2) {
3670
        return 'UTF-16BE';
3671 1
      }
3672 1
3673 1
      if (self::is_utf32($str) === 1) {
3674 1
        return 'UTF-32LE';
3675
      }
3676
3677 1
      if (self::is_utf32($str) === 2) {
3678 1
        return 'UTF-32BE';
3679 1
      }
3680
3681 1
    }
3682
3683
    //
3684
    // 2.) simple check for ASCII chars
3685
    //
3686
3687
    if (self::is_ascii($str) === true) {
3688
      return 'ASCII';
3689
    }
3690
3691
    //
3692
    // 3.) simple check for UTF-8 chars
3693 39
    //
3694
3695 39
    if (self::is_utf8($str) === true) {
3696
      return 'UTF-8';
3697 39
    }
3698 3
3699
    //
3700
    // 4.) check via "\mb_detect_encoding()"
3701
    //
3702 38
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3703
3704 38
    $detectOrder = [
3705
        'ISO-8859-1',
3706
        'ISO-8859-2',
3707
        'ISO-8859-3',
3708 38
        'ISO-8859-4',
3709 7
        'ISO-8859-5',
3710 7
        'ISO-8859-6',
3711
        'ISO-8859-7',
3712 38
        'ISO-8859-8',
3713
        'ISO-8859-9',
3714 38
        'ISO-8859-10',
3715 38
        'ISO-8859-13',
3716 38
        'ISO-8859-14',
3717 38
        'ISO-8859-15',
3718 38
        'ISO-8859-16',
3719
        'WINDOWS-1251',
3720 38
        'WINDOWS-1252',
3721
        'WINDOWS-1254',
3722
        'ISO-2022-JP',
3723
        'JIS',
3724 2
        'EUC-JP',
3725
    ];
3726
3727
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3728 2
    if ($encoding) {
3729
      return $encoding;
3730
    }
3731 2
3732
    //
3733 2
    // 5.) check via "iconv()"
3734
    //
3735 2
3736
    if (self::$ENCODINGS === null) {
3737 2
      self::$ENCODINGS = self::getData('encodings');
3738 2
    }
3739 2
3740 2
    $md5 = md5($str);
3741 2
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3742
      # INFO: //IGNORE and //TRANSLIT still throw notice
3743
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3744
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3745
        return $encodingTmp;
3746
      }
3747
    }
3748
3749
    return false;
3750 2
  }
3751 2
3752 2
  /**
3753 2
   * Check if the string ends with the given substring.
3754
   *
3755
   * @param string $haystack <p>The string to search in.</p>
3756 2
   * @param string $needle   <p>The substring to search for.</p>
3757 2
   *
3758 2
   * @return bool
3759 2
   */
3760 2 View Code Duplication
  public static function str_ends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3761
  {
3762 2
    if (!isset($haystack[0], $needle[0])) {
3763 2
      return false;
3764
    }
3765 2
3766
    if (\substr($haystack, -\strlen($needle)) === $needle) {
3767
      return true;
3768
    }
3769
3770
    return false;
3771
  }
3772
3773
  /**
3774
   * Check if the string ends with the given substring, case insensitive.
3775
   *
3776
   * @param string $haystack <p>The string to search in.</p>
3777
   * @param string $needle   <p>The substring to search for.</p>
3778
   *
3779
   * @return bool
3780
   */
3781 View Code Duplication
  public static function str_iends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3782
  {
3783
    if (!isset($haystack[0], $needle[0])) {
3784 2
      return false;
3785
    }
3786
3787 38
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
3788 5
      return true;
3789
    }
3790 5
3791
    return false;
3792 5
  }
3793 5
3794 5
  /**
3795
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3796
   *
3797 34
   * @link  http://php.net/manual/en/function.str-ireplace.php
3798
   *
3799
   * @param mixed $search  <p>
3800
   *                       Every replacement with search array is
3801 34
   *                       performed on the result of previous replacement.
3802
   *                       </p>
3803
   * @param mixed $replace <p>
3804
   *                       </p>
3805
   * @param mixed $subject <p>
3806
   *                       If subject is an array, then the search and
3807
   *                       replace is performed with every entry of
3808
   *                       subject, and the return value is an array as
3809
   *                       well.
3810
   *                       </p>
3811
   * @param int   $count   [optional] <p>
3812
   *                       The number of matched and replaced needles will
3813
   *                       be returned in count which is passed by
3814 12
   *                       reference.
3815
   *                       </p>
3816
   *
3817
   * @return mixed <p>A string or an array of replacements.</p>
3818
   */
3819
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3820 12
  {
3821
    $search = (array)$search;
3822 3
3823 1
    /** @noinspection AlterInForeachInspection */
3824
    foreach ($search as &$s) {
3825
      if ('' === $s .= '') {
3826 3
        $s = '/^(?<=.)$/';
3827 1
      } else {
3828
        $s = '/' . preg_quote($s, '/') . '/ui';
3829
      }
3830 2
    }
3831
3832
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
3833
    $count = $replace; // used as reference parameter
3834 2
3835
    return $subject;
3836
  }
3837
3838 2
  /**
3839
   * Check if the string starts with the given substring, case insensitive.
3840
   *
3841
   * @param string $haystack <p>The string to search in.</p>
3842
   * @param string $needle   <p>The substring to search for.</p>
3843
   *
3844 12
   * @return bool
3845 3
   */
3846 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3847
  {
3848
    if (!isset($haystack[0], $needle[0])) {
3849
      return false;
3850
    }
3851
3852 12
    if (self::stripos($haystack, $needle) === 0) {
3853 9
      return true;
3854
    }
3855
3856
    return false;
3857
  }
3858
3859
  /**
3860
   * Limit the number of characters in a string, but also after the next word.
3861
   *
3862 7
   * @param string $str
3863 7
   * @param int    $length
3864 7
   * @param string $strAddOn
3865 7
   *
3866 7
   * @return string
3867 7
   */
3868 7
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
3869 7
  {
3870 7
    if (!isset($str[0])) {
3871 7
      return '';
3872 7
    }
3873 7
3874 7
    if (self::strlen($str) <= $length) {
3875 7
      return $str;
3876 7
    }
3877 7
3878 7
    if (self::substr($str, $length - 1, 1) === ' ') {
3879 7
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
3880 7
    }
3881 7
3882 7
    $str = (string)self::substr($str, 0, $length);
3883
    $array = explode(' ', $str);
3884 7
    array_pop($array);
3885 7
    $new_str = implode(' ', $array);
3886 7
3887
    if ($new_str === '') {
3888
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
3889
    } else {
3890
      $str = $new_str . $strAddOn;
3891
    }
3892
3893
    return $str;
3894
  }
3895
3896
  /**
3897
   * Pad a UTF-8 string to given length with another string.
3898
   *
3899
   * @param string $str        <p>The input string.</p>
3900
   * @param int    $pad_length <p>The length of return string.</p>
3901
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3902
   * @param int    $pad_type   [optional] <p>
3903
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3904
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3905
   *                           </p>
3906
   *
3907
   * @return string <strong>Returns the padded string</strong>
3908
   */
3909
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
3910
  {
3911
    $str_length = self::strlen($str);
3912
3913
    if (
3914
        \is_int($pad_length) === true
3915
        &&
3916
        $pad_length > 0
3917 2
        &&
3918
        $pad_length >= $str_length
3919 2
    ) {
3920 2
      $ps_length = self::strlen($pad_string);
3921
3922 2
      $diff = ($pad_length - $str_length);
3923 1
3924
      switch ($pad_type) {
3925 View Code Duplication
        case STR_PAD_LEFT:
3926 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3927 2
          $pre = (string)self::substr($pre, 0, $diff);
3928
          $post = '';
3929
          break;
3930 2
3931
        case STR_PAD_BOTH:
3932
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3933
          $pre = (string)self::substr($pre, 0, $diff / 2);
3934
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3935
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
3936
          break;
3937
3938
        case STR_PAD_RIGHT:
3939 View Code Duplication
        default:
3940
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3941 2
          $post = (string)self::substr($post, 0, $diff);
3942
          $pre = '';
3943 2
      }
3944 2
3945
      return $pre . $str . $post;
3946 2
    }
3947 1
3948
    return $str;
3949
  }
3950 2
3951 2
  /**
3952
   * Repeat a string.
3953
   *
3954 2
   * @param string $str        <p>
3955
   *                           The string to be repeated.
3956
   *                           </p>
3957
   * @param int    $multiplier <p>
3958
   *                           Number of time the input string should be
3959
   *                           repeated.
3960
   *                           </p>
3961
   *                           <p>
3962
   *                           multiplier has to be greater than or equal to 0.
3963
   *                           If the multiplier is set to 0, the function
3964
   *                           will return an empty string.
3965
   *                           </p>
3966
   *
3967
   * @return string <p>The repeated string.</p>
3968
   */
3969
  public static function str_repeat(string $str, int $multiplier): string
3970
  {
3971
    $str = self::filter($str);
3972
3973
    return \str_repeat($str, $multiplier);
3974
  }
3975
3976
  /**
3977
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
3978
   *
3979
   * Replace all occurrences of the search string with the replacement string
3980
   *
3981
   * @link http://php.net/manual/en/function.str-replace.php
3982 26
   *
3983
   * @param mixed $search  <p>
3984 26
   *                       The value being searched for, otherwise known as the needle.
3985
   *                       An array may be used to designate multiple needles.
3986
   *                       </p>
3987 26
   * @param mixed $replace <p>
3988 26
   *                       The replacement value that replaces found search
3989 2
   *                       values. An array may be used to designate multiple replacements.
3990 2
   *                       </p>
3991 24
   * @param mixed $subject <p>
3992
   *                       The string or array being searched and replaced on,
3993 26
   *                       otherwise known as the haystack.
3994
   *                       </p>
3995 26
   *                       <p>
3996 26
   *                       If subject is an array, then the search and
3997
   *                       replace is performed with every entry of
3998 26
   *                       subject, and the return value is an array as
3999
   *                       well.
4000
   *                       </p>
4001
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4002
   *
4003
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4004
   */
4005
  public static function str_replace($search, $replace, $subject, int &$count = null)
4006
  {
4007
    return \str_replace($search, $replace, $subject, $count);
4008
  }
4009 2
4010
  /**
4011 2
   * Replace the first "$search"-term with the "$replace"-term.
4012 2
   *
4013
   * @param string $search
4014 2
   * @param string $replace
4015 1
   * @param string $subject
4016
   *
4017
   * @return string
4018 2
   */
4019 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
4020
  {
4021
    $pos = self::strpos($subject, $search);
4022 2
4023
    if ($pos !== false) {
4024
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4025
    }
4026
4027
    return $subject;
4028
  }
4029
4030
  /**
4031
   * Shuffles all the characters in the string.
4032
   *
4033
   * @param string $str <p>The input string</p>
4034 1
   *
4035
   * @return string <p>The shuffled string.</p>
4036 1
   */
4037
  public static function str_shuffle(string $str): string
4038 1
  {
4039 1
    $array = self::split($str);
4040
4041
    \shuffle($array);
4042 1
4043
    return \implode('', $array);
4044 1
  }
4045 1
4046
  /**
4047
   * Sort all characters according to code points.
4048 1
   *
4049 1
   * @param string $str    <p>A UTF-8 string.</p>
4050
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4051
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4052 1
   *
4053 1
   * @return string <p>String of sorted characters.</p>
4054 1
   */
4055 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
4056
  {
4057 1
    $array = self::codepoints($str);
4058 1
4059 1
    if ($unique) {
4060 1
      $array = array_flip(array_flip($array));
4061
    }
4062
4063 1
    if ($desc) {
4064
      arsort($array);
4065
    } else {
4066
      asort($array);
4067
    }
4068
4069
    return self::string($array);
4070
  }
4071
4072
  /**
4073
   * Split a string into an array.
4074
   *
4075
   * @param string|string[] $str
4076
   * @param int             $len
4077
   *
4078
   * @return array
4079 2
   */
4080
  public static function str_split($str, int $len = 1): array
4081 2
  {
4082 View Code Duplication
    if (\is_array($str) === true) {
4083
      foreach ($str as $k => $v) {
4084 2
        $str[$k] = self::str_split($v, $len);
4085 2
      }
4086
4087 2
      return $str;
4088
    }
4089 2
4090 2
    if (!isset($str[0])) {
4091
      return [];
4092 2
    }
4093
4094
    if ($len < 1) {
4095 2
      return str_split($str, $len);
4096 2
    }
4097 2
4098 2
    /** @noinspection PhpInternalEntityUsedInspection */
4099 2
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4100
    $a = $a[0];
4101 2
4102 2
    if ($len === 1) {
4103 2
      return $a;
4104 2
    }
4105 2
4106 2
    $arrayOutput = [];
4107
    $p = -1;
4108 2
4109 2
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4110 2
    foreach ($a as $l => $a) {
4111 2
      if ($l % $len) {
4112 2
        $arrayOutput[$p] .= $a;
4113 2
      } else {
4114
        $arrayOutput[++$p] = $a;
4115 2
      }
4116
    }
4117
4118 2
    return $arrayOutput;
4119
  }
4120
4121
  /**
4122
   * Check if the string starts with the given substring.
4123
   *
4124
   * @param string $haystack <p>The string to search in.</p>
4125
   * @param string $needle   <p>The substring to search for.</p>
4126
   *
4127
   * @return bool
4128
   */
4129 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4130
  {
4131
    if (!isset($haystack[0], $needle[0])) {
4132
      return false;
4133
    }
4134
4135
    if (strpos($haystack, $needle) === 0) {
4136
      return true;
4137
    }
4138
4139 1
    return false;
4140
  }
4141 1
4142
  /**
4143 1
   * Get a binary representation of a specific string.
4144
   *
4145
   * @param string $str <p>The input string.</p>
4146
   *
4147
   * @return string
4148
   */
4149
  public static function str_to_binary(string $str): string
4150
  {
4151
    $value = unpack('H*', $str);
4152
4153
    return base_convert($value[1], 16, 2);
4154
  }
4155
4156
  /**
4157
   * Convert a string into an array of words.
4158
   *
4159
   * @param string   $str
4160
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4161
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4162
   * @param null|int $removeShortValues
4163
   *
4164
   * @return array
4165
   */
4166
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
4167
  {
4168
    if (!isset($str[0])) {
4169
      if ($removeEmptyValues === true) {
4170
        return [];
4171
      }
4172
4173
      return [''];
4174
    }
4175 12
4176
    $charList = self::rxClass($charList, '\pL');
4177 12
4178
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4179
4180
    if (
4181
        $removeShortValues === null
4182
        &&
4183
        $removeEmptyValues === false
4184
    ) {
4185
      return $return;
4186
    }
4187
4188
    $tmpReturn = [];
4189 1
    foreach ($return as $returnValue) {
4190
      if (
4191 1
          $removeShortValues !== null
4192
          &&
4193 1
          self::strlen($returnValue) <= $removeShortValues
4194 1
      ) {
4195
        continue;
4196
      }
4197 1
4198
      if (
4199
          $removeEmptyValues === true
4200
          &&
4201
          trim($returnValue) === ''
4202
      ) {
4203
        continue;
4204
      }
4205
4206
      $tmpReturn[] = $returnValue;
4207 1
    }
4208
4209 1
    return $tmpReturn;
4210
  }
4211 1
4212
  /**
4213 1
   * alias for "UTF8::to_ascii()"
4214
   *
4215
   * @see UTF8::to_ascii()
4216
   *
4217
   * @param string $str
4218
   * @param string $unknown
4219
   * @param bool   $strict
4220
   *
4221
   * @return string
4222
   */
4223
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
4224
  {
4225 1
    return self::to_ascii($str, $unknown, $strict);
4226
  }
4227 1
4228
  /**
4229 1
   * Counts number of words in the UTF-8 string.
4230 1
   *
4231 1
   * @param string $str      <p>The input string.</p>
4232
   * @param int    $format   [optional] <p>
4233 1
   *                         <strong>0</strong> => return a number of words (default)<br>
4234 1
   *                         <strong>1</strong> => return an array of words<br>
4235 1
   *                         <strong>2</strong> => return an array of words with word-offset as key
4236 1
   *                         </p>
4237
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4238
   *
4239 1
   * @return array|int <p>The number of words in the string</p>
4240
   */
4241
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
4242
  {
4243
    $strParts = self::str_to_words($str, $charlist);
4244
4245
    $len = \count($strParts);
4246
4247
    if ($format === 1) {
4248
4249
      $numberOfWords = [];
4250 23
      for ($i = 1; $i < $len; $i += 2) {
4251
        $numberOfWords[] = $strParts[$i];
4252 23
      }
4253
4254 23
    } elseif ($format === 2) {
4255 1
4256
      $numberOfWords = [];
4257
      $offset = self::strlen($strParts[0]);
4258 22
      for ($i = 1; $i < $len; $i += 2) {
4259
        $numberOfWords[$offset] = $strParts[$i];
4260 22
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4261
      }
4262
4263
    } else {
4264
4265 22
      $numberOfWords = ($len - 1) / 2;
4266 22
4267
    }
4268 22
4269 22
    return $numberOfWords;
4270
  }
4271
4272 1
  /**
4273 1
   * Case-insensitive string comparison.
4274
   *
4275
   * INFO: Case-insensitive version of UTF8::strcmp()
4276 1
   *
4277 1
   * @param string $str1
4278 1
   * @param string $str2
4279 1
   *
4280 1
   * @return int <p>
4281
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4282 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4283
   *             <strong>0</strong> if they are equal.
4284 1
   *             </p>
4285
   */
4286
  public static function strcasecmp(string $str1, string $str2): int
4287
  {
4288
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4289
  }
4290
4291
  /**
4292
   * alias for "UTF8::strstr()"
4293
   *
4294
   * @see UTF8::strstr()
4295 2
   *
4296
   * @param string $haystack
4297 2
   * @param string $needle
4298 2
   * @param bool   $before_needle
4299
   * @param string $encoding
4300 2
   * @param bool   $cleanUtf8
4301 1
   *
4302
   * @return string|false
4303
   */
4304 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4305 2
  {
4306
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4307
  }
4308 2
4309
  /**
4310
   * Case-sensitive string comparison.
4311
   *
4312
   * @param string $str1
4313
   * @param string $str2
4314
   *
4315
   * @return int  <p>
4316
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4317
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4318 1
   *              <strong>0</strong> if they are equal.
4319
   *              </p>
4320 1
   */
4321
  public static function strcmp(string $str1, string $str2): int
4322 1
  {
4323
    /** @noinspection PhpUndefinedClassInspection */
4324 1
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4325
        \Normalizer::normalize($str1, \Normalizer::NFD),
4326
        \Normalizer::normalize($str2, \Normalizer::NFD)
4327
    );
4328
  }
4329
4330
  /**
4331
   * Find length of initial segment not matching mask.
4332
   *
4333
   * @param string $str
4334
   * @param string $charList
4335
   * @param int    $offset
4336
   * @param int    $length
4337 10
   *
4338
   * @return int|null
4339 10
   */
4340
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
4341 10
  {
4342 1
    if ('' === $charList .= '') {
4343 1
      return null;
4344
    }
4345 10
4346 2 View Code Duplication
    if ($offset || $length !== null) {
4347
      $strTmp = self::substr($str, $offset, $length);
4348
      if ($strTmp === false) {
4349
        return null;
4350 2
      }
4351
      $str = (string)$strTmp;
4352
    }
4353 10
4354
    if (!isset($str[0])) {
4355 10
      return null;
4356
    }
4357
4358
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4359 10
      return self::strlen($length[1]);
4360
    }
4361 10
4362 10
    return self::strlen($str);
4363
  }
4364
4365 1
  /**
4366 1
   * alias for "UTF8::stristr()"
4367
   *
4368
   * @see UTF8::stristr()
4369 1
   *
4370 1
   * @param string $haystack
4371 1
   * @param string $needle
4372 1
   * @param bool   $before_needle
4373
   * @param string $encoding
4374
   * @param bool   $cleanUtf8
4375
   *
4376
   * @return string|false
4377 1
   */
4378 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4379 1
  {
4380 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4381
  }
4382
4383 1
  /**
4384 1
   * Create a UTF-8 string from code points.
4385
   *
4386 1
   * INFO: opposite to UTF8::codepoints()
4387
   *
4388
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4389
   *
4390
   * @return string <p>UTF-8 encoded string.</p>
4391
   */
4392
  public static function string(array $array): string
4393
  {
4394
    return \implode(
4395
        '',
4396
        \array_map(
4397
            [
4398
                UTF8::class,
4399
                'chr',
4400 7
            ],
4401
            $array
4402 7
        )
4403
    );
4404
  }
4405
4406
  /**
4407
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4408
   *
4409
   * @param string $str <p>The input string.</p>
4410
   *
4411
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4412
   */
4413
  public static function string_has_bom(string $str): bool
4414
  {
4415
    foreach (self::$BOM as $bomString => $bomByteLength) {
4416
      if (0 === strpos($str, $bomString)) {
4417
        return true;
4418 1
      }
4419
    }
4420 1
4421
    return false;
4422 1
  }
4423
4424 1
  /**
4425
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4426 1
   *
4427 1
   * @link http://php.net/manual/en/function.strip-tags.php
4428 1
   *
4429 1
   * @param string $str             <p>
4430
   *                                The input string.
4431 1
   *                                </p>
4432
   * @param string $allowable_tags  [optional] <p>
4433 1
   *                                You can use the optional second parameter to specify tags which should
4434 1
   *                                not be stripped.
4435 1
   *                                </p>
4436 1
   *                                <p>
4437 1
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4438 1
   *                                can not be changed with allowable_tags.
4439
   *                                </p>
4440 1
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
4441
   *
4442 1
   * @return string <p>The stripped string.</p>
4443
   */
4444
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
4445
  {
4446 1
    if (!isset($str[0])) {
4447
      return '';
4448
    }
4449
4450
    if ($cleanUtf8 === true) {
4451
      $str = self::clean($str);
4452
    }
4453
4454
    return \strip_tags($str, $allowable_tags);
4455
  }
4456
4457
  /**
4458
   * Finds position of first occurrence of a string within another, case insensitive.
4459
   *
4460
   * @link http://php.net/manual/en/function.mb-stripos.php
4461
   *
4462
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4463 11
   * @param string $needle    <p>The string to find in haystack.</p>
4464
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
4465 11
   * @param string $encoding  [optional] <p>Set the charset.</p>
4466
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4467
   *
4468
   * @return int|false <p>
4469
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4470
   *                   or false if needle is not found.
4471
   *                   </p>
4472
   */
4473
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4474
  {
4475
    if (!isset($haystack[0], $needle[0])) {
4476
      return false;
4477
    }
4478
4479
    if ($cleanUtf8 === true) {
4480
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4481 1
      // if invalid characters are found in $haystack before $needle
4482
      $haystack = self::clean($haystack);
4483 1
      $needle = self::clean($needle);
4484
    }
4485
4486
    if ($encoding !== 'UTF-8') {
4487
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4488
    }
4489
4490
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4491
      self::checkForSupport();
4492
    }
4493
4494
    if (
4495
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4496
        &&
4497
        self::$SUPPORT['intl'] === true
4498 14
        &&
4499
        Bootup::is_php('5.4') === true
4500
    ) {
4501 14
      return \grapheme_stripos($haystack, $needle, $offset);
4502 13
    }
4503 13
4504 14
    // fallback to "mb_"-function via polyfill
4505
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4506
  }
4507
4508
  /**
4509
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4510
   *
4511
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
4512
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
4513
   * @param bool   $before_needle  [optional] <p>
4514
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4515
   *                               haystack before the first occurrence of the needle (excluding the needle).
4516
   *                               </p>
4517 15
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4518
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4519 15
   *
4520 1
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4521
   */
4522
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4523 14
  {
4524 2
    if (!isset($haystack[0], $needle[0])) {
4525 2
      return false;
4526
    }
4527
4528 2
    if ($encoding !== 'UTF-8') {
4529 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4530
    }
4531 14
4532 14
    if ($cleanUtf8 === true) {
4533 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4534
      // if invalid characters are found in $haystack before $needle
4535
      $needle = self::clean($needle);
4536 13
      $haystack = self::clean($haystack);
4537
    }
4538 13
4539
    if (!$needle) {
4540
      return $haystack;
4541 1
    }
4542
4543
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4544
      self::checkForSupport();
4545
    }
4546
4547 View Code Duplication
    if (
4548
        $encoding !== 'UTF-8'
4549
        &&
4550
        self::$SUPPORT['mbstring'] === false
4551
    ) {
4552
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4553
    }
4554
4555
    if (self::$SUPPORT['mbstring'] === true) {
4556
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4557 1
    }
4558
4559 1
    if (
4560
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4561
        &&
4562
        self::$SUPPORT['intl'] === true
4563
        &&
4564
        Bootup::is_php('5.4') === true
4565
    ) {
4566
      return \grapheme_stristr($haystack, $needle, $before_needle);
4567
    }
4568
4569
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4570
      return stristr($haystack, $needle, $before_needle);
4571 2
    }
4572
4573 2
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4574 2
4575 2
    if (!isset($match[1])) {
4576
      return false;
4577 2
    }
4578 2
4579 2
    if ($before_needle) {
4580
      return $match[1];
4581 2
    }
4582 2
4583
    return self::substr($haystack, self::strlen($match[1]));
4584
  }
4585
4586
  /**
4587
   * Get the string length, not the byte-length!
4588
   *
4589
   * @link     http://php.net/manual/en/function.mb-strlen.php
4590
   *
4591
   * @param string $str       <p>The string being checked for length.</p>
4592 3
   * @param string $encoding  [optional] <p>Set the charset.</p>
4593
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4594 3
   *
4595 3
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4596 3
   *             character counted as +1)</p>
4597
   */
4598 3
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
4599
  {
4600 3
    if (!isset($str[0])) {
4601
      return 0;
4602
    }
4603
4604
    if ($encoding !== 'UTF-8') {
4605
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4606
    }
4607
4608
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4609
      self::checkForSupport();
4610
    }
4611
4612
    switch ($encoding) {
4613
      case 'ASCII':
4614
      case 'CP850':
4615
      case '8BIT':
4616
        if (
4617
            $encoding === 'CP850'
4618
            &&
4619
            self::$SUPPORT['mbstring_func_overload'] === false
4620
        ) {
4621
          return \strlen($str);
4622
        }
4623 2
4624
        return \mb_strlen($str, '8BIT');
4625 2
    }
4626
4627 2
    if ($cleanUtf8 === true) {
4628 1
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4629
      // if invalid characters are found in $str
4630
      $str = self::clean($str);
4631 2
    }
4632 1
4633 1 View Code Duplication
    if (
4634
        $encoding !== 'UTF-8'
4635 2
        &&
4636
        self::$SUPPORT['mbstring'] === false
4637
        &&
4638
        self::$SUPPORT['iconv'] === false
4639
    ) {
4640
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4641
    }
4642
4643
    if (
4644
        $encoding !== 'UTF-8'
4645
        &&
4646
        self::$SUPPORT['iconv'] === true
4647
        &&
4648
        self::$SUPPORT['mbstring'] === false
4649
    ) {
4650
      $returnTmp = \iconv_strlen($str, $encoding);
4651
      if ($returnTmp !== false) {
4652
        return $returnTmp;
4653
      }
4654 10
    }
4655
4656 10 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4657 10
      $returnTmp = \mb_strlen($str, $encoding);
4658 10
      if ($returnTmp !== false) {
4659
        return $returnTmp;
4660 10
      }
4661 3
    }
4662
4663 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
4664 9
      $returnTmp = \iconv_strlen($str, $encoding);
4665
      if ($returnTmp !== false) {
4666
        return $returnTmp;
4667 1
      }
4668 1
    }
4669 1
4670
    if (
4671
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4672
        &&
4673 9
        self::$SUPPORT['intl'] === true
4674 2
        &&
4675 9
        Bootup::is_php('5.4') === true
4676 9
    ) {
4677 9
      return \grapheme_strlen($str);
4678 1
    }
4679
4680
    if (self::is_ascii($str)) {
4681 9
      return \strlen($str);
4682
    }
4683
4684
    // fallback via vanilla php
4685
    \preg_match_all('/./us', $str, $parts);
4686
    $returnTmp = \count($parts[0]);
4687 9
    if ($returnTmp !== 0) {
4688 9
      return $returnTmp;
4689 9
    }
4690 9
4691 9
    // fallback to "mb_"-function via polyfill
4692
    return \mb_strlen($str, $encoding);
4693
  }
4694
4695
  /**
4696 9
   * Get string length in byte.
4697
   *
4698
   * @param string $str
4699
   *
4700
   * @return int
4701
   */
4702
  public static function strlen_in_byte(string $str): int
4703
  {
4704
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4705
      $len = \mb_strlen($str, '8BIT');
4706
    } else {
4707
      $len = \strlen($str);
4708
    }
4709
4710
    return $len;
4711
  }
4712
4713 17
  /**
4714
   * Case insensitive string comparisons using a "natural order" algorithm.
4715 17
   *
4716 17
   * INFO: natural order version of UTF8::strcasecmp()
4717 17
   *
4718
   * @param string $str1 <p>The first string.</p>
4719 17
   * @param string $str2 <p>The second string.</p>
4720 6
   *
4721
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4722
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4723 11
   *             <strong>0</strong> if they are equal
4724 1
   */
4725 1
  public static function strnatcasecmp(string $str1, string $str2): int
4726
  {
4727 11
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4728
  }
4729
4730 1
  /**
4731 1
   * String comparisons using a "natural order" algorithm
4732 1
   *
4733
   * INFO: natural order version of UTF8::strcmp()
4734 11
   *
4735
   * @link  http://php.net/manual/en/function.strnatcmp.php
4736
   *
4737
   * @param string $str1 <p>The first string.</p>
4738 11
   * @param string $str2 <p>The second string.</p>
4739
   *
4740
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4741
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4742
   *             <strong>0</strong> if they are equal
4743
   */
4744 11
  public static function strnatcmp(string $str1, string $str2): int
4745 1
  {
4746 11
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4747
  }
4748
4749
  /**
4750 11
   * Case-insensitive string comparison of the first n characters.
4751 11
   *
4752
   * @link  http://php.net/manual/en/function.strncasecmp.php
4753
   *
4754
   * @param string $str1 <p>The first string.</p>
4755
   * @param string $str2 <p>The second string.</p>
4756
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4757
   *
4758
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4759
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4760
   *             <strong>0</strong> if they are equal
4761
   */
4762
  public static function strncasecmp(string $str1, string $str2, int $len): int
4763
  {
4764
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4765
  }
4766
4767
  /**
4768
   * String comparison of the first n characters.
4769
   *
4770
   * @link  http://php.net/manual/en/function.strncmp.php
4771
   *
4772
   * @param string $str1 <p>The first string.</p>
4773
   * @param string $str2 <p>The second string.</p>
4774
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4775
   *
4776
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4777
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4778
   *             <strong>0</strong> if they are equal
4779
   */
4780
  public static function strncmp(string $str1, string $str2, int $len): int
4781
  {
4782
    $str1 = (string)self::substr($str1, 0, $len);
4783
    $str2 = (string)self::substr($str2, 0, $len);
4784
4785
    return self::strcmp($str1, $str2);
4786
  }
4787
4788
  /**
4789
   * Search a string for any of a set of characters.
4790
   *
4791
   * @link  http://php.net/manual/en/function.strpbrk.php
4792
   *
4793 88
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4794
   * @param string $char_list <p>This parameter is case sensitive.</p>
4795 88
   *
4796
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
4797 88
   */
4798 6
  public static function strpbrk(string $haystack, string $char_list)
4799
  {
4800
    if (!isset($haystack[0], $char_list[0])) {
4801
      return false;
4802
    }
4803 87
4804 14
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4805 87
      return substr($haystack, strpos($haystack, $m[0]));
4806 78
    }
4807 78
4808 13
    return false;
4809
  }
4810
4811 87
  /**
4812
   * Find position of first occurrence of string in a string.
4813
   *
4814
   * @link http://php.net/manual/en/function.mb-strpos.php
4815
   *
4816 87
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4817 87
   * @param string $needle    <p>The string to find in haystack.</p>
4818 87
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4819
   * @param string $encoding  [optional] <p>Set the charset.</p>
4820
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4821 10
   *
4822 10
   * @return int|false <p>
4823 10
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
4824 10
   *                   If needle is not found it returns false.
4825
   *                   </p>
4826
   */
4827
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4828
  {
4829
    if (!isset($haystack[0], $needle[0])) {
4830 79
      return false;
4831
    }
4832
4833 2
    // iconv and mbstring do not support integer $needle
4834 2 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4835
      $needle = (string)self::chr((int)$needle);
4836
    }
4837
4838 79
    if ($cleanUtf8 === true) {
4839 2
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4840 79
      // if invalid characters are found in $haystack before $needle
4841
      $needle = self::clean($needle);
4842 79
      $haystack = self::clean($haystack);
4843
    }
4844
4845
    if ($encoding !== 'UTF-8') {
4846
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4847
    }
4848 79
4849 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4850 79
      self::checkForSupport();
4851 2
    }
4852 79
4853
    if (
4854
        $encoding === 'CP850'
4855
        &&
4856 79
        self::$SUPPORT['mbstring_func_overload'] === false
4857 78
    ) {
4858
      return strpos($haystack, $needle, $offset);
4859
    }
4860 2
4861 View Code Duplication
    if (
4862
        $encoding !== 'UTF-8'
4863
        &&
4864
        self::$SUPPORT['iconv'] === false
4865
        &&
4866 2
        self::$SUPPORT['mbstring'] === false
4867 2
    ) {
4868 2
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4869
    }
4870 2
4871
    if (
4872
        $offset >= 0 // iconv_strpos() can't handle negative offset
4873
        &&
4874 2
        $encoding !== 'UTF-8'
4875 1
        &&
4876
        self::$SUPPORT['mbstring'] === false
4877
        &&
4878
        self::$SUPPORT['iconv'] === true
4879 2
    ) {
4880 2
      // ignore invalid negative offset to keep compatibility
4881 2
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4882 2
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4883
      if ($returnTmp !== false) {
4884
        return $returnTmp;
4885
      }
4886
    }
4887
4888 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4889
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
4890
      if ($returnTmp !== false) {
4891
        return $returnTmp;
4892
      }
4893
    }
4894
4895
    if (
4896 69
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4897
        &&
4898 69
        self::$SUPPORT['intl'] === true
4899
        &&
4900
        Bootup::is_php('5.4') === true
4901 69
    ) {
4902
      return \grapheme_strpos($haystack, $needle, $offset);
4903
    }
4904 69
4905
    if (
4906
        $offset >= 0 // iconv_strpos() can't handle negative offset
4907
        &&
4908
        self::$SUPPORT['iconv'] === true
4909
    ) {
4910
      // ignore invalid negative offset to keep compatibility
4911
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4912
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4913
      if ($returnTmp !== false) {
4914
        return $returnTmp;
4915
      }
4916
    }
4917
4918
    $haystackIsAscii = self::is_ascii($haystack);
4919 1
    if ($haystackIsAscii && self::is_ascii($needle)) {
4920
      return strpos($haystack, $needle, $offset);
4921 1
    }
4922
4923
    // fallback via vanilla php
4924
4925
    if ($haystackIsAscii) {
4926
      $haystackTmp = substr($haystack, $offset);
4927
    } else {
4928
      $haystackTmp = self::substr($haystack, $offset);
4929
    }
4930
    if ($haystackTmp === false) {
4931
      $haystackTmp = '';
4932
    }
4933
    $haystack = (string)$haystackTmp;
4934
4935
    if ($offset < 0) {
4936
      $offset = 0;
4937
    }
4938 2
4939
    $pos = strpos($haystack, $needle);
4940 2
    if ($pos === false) {
4941
      return false;
4942
    }
4943
4944
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
4945
    if ($returnTmp !== false) {
4946
      return $returnTmp;
4947
    }
4948
4949
    // fallback to "mb_"-function via polyfill
4950
    return \mb_strpos($haystack, $needle, $offset, $encoding);
4951
  }
4952
4953
  /**
4954
   * Finds the last occurrence of a character in a string within another.
4955
   *
4956 1
   * @link http://php.net/manual/en/function.mb-strrchr.php
4957
   *
4958 1
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
4959
   * @param string $needle        <p>The string to find in haystack</p>
4960
   * @param bool   $before_needle [optional] <p>
4961
   *                              Determines which portion of haystack
4962
   *                              this function returns.
4963
   *                              If set to true, it returns all of haystack
4964
   *                              from the beginning to the last occurrence of needle.
4965
   *                              If set to false, it returns all of haystack
4966
   *                              from the last occurrence of needle to the end,
4967
   *                              </p>
4968
   * @param string $encoding      [optional] <p>
4969
   *                              Character encoding name to use.
4970
   *                              If it is omitted, internal character encoding is used.
4971
   *                              </p>
4972
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
4973
   *
4974 2
   * @return string|false The portion of haystack or false if needle is not found.
4975
   */
4976 2 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4977 2
  {
4978
    if ($encoding !== 'UTF-8') {
4979 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4980
    }
4981
4982
    if ($cleanUtf8 === true) {
4983
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4984
      // if invalid characters are found in $haystack before $needle
4985
      $needle = self::clean($needle);
4986
      $haystack = self::clean($haystack);
4987
    }
4988
4989
    // fallback to "mb_"-function via polyfill
4990
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
4991
  }
4992 1
4993
  /**
4994 1
   * Reverses characters order in the string.
4995 1
   *
4996
   * @param string $str The input string
4997 1
   *
4998 1
   * @return string The string with characters in the reverse sequence
4999
   */
5000
  public static function strrev(string $str): string
5001 1
  {
5002 1
    if (!isset($str[0])) {
5003
      return '';
5004
    }
5005 1
5006
    return implode('', array_reverse(self::split($str)));
5007
  }
5008
5009
  /**
5010
   * Finds the last occurrence of a character in a string within another, case insensitive.
5011
   *
5012
   * @link http://php.net/manual/en/function.mb-strrichr.php
5013
   *
5014
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
5015
   * @param string $needle         <p>The string to find in haystack.</p>
5016
   * @param bool   $before_needle  [optional] <p>
5017
   *                               Determines which portion of haystack
5018
   *                               this function returns.
5019
   *                               If set to true, it returns all of haystack
5020
   *                               from the beginning to the last occurrence of needle.
5021
   *                               If set to false, it returns all of haystack
5022
   *                               from the last occurrence of needle to the end,
5023
   *                               </p>
5024 56
   * @param string $encoding       [optional] <p>
5025
   *                               Character encoding name to use.
5026 56
   *                               If it is omitted, internal character encoding is used.
5027 56
   *                               </p>
5028
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5029 56
   *
5030 3
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5031
   */
5032 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5033
  {
5034 55
    if ($encoding !== 'UTF-8') {
5035
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5036
    }
5037
5038 55
    if ($cleanUtf8 === true) {
5039
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5040
      // if invalid characters are found in $haystack before $needle
5041
      $needle = self::clean($needle);
5042 55
      $haystack = self::clean($haystack);
5043
    }
5044
5045 2
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5046 2
  }
5047 2
5048
  /**
5049
   * Find position of last occurrence of a case-insensitive string.
5050
   *
5051 55
   * @param string $haystack  <p>The string to look in.</p>
5052 42
   * @param string $needle    <p>The string to look for.</p>
5053 55
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5054 15
   * @param string $encoding  [optional] <p>Set the charset.</p>
5055 15
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5056 41
   *
5057
   * @return int|false <p>
5058
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5059 55
   *                   not found, it returns false.
5060
   *                   </p>
5061
   */
5062
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5063
  {
5064 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5065 55
      $needle = (string)self::chr((int)$needle);
5066 41
    }
5067 55
5068 41
    if (!isset($haystack[0], $needle[0])) {
5069
      return false;
5070
    }
5071
5072
    if ($cleanUtf8 === true) {
5073 15
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5074 1
      $needle = self::clean($needle);
5075 15
      $haystack = self::clean($haystack);
5076
    }
5077 15
5078
    if ($encoding !== 'UTF-8') {
5079
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5080
    }
5081
5082
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5083 15
      self::checkForSupport();
5084
    }
5085 15
5086 1 View Code Duplication
    if (
5087 15
        $encoding !== 'UTF-8'
5088
        &&
5089 15
        self::$SUPPORT['mbstring'] === false
5090
    ) {
5091
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5092
    }
5093
5094
    if (self::$SUPPORT['mbstring'] === true) {
5095 15
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5096 15
    }
5097
5098
    if (
5099
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5100
        &&
5101 1
        self::$SUPPORT['intl'] === true
5102 1
        &&
5103 1
        Bootup::is_php('5.4') === true
5104
    ) {
5105 1
      return \grapheme_strripos($haystack, $needle, $offset);
5106
    }
5107
5108
    // fallback via vanilla php
5109
5110
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5111 1
  }
5112 1
5113 1
  /**
5114
   * Find position of last occurrence of a string in a string.
5115
   *
5116
   * @link http://php.net/manual/en/function.mb-strrpos.php
5117
   *
5118
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5119 1
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5120 1
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5121 1
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5122
   *                              the end of the string.
5123
   *                              </p>
5124
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5125
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5126 1
   *
5127
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5128
   *                   is not found, it returns false.</p>
5129 1
   */
5130
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5131 1
  {
5132
    if ((int)$needle === $needle && $needle >= 0) {
5133
      $needle = (string)self::chr($needle);
5134 1
    }
5135
    $needle = (string)$needle;
5136 1
5137
    if (!isset($haystack[0], $needle[0])) {
5138
      return false;
5139
    }
5140 1
5141 1
    if (
5142
        $cleanUtf8 === true
5143
        ||
5144
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5145 1
    ) {
5146 1
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5147 1
      $needle = self::clean($needle);
5148
      $haystack = self::clean($haystack);
5149
    }
5150
5151
    if ($encoding !== 'UTF-8') {
5152
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5153
    }
5154
5155
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5156
      self::checkForSupport();
5157
    }
5158
5159 View Code Duplication
    if (
5160
        $encoding !== 'UTF-8'
5161
        &&
5162
        self::$SUPPORT['mbstring'] === false
5163
    ) {
5164
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5165
    }
5166
5167
    if (self::$SUPPORT['mbstring'] === true) {
5168
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5169
    }
5170
5171
    if (
5172
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5173
        &&
5174
        self::$SUPPORT['intl'] === true
5175
        &&
5176
        Bootup::is_php('5.4') === true
5177 1
    ) {
5178
      return \grapheme_strrpos($haystack, $needle, $offset);
5179 1
    }
5180 1
5181 1
    // fallback via vanilla php
5182
5183 1
    $haystackTmp = null;
5184
    if ($offset > 0) {
5185
      $haystackTmp = self::substr($haystack, $offset);
5186 1
    } elseif ($offset < 0) {
5187 1
      $haystackTmp = self::substr($haystack, 0, $offset);
5188 1
      $offset = 0;
5189
    }
5190
5191 1
    if ($haystackTmp !== null) {
5192
      if ($haystackTmp === false) {
5193
        $haystackTmp = '';
5194
      }
5195
      $haystack = (string)$haystackTmp;
5196
    }
5197
5198
    $pos = strrpos($haystack, $needle);
5199
    if ($pos === false) {
5200
      return false;
5201 4
    }
5202
5203 4
    return $offset + self::strlen(substr($haystack, 0, $pos));
5204
  }
5205 4
5206 2
  /**
5207
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5208
   * mask.
5209 3
   *
5210
   * @param string $str    <p>The input string.</p>
5211
   * @param string $mask   <p>The mask of chars</p>
5212
   * @param int    $offset [optional]
5213
   * @param int    $length [optional]
5214
   *
5215
   * @return int
5216
   */
5217
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
5218
  {
5219 View Code Duplication
    if ($offset || $length !== null) {
5220
      $strTmp = self::substr($str, $offset, $length);
5221
      if ($strTmp === false) {
5222
        $strTmp = '';
5223
      }
5224
      $str = (string)$strTmp;
5225
    }
5226
5227
    if (!isset($str[0], $mask[0])) {
5228
      return 0;
5229
    }
5230
5231
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5232
  }
5233
5234
  /**
5235 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5236
   *
5237 1
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5238 1
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5239 1
   * @param bool   $before_needle  [optional] <p>
5240
   *                               If <b>TRUE</b>, strstr() returns the part of the
5241 1
   *                               haystack before the first occurrence of the needle (excluding the needle).
5242
   *                               </p>
5243
   * @param string $encoding       [optional] <p>Set the charset.</p>
5244 1
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5245 1
   *
5246 1
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5247
   */
5248 1
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5249
  {
5250
    if (!isset($haystack[0], $needle[0])) {
5251
      return false;
5252
    }
5253
5254
    if ($cleanUtf8 === true) {
5255
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5256
      // if invalid characters are found in $haystack before $needle
5257
      $needle = self::clean($needle);
5258
      $haystack = self::clean($haystack);
5259
    }
5260
5261
    if ($encoding !== 'UTF-8') {
5262
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5263
    }
5264
5265 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5266
      self::checkForSupport();
5267 1
    }
5268
5269 View Code Duplication
    if (
5270
        $encoding !== 'UTF-8'
5271
        &&
5272 1
        self::$SUPPORT['mbstring'] === false
5273 1
    ) {
5274 1
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5275
    }
5276 1
5277
    if (self::$SUPPORT['mbstring'] === true) {
5278
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5279
    }
5280
5281
    if (
5282 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5283
        &&
5284 1
        self::$SUPPORT['intl'] === true
5285
        &&
5286
        Bootup::is_php('5.4') === true
5287 1
    ) {
5288 1
      return \grapheme_strstr($haystack, $needle, $before_needle);
5289 1
    }
5290
5291
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5292
5293 1
    if (!isset($match[1])) {
5294 1
      return false;
5295 1
    }
5296 1
5297 1
    if ($before_needle) {
5298 1
      return $match[1];
5299
    }
5300
5301 1
    return self::substr($haystack, self::strlen($match[1]));
5302
  }
5303
5304
  /**
5305
   * Unicode transformation for case-less matching.
5306
   *
5307 1
   * @link http://unicode.org/reports/tr21/tr21-5.html
5308
   *
5309 1
   * @param string $str        <p>The input string.</p>
5310
   * @param bool   $full       [optional] <p>
5311
   *                           <b>true</b>, replace full case folding chars (default)<br>
5312
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5313 1
   *                           </p>
5314 1
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5315
   *
5316
   * @return string
5317
   */
5318
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
5319
  {
5320
    if (!isset($str[0])) {
5321
      return '';
5322
    }
5323
5324
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5325
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5326
5327
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5328
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
5329
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
5330
    }
5331
5332
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5333
5334
    if ($full) {
5335
5336
      static $FULL_CASE_FOLD = null;
5337
      if ($FULL_CASE_FOLD === null) {
5338
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5339
      }
5340
5341
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5342
    }
5343
5344
    if ($cleanUtf8 === true) {
5345
      $str = self::clean($str);
5346
    }
5347
5348
    return self::strtolower($str);
5349 10
  }
5350
5351 10
  /**
5352 2
   * Make a string lowercase.
5353 2
   *
5354
   * @link http://php.net/manual/en/function.mb-strtolower.php
5355
   *
5356 10
   * @param string      $str       <p>The string being lowercased.</p>
5357 10
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5358 10
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5359
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5360 10
   *
5361 2
   * @return string str with all alphabetic characters converted to lowercase.
5362
   */
5363 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5364
  {
5365
    // init
5366 9
    $str = (string)$str;
5367
    if (!isset($str[0])) {
5368 9
      return '';
5369
    }
5370 3
5371 3
    if ($cleanUtf8 === true) {
5372 3
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5373
      // if invalid characters are found in $haystack before $needle
5374
      $str = self::clean($str);
5375
    }
5376 9
5377 1
    if ($encoding !== 'UTF-8') {
5378 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5379 9
    }
5380 9
5381 1
    if ($lang !== null) {
5382
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5383
        self::checkForSupport();
5384 9
      }
5385
5386
      if (
5387
          self::$SUPPORT['intl'] === true
5388
          &&
5389
          Bootup::is_php('5.4') === true
5390 9
      ) {
5391 1
5392 9
        $langCode = $lang . '-Lower';
5393
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5394
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5395
5396 9
          $langCode = 'Any-Lower';
5397 9
        }
5398
5399
        return transliterator_transliterate($langCode, $str);
5400
      }
5401
5402
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5403
    }
5404
5405
    return \mb_strtolower($str, $encoding);
5406
  }
5407
5408
  /**
5409
   * Generic case sensitive transformation for collation matching.
5410
   *
5411
   * @param string $str <p>The input string</p>
5412
   *
5413
   * @return string
5414
   */
5415
  private static function strtonatfold(string $str): string
5416
  {
5417
    /** @noinspection PhpUndefinedClassInspection */
5418
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5419
  }
5420
5421
  /**
5422
   * Make a string uppercase.
5423
   *
5424
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5425
   *
5426
   * @param string      $str       <p>The string being uppercased.</p>
5427
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5428
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5429
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5430
   *
5431
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
5432
   */
5433 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5434
  {
5435
    $str = (string)$str;
5436
    if (!isset($str[0])) {
5437
      return '';
5438
    }
5439
5440
    if ($cleanUtf8 === true) {
5441
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5442
      // if invalid characters are found in $haystack before $needle
5443
      $str = self::clean($str);
5444
    }
5445
5446 10
    if ($encoding !== 'UTF-8') {
5447
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5448 10
    }
5449 2
5450 2
    if ($lang !== null) {
5451
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5452
        self::checkForSupport();
5453 2
      }
5454 2
5455
      if (
5456 10
          self::$SUPPORT['intl'] === true
5457 10
          &&
5458 2
          Bootup::is_php('5.4') === true
5459
      ) {
5460
5461 8
        $langCode = $lang . '-Upper';
5462
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5463
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5464
5465
          $langCode = 'Any-Upper';
5466
        }
5467
5468
        return transliterator_transliterate($langCode, $str);
5469
      }
5470
5471
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5472
    }
5473
5474
    return \mb_strtoupper($str, $encoding);
5475
  }
5476
5477
  /**
5478 2
   * Translate characters or replace sub-strings.
5479
   *
5480 2
   * @link  http://php.net/manual/en/function.strtr.php
5481 2
   *
5482
   * @param string          $str  <p>The string being translated.</p>
5483 2
   * @param string|string[] $from <p>The string replacing from.</p>
5484 1
   * @param string|string[] $to   <p>The string being translated to to.</p>
5485
   *
5486
   * @return string <p>
5487 2
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5488
   *                corresponding character in to.
5489
   *                </p>
5490
   */
5491
  public static function strtr(string $str, $from, $to = INF): string
5492
  {
5493
    if (!isset($str[0])) {
5494 2
      return '';
5495 1
    }
5496 1
5497
    if ($from === $to) {
5498 2
      return $str;
5499
    }
5500
5501
    if (INF !== $to) {
5502
      $from = self::str_split($from);
5503
      $to = self::str_split($to);
5504 2
      $countFrom = \count($from);
5505 1
      $countTo = \count($to);
5506 2
5507
      if ($countFrom > $countTo) {
5508
        $from = \array_slice($from, 0, $countTo);
5509
      } elseif ($countFrom < $countTo) {
5510 2
        $to = \array_slice($to, 0, $countFrom);
5511 2
      }
5512
5513
      $from = \array_combine($from, $to);
5514
    }
5515
5516
    if (\is_string($from)) {
5517
      return \str_replace($from, '', $str);
5518
    }
5519
5520
    return \strtr($str, $from);
5521
  }
5522
5523
  /**
5524
   * Return the width of a string.
5525
   *
5526
   * @param string $str       <p>The input string.</p>
5527
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5528
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5529
   *
5530
   * @return int
5531
   */
5532
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5533
  {
5534
    if ($encoding !== 'UTF-8') {
5535
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5536
    }
5537
5538
    if ($cleanUtf8 === true) {
5539
      // iconv and mbstring are not tolerant to invalid encoding
5540
      // further, their behaviour is inconsistent with that of PHP's substr
5541
      $str = self::clean($str);
5542
    }
5543
5544
    // fallback to "mb_"-function via polyfill
5545
    return \mb_strwidth($str, $encoding);
5546
  }
5547
5548
  /**
5549
   * Changes all keys in an array.
5550
   *
5551 13
   * @param array $array <p>The array to work on</p>
5552
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
5553
   *                     or <strong>CASE_LOWER</strong> (default)</p>
5554 13
   *
5555
   * @return array <p>An array with its keys lower or uppercased.</p>
5556 13
   */
5557 4
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
5558
  {
5559
    if (
5560 12
        $case !== CASE_LOWER
5561 12
        &&
5562
        $case !== CASE_UPPER
5563 12
    ) {
5564 1
      $case = CASE_LOWER;
5565 1
    }
5566 1
5567
    $return = [];
5568 12
    foreach ($array as $key => $value) {
5569
      if ($case === CASE_LOWER) {
5570 12
        $key = self::strtolower($key);
5571
      } else {
5572 12
        $key = self::strtoupper($key);
5573 12
      }
5574 1
5575 1
      $return[$key] = $value;
5576
    }
5577
5578 12
    return $return;
5579 12
  }
5580
5581 12
  /**
5582 1
   * Get part of a string.
5583 1
   *
5584
   * @link http://php.net/manual/en/function.mb-substr.php
5585 12
   *
5586
   * @param string $str       <p>The string being checked.</p>
5587
   * @param int    $offset    <p>The first position used in str.</p>
5588
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
5589
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5590
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5591
   *
5592
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5593
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5594
   *                      characters long, <b>FALSE</b> will be returned.</p>
5595
   */
5596
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5597
  {
5598
    if (!isset($str[0])) {
5599
      return '';
5600 25
    }
5601
5602
    // Empty string
5603 25
    if ($length === 0) {
5604
      return '';
5605 25
    }
5606 3
5607
    if ($cleanUtf8 === true) {
5608
      // iconv and mbstring are not tolerant to invalid encoding
5609 23
      // further, their behaviour is inconsistent with that of PHP's substr
5610
      $str = self::clean($str);
5611
    }
5612 1
5613 1
    // Whole string
5614
    if (!$offset && $length === null) {
5615 23
      return $str;
5616 2
    }
5617 2
5618
    $str_length = 0;
5619 23
    if ($offset || $length === null) {
5620
      $str_length = self::strlen($str, $encoding);
5621
    }
5622
5623
    // Impossible
5624
    if ($offset && $offset > $str_length) {
5625
      return false;
5626
    }
5627
5628
    if ($length === null) {
5629
      $length = $str_length;
5630
    } else {
5631
      $length = (int)$length;
5632
    }
5633
5634
    if ($encoding !== 'UTF-8') {
5635
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5636
    }
5637
5638
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5639
      self::checkForSupport();
5640
    }
5641
5642
    if (
5643 23
        $encoding === 'CP850'
5644
        &&
5645
        self::$SUPPORT['mbstring_func_overload'] === false
5646
    ) {
5647
      return substr($str, $offset, $length ?? $str_length);
5648
    }
5649
5650 View Code Duplication
    if (
5651
        $encoding !== 'UTF-8'
5652
        &&
5653 3
        self::$SUPPORT['mbstring'] === false
5654
    ) {
5655
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5656 3
    }
5657
5658
    if (self::$SUPPORT['mbstring'] === true) {
5659
      return \mb_substr($str, $offset, $length, $encoding);
5660
    }
5661
5662
    if (
5663
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5664
        &&
5665
        self::$SUPPORT['intl'] === true
5666
        &&
5667
        Bootup::is_php('5.4') === true
5668
    ) {
5669
      return \grapheme_substr($str, $offset, $length);
5670
    }
5671 19
5672
    if (
5673 19
        $length >= 0 // "iconv_substr()" can't handle negative length
5674
        &&
5675 19
        self::$SUPPORT['iconv'] === true
5676 3
    ) {
5677
      $returnTmp = \iconv_substr($str, $offset, $length);
5678
      if ($returnTmp !== false) {
5679 17
        return $returnTmp;
5680
      }
5681
    }
5682 2
5683 2
    if (self::is_ascii($str)) {
5684
      return ($length === null) ?
5685 17
          \substr($str, $offset) :
5686 3
          \substr($str, $offset, $length);
5687 3
    }
5688
5689 17
    // fallback via vanilla php
5690
5691
    // split to array, and remove invalid characters
5692
    $array = self::split($str);
5693
5694
    // extract relevant part, and join to make sting again
5695
    return \implode('', \array_slice($array, $offset, $length));
5696
  }
5697
5698
  /**
5699
   * Binary safe comparison of two strings from an offset, up to length characters.
5700
   *
5701
   * @param string   $str1               <p>The main string being compared.</p>
5702
   * @param string   $str2               <p>The secondary string being compared.</p>
5703
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
5704
   *                                     counting from the end of the string.</p>
5705
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
5706
   *                                     the length of the str compared to the length of main_str less the offset.</p>
5707
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5708
   *                                     insensitive.</p>
5709
   *
5710
   * @return int <p>
5711
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5712
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5713 17
   *             <strong>0</strong> if they are equal.
5714
   *             </p>
5715
   */
5716
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
5717
  {
5718
    if (
5719
        $offset !== 0
5720
        ||
5721
        $length !== null
5722
    ) {
5723
      $str1Tmp = self::substr($str1, $offset, $length);
5724
      if ($str1Tmp === false) {
5725
        $str1Tmp = '';
5726
      }
5727
      $str1 = (string)$str1Tmp;
5728
5729
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5730 1
      if ($str2Tmp === false) {
5731
        $str2Tmp = '';
5732 1
      }
5733
      $str2 = (string)$str2Tmp;
5734 1
    }
5735
5736
    if ($case_insensitivity === true) {
5737
      return self::strcasecmp($str1, $str2);
5738 1
    }
5739
5740
    return self::strcmp($str1, $str2);
5741
  }
5742 1
5743 1
  /**
5744 1
   * Count the number of substring occurrences.
5745 1
   *
5746 1
   * @link  http://php.net/manual/en/function.substr-count.php
5747
   *
5748 1
   * @param string $haystack   <p>The string to search in.</p>
5749 1
   * @param string $needle     <p>The substring to search for.</p>
5750 1
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
5751 1
   * @param int    $length     [optional] <p>
5752 1
   *                           The maximum length after the specified offset to search for the
5753
   *                           substring. It outputs a warning if the offset plus the length is
5754 1
   *                           greater than the haystack length.
5755 1
   *                           </p>
5756
   * @param string $encoding   <p>Set the charset.</p>
5757 1
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5758 1
   *
5759
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5760
   */
5761 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5762
  {
5763
    if (!isset($haystack[0], $needle[0])) {
5764
      return false;
5765
    }
5766
5767
    if ($offset || $length !== null) {
5768
5769
      if ($length === null) {
5770
        $length = self::strlen($haystack);
5771
      }
5772
5773 1
      if (
5774
          (
5775 1
              $length !== 0
5776 1
              &&
5777 1
              $offset !== 0
5778
          )
5779 1
          &&
5780
          $length + $offset <= 0
5781
          &&
5782 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
5783 1
      ) {
5784
        return false;
5785
      }
5786 1
5787
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
5788
      if ($haystackTmp === false) {
5789
        $haystackTmp = '';
5790
      }
5791
      $haystack = (string)$haystackTmp;
5792
    }
5793
5794
    if ($encoding !== 'UTF-8') {
5795
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5796
    }
5797
5798
    if ($cleanUtf8 === true) {
5799 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5800
      // if invalid characters are found in $haystack before $needle
5801 1
      $needle = self::clean($needle);
5802
      $haystack = self::clean($haystack);
5803
    }
5804
5805
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5806
      self::checkForSupport();
5807 1
    }
5808
5809 1 View Code Duplication
    if (
5810
        $encoding !== 'UTF-8'
5811
        &&
5812
        self::$SUPPORT['mbstring'] === false
5813 1
    ) {
5814 1
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5815 1
    }
5816 1
5817 1
    if (self::$SUPPORT['mbstring'] === true) {
5818 1
      return \mb_substr_count($haystack, $needle, $encoding);
5819
    }
5820
5821 1
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5822 1
5823
    return \count($matches);
5824 1
  }
5825
5826
  /**
5827
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5828
   *
5829
   * @param string $haystack <p>The string to search in.</p>
5830
   * @param string $needle   <p>The substring to search for.</p>
5831
   *
5832
   * @return string <p>Return the sub-string.</p>
5833
   */
5834 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5835
  {
5836
    if (!isset($haystack[0])) {
5837
      return '';
5838
    }
5839
5840
    if (!isset($needle[0])) {
5841
      return $haystack;
5842 76
    }
5843
5844
    if (self::str_istarts_with($haystack, $needle) === true) {
5845 76
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5846
      if ($haystackTmp === false) {
5847 76
        $haystackTmp = '';
5848 10
      }
5849
      $haystack = (string)$haystackTmp;
5850
    }
5851
5852 74
    return $haystack;
5853 3
  }
5854
5855
  /**
5856 73
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5857
   *
5858
   * @param string $haystack <p>The string to search in.</p>
5859 1
   * @param string $needle   <p>The substring to search for.</p>
5860 1
   *
5861
   * @return string <p>Return the sub-string.</p>
5862
   */
5863 73 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5864 2
  {
5865
    if (!isset($haystack[0])) {
5866
      return '';
5867 71
    }
5868 71
5869 45
    if (!isset($needle[0])) {
5870 45
      return $haystack;
5871
    }
5872
5873 71
    if (self::str_iends_with($haystack, $needle) === true) {
5874 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5875
      if ($haystackTmp === false) {
5876
        $haystackTmp = '';
5877 69
      }
5878 30
      $haystack = (string)$haystackTmp;
5879 30
    }
5880 60
5881
    return $haystack;
5882
  }
5883
5884
  /**
5885 69
   * Removes an prefix ($needle) from start of the string ($haystack).
5886 25
   *
5887 69
   * @param string $haystack <p>The string to search in.</p>
5888 47
   * @param string $needle   <p>The substring to search for.</p>
5889 47
   *
5890 24
   * @return string <p>Return the sub-string.</p>
5891
   */
5892 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5893 69
  {
5894
    if (!isset($haystack[0])) {
5895
      return '';
5896
    }
5897
5898
    if (!isset($needle[0])) {
5899 69
      return $haystack;
5900 22
    }
5901 69
5902 22
    if (self::str_starts_with($haystack, $needle) === true) {
5903
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5904
      if ($haystackTmp === false) {
5905
        $haystackTmp = '';
5906
      }
5907 47
      $haystack = (string)$haystackTmp;
5908 1
    }
5909 47
5910
    return $haystack;
5911
  }
5912
5913 47
  /**
5914 47
   * Replace text within a portion of a string.
5915
   *
5916
   * source: https://gist.github.com/stemar/8287074
5917
   *
5918
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5919
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5920
   * @param int|int[]       $offset           <p>
5921
   *                                          If start is positive, the replacing will begin at the start'th offset
5922
   *                                          into string.
5923
   *                                          <br><br>
5924
   *                                          If start is negative, the replacing will begin at the start'th character
5925
   *                                          from the end of string.
5926
   *                                          </p>
5927
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
5928
   *                                          portion of string which is to be replaced. If it is negative, it
5929
   *                                          represents the number of characters from the end of string at which to
5930
   *                                          stop replacing. If it is not given, then it will default to strlen(
5931
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5932
   *                                          length is zero then this function will have the effect of inserting
5933
   *                                          replacement into string at the given start offset.</p>
5934
   *
5935
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5936
   */
5937
  public static function substr_replace($str, $replacement, $offset, $length = null)
5938
  {
5939
    if (\is_array($str) === true) {
5940
      $num = \count($str);
5941
5942
      // the replacement
5943
      if (\is_array($replacement) === true) {
5944
        $replacement = \array_slice($replacement, 0, $num);
5945
      } else {
5946
        $replacement = \array_pad([$replacement], $num, $replacement);
5947
      }
5948
5949
      // the offset
5950
      if (\is_array($offset) === true) {
5951
        $offset = \array_slice($offset, 0, $num);
5952
        foreach ($offset as &$valueTmp) {
5953
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5954
        }
5955
        unset($valueTmp);
5956
      } else {
5957
        $offset = \array_pad([$offset], $num, $offset);
5958
      }
5959
5960
      // the length
5961
      if (null === $length) {
5962
        $length = \array_fill(0, $num, 0);
5963
      } elseif (\is_array($length) === true) {
5964
        $length = \array_slice($length, 0, $num);
5965
        foreach ($length as &$valueTmpV2) {
5966
          if (null !== $valueTmpV2) {
5967
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5968 1
          } else {
5969
            $valueTmpV2 = 0;
5970
          }
5971
        }
5972 1
        unset($valueTmpV2);
5973
      } else {
5974 1
        $length = \array_pad([$length], $num, $length);
5975 1
      }
5976 1
5977
      // recursive call
5978
      return \array_map([UTF8::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
5979 1
    }
5980
5981 1
    if (\is_array($replacement) === true) {
5982 1
      if (\count($replacement) > 0) {
5983
        $replacement = $replacement[0];
5984
      } else {
5985 1
        $replacement = '';
5986 1
      }
5987
    }
5988 1
5989 1
    // init
5990
    $str = (string)$str;
5991
    $replacement = (string)$replacement;
5992 1
5993
    if (!isset($str[0])) {
5994
      return $replacement;
5995
    }
5996
5997
    if (self::is_ascii($str)) {
5998
      return ($length === null) ?
5999
          substr_replace($str, $replacement, $offset) :
6000
          substr_replace($str, $replacement, $offset, $length);
6001
    }
6002
6003
    preg_match_all('/./us', $str, $smatches);
6004
    preg_match_all('/./us', $replacement, $rmatches);
6005
6006
    if ($length === null) {
6007
      $length = self::strlen($str);
6008
    }
6009
6010
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6011
6012
    return implode('', $smatches[0]);
6013 1
  }
6014
6015
  /**
6016 1
   * Removes an suffix ($needle) from end of the string ($haystack).
6017 1
   *
6018
   * @param string $haystack <p>The string to search in.</p>
6019 1
   * @param string $needle   <p>The substring to search for.</p>
6020 1
   *
6021
   * @return string <p>Return the sub-string.</p>
6022
   */
6023 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6024
  {
6025 1
    if (!isset($haystack[0])) {
6026 1
      return '';
6027 1
    }
6028
6029 1
    if (!isset($needle[0])) {
6030 1
      return $haystack;
6031
    }
6032
6033
    if (self::str_ends_with($haystack, $needle) === true) {
6034
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6035 1
      if ($haystackTmp === false) {
6036
        $haystackTmp = '';
6037 1
      }
6038 1
      $haystack = (string)$haystackTmp;
6039 1
    }
6040 1
6041 1
    return $haystack;
6042 1
  }
6043 1
6044
  /**
6045
   * Returns a case swapped version of the string.
6046 1
   *
6047 1
   * @param string $str       <p>The input string.</p>
6048
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
6049
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6050 1
   *
6051 1
   * @return string <p>Each character's case swapped.</p>
6052
   */
6053 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6054 1
  {
6055 1
    if (!isset($str[0])) {
6056
      return '';
6057 1
    }
6058
6059
    if ($encoding !== 'UTF-8') {
6060
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6061
    }
6062
6063
    if ($cleanUtf8 === true) {
6064 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6065
      // if invalid characters are found in $haystack before $needle
6066
      $str = self::clean($str);
6067
    }
6068
6069
    $strSwappedCase = preg_replace_callback(
6070 1
        '/[\S]/u',
6071 1
        function ($match) use ($encoding) {
6072 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6073
6074
          if ($match[0] === $marchToUpper) {
6075
            return UTF8::strtolower($match[0], $encoding);
6076 1
          }
6077 1
6078
          return $marchToUpper;
6079
        },
6080
        $str
6081
    );
6082
6083
    return $strSwappedCase;
6084
  }
6085
6086
  /**
6087
   * alias for "UTF8::to_ascii()"
6088
   *
6089
   * @see        UTF8::to_ascii()
6090
   *
6091
   * @param string $str
6092
   * @param string $subst_chr
6093 1
   * @param bool   $strict
6094
   *
6095
   * @return string
6096 1
   *
6097 1
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6098
   */
6099 1
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
6100 1
  {
6101
    return self::to_ascii($str, $subst_chr, $strict);
6102
  }
6103 1
6104 1
  /**
6105
   * alias for "UTF8::to_iso8859()"
6106
   *
6107 1
   * @see        UTF8::to_iso8859()
6108 1
   *
6109 1
   * @param string|string[] $str
6110
   *
6111
   * @return string|string[]
6112 1
   *
6113 1
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6114
   */
6115 1
  public static function toIso8859($str)
6116
  {
6117
    return self::to_iso8859($str);
6118
  }
6119
6120
  /**
6121
   * alias for "UTF8::to_latin1()"
6122
   *
6123
   * @see        UTF8::to_latin1()
6124
   *
6125
   * @param string|string[] $str
6126 1
   *
6127
   * @return string|string[]
6128
   *
6129 1
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6130 1
   */
6131
  public static function toLatin1($str)
6132 1
  {
6133 1
    return self::to_latin1($str);
6134
  }
6135
6136 1
  /**
6137 1
   * alias for "UTF8::to_utf8()"
6138
   *
6139
   * @see        UTF8::to_utf8()
6140 1
   *
6141 1
   * @param string|string[] $str
6142 1
   *
6143
   * @return string|string[]
6144
   *
6145 1
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6146 1
   */
6147
  public static function toUTF8($str)
6148 1
  {
6149
    return self::to_utf8($str);
6150
  }
6151
6152
  /**
6153
   * Convert a string into ASCII.
6154
   *
6155
   * @param string $str     <p>The input string.</p>
6156
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6157
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6158
   *                        performance</p>
6159 1
   *
6160
   * @return string
6161
   */
6162 1
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
6163 1
  {
6164
    static $UTF8_TO_ASCII;
6165 1
6166 1
    if (!isset($str[0])) {
6167
      return '';
6168
    }
6169 1
6170 1
    // check if we only have ASCII, first (better performance)
6171
    if (self::is_ascii($str) === true) {
6172
      return $str;
6173 1
    }
6174 1
6175 1
    $str = self::clean($str, true, true, true);
6176
6177
    // check again, if we only have ASCII, now ...
6178 1
    if (self::is_ascii($str) === true) {
6179 1
      return $str;
6180
    }
6181 1
6182
    if ($strict === true) {
6183
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6184
        self::checkForSupport();
6185
      }
6186
6187
      if (
6188
          self::$SUPPORT['intl'] === true
6189
          &&
6190
          Bootup::is_php('5.4') === true
6191
      ) {
6192
6193
        // HACK for issue from "transliterator_transliterate()"
6194
        $str = str_replace(
6195
            'ℌ',
6196
            'H',
6197
            $str
6198
        );
6199
6200
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6201
6202
        // check again, if we only have ASCII, now ...
6203
        if (self::is_ascii($str) === true) {
6204
          return $str;
6205
        }
6206
6207
      }
6208 7
    }
6209
6210 7
    if (self::$ORD === null) {
6211 1
      self::$ORD = self::getData('ord');
6212
    }
6213
6214 1
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6215 1
    $chars = $ar[0];
6216 1
    foreach ($chars as &$c) {
6217 1
6218
      $ordC0 = self::$ORD[$c[0]];
6219
6220
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6221 1
        continue;
6222 1
      }
6223 1
6224 1
      $ordC1 = self::$ORD[$c[1]];
6225 1
6226 1
      // ASCII - next please
6227 1
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6228 1
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6229
      }
6230
6231
      if ($ordC0 >= 224) {
6232 1
        $ordC2 = self::$ORD[$c[2]];
6233 1
6234 1
        if ($ordC0 <= 239) {
6235 1
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6236 1
        }
6237 1
6238 1
        if ($ordC0 >= 240) {
6239 1
          $ordC3 = self::$ORD[$c[3]];
6240
6241
          if ($ordC0 <= 247) {
6242 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6243 1
          }
6244 1
6245 1
          if ($ordC0 >= 248) {
6246
            $ordC4 = self::$ORD[$c[4]];
6247
6248 View Code Duplication
            if ($ordC0 <= 251) {
6249 1
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6250
            }
6251
6252 7
            if ($ordC0 >= 252) {
6253 1
              $ordC5 = self::$ORD[$c[5]];
6254 1
6255 1 View Code Duplication
              if ($ordC0 <= 253) {
6256 1
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6257
              }
6258 1
            }
6259
          }
6260
        }
6261 7
      }
6262 7
6263
      if ($ordC0 === 254 || $ordC0 === 255) {
6264 7
        $c = $unknown;
6265 1
        continue;
6266
      }
6267
6268 6
      if (!isset($ord)) {
6269 3
        $c = $unknown;
6270 3
        continue;
6271 3
      }
6272
6273
      $bank = $ord >> 8;
6274 5
      if (!isset($UTF8_TO_ASCII[$bank])) {
6275 5
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6276
        if ($UTF8_TO_ASCII[$bank] === false) {
6277 5
          $UTF8_TO_ASCII[$bank] = [];
6278 3
        }
6279 3
      }
6280
6281 5
      $newchar = $ord & 255;
6282
6283 5
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6284
6285
        // keep for debugging
6286
        /*
6287
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6288
        echo "char: " . $c . "\n";
6289
        echo "ord: " . $ord . "\n";
6290
        echo "newchar: " . $newchar . "\n";
6291
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6292
        echo "bank:" . $bank . "\n\n";
6293
        */
6294 1
6295
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6296 1
      } else {
6297 1
6298
        // keep for debugging missing chars
6299 1
        /*
6300 1
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6301
        echo "char: " . $c . "\n";
6302
        echo "ord: " . $ord . "\n";
6303 1
        echo "newchar: " . $newchar . "\n";
6304 1
        echo "bank:" . $bank . "\n\n";
6305
        */
6306
6307 1
        $c = $unknown;
6308 1
      }
6309 1
    }
6310
6311
    return implode('', $chars);
6312 1
  }
6313 1
6314
  /**
6315 1
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6316
   *
6317
   * @param string|string[] $str
6318
   *
6319
   * @return string|string[]
6320
   */
6321
  public static function to_iso8859($str)
6322
  {
6323
    if (\is_array($str) === true) {
6324
      foreach ($str as $k => $v) {
6325
        $str[$k] = self::to_iso8859($v);
6326
      }
6327 1
6328
      return $str;
6329 1
    }
6330
6331 1
    $str = (string)$str;
6332 1
    if (!isset($str[0])) {
6333
      return '';
6334
    }
6335 1
6336 1
    return self::utf8_decode($str);
6337 1
  }
6338
6339 1
  /**
6340
   * alias for "UTF8::to_iso8859()"
6341
   *
6342 1
   * @see UTF8::to_iso8859()
6343 1
   *
6344
   * @param string|string[] $str
6345 1
   *
6346 1
   * @return string|string[]
6347
   */
6348 1
  public static function to_latin1($str)
6349
  {
6350 1
    return self::to_iso8859($str);
6351 1
  }
6352
6353
  /**
6354 1
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6355 1
   *
6356
   * <ul>
6357 1
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6358
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6359 1
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6360
   * case.</li>
6361
   * </ul>
6362
   *
6363
   * @param string|string[] $str                    <p>Any string or array.</p>
6364
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6365
   *
6366
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6367
   */
6368
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
6369
  {
6370 View Code Duplication
    if (\is_array($str) === true) {
6371
      foreach ($str as $k => $v) {
6372
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6373
      }
6374
6375
      return $str;
6376
    }
6377
6378
    $str = (string)$str;
6379
    if (!isset($str[0])) {
6380
      return $str;
6381
    }
6382
6383
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6384
      self::checkForSupport();
6385
    }
6386
6387
    $max = self::strlen_in_byte($str);
6388
    $buf = '';
6389
6390
    /** @noinspection ForeachInvariantsInspection */
6391
    for ($i = 0; $i < $max; $i++) {
6392
      $c1 = $str[$i];
6393
6394
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6395
6396
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6397
6398
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6399
6400
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6401
            $buf .= $c1 . $c2;
6402
            $i++;
6403
          } else { // not valid UTF8 - convert it
6404
            $buf .= self::to_utf8_convert($c1);
6405
          }
6406
6407
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6408
6409
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6410
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6411
6412
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6413
            $buf .= $c1 . $c2 . $c3;
6414
            $i += 2;
6415
          } else { // not valid UTF8 - convert it
6416
            $buf .= self::to_utf8_convert($c1);
6417
          }
6418
6419
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6420
6421
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6422
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6423
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6424
6425
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6426
            $buf .= $c1 . $c2 . $c3 . $c4;
6427
            $i += 3;
6428
          } else { // not valid UTF8 - convert it
6429
            $buf .= self::to_utf8_convert($c1);
6430
          }
6431
6432
        } else { // doesn't look like UTF8, but should be converted
6433
          $buf .= self::to_utf8_convert($c1);
6434
        }
6435
6436
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6437
6438 21
        $buf .= self::to_utf8_convert($c1);
6439
6440 21
      } else { // it doesn't need conversion
6441
        $buf .= $c1;
6442
      }
6443 21
    }
6444
6445 21
    // decode unicode escape sequences
6446 4
    $buf = preg_replace_callback(
6447
        '/\\\\u([0-9a-f]{4})/i',
6448
        function ($match) {
6449
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6450 18
        },
6451 6
        $buf
6452
    );
6453
6454 13
    // decode UTF-8 codepoints
6455
    if ($decodeHtmlEntityToUtf8 === true) {
6456
      $buf = self::html_entity_decode($buf);
6457 13
    }
6458 7
6459
    return $buf;
6460
  }
6461 7
6462
  /**
6463
   * @param int $int
6464
   *
6465
   * @return string
6466
   */
6467
  private static function to_utf8_convert($int): string
6468
  {
6469
    // init
6470
    $buf = '';
6471
6472
    if (self::$ORD === null) {
6473
      self::$ORD = self::getData('ord');
6474
    }
6475
6476
    if (self::$CHR === null) {
6477
      self::$CHR = self::getData('chr');
6478
    }
6479
6480
    if (self::$WIN1252_TO_UTF8 === null) {
6481
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6482
    }
6483
6484
    $ordC1 = self::$ORD[$int];
6485
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6486
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6487
    } else {
6488
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6489 7
      $cc2 = ($int & "\x3F") | "\x80";
6490
      $buf .= $cc1 . $cc2;
6491
    }
6492
6493 7
    return $buf;
6494 7
  }
6495 7
6496
  /**
6497 7
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6498
   *
6499 7
   * INFO: This is slower then "trim()"
6500 7
   *
6501
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6502
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6503 7
   *
6504
   * @param string $str   <p>The string to be trimmed</p>
6505
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
6506 7
   *
6507 7
   * @return string <p>The trimmed string.</p>
6508 7
   */
6509
  public static function trim(string $str = '', $chars = INF): string
6510 7
  {
6511 2
    if (!isset($str[0])) {
6512
      return '';
6513 2
    }
6514 2
6515 2
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6516
    if ($chars === INF || !$chars) {
6517 2
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6518 1
    }
6519
6520 1
    return self::rtrim(self::ltrim($str, $chars), $chars);
6521 1
  }
6522 1
6523
  /**
6524 1
   * Makes string's first char uppercase.
6525
   *
6526
   * @param string $str       <p>The input string.</p>
6527
   * @param string $encoding  [optional] <p>Set the charset.</p>
6528
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6529
   *
6530
   * @return string <p>The resulting string</p>
6531
   */
6532
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6533
  {
6534
    if ($cleanUtf8 === true) {
6535
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6536
      // if invalid characters are found in $haystack before $needle
6537
      $str = self::clean($str);
6538
    }
6539 1
6540 2
    $strPartTwo = self::substr($str, 1, null, $encoding);
6541
    if ($strPartTwo === false) {
6542 7
      $strPartTwo = '';
6543
    }
6544
6545
    $strPartOne = self::strtoupper(
6546
        (string)self::substr($str, 0, 1, $encoding),
6547 7
        $encoding,
6548
        $cleanUtf8
6549
    );
6550
6551
    return $strPartOne . $strPartTwo;
6552 7
  }
6553 7
6554 3
  /**
6555 3
   * alias for "UTF8::ucfirst()"
6556 1
   *
6557 1
   * @see UTF8::ucfirst()
6558 3
   *
6559
   * @param string $word
6560 7
   * @param string $encoding
6561
   * @param bool   $cleanUtf8
6562 7
   *
6563
   * @return string
6564
   */
6565
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6566
  {
6567
    return self::ucfirst($word, $encoding, $cleanUtf8);
6568
  }
6569
6570
  /**
6571
   * Uppercase for all words in the string.
6572
   *
6573
   * @param string   $str        <p>The input string.</p>
6574 7
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6575 7
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6576
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6577
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6578
   *
6579
   * @return string
6580
   */
6581
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6582
  {
6583
    if (!$str) {
6584
      return '';
6585
    }
6586 1
6587
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6588 7
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6589
6590 7
    if ($cleanUtf8 === true) {
6591
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6592
      // if invalid characters are found in $haystack before $needle
6593
      $str = self::clean($str);
6594
    }
6595
6596
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
6597
6598
    if (
6599
        $usePhpDefaultFunctions === true
6600 3
        &&
6601
        self::is_ascii($str) === true
6602 3
    ) {
6603
      return \ucwords($str);
6604
    }
6605 1
6606
    $words = self::str_to_words($str, $charlist);
6607
    $newWords = [];
6608 1
6609 1
    if (\count($exceptions) > 0) {
6610
      $useExceptions = true;
6611 1
    } else {
6612
      $useExceptions = false;
6613
    }
6614 3
6615 View Code Duplication
    foreach ($words as $word) {
6616 3
6617 1
      if (!$word) {
6618
        continue;
6619
      }
6620 3
6621
      if (
6622
          $useExceptions === false
6623
          ||
6624
          (
6625
              $useExceptions === true
6626
              &&
6627
              !\in_array($word, $exceptions, true)
6628
          )
6629
      ) {
6630
        $word = self::ucfirst($word, $encoding);
6631
      }
6632 1
6633
      $newWords[] = $word;
6634 1
    }
6635
6636
    return implode('', $newWords);
6637
  }
6638
6639
  /**
6640
   * Multi decode html entity & fix urlencoded-win1252-chars.
6641
   *
6642
   * e.g:
6643
   * 'test+test'                     => 'test test'
6644
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6645
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6646
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6647
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6648
   * 'Düsseldorf'                   => 'Düsseldorf'
6649
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6650
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6651
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6652 20
   *
6653
   * @param string $str          <p>The input string.</p>
6654 20
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6655
   *
6656 2
   * @return string
6657
   */
6658 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6659 2
  {
6660 2
    if (!isset($str[0])) {
6661
      return '';
6662 2
    }
6663
6664
    $pattern = '/%u([0-9a-f]{3,4})/i';
6665 20
    if (preg_match($pattern, $str)) {
6666
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6667 20
    }
6668 3
6669
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6670
6671 20
    do {
6672
      $str_compare = $str;
6673
6674
      $str = self::fix_simple_utf8(
6675 20
          urldecode(
6676 20
              self::html_entity_decode(
6677
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6678
                  $flags
6679 20
              )
6680 20
          )
6681
      );
6682 20
6683
    } while ($multi_decode === true && $str_compare !== $str);
6684 20
6685
    return $str;
6686 18
  }
6687
6688 18
  /**
6689 15
   * Return a array with "urlencoded"-win1252 -> UTF-8
6690 15
   *
6691 15
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6692 8
   *
6693
   * @return array
6694
   */
6695 20
  public static function urldecode_fix_win1252_chars(): array
6696
  {
6697 18
    return [
6698 18
        '%20' => ' ',
6699
        '%21' => '!',
6700 18
        '%22' => '"',
6701 12
        '%23' => '#',
6702 12
        '%24' => '$',
6703 12
        '%25' => '%',
6704 10
        '%26' => '&',
6705
        '%27' => "'",
6706
        '%28' => '(',
6707 19
        '%29' => ')',
6708
        '%2A' => '*',
6709 12
        '%2B' => '+',
6710 12
        '%2C' => ',',
6711 12
        '%2D' => '-',
6712
        '%2E' => '.',
6713 12
        '%2F' => '/',
6714 5
        '%30' => '0',
6715 5
        '%31' => '1',
6716 5
        '%32' => '2',
6717 9
        '%33' => '3',
6718
        '%34' => '4',
6719
        '%35' => '5',
6720 12
        '%36' => '6',
6721 9
        '%37' => '7',
6722
        '%38' => '8',
6723
        '%39' => '9',
6724 20
        '%3A' => ':',
6725
        '%3B' => ';',
6726 3
        '%3C' => '<',
6727
        '%3D' => '=',
6728 3
        '%3E' => '>',
6729 18
        '%3F' => '?',
6730
        '%40' => '@',
6731 20
        '%41' => 'A',
6732
        '%42' => 'B',
6733
        '%43' => 'C',
6734 20
        '%44' => 'D',
6735 20
        '%45' => 'E',
6736 20
        '%46' => 'F',
6737 4
        '%47' => 'G',
6738 20
        '%48' => 'H',
6739
        '%49' => 'I',
6740 20
        '%4A' => 'J',
6741
        '%4B' => 'K',
6742
        '%4C' => 'L',
6743 20
        '%4D' => 'M',
6744 1
        '%4E' => 'N',
6745 1
        '%4F' => 'O',
6746
        '%50' => 'P',
6747 20
        '%51' => 'Q',
6748
        '%52' => 'R',
6749
        '%53' => 'S',
6750
        '%54' => 'T',
6751
        '%55' => 'U',
6752
        '%56' => 'V',
6753
        '%57' => 'W',
6754
        '%58' => 'X',
6755 15
        '%59' => 'Y',
6756
        '%5A' => 'Z',
6757
        '%5B' => '[',
6758 15
        '%5C' => '\\',
6759
        '%5D' => ']',
6760 15
        '%5E' => '^',
6761
        '%5F' => '_',
6762
        '%60' => '`',
6763
        '%61' => 'a',
6764 15
        '%62' => 'b',
6765 1
        '%63' => 'c',
6766 1
        '%64' => 'd',
6767
        '%65' => 'e',
6768 15
        '%66' => 'f',
6769 1
        '%67' => 'g',
6770 1
        '%68' => 'h',
6771
        '%69' => 'i',
6772 15
        '%6A' => 'j',
6773 15
        '%6B' => 'k',
6774 15
        '%6C' => 'l',
6775 15
        '%6D' => 'm',
6776 2
        '%6E' => 'n',
6777 2
        '%6F' => 'o',
6778 2
        '%70' => 'p',
6779
        '%71' => 'q',
6780
        '%72' => 'r',
6781 15
        '%73' => 's',
6782
        '%74' => 't',
6783
        '%75' => 'u',
6784
        '%76' => 'v',
6785
        '%77' => 'w',
6786
        '%78' => 'x',
6787
        '%79' => 'y',
6788
        '%7A' => 'z',
6789
        '%7B' => '{',
6790
        '%7C' => '|',
6791
        '%7D' => '}',
6792
        '%7E' => '~',
6793
        '%7F' => '',
6794
        '%80' => '`',
6795
        '%81' => '',
6796
        '%82' => '‚',
6797 26
        '%83' => 'ƒ',
6798
        '%84' => '„',
6799 26
        '%85' => '…',
6800
        '%86' => '†',
6801 26
        '%87' => '‡',
6802 5
        '%88' => 'ˆ',
6803
        '%89' => '‰',
6804
        '%8A' => 'Š',
6805
        '%8B' => '‹',
6806 22
        '%8C' => 'Œ',
6807 6
        '%8D' => '',
6808
        '%8E' => 'Ž',
6809
        '%8F' => '',
6810 16
        '%90' => '',
6811
        '%91' => '‘',
6812
        '%92' => '’',
6813
        '%93' => '“',
6814
        '%94' => '”',
6815
        '%95' => '•',
6816
        '%96' => '–',
6817
        '%97' => '—',
6818
        '%98' => '˜',
6819
        '%99' => '™',
6820
        '%9A' => 'š',
6821
        '%9B' => '›',
6822 14
        '%9C' => 'œ',
6823
        '%9D' => '',
6824 14
        '%9E' => 'ž',
6825
        '%9F' => 'Ÿ',
6826
        '%A0' => '',
6827 1
        '%A1' => '¡',
6828 1
        '%A2' => '¢',
6829
        '%A3' => '£',
6830 14
        '%A4' => '¤',
6831 14
        '%A5' => '¥',
6832
        '%A6' => '¦',
6833
        '%A7' => '§',
6834
        '%A8' => '¨',
6835 14
        '%A9' => '©',
6836 14
        '%AA' => 'ª',
6837 14
        '%AB' => '«',
6838
        '%AC' => '¬',
6839 14
        '%AD' => '',
6840
        '%AE' => '®',
6841 14
        '%AF' => '¯',
6842
        '%B0' => '°',
6843
        '%B1' => '±',
6844
        '%B2' => '²',
6845
        '%B3' => '³',
6846
        '%B4' => '´',
6847
        '%B5' => 'µ',
6848
        '%B6' => '¶',
6849
        '%B7' => '·',
6850
        '%B8' => '¸',
6851
        '%B9' => '¹',
6852
        '%BA' => 'º',
6853
        '%BB' => '»',
6854
        '%BC' => '¼',
6855 1
        '%BD' => '½',
6856
        '%BE' => '¾',
6857 1
        '%BF' => '¿',
6858
        '%C0' => 'À',
6859
        '%C1' => 'Á',
6860
        '%C2' => 'Â',
6861
        '%C3' => 'Ã',
6862
        '%C4' => 'Ä',
6863
        '%C5' => 'Å',
6864
        '%C6' => 'Æ',
6865
        '%C7' => 'Ç',
6866
        '%C8' => 'È',
6867
        '%C9' => 'É',
6868
        '%CA' => 'Ê',
6869
        '%CB' => 'Ë',
6870
        '%CC' => 'Ì',
6871 8
        '%CD' => 'Í',
6872
        '%CE' => 'Î',
6873 8
        '%CF' => 'Ï',
6874 2
        '%D0' => 'Ð',
6875
        '%D1' => 'Ñ',
6876
        '%D2' => 'Ò',
6877
        '%D3' => 'Ó',
6878
        '%D4' => 'Ô',
6879
        '%D5' => 'Õ',
6880 7
        '%D6' => 'Ö',
6881
        '%D7' => '×',
6882
        '%D8' => 'Ø',
6883 1
        '%D9' => 'Ù',
6884 1
        '%DA' => 'Ú',
6885
        '%DB' => 'Û',
6886 7
        '%DC' => 'Ü',
6887
        '%DD' => 'Ý',
6888
        '%DE' => 'Þ',
6889
        '%DF' => 'ß',
6890 7
        '%E0' => 'à',
6891 7
        '%E1' => 'á',
6892 7
        '%E2' => 'â',
6893
        '%E3' => 'ã',
6894
        '%E4' => 'ä',
6895
        '%E5' => 'å',
6896 7
        '%E6' => 'æ',
6897 7
        '%E7' => 'ç',
6898
        '%E8' => 'è',
6899 7
        '%E9' => 'é',
6900 1
        '%EA' => 'ê',
6901 1
        '%EB' => 'ë',
6902 7
        '%EC' => 'ì',
6903
        '%ED' => 'í',
6904
        '%EE' => 'î',
6905 7
        '%EF' => 'ï',
6906
        '%F0' => 'ð',
6907 7
        '%F1' => 'ñ',
6908 7
        '%F2' => 'ò',
6909
        '%F3' => 'ó',
6910
        '%F4' => 'ô',
6911
        '%F5' => 'õ',
6912
        '%F6' => 'ö',
6913 7
        '%F7' => '÷',
6914
        '%F8' => 'ø',
6915
        '%F9' => 'ù',
6916 1
        '%FA' => 'ú',
6917 1
        '%FB' => 'û',
6918 1
        '%FC' => 'ü',
6919 7
        '%FD' => 'ý',
6920 7
        '%FE' => 'þ',
6921 7
        '%FF' => 'ÿ',
6922
    ];
6923 7
  }
6924 7
6925
  /**
6926 7
   * Decodes an UTF-8 string to ISO-8859-1.
6927
   *
6928
   * @param string $str <p>The input string.</p>
6929
   * @param bool   $keepUtf8Chars
6930
   *
6931
   * @return string
6932
   */
6933
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
6934
  {
6935
    if (!isset($str[0])) {
6936
      return '';
6937
    }
6938
6939
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6940
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6941
6942 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6943
6944
      if (self::$WIN1252_TO_UTF8 === null) {
6945
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6946
      }
6947
6948 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6949
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6950 1
    }
6951
6952 1
    /** @noinspection PhpInternalEntityUsedInspection */
6953 1
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
6954
6955
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6956 1
      self::checkForSupport();
6957 1
    }
6958 1
6959 1
    // save for later comparision
6960
    $str_backup = $str;
6961 1
    $len = self::strlen_in_byte($str);
6962
6963
    if (self::$ORD === null) {
6964 1
      self::$ORD = self::getData('ord');
6965
    }
6966 1
6967 1
    if (self::$CHR === null) {
6968 1
      self::$CHR = self::getData('chr');
6969 1
    }
6970
6971 1
    $noCharFound = '?';
6972 1
    /** @noinspection ForeachInvariantsInspection */
6973 1
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
6974
      switch ($str[$i] & "\xF0") {
6975 1
        case "\xC0":
6976
        case "\xD0":
6977 1
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
6978
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
6979
          break;
6980
6981
        /** @noinspection PhpMissingBreakStatementInspection */
6982
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
6983
          ++$i;
6984
        case "\xE0":
6985
          $str[$j] = $noCharFound;
6986
          $i += 2;
6987
          break;
6988
6989
        default:
6990
          $str[$j] = $str[$i];
6991
      }
6992
    }
6993
6994
    $return = (string)self::substr($str, 0, $j, '8BIT');
6995
6996
    if (
6997
        $keepUtf8Chars === true
6998
        &&
6999
        self::strlen($return) >= self::strlen($str_backup)
7000
    ) {
7001
      return $str_backup;
7002
    }
7003
7004
    return $return;
7005
  }
7006
7007
  /**
7008
   * Encodes an ISO-8859-1 string to UTF-8.
7009
   *
7010
   * @param string $str <p>The input string.</p>
7011
   *
7012
   * @return string
7013
   */
7014
  public static function utf8_encode(string $str): string
7015
  {
7016
    if (!isset($str[0])) {
7017
      return '';
7018
    }
7019
7020
    $strTmp = \utf8_encode($str);
7021
7022
    // the polyfill maybe return false
7023
    if ($strTmp === false) {
7024
      return '';
7025
    }
7026
7027
    $str = (string)$strTmp;
7028
    if (false === \strpos($str, "\xC2")) {
7029
      return $str;
7030
    }
7031
7032
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
7033
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
7034
7035 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
7036
7037
      if (self::$WIN1252_TO_UTF8 === null) {
7038
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7039
      }
7040
7041
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7042
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7043
    }
7044
7045
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
7046
  }
7047
7048
  /**
7049
   * fix -> utf8-win1252 chars
7050
   *
7051
   * @param string $str <p>The input string.</p>
7052
   *
7053
   * @return string
7054
   *
7055
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7056
   */
7057
  public static function utf8_fix_win1252_chars(string $str): string
7058
  {
7059
    return self::fix_simple_utf8($str);
7060
  }
7061
7062
  /**
7063
   * Returns an array with all utf8 whitespace characters.
7064
   *
7065
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7066
   *
7067
   * @author: Derek E. [email protected]
7068
   *
7069
   * @return array <p>
7070
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7071
   *               as defined in above URL.
7072
   *               </p>
7073
   */
7074
  public static function whitespace_table(): array
7075
  {
7076
    return self::$WHITESPACE_TABLE;
7077
  }
7078
7079
  /**
7080
   * Limit the number of words in a string.
7081
   *
7082
   * @param string $str      <p>The input string.</p>
7083
   * @param int    $limit    <p>The limit of words as integer.</p>
7084
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7085
   *
7086
   * @return string
7087
   */
7088
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
7089
  {
7090
    if (!isset($str[0])) {
7091
      return '';
7092
    }
7093
7094
    if ($limit < 1) {
7095
      return '';
7096
    }
7097
7098
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7099
7100
    if (
7101
        !isset($matches[0])
7102
        ||
7103
        self::strlen($str) === self::strlen($matches[0])
7104
    ) {
7105
      return $str;
7106
    }
7107
7108
    return self::rtrim($matches[0]) . $strAddOn;
7109
  }
7110
7111
  /**
7112
   * Wraps a string to a given number of characters
7113
   *
7114
   * @link  http://php.net/manual/en/function.wordwrap.php
7115
   *
7116
   * @param string $str   <p>The input string.</p>
7117
   * @param int    $width [optional] <p>The column width.</p>
7118
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7119
   * @param bool   $cut   [optional] <p>
7120
   *                      If the cut is set to true, the string is
7121
   *                      always wrapped at or before the specified width. So if you have
7122
   *                      a word that is larger than the given width, it is broken apart.
7123
   *                      </p>
7124
   *
7125
   * @return string <p>The given string wrapped at the specified column.</p>
7126
   */
7127
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
7128
  {
7129
    if (!isset($str[0], $break[0])) {
7130
      return '';
7131
    }
7132
7133
    $w = '';
7134
    $strSplit = \explode($break, $str);
7135
    $count = \count($strSplit);
7136
7137
    $chars = [];
7138
    /** @noinspection ForeachInvariantsInspection */
7139
    for ($i = 0; $i < $count; ++$i) {
7140
7141
      if ($i) {
7142
        $chars[] = $break;
7143
        $w .= '#';
7144
      }
7145
7146
      $c = $strSplit[$i];
7147
      unset($strSplit[$i]);
7148
7149
      foreach (self::split($c) as $c) {
7150
        $chars[] = $c;
7151
        $w .= ' ' === $c ? ' ' : '?';
7152
      }
7153
    }
7154
7155
    $strReturn = '';
7156
    $j = 0;
7157
    $b = $i = -1;
7158
    $w = wordwrap($w, $width, '#', $cut);
7159
7160
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7161
      for (++$i; $i < $b; ++$i) {
7162
        $strReturn .= $chars[$j];
7163
        unset($chars[$j++]);
7164
      }
7165
7166
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7167
        unset($chars[$j++]);
7168
      }
7169
7170
      $strReturn .= $break;
7171
    }
7172
7173
    return $strReturn . implode('', $chars);
7174
  }
7175
7176
  /**
7177
   * Returns an array of Unicode White Space characters.
7178
   *
7179
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7180
   */
7181
  public static function ws(): array
7182
  {
7183
    return self::$WHITESPACE;
7184
  }
7185
7186
}
7187